1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2019-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 //
10 /// GenXCisaBuilder
11 /// ---------------
12 ///
13 /// This file contains to passes: GenXCisaBuilder and GenXFinalizer.
14 ///
15 /// 1. GenXCisaBuilder transforms LLVM IR to CISA IR via Finalizer' public API.
16 /// It is a FunctionGroupWrapperPass, thus it runs once for each kernel and
17 /// creates CISA IR for it and all its subroutines. Real building of kernels
18 /// is performed by the GenXKernelBuilder class. This splitting is necessary
19 /// because GenXCisaBuilder object lives through all Function Groups, but we
20 /// don't need to keep all Kernel building specific data in such lifetime.
21 ///
22 /// 2. GenXFinalizer is a module pass, thus it runs once and all that it does
23 /// is a running of Finalizer for kernels created in GenXCisaBuilder pass.
24 ///
25 //===----------------------------------------------------------------------===//
26
27 #include "FunctionGroup.h"
28 #include "GenX.h"
29 #include "GenXDebugInfo.h"
30 #include "GenXGotoJoin.h"
31 #include "GenXIntrinsics.h"
32 #include "GenXPressureTracker.h"
33 #include "GenXSubtarget.h"
34 #include "GenXTargetMachine.h"
35 #include "GenXUtil.h"
36 #include "GenXVisaRegAlloc.h"
37
38 #include "vc/GenXOpts/Utils/KernelInfo.h"
39 #include "vc/Support/BackendConfig.h"
40 #include "vc/Support/ShaderDump.h"
41 #include "vc/Utils/GenX/Printf.h"
42
43 #include "llvm/GenXIntrinsics/GenXIntrinsicInst.h"
44
45 #include "visaBuilder_interface.h"
46
47 #include "llvm/ADT/IndexedMap.h"
48 #include "llvm/ADT/Statistic.h"
49 #include "llvm/ADT/StringExtras.h"
50 #include "llvm/Analysis/LoopInfo.h"
51 #include "llvm/CodeGen/TargetPassConfig.h"
52 #include "llvm/IR/DebugInfo.h"
53 #include "llvm/IR/DiagnosticInfo.h"
54 #include "llvm/IR/DiagnosticPrinter.h"
55 #include "llvm/InitializePasses.h"
56 #include "llvm/Support/Error.h"
57 #include "llvm/Support/Path.h"
58 #include "llvm/Support/Regex.h"
59 #include "llvm/Support/ScopedPrinter.h"
60 #include "llvm/Support/StringSaver.h"
61
62 #include "Probe/Assertion.h"
63 #include "llvmWrapper/IR/CallSite.h"
64 #include "llvmWrapper/IR/InstrTypes.h"
65 #include "llvmWrapper/IR/Instructions.h"
66 #include "llvmWrapper/IR/DerivedTypes.h"
67
68 #include <algorithm>
69 #include <map>
70 #include <string>
71 #include <vector>
72
73 using namespace llvm;
74 using namespace genx;
75
76 #define DEBUG_TYPE "GENX_CISA_BUILDER"
77
78 static cl::list<std::string>
79 FinalizerOpts("finalizer-opts", cl::Hidden, cl::ZeroOrMore,
80 cl::desc("Additional options for finalizer."));
81
82 static cl::opt<bool> EmitVisa("emit-visa", cl::init(false), cl::Hidden,
83 cl::desc("Generate Visa instead of fat binary."));
84
85 static cl::opt<std::string> AsmNameOpt("asm-name", cl::init(""), cl::Hidden,
86 cl::desc("Output assembly code to this file during compilation."));
87
88 static cl::opt<bool> ReverseKernels("reverse-kernels", cl::init(false), cl::Hidden,
89 cl::desc("Emit the kernel asm name in reversed order (if user asm name presented)."));
90
91 static cl::opt<bool>
92 PrintFinalizerOptions("cg-print-finalizer-args", cl::init(false), cl::Hidden,
93 cl::desc("Prints options used to invoke finalizer"));
94
95 static cl::opt<bool> SkipNoWiden("skip-widen", cl::init(false), cl::Hidden,
96 cl::desc("Do new emit NoWiden hint"));
97
98 static cl::opt<bool> DisableNoMaskWA(
99 "vc-cg-disable-no-mask-wa", cl::init(false), cl::Hidden,
100 cl::desc("do not apply noMask WA (fusedEU)"));
101
102 static cl::opt<bool> OptStrictI64Check(
103 "genx-cisa-builder-noi64-check", cl::init(false), cl::Hidden,
104 cl::desc("strict check to ensure we produce no 64-bit operations"));
105
106 STATISTIC(NumVisaInsts, "Number of VISA instructions");
107 STATISTIC(NumAsmInsts, "Number of Gen asm instructions");
108 STATISTIC(SpillMemUsed, "Spill memory size used");
109
110 /// For VISA_PREDICATE_CONTROL & VISA_PREDICATE_STATE
operator ^=(T & a,T b)111 template <class T> T &operator^=(T &a, T b) {
112 using _T = typename std::underlying_type<T>::type;
113 static_assert(std::is_integral<_T>::value,
114 "Wrong operation for non-integral type");
115 a = static_cast<T>(static_cast<_T>(a) ^ static_cast<_T>(b));
116 return a;
117 }
118
operator |=(T & a,T b)119 template <class T> T operator|=(T &a, T b) {
120 using _T = typename std::underlying_type<T>::type;
121 static_assert(std::is_integral<_T>::value,
122 "Wrong operation for non-integral type");
123 a = static_cast<T>(static_cast<_T>(a) | static_cast<_T>(b));
124 return a;
125 }
126
127 struct DstOpndDesc {
128 Instruction *WrRegion = nullptr;
129 Instruction *GStore = nullptr;
130 Instruction *WrPredefReg = nullptr;
131 genx::BaleInfo WrRegionBI;
132 };
133
134 namespace {
135
136 // Diagnostic information for errors/warnings in the GEN IR building passes.
137 class DiagnosticInfoCisaBuild : public DiagnosticInfo {
138 private:
139 std::string Description;
140 static int KindID;
141
getKindID()142 static int getKindID() {
143 if (KindID == 0)
144 KindID = llvm::getNextAvailablePluginDiagnosticKind();
145 return KindID;
146 }
147
148 public:
DiagnosticInfoCisaBuild(const Twine & Desc,DiagnosticSeverity Severity)149 DiagnosticInfoCisaBuild(const Twine &Desc, DiagnosticSeverity Severity)
150 : DiagnosticInfo(getKindID(), Severity) {
151 Description = (Twine("GENX IR generation error: ") + Desc).str();
152 }
153
DiagnosticInfoCisaBuild(Instruction * Inst,const Twine & Desc,DiagnosticSeverity Severity)154 DiagnosticInfoCisaBuild(Instruction *Inst, const Twine &Desc,
155 DiagnosticSeverity Severity)
156 : DiagnosticInfo(getKindID(), Severity) {
157 std::string Str;
158 llvm::raw_string_ostream(Str) << *Inst;
159 Description =
160 (Twine("CISA builder failed for intruction <") + Str + ">: " + Desc)
161 .str();
162 }
163
print(DiagnosticPrinter & DP) const164 void print(DiagnosticPrinter &DP) const override { DP << Description; }
165
classof(const DiagnosticInfo * DI)166 static bool classof(const DiagnosticInfo *DI) {
167 return DI->getKind() == getKindID();
168 }
169 };
170 int DiagnosticInfoCisaBuild::KindID = 0;
171
172
getExecSizeFromValue(unsigned int Size)173 static VISA_Exec_Size getExecSizeFromValue(unsigned int Size) {
174 int Res = genx::log2(Size);
175 IGC_ASSERT(std::bitset<sizeof(unsigned int) * 8>(Size).count() <= 1);
176 IGC_ASSERT_MESSAGE(Res <= 5,
177 "illegal common ISA execsize (should be 1, 2, 4, 8, 16, 32).");
178 return Res == -1 ? EXEC_SIZE_ILLEGAL : (VISA_Exec_Size)Res;
179 }
180
getCisaOwordNumFromNumber(unsigned num)181 static VISA_Oword_Num getCisaOwordNumFromNumber(unsigned num) {
182 switch (num) {
183 case 1:
184 return OWORD_NUM_1;
185 case 2:
186 return OWORD_NUM_2;
187 case 4:
188 return OWORD_NUM_4;
189 case 8:
190 return OWORD_NUM_8;
191 case 16:
192 return OWORD_NUM_16;
193 default:
194 IGC_ASSERT_MESSAGE(0, "illegal Oword number.");
195 return OWORD_NUM_ILLEGAL;
196 }
197 }
198
convertChannelMaskToVisaType(unsigned Mask)199 VISAChannelMask convertChannelMaskToVisaType(unsigned Mask) {
200 switch (Mask & 0xf) {
201 case 1:
202 return CHANNEL_MASK_R;
203 case 2:
204 return CHANNEL_MASK_G;
205 case 3:
206 return CHANNEL_MASK_RG;
207 case 4:
208 return CHANNEL_MASK_B;
209 case 5:
210 return CHANNEL_MASK_RB;
211 case 6:
212 return CHANNEL_MASK_GB;
213 case 7:
214 return CHANNEL_MASK_RGB;
215 case 8:
216 return CHANNEL_MASK_A;
217 case 9:
218 return CHANNEL_MASK_RA;
219 case 10:
220 return CHANNEL_MASK_GA;
221 case 11:
222 return CHANNEL_MASK_RGA;
223 case 12:
224 return CHANNEL_MASK_BA;
225 case 13:
226 return CHANNEL_MASK_RBA;
227 case 14:
228 return CHANNEL_MASK_GBA;
229 case 15:
230 return CHANNEL_MASK_RGBA;
231 default:
232 IGC_ASSERT_EXIT_MESSAGE(0, "Wrong mask");
233 }
234 }
235
getChannelOutputFormat(uint8_t ChannelOutput)236 CHANNEL_OUTPUT_FORMAT getChannelOutputFormat(uint8_t ChannelOutput) {
237 return (CHANNEL_OUTPUT_FORMAT)((ChannelOutput >> 4) & 0x3);
238 }
239
cutString(const Twine & Str)240 static std::string cutString(const Twine &Str) {
241 // vISA is limited to 64 byte strings. But old fe-compiler seems to ignore
242 // that for source filenames.
243 constexpr size_t MaxVisaLabelLength = 64;
244 auto Result = Str.str();
245 if (Result.size() > MaxVisaLabelLength)
246 Result.erase(MaxVisaLabelLength);
247 return Result;
248 }
249
handleCisaCallError(const Twine & Call,LLVMContext & Ctx)250 void handleCisaCallError(const Twine &Call, LLVMContext &Ctx) {
251 DiagnosticInfoCisaBuild Err(
252 "VISA builder API call failed: " + Call, DS_Error);
253 Ctx.diagnose(Err);
254 }
255
256 /***********************************************************************
257 * Local function for testing one assertion statement.
258 * It returns true if all is ok.
259 * A phi node not generates any code.
260 * The phi node should has no live range because it is part of an indirected
261 * arg/retval in GenXArgIndirection or it is an EM/RM category.
262 */
testPhiNodeHasNoMismatchedRegs(const llvm::PHINode * const Phi,const llvm::GenXLiveness * const Liveness)263 bool testPhiNodeHasNoMismatchedRegs(const llvm::PHINode *const Phi,
264 const llvm::GenXLiveness *const Liveness) {
265 IGC_ASSERT(Phi);
266 IGC_ASSERT(Liveness);
267 bool Result = true;
268 const size_t Count = Phi->getNumIncomingValues();
269 for (size_t i = 0; (i < Count) && Result; ++i) {
270 const llvm::Value *const Incoming = Phi->getIncomingValue(i);
271 if (!isa<UndefValue>(Incoming)) {
272 const genx::SimpleValue SVI(const_cast<llvm::Value *> (Incoming));
273 const genx::LiveRange *const LRI = Liveness->getLiveRangeOrNull(SVI);
274 if (LRI) {
275 if (LRI->getCategory() < RegCategory::NUMREALCATEGORIES) {
276 const genx::SimpleValue SVP(const_cast<llvm::PHINode *> (Phi));
277 const genx::LiveRange *const LRP = Liveness->getLiveRangeOrNull(SVP);
278 Result = (LRI == LRP);
279 IGC_ASSERT_MESSAGE(Result, "mismatched registers in phi node");
280 }
281 }
282 }
283 }
284 return Result;
285 }
286
287 /***********************************************************************
288 * Local function for testing one assertion statement.
289 */
testPredicate(const CmpInst * const Cmp,const DstOpndDesc & DstDesc)290 bool testPredicate(const CmpInst *const Cmp, const DstOpndDesc &DstDesc) {
291 bool Result = (!DstDesc.WrRegion);
292 Result = (Result || (Cmp->getType()->getPrimitiveSizeInBits() != 4));
293 Result = (Result || (Cmp->getOperand(0)->getType()->getScalarType()
294 ->getPrimitiveSizeInBits() == 64));
295 IGC_ASSERT(Result);
296 return Result;
297 }
298
299 } // namespace
300
301 #define CISA_CALL_CTX(c, ctx) \
302 do { \
303 auto result = c; \
304 if (result != 0) { \
305 handleCisaCallError(#c, (ctx)); \
306 } \
307 } while (0);
308
309 #define CISA_CALL(c) CISA_CALL_CTX(c, getContext())
310
311 namespace llvm {
312
getVisaTypeFromBytesNumber(unsigned BytesNum,bool IsFloat,genx::Signedness Sign)313 static VISA_Type getVisaTypeFromBytesNumber(unsigned BytesNum, bool IsFloat,
314 genx::Signedness Sign) {
315 VISA_Type aliasType;
316 if (IsFloat) {
317 switch (BytesNum) {
318 case 2:
319 aliasType = ISA_TYPE_HF;
320 break;
321 case 4:
322 aliasType = ISA_TYPE_F;
323 break;
324 case 8:
325 aliasType = ISA_TYPE_DF;
326 break;
327 default:
328 report_fatal_error("unknown float type");
329 break;
330 }
331 } else {
332 switch (BytesNum) {
333 case 1:
334 aliasType = (Sign == SIGNED) ? ISA_TYPE_B : ISA_TYPE_UB;
335 break;
336 case 2:
337 aliasType = (Sign == SIGNED) ? ISA_TYPE_W : ISA_TYPE_UW;
338 break;
339 case 4:
340 aliasType = (Sign == SIGNED) ? ISA_TYPE_D : ISA_TYPE_UD;
341 break;
342 case 8:
343 aliasType = (Sign == SIGNED) ? ISA_TYPE_Q : ISA_TYPE_UQ;
344 break;
345 default:
346 report_fatal_error("unknown integer type");
347 break;
348 }
349 }
350 return aliasType;
351 }
352
llvmToVisaType(Type * Type,genx::Signedness Sign=DONTCARESIGNED)353 static VISA_Type llvmToVisaType(Type *Type,
354 genx::Signedness Sign = DONTCARESIGNED) {
355 auto T = Type;
356 IGC_ASSERT(!T->isAggregateType());
357 VISA_Type Result = ISA_TYPE_NUM;
358 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(T);
359 VT && VT->getElementType()->isIntegerTy(1)) {
360 IGC_ASSERT(VT->getNumElements() == 8 || VT->getNumElements() == 16 ||
361 VT->getNumElements() == 32);
362 Result = getVisaTypeFromBytesNumber(VT->getNumElements() / genx::ByteBits,
363 false, Sign);
364 } else {
365 if (T->isVectorTy())
366 T = cast<VectorType>(T)->getElementType();
367 if (T->isPointerTy()) {
368 // we might have used DL to get the type size but that'd
369 // overcomplicate this function's type unnecessarily
370 Result = getVisaTypeFromBytesNumber(visa::BytesPerSVMPtr, false,
371 DONTCARESIGNED);
372 } else {
373 IGC_ASSERT(T->isFloatingPointTy() || T->isIntegerTy());
374 Result = getVisaTypeFromBytesNumber(T->getScalarSizeInBits() / CHAR_BIT,
375 T->isFloatingPointTy(), Sign);
376 }
377 }
378 IGC_ASSERT(Result != ISA_TYPE_NUM);
379 return Result;
380 }
381
llvmToVisaType(Value * V,genx::Signedness Sign=DONTCARESIGNED)382 static VISA_Type llvmToVisaType(Value *V,
383 genx::Signedness Sign = DONTCARESIGNED) {
384 return llvmToVisaType(V->getType(), Sign);
385 }
386
387 // Due to the lack of access to VISA_GenVar internal interfaces (concerning type, size, etc)
388 // some local DS are required to store such info: CisaVariable and GenericCisaVariable.
389
390 //===----------------------------------------------------------------------===//
391 // CisaVariable
392 // ------------------
393 //
394 // CisaVariable keeps VISA_GenVar of a specific VISA_Type and provides accessors
395 // to its byte size and number of elements thus emulating some internal vISA machinery.
396 //
397 //===----------------------------------------------------------------------===//
398 class CisaVariable {
399 VISA_Type Type;
400 unsigned ByteSize = 0;
401 VISA_GenVar *VisaVar = nullptr;
402
403 public:
CisaVariable(VISA_Type T,unsigned BS,VISA_GenVar * V)404 CisaVariable(VISA_Type T, unsigned BS, VISA_GenVar *V)
405 : Type(T), ByteSize(BS), VisaVar(V) {}
406
getType() const407 VISA_Type getType() const { return Type; }
408
getGenVar()409 VISA_GenVar *getGenVar() { return VisaVar; }
410
getByteSize() const411 unsigned getByteSize() const { return ByteSize; }
412
getNumElements() const413 unsigned getNumElements() const {
414 const int size = CISATypeTable[Type].typeSize;
415 IGC_ASSERT(size);
416 IGC_ASSERT(!(ByteSize % size));
417 return ByteSize / size;
418 }
419 };
420
421 //===----------------------------------------------------------------------===//
422 // GenericCisaVariable
423 // ------------------
424 //
425 // GenericCisaVariable describes vISA value that isn't intended to have matching llvm::Value
426 // (e.g. stack regs %arg and %retv). It provides interface to get a VisaVar alias with a specific
427 // vISA type.
428 //
429 //===----------------------------------------------------------------------===//
430 class GenericCisaVariable {
431 const char *Name = "";
432 VISA_GenVar *VisaVar = nullptr;
433 unsigned ByteSize = 0;
434
435 IndexedMap<CisaVariable *> AliasDecls;
436 std::list<CisaVariable> Storage;
437
getNumElements(VISA_Type T) const438 unsigned getNumElements(VISA_Type T) const {
439 const int size = CISATypeTable[T].typeSize;
440 IGC_ASSERT(size);
441 IGC_ASSERT(!(ByteSize % size));
442 return ByteSize / size;
443 }
444
445 public:
GenericCisaVariable(const char * Nm,VISA_GenVar * V,unsigned BS)446 GenericCisaVariable(const char *Nm, VISA_GenVar *V, unsigned BS)
447 : Name(Nm), VisaVar(V), ByteSize(BS) {
448 AliasDecls.grow(ISA_TYPE_NUM);
449 }
450
getAlias(Value * V,VISAKernel * K)451 CisaVariable *getAlias(Value *V, VISAKernel *K) {
452 return getAlias(llvmToVisaType(V), K);
453 }
454
getAlias(VISA_Type T,VISAKernel * K)455 CisaVariable *getAlias(VISA_Type T, VISAKernel *K) {
456 if (!AliasDecls[T]) {
457 VISA_GenVar *VV = nullptr;
458 K->CreateVISAGenVar(VV, Name, getNumElements(T), T, ALIGN_GRF, VisaVar);
459 Storage.push_back(CisaVariable(T, ByteSize, VV));
460 AliasDecls[T] = &Storage.back();
461 }
462 return AliasDecls[T];
463 }
464
getByteSize() const465 unsigned getByteSize() const { return ByteSize; }
466 };
467
468 //===----------------------------------------------------------------------===//
469 /// GenXCisaBuilder
470 /// ------------------
471 ///
472 /// This class encapsulates a creation of vISA kernels.
473 /// It is a FunctionGroupWrapperPass, thus it runs once for each kernel and
474 /// builds vISA kernel via class GenXKernelBuilder.
475 /// All created kernels are stored in CISA Builder object which is provided
476 /// by finalizer.
477 ///
478 //===----------------------------------------------------------------------===//
479 class GenXCisaBuilder : public FGPassImplInterface,
480 public IDMixin<GenXCisaBuilder> {
481 LLVMContext *Ctx = nullptr;
482
483 public:
GenXCisaBuilder()484 explicit GenXCisaBuilder() {}
485
getPassName()486 static StringRef getPassName() { return "GenX CISA construction pass"; }
487 static void getAnalysisUsage(AnalysisUsage &AU);
488 bool runOnFunctionGroup(FunctionGroup &FG) override;
489
getContext()490 LLVMContext &getContext() {
491 IGC_ASSERT(Ctx);
492 return *Ctx;
493 }
494 };
495
496 void initializeGenXCisaBuilderWrapperPass(PassRegistry &);
497 using GenXCisaBuilderWrapper = FunctionGroupWrapperPass<GenXCisaBuilder>;
498
499 //===----------------------------------------------------------------------===//
500 /// GenXKernelBuilder
501 /// ------------------
502 ///
503 /// This class does all the work for creation of vISA kernels.
504 ///
505 //===----------------------------------------------------------------------===//
506 class GenXKernelBuilder {
507 using Register = GenXVisaRegAlloc::Reg;
508
509 VISAKernel *MainKernel = nullptr;
510 VISAFunction *Kernel = nullptr;
511 genx::KernelMetadata TheKernelMetadata;
512 LLVMContext &Ctx;
513 const DataLayout &DL;
514
515 std::map<Function *, VISAFunction *> Func2Kern;
516
517 std::map<std::string, unsigned> StringPool;
518 std::vector<VISA_LabelOpnd *> Labels;
519 std::map<const Value *, unsigned> LabelMap;
520
521 // loop info for each function
522 std::map<Function *, LoopInfoBase<BasicBlock, Loop> *> Loops;
523 ValueMap<Function *, bool> IsInLoopCache;
524
525 // whether kernel has barrier or sbarrier instruction
526 bool HasBarrier = false;
527 bool HasCallable = false;
528 bool HasStackcalls = false;
529 bool HasAlloca = false;
530 bool UseNewStackBuilder = false;
531 // GRF width in unit of byte
532 unsigned GrfByteSize = defaultGRFByteSize;
533
534 unsigned LastLine = 0;
535 unsigned PendingLine = 0;
536 StringRef LastFilename;
537 StringRef PendingFilename;
538 StringRef LastDirectory;
539 StringRef PendingDirectory;
540
541 // function currently being written during constructor
542 Function *Func = nullptr;
543 // function corresponding to VISAKernel currently being written
544 Function *KernFunc = nullptr;
545 PreDefined_Surface StackSurf;
546
547 std::map<Function *, VISA_GenVar *> FPMap;
548 SmallVector<InsertValueInst *, 10> RetvInserts;
549
550 std::map<VISAKernel *, std::map<StringRef, GenericCisaVariable>> CisaVars;
551
552 // The default float control from kernel attribute. Each subroutine may
553 // overrride this control mask, but it should revert back to the default float
554 // control mask before exiting from the subroutine.
555 uint32_t DefaultFloatControl = 0;
556
557 static const uint32_t CR_Mask = 0x1 << 10 | 0x3 << 6 | 0x3 << 4 | 0x1;
558
559 // normally false, set to true if there is any SIMD CF in the func or this is
560 // (indirectly) called inside any SIMD CF.
561 bool NoMask = false;
562
563 genx::AlignmentInfo AI;
564 const Instruction *CurrentInst = nullptr;
565
566 // Map from LLVM Value to pointer to the last used register alias for this
567 // Value.
568 std::map<Value *, Register *> LastUsedAliasMap;
569 unsigned CurrentPadding = 0;
570
571 public:
572 FunctionGroup *FG = nullptr;
573 GenXLiveness *Liveness = nullptr;
574 GenXNumbering *Numbering = nullptr;
575 GenXVisaRegAlloc *RegAlloc = nullptr;
576 FunctionGroupAnalysis *FGA = nullptr;
577 GenXModule *GM = nullptr;
578 LoopInfoGroupWrapperPass *LIs = nullptr;
579 const GenXSubtarget *Subtarget = nullptr;
580 const GenXBackendConfig *BackendConfig = nullptr;
581 GenXBaling *Baling = nullptr;
582 VISABuilder *CisaBuilder = nullptr;
583
584 private:
585 bool allowI64Ops() const;
586 void collectKernelInfo();
587 void buildVariables();
588 void buildInstructions();
589
590 bool buildInstruction(Instruction *Inst);
591 bool buildMainInst(Instruction *Inst, genx::BaleInfo BI, unsigned Mod,
592 const DstOpndDesc &DstDesc);
593 void buildControlRegUpdate(unsigned Mask, bool Clear);
594 void buildJoin(CallInst *Join, BranchInst *Branch);
595 bool buildBranch(BranchInst *Branch);
596 void buildIndirectBr(IndirectBrInst *Br);
597 void buildIntrinsic(CallInst *CI, unsigned IntrinID, genx::BaleInfo BI,
598 unsigned Mod, const DstOpndDesc &DstDesc);
599 void buildInputs(Function *F, bool NeedRetIP);
600
601 void buildFunctionAddr(Instruction *Inst, const DstOpndDesc &DstDesc);
602 void buildLoneWrRegion(const DstOpndDesc &Desc);
603 void buildLoneWrPredRegion(Instruction *Inst, genx::BaleInfo BI);
604 void buildLoneOperand(Instruction *Inst, genx::BaleInfo BI, unsigned Mod,
605 const DstOpndDesc &DstDesc);
606
607 VISA_PredVar *getPredicateVar(Register *Idx);
608 VISA_PredVar *getPredicateVar(Value *V);
609 VISA_PredVar *getZeroedPredicateVar(Value *V);
610 VISA_SurfaceVar *getPredefinedSurfaceVar(GlobalVariable &GV);
611 VISA_EMask_Ctrl getExecMaskFromWrPredRegion(Instruction *WrPredRegion,
612 bool IsNoMask);
613 VISA_EMask_Ctrl getExecMaskFromWrRegion(const DstOpndDesc &DstDesc,
614 bool IsNoMask = false);
615 unsigned getOrCreateLabel(const Value *V, int Kind);
616 int getLabel(const Value *V) const;
617 void setLabel(const Value *V, unsigned Num);
618
619 void emitOptimizationHints();
620
621 Value *getPredicateOperand(Instruction *Inst, unsigned OperandNum,
622 genx::BaleInfo BI, VISA_PREDICATE_CONTROL &Control,
623 VISA_PREDICATE_STATE &PredField,
624 VISA_EMask_Ctrl *MaskCtrl);
625 bool isInLoop(BasicBlock *BB);
626
627 void addLabelInst(const Value *BB);
628 void buildPhiNode(PHINode *Phi);
629 void buildGoto(CallInst *Goto, BranchInst *Branch);
630 void buildCall(CallInst *CI, const DstOpndDesc &DstDesc);
631 void buildStackCall(CallInst *CI, const DstOpndDesc &DstDesc);
632 void buildStackCallLight(CallInst *CI, const DstOpndDesc &DstDesc);
633 void buildInlineAsm(CallInst *CI);
634 void buildPrintIndex(CallInst *CI, unsigned IntrinID, unsigned Mod,
635 const DstOpndDesc &DstDesc);
636 void buildSelectInst(SelectInst *SI, genx::BaleInfo BI, unsigned Mod,
637 const DstOpndDesc &DstDesc);
638 void buildBinaryOperator(BinaryOperator *BO, genx::BaleInfo BI, unsigned Mod,
639 const DstOpndDesc &DstDesc);
640 #if (LLVM_VERSION_MAJOR > 8)
641 void buildUnaryOperator(UnaryOperator *UO, genx::BaleInfo BI, unsigned Mod,
642 const DstOpndDesc &DstDesc);
643 #endif
644 void buildBoolBinaryOperator(BinaryOperator *BO);
645 void buildSymbolInst(CallInst *GAddrInst, unsigned Mod,
646 const DstOpndDesc &DstDesc);
647 void buildCastInst(CastInst *CI, genx::BaleInfo BI, unsigned Mod,
648 const DstOpndDesc &DstDesc);
649 void buildConvertAddr(CallInst *CI, genx::BaleInfo BI, unsigned Mod,
650 const DstOpndDesc &DstDesc);
651 void buildAlloca(CallInst *CI, unsigned IntrinID, unsigned Mod,
652 const DstOpndDesc &DstDesc);
653 void buildWritePredefSurface(CallInst &CI);
654 void buildGetHWID(CallInst *CI, const DstOpndDesc &DstDesc);
655 void addWriteRegionLifetimeStartInst(Instruction *WrRegion);
656 void addLifetimeStartInst(Instruction *Inst);
657 void AddGenVar(Register &Reg);
658 void buildRet(ReturnInst *RI);
659 void buildNoopCast(CastInst *CI, genx::BaleInfo BI, unsigned Mod,
660 const DstOpndDesc &DstDesc);
661 void buildCmp(CmpInst *Cmp, genx::BaleInfo BI, const DstOpndDesc &DstDesc);
662 void buildExtractRetv(ExtractValueInst *Inst);
663 void buildInsertRetv(InsertValueInst *Inst);
664
665 VISA_VectorOpnd *createState(Register *Reg, unsigned Offset, bool IsDst);
666 VISA_Type getVISAImmTy(uint8_t ImmTy);
667
668 VISA_PredOpnd *createPredOperand(VISA_PredVar *PredVar,
669 VISA_PREDICATE_STATE State,
670 VISA_PREDICATE_CONTROL Control);
671
672 VISA_VectorOpnd *createCisaSrcOperand(VISA_GenVar *Decl, VISA_Modifier Mod,
673 unsigned VStride, unsigned Width,
674 unsigned HStride, unsigned ROffset,
675 unsigned COffset);
676
677 VISA_VectorOpnd *createCisaDstOperand(VISA_GenVar *Decl, unsigned HStride,
678 unsigned ROffset, unsigned COffset);
679
680 VISA_VectorOpnd *createDestination(Value *Dest, genx::Signedness Signed,
681 unsigned Mod, const DstOpndDesc &DstDesc,
682 genx::Signedness *SignedRes = nullptr,
683 unsigned *Offset = nullptr);
684 VISA_VectorOpnd *createDestination(CisaVariable *Dest,
685 genx::Signedness Signed,
686 unsigned *Offset = nullptr);
687 VISA_VectorOpnd *createDestination(Value *Dest,
688 genx::Signedness Signed,
689 unsigned *Offset = nullptr);
690 VISA_VectorOpnd *createSourceOperand(Instruction *Inst,
691 genx::Signedness Signed,
692 unsigned OperandNum, genx::BaleInfo BI,
693 unsigned Mod = 0,
694 genx::Signedness *SignedRes = nullptr,
695 unsigned MaxWidth = 16);
696 VISA_VectorOpnd *createSource(CisaVariable *V, genx::Signedness Signed,
697 unsigned MaxWidth = 16,
698 unsigned *Offset = nullptr);
699 VISA_VectorOpnd *createSource(Value *V, genx::Signedness Signed, bool Baled,
700 unsigned Mod = 0,
701 genx::Signedness *SignedRes = nullptr,
702 unsigned MaxWidth = 16,
703 unsigned *Offset = nullptr);
704 VISA_VectorOpnd *createSource(Value *V, genx::Signedness Signed,
705 unsigned MaxWidth = 16,
706 unsigned *Offset = nullptr);
707
708 std::string createInlineAsmOperand(Register *Reg, genx::Region *R, bool IsDst,
709 genx::Signedness Signed,
710 genx::ConstraintType Ty, unsigned Mod);
711
712 std::string createInlineAsmSourceOperand(Value *V, genx::Signedness Signed,
713 bool Baled, genx::ConstraintType Ty,
714 unsigned Mod = 0,
715 unsigned MaxWidth = 16);
716
717 std::string createInlineAsmDestinationOperand(Value *Dest,
718 genx::Signedness Signed,
719 genx::ConstraintType Ty,
720 unsigned Mod,
721 const DstOpndDesc &DstDesc);
722
723 VISA_VectorOpnd *createImmediateOperand(Constant *V, genx::Signedness Signed);
724
725 VISA_PredVar *createPredicateDeclFromSelect(Instruction *SI,
726 genx::BaleInfo BI,
727 VISA_PREDICATE_CONTROL &Control,
728 VISA_PREDICATE_STATE &PredField,
729 VISA_EMask_Ctrl *MaskCtrl);
730
731 VISA_RawOpnd *createRawSourceOperand(const Instruction *Inst,
732 unsigned OperandNum, genx::BaleInfo BI,
733 genx::Signedness Signed);
734 VISA_RawOpnd *createRawDestination(Value *V, const DstOpndDesc &DstDesc,
735 genx::Signedness Signed);
736
737 VISA_VectorOpnd *createAddressOperand(Value *V, bool IsDst);
738
739 void addDebugInfo();
740
741 void deduceRegion(Region *R, bool IsDest, unsigned MaxWidth = 16);
742
743 VISA_VectorOpnd *createGeneralOperand(genx::Region *R, VISA_GenVar *Decl,
744 genx::Signedness Signed, unsigned Mod,
745 bool IsDest, unsigned MaxWidth = 16);
746 VISA_VectorOpnd *createIndirectOperand(genx::Region *R,
747 genx::Signedness Signed, unsigned Mod,
748 bool IsDest, unsigned MaxWidth = 16);
749 VISA_VectorOpnd *createRegionOperand(genx::Region *R, VISA_GenVar *Decl,
750 genx::Signedness Signed, unsigned Mod,
751 bool IsDest, unsigned MaxWidth = 16);
752 VISA_PredOpnd *createPredFromWrRegion(const DstOpndDesc &DstDesc);
753
754 VISA_PredOpnd *createPred(Instruction *Inst, genx::BaleInfo BI,
755 unsigned OperandNum);
756
757 Instruction *getOriginalInstructionForSource(Instruction *CI,
758 genx::BaleInfo BI);
759 void buildConvert(CallInst *CI, genx::BaleInfo BI, unsigned Mod,
760 const DstOpndDesc &DstDesc);
761 std::string buildAsmName() const;
762 void beginFunction(Function *Func);
763 void beginFunctionLight(Function *Func);
764 void endFunction(Function *Func, ReturnInst *RI);
765
766 unsigned getFuncArgsSize(Function *F);
767 unsigned getValueSize(Type *T, unsigned Mod = 32) const;
getValueSize(CisaVariable * V) const768 unsigned getValueSize(CisaVariable *V) const {
769 return V->getByteSize();
770 }
getValueSize(Value * V,unsigned Mod=32) const771 unsigned getValueSize(Value *V, unsigned Mod = 32) const {
772 return getValueSize(V->getType(), Mod);
773 }
774 GenericCisaVariable *createCisaVariable(VISAKernel *Kernel, const char *Name,
775 VISA_GenVar *AliasVar, unsigned ByteSize);
776
777 template <typename T1, typename T2>
778 void emitVectorCopy(
779 T1 *Dst, T2 *Src, unsigned &RowOff, unsigned &ColOff, unsigned &SrcRowOff,
780 unsigned &SrcColOff, int TotalSize, bool DoCopy = true);
781
782 void pushStackArg(VISA_StateOpndHandle *Dst, Value *Src, int TotalSz,
783 unsigned &RowOff, unsigned &ColOff, unsigned &SrcRowOff,
784 unsigned &SrcColOff, bool DoCopy = true);
785 void popStackArg(Value *Dst, VISA_StateOpndHandle *Src, int TotalSz,
786 unsigned &RowOff, unsigned &ColOff, unsigned &SrcRowOff,
787 unsigned &SrcColOff, int &PrevStackOff);
788 Signedness getCommonSignedness(ArrayRef<Value *> Vs) const;
789
790 Register *getLastUsedAlias(Value *V) const;
791
792 template <typename... Args>
793 Register *getRegForValueUntypedAndSaveAlias(Args &&... args);
794 template <typename... Args>
795 Register *getRegForValueOrNullAndSaveAlias(Args &&... args);
796 template <typename... Args>
797 Register *getRegForValueAndSaveAlias(Args &&... args);
798
799 void runOnKernel();
800 void runOnFunction();
801
802 public:
GenXKernelBuilder(FunctionGroup & FG)803 GenXKernelBuilder(FunctionGroup &FG)
804 : TheKernelMetadata(FG.getHead()), Ctx(FG.getContext()),
805 DL(FG.getModule()->getDataLayout()), FG(&FG) {
806 collectKernelInfo();
807 }
~GenXKernelBuilder()808 ~GenXKernelBuilder() { clearLoops(); }
clearLoops()809 void clearLoops() {
810 for (auto i = Loops.begin(), e = Loops.end(); i != e; ++i) {
811 delete i->second;
812 i->second = nullptr;
813 }
814 Loops.clear();
815 }
816
817 bool run();
818
getContext()819 LLVMContext &getContext() { return Ctx; }
820
821 unsigned addStringToPool(StringRef Str);
822 StringRef getStringByIndex(unsigned Val);
823 };
createGenXCisaBuilderWrapperPass()824 ModulePass *createGenXCisaBuilderWrapperPass() {
825 initializeGenXCisaBuilderWrapperPass(*PassRegistry::getPassRegistry());
826 return new GenXCisaBuilderWrapper();
827 }
828
829 } // end namespace llvm
830
831 INITIALIZE_PASS_BEGIN(GenXCisaBuilderWrapper, "GenXCisaBuilderPassWrapper",
832 "GenXCisaBuilderPassWrapper", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoGroupWrapperPassWrapper)833 INITIALIZE_PASS_DEPENDENCY(LoopInfoGroupWrapperPassWrapper)
834 INITIALIZE_PASS_DEPENDENCY(GenXGroupBalingWrapper)
835 INITIALIZE_PASS_DEPENDENCY(GenXLivenessWrapper)
836 INITIALIZE_PASS_DEPENDENCY(GenXVisaRegAllocWrapper)
837 INITIALIZE_PASS_DEPENDENCY(GenXModule)
838 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
839 INITIALIZE_PASS_DEPENDENCY(GenXBackendConfig)
840 INITIALIZE_PASS_END(GenXCisaBuilderWrapper, "GenXCisaBuilderPassWrapper",
841 "GenXCisaBuilderPassWrapper", false, false)
842
843 void GenXCisaBuilder::getAnalysisUsage(AnalysisUsage &AU) {
844 AU.addRequired<LoopInfoGroupWrapperPass>();
845 AU.addRequired<GenXGroupBaling>();
846 AU.addRequired<GenXLiveness>();
847 AU.addRequired<GenXVisaRegAlloc>();
848 AU.addRequired<GenXModule>();
849 AU.addRequired<FunctionGroupAnalysis>();
850 AU.addRequired<TargetPassConfig>();
851 AU.addRequired<GenXBackendConfig>();
852 AU.setPreservesAll();
853 }
854
runOnFunctionGroup(FunctionGroup & FG)855 bool GenXCisaBuilder::runOnFunctionGroup(FunctionGroup &FG) {
856 Ctx = &FG.getContext();
857 std::unique_ptr<GenXKernelBuilder> KernelBuilder(new GenXKernelBuilder(FG));
858 KernelBuilder->FGA = getAnalysisIfAvailable<FunctionGroupAnalysis>();
859 KernelBuilder->GM = getAnalysisIfAvailable<GenXModule>();
860 KernelBuilder->CisaBuilder = KernelBuilder->GM->GetCisaBuilder();
861 KernelBuilder->RegAlloc = getAnalysisIfAvailable<GenXVisaRegAlloc>();
862 KernelBuilder->Baling = &getAnalysis<GenXGroupBaling>();
863 KernelBuilder->LIs = &getAnalysis<LoopInfoGroupWrapperPass>();
864 KernelBuilder->Liveness = &getAnalysis<GenXLiveness>();
865 KernelBuilder->Subtarget = &getAnalysis<TargetPassConfig>()
866 .getTM<GenXTargetMachine>()
867 .getGenXSubtarget();
868 KernelBuilder->BackendConfig = &getAnalysis<GenXBackendConfig>();
869
870 KernelBuilder->run();
871
872 GenXModule *GM = KernelBuilder->GM;
873 if (GM->HasInlineAsm()) {
874 auto VISAAsmTextReader = GM->GetVISAAsmReader();
875 auto VISAText = KernelBuilder->CisaBuilder->GetAsmTextStream().str();
876 CISA_CALL(VISAAsmTextReader->ParseVISAText(VISAText, ""));
877 }
878
879 return false;
880 }
881
isDerivedFromUndef(Constant * C)882 static bool isDerivedFromUndef(Constant *C) {
883 if (isa<UndefValue>(C))
884 return true;
885 if (!isa<ConstantExpr>(C))
886 return false;
887 ConstantExpr *CE = cast<ConstantExpr>(C);
888 for (auto &Opnd : CE->operands())
889 if (isDerivedFromUndef(cast<Constant>(Opnd)))
890 return true;
891 return false;
892 }
893
get8bitPackedFloat(float f)894 static unsigned get8bitPackedFloat(float f) {
895 union {
896 float f;
897 unsigned u;
898 } u;
899
900 u.f = f;
901 unsigned char Sign = (u.u >> 31) << 7;
902 unsigned Exp = (u.u >> 23) & 0xFF;
903 unsigned Frac = u.u & 0x7FFFFF;
904 if (Exp == 0 && Frac == 0)
905 return Sign;
906
907 IGC_ASSERT(Exp >= 124);
908 IGC_ASSERT(Exp <= 131);
909 Exp -= 124;
910 IGC_ASSERT((Frac & 0x780000) == Frac);
911 Frac >>= 19;
912 IGC_ASSERT(!(Exp == 124 && Frac == 0));
913
914 Sign |= (Exp << 4);
915 Sign |= Frac;
916
917 return Sign;
918 }
919
getISatSrcSign(unsigned IID)920 static Signedness getISatSrcSign(unsigned IID) {
921 switch (IID) {
922 case GenXIntrinsic::genx_sstrunc_sat:
923 case GenXIntrinsic::genx_ustrunc_sat:
924 return SIGNED;
925 case GenXIntrinsic::genx_sutrunc_sat:
926 case GenXIntrinsic::genx_uutrunc_sat:
927 return UNSIGNED;
928 default:
929 return DONTCARESIGNED;
930 }
931 }
932
getISatDstSign(unsigned IID)933 static Signedness getISatDstSign(unsigned IID) {
934 switch (IID) {
935 case GenXIntrinsic::genx_sstrunc_sat:
936 case GenXIntrinsic::genx_sutrunc_sat:
937 return SIGNED;
938 case GenXIntrinsic::genx_ustrunc_sat:
939 case GenXIntrinsic::genx_uutrunc_sat:
940 return UNSIGNED;
941 default:
942 return DONTCARESIGNED;
943 }
944 }
945
getISatSrcSign(Value * V)946 static Signedness getISatSrcSign(Value *V) {
947 return getISatSrcSign(GenXIntrinsic::getGenXIntrinsicID(V));
948 }
949
getISatDstSign(Value * V)950 static Signedness getISatDstSign(Value *V) {
951 return getISatDstSign(GenXIntrinsic::getGenXIntrinsicID(V));
952 }
953
954 // isExtOperandBaled : check whether a sext/zext operand is baled.
isExtOperandBaled(Instruction * Inst,unsigned OpIdx,const GenXBaling * Baling)955 static bool isExtOperandBaled(Instruction *Inst, unsigned OpIdx,
956 const GenXBaling *Baling) {
957 BaleInfo InstBI = Baling->getBaleInfo(Inst);
958 if (!InstBI.isOperandBaled(OpIdx))
959 return false;
960
961 auto OpInst = cast<Instruction>(Inst->getOperand(OpIdx));
962 BaleInfo OpBI = Baling->getBaleInfo(OpInst);
963 return OpBI.Type == BaleInfo::ZEXT || OpBI.Type == BaleInfo::SEXT;
964 }
965
isExtOperandBaled(Use & U,const GenXBaling * Baling)966 static bool isExtOperandBaled(Use &U, const GenXBaling *Baling) {
967 return isExtOperandBaled(cast<Instruction>(U.get()), U.getOperandNo(),
968 Baling);
969 }
970
addKernelAttrsFromMetadata(VISAKernel & Kernel,const KernelMetadata & KM,const GenXSubtarget * Subtarget)971 void addKernelAttrsFromMetadata(VISAKernel &Kernel, const KernelMetadata &KM,
972 const GenXSubtarget* Subtarget) {
973 IGC_ASSERT(Subtarget);
974 unsigned SLMSizeInKb = divideCeil(KM.getSLMSize(), 1024);
975 if (SLMSizeInKb > Subtarget->getMaxSlmSize())
976 report_fatal_error("SLM size exceeds target limits");
977 if (!Subtarget->isOCLRuntime() && SLMSizeInKb > 255)
978 report_fatal_error("SLM size greater than 255KB is not supported by CMRT");
979 Kernel.AddKernelAttribute("SLMSize", sizeof(SLMSizeInKb), &SLMSizeInKb);
980
981 // Load thread payload from memory.
982 if (Subtarget->hasThreadPayloadInMemory()) {
983 // The number of GRFs for per thread inputs (thread local IDs)
984 unsigned NumGRFs = 0;
985 bool HasImplicit = false;
986 for (auto Kind : KM.getArgKinds()) {
987 if (Kind & 0x8)
988 HasImplicit = true;
989 }
990 if (Subtarget->isOCLRuntime()) {
991 // When CM kernel is run with OCL runtime, it is dispatched in a
992 // special "SIMD1" mode (aka "Programmable Media Kernels").
993 // This mode implies that we always have a "full" thread payload,
994 // even when CM kernel does *not* have implicit arguments.
995 // Payload format:
996 // | 0-15 | 16 - 31 | 32 - 47 | 46 - 256 |
997 // | localIDX | localIDY | localIDZ | unused |
998 NumGRFs = 1;
999 } else {
1000 // One GRF for per thread input size for CM
1001 NumGRFs = std::max(HasImplicit ? 1U : 0U, NumGRFs);
1002 }
1003
1004 uint16_t Bytes = NumGRFs * Subtarget->getGRFByteSize();
1005 Kernel.AddKernelAttribute("PerThreadInputSize", sizeof(Bytes), &Bytes);
1006 }
1007
1008 }
1009
1010 // Legalize name for using as filename or in visa asm
legalizeName(std::string Name)1011 static std::string legalizeName(std::string Name) {
1012 std::replace_if(Name.begin(), Name.end(),
1013 [](unsigned char c) { return (!isalnum(c) && c != '_'); },
1014 '_');
1015 return Name;
1016 }
1017
buildAsmName() const1018 std::string GenXKernelBuilder::buildAsmName() const {
1019 std::string AsmName;
1020 auto UserAsmName = AsmNameOpt.getValue();
1021 if (UserAsmName.empty()) {
1022 AsmName = vc::legalizeShaderDumpName(TheKernelMetadata.getName());
1023 } else {
1024 int idx = -1;
1025 auto *KernelMDs =
1026 FG->getModule()->getOrInsertNamedMetadata(genx::FunctionMD::GenXKernels);
1027 unsigned E = KernelMDs->getNumOperands();
1028 for (unsigned I = 0; I < E; ++I) {
1029 MDNode *KernelMD = KernelMDs->getOperand(I);
1030 StringRef KernelName =
1031 cast<MDString>(KernelMD->getOperand(genx::KernelMDOp::Name).get())
1032 ->getString();
1033 if (KernelName == TheKernelMetadata.getName()) {
1034 idx = I;
1035 break;
1036 }
1037 }
1038 IGC_ASSERT(idx >= 0);
1039 // Reverse kernel ASM names during codegen.
1040 // This provides an option to match the old compiler's output.
1041 if (ReverseKernels.getValue())
1042 idx = E - idx - 1;
1043 AsmName = (UserAsmName + llvm::Twine('_') + llvm::Twine(idx)).str();
1044 }
1045
1046 // Currently installed shader dumper can provide its own path for
1047 // dumps. Prepend it to generated asm name.
1048 if (!BackendConfig->hasShaderDumper())
1049 return AsmName;
1050
1051 vc::ShaderDumper &Dumper = BackendConfig->getShaderDumper();
1052 return Dumper.composeDumpPath(AsmName);
1053 }
1054
runOnKernel()1055 void GenXKernelBuilder::runOnKernel() {
1056 IGC_ASSERT(TheKernelMetadata.isKernel());
1057
1058 const std::string KernelName = TheKernelMetadata.getName().str();
1059 CisaBuilder->AddKernel(MainKernel, KernelName.c_str());
1060 Kernel = static_cast<VISAFunction *>(MainKernel);
1061 Func2Kern[Func] = Kernel;
1062
1063 IGC_ASSERT_MESSAGE(Kernel, "Kernel initialization failed!");
1064 LLVM_DEBUG(dbgs() << "=== PROCESS KERNEL(" << KernelName << ") ===\n");
1065
1066 addKernelAttrsFromMetadata(*Kernel, TheKernelMetadata, Subtarget);
1067
1068 // Set CM target for all functions produced by VC.
1069 // See visa spec for CMTarget value (section 4, Kernel).
1070 const uint8_t CMTarget = 0;
1071 CISA_CALL(Kernel->AddKernelAttribute("Target", sizeof(CMTarget), &CMTarget));
1072
1073 bool NeedRetIP = false; // Need special return IP variable for FC.
1074 // For a kernel, add an attribute for asm filename for the jitter.
1075 std::string AsmName = buildAsmName();
1076 StringRef AsmNameRef = AsmName;
1077 CISA_CALL(Kernel->AddKernelAttribute("OutputAsmPath", AsmNameRef.size(),
1078 AsmNameRef.begin()));
1079 // Populate variable attributes if any.
1080 unsigned Idx = 0;
1081 bool IsComposable = false;
1082 for (auto &Arg : Func->args()) {
1083 const char *Kind = nullptr;
1084 switch (TheKernelMetadata.getArgInputOutputKind(Idx++)) {
1085 default:
1086 break;
1087 case KernelMetadata::ArgIOKind::Input:
1088 Kind = "Input";
1089 break;
1090 case KernelMetadata::ArgIOKind::Output:
1091 Kind = "Output";
1092 break;
1093 case KernelMetadata::ArgIOKind::InputOutput:
1094 Kind = "Input_Output";
1095 break;
1096 }
1097 if (Kind != nullptr) {
1098 auto R = getRegForValueUntypedAndSaveAlias(Func, &Arg);
1099 IGC_ASSERT(R);
1100 IGC_ASSERT(R->Category == RegCategory::GENERAL);
1101 R->addAttribute(addStringToPool(Kind), "");
1102 IsComposable = true;
1103 }
1104 }
1105 if (IsComposable)
1106 CISA_CALL(Kernel->AddKernelAttribute("Composable", 0, ""));
1107 if (HasCallable) {
1108 CISA_CALL(Kernel->AddKernelAttribute("Caller", 0, ""));
1109 NeedRetIP = true;
1110 }
1111 if (Func->hasFnAttribute("CMCallable")) {
1112 CISA_CALL(Kernel->AddKernelAttribute("Callable", 0, ""));
1113 NeedRetIP = true;
1114 }
1115 if (Func->hasFnAttribute("CMEntry")) {
1116 CISA_CALL(Kernel->AddKernelAttribute("Entry", 0, ""));
1117 }
1118
1119 if (NeedRetIP) {
1120 // Ask RegAlloc to add a special variable RetIP.
1121 RegAlloc->addRetIPArgument();
1122 auto R = RegAlloc->getRetIPArgument();
1123 R->NameStr = "RetIP";
1124 R->addAttribute(addStringToPool("Input_Output"), "");
1125 }
1126
1127 // Emit optimization hints if any.
1128 emitOptimizationHints();
1129
1130 // Build variables
1131 buildVariables();
1132
1133 // Build input variables
1134 buildInputs(Func, NeedRetIP);
1135 }
1136
runOnFunction()1137 void GenXKernelBuilder::runOnFunction() {
1138 VISAFunction *visaFunc = nullptr;
1139
1140 std::string FuncName = Func->getName().str();
1141 CisaBuilder->AddFunction(visaFunc, FuncName.c_str());
1142 std::string AsmName = buildAsmName().append("_").append(FuncName);
1143 CISA_CALL(visaFunc->AddKernelAttribute("OutputAsmPath", AsmName.size(),
1144 AsmName.c_str()));
1145 IGC_ASSERT(visaFunc);
1146 Func2Kern[Func] = visaFunc;
1147 Kernel = visaFunc;
1148 buildVariables();
1149 }
1150
run()1151 bool GenXKernelBuilder::run() {
1152 GrfByteSize = Subtarget ? Subtarget->getGRFByteSize() : defaultGRFByteSize;
1153 StackSurf = Subtarget ? Subtarget->stackSurface() : PREDEFINED_SURFACE_STACK;
1154
1155 UseNewStackBuilder =
1156 BackendConfig->useNewStackBuilder() && Subtarget->isOCLRuntime();
1157
1158 IGC_ASSERT(Subtarget);
1159
1160 Func = FG->getHead();
1161 if (genx::fg::isGroupHead(*Func))
1162 runOnKernel();
1163 else if (genx::fg::isSubGroupHead(*Func))
1164 runOnFunction();
1165 else
1166 llvm_unreachable("unknown function group type");
1167
1168 // Build instructions
1169 buildInstructions();
1170
1171 // Reset Regalloc hook
1172 RegAlloc->SetRegPushHook(nullptr, nullptr);
1173
1174 if (TheKernelMetadata.isKernel()) {
1175 // For a kernel with no barrier instruction, add a NoBarrier attribute.
1176 if (!HasBarrier)
1177 CISA_CALL(Kernel->AddKernelAttribute("NoBarrier", 0, nullptr));
1178 }
1179
1180 NumVisaInsts += Kernel->getvIsaInstCount();
1181
1182 return false;
1183 }
1184
PatchImpArgOffset(Function * F,const GenXSubtarget * ST,const KernelMetadata & KM)1185 static bool PatchImpArgOffset(Function *F, const GenXSubtarget *ST,
1186 const KernelMetadata &KM) {
1187 IGC_ASSERT(ST);
1188 if (ST->isOCLRuntime())
1189 return false;
1190 if (!ST->hasThreadPayloadInMemory())
1191 return false;
1192
1193 unsigned Idx = 0;
1194 for (auto i = F->arg_begin(), e = F->arg_end(); i != e; ++i, ++Idx) {
1195 uint8_t Kind = (KM.getArgKind(Idx));
1196 if (Kind & 0xf8)
1197 return true;
1198 }
1199
1200 return false;
1201 }
1202
getStateVariableSizeInBytes(const Type * Ty,const unsigned ElemSize)1203 static unsigned getStateVariableSizeInBytes(const Type *Ty,
1204 const unsigned ElemSize) {
1205 auto *VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty);
1206 if (!VTy)
1207 return ElemSize;
1208 return ElemSize * VTy->getNumElements();
1209 }
1210
getInputSizeInBytes(const DataLayout & DL,const unsigned ArgCategory,Type * Ty)1211 static unsigned getInputSizeInBytes(const DataLayout &DL,
1212 const unsigned ArgCategory, Type *Ty) {
1213 switch (ArgCategory) {
1214 case RegCategory::GENERAL:
1215 return DL.getTypeSizeInBits(Ty) / genx::ByteBits;
1216 case RegCategory::SAMPLER:
1217 return getStateVariableSizeInBytes(Ty, genx::SamplerElementBytes);
1218 case RegCategory::SURFACE:
1219 return getStateVariableSizeInBytes(Ty, genx::SurfaceElementBytes);
1220 default:
1221 break;
1222 }
1223 IGC_ASSERT_EXIT_MESSAGE(0, "Unexpected register category for input");
1224 }
1225
buildInputs(Function * F,bool NeedRetIP)1226 void GenXKernelBuilder::buildInputs(Function *F, bool NeedRetIP) {
1227
1228 IGC_ASSERT_MESSAGE(F->arg_size() == TheKernelMetadata.getNumArgs(),
1229 "Mismatch between metadata for kernel and number of args");
1230
1231 // Number of globals to be binded statically.
1232 std::vector<std::pair<GlobalVariable *, int32_t>> Bindings;
1233 Module *M = F->getParent();
1234 for (auto &GV : M->getGlobalList()) {
1235 int32_t Offset = 0;
1236 GV.getAttribute(genx::FunctionMD::GenXByteOffset)
1237 .getValueAsString()
1238 .getAsInteger(0, Offset);
1239 if (Offset > 0)
1240 Bindings.emplace_back(&GV, Offset);
1241 }
1242 // Each argument.
1243 unsigned Idx = 0;
1244 bool PatchImpArgOff = PatchImpArgOffset(F, Subtarget, TheKernelMetadata);
1245 for (auto i = F->arg_begin(), e = F->arg_end(); i != e; ++i, ++Idx) {
1246 if (TheKernelMetadata.shouldSkipArg(Idx))
1247 continue;
1248 Argument *Arg = &*i;
1249 Register *Reg = getRegForValueUntypedAndSaveAlias(F, Arg);
1250 IGC_ASSERT(Reg);
1251 uint8_t Kind = TheKernelMetadata.getArgKind(Idx);
1252 uint16_t Offset = 0;
1253 if (!PatchImpArgOff) {
1254 Offset = TheKernelMetadata.getArgOffset(Idx);
1255 }
1256 else {
1257 if ((Kind >> 3) == 3) {
1258 Offset = GrfByteSize;
1259 } else {
1260 Offset = (TheKernelMetadata.getArgOffset(Idx) + GrfByteSize);
1261 }
1262 }
1263 // Argument size in bytes.
1264 const unsigned NumBytes = getInputSizeInBytes(
1265 DL, TheKernelMetadata.getArgCategory(Idx), Arg->getType());
1266
1267 switch (Kind & 0x7) {
1268 case visa::VISA_INPUT_GENERAL:
1269 case visa::VISA_INPUT_SAMPLER:
1270 case visa::VISA_INPUT_SURFACE:
1271 CISA_CALL(Kernel->CreateVISAImplicitInputVar(
1272 Reg->GetVar<VISA_GenVar>(Kernel), Offset, NumBytes, Kind >> 3));
1273 break;
1274
1275 default:
1276 report_fatal_error("Unknown input category");
1277 break;
1278 }
1279 }
1280 // Add pseudo-input for global variables with offset attribute.
1281 for (auto &Item : Bindings) {
1282 // TODO: sanity check. No overlap with other inputs.
1283 GlobalVariable *GV = Item.first;
1284 uint16_t Offset = Item.second;
1285 IGC_ASSERT(Offset > 0);
1286 uint16_t NumBytes = (GV->getValueType()->getPrimitiveSizeInBits() / 8U);
1287 uint8_t Kind = KernelMetadata::IMP_PSEUDO_INPUT;
1288 Register *Reg = getRegForValueUntypedAndSaveAlias(F, GV);
1289 CISA_CALL(Kernel->CreateVISAImplicitInputVar(Reg->GetVar<VISA_GenVar>(Kernel),
1290 Offset, NumBytes, Kind >> 3));
1291 }
1292 // Add the special RetIP argument.
1293 // Current assumption in Finalizer is that RetIP should be the last argument,
1294 // so we add it after generation of all other arguments.
1295 if (NeedRetIP) {
1296 Register *Reg = RegAlloc->getRetIPArgument();
1297 uint16_t Offset = (127 * GrfByteSize + 6 * 4); // r127.6
1298 uint16_t NumBytes = (64 / 8);
1299 uint8_t Kind = KernelMetadata::IMP_PSEUDO_INPUT;
1300 CISA_CALL(Kernel->CreateVISAImplicitInputVar(Reg->GetVar<VISA_GenVar>(Kernel),
1301 Offset, NumBytes, Kind >> 3));
1302 }
1303 }
1304
1305 // FIXME: We should use NM by default once code quality issues are addressed
1306 // in vISA compiler.
setNoMaskByDefault(Function * F,std::unordered_set<Function * > & Visited)1307 static bool setNoMaskByDefault(Function *F,
1308 std::unordered_set<Function *> &Visited) {
1309 for (auto &BB : F->getBasicBlockList())
1310 if (GotoJoin::isGotoBlock(&BB))
1311 return true;
1312
1313 // Check if this is subroutine call.
1314 for (auto U : F->users()) {
1315 if (auto CI = dyn_cast<CallInst>(U)) {
1316 Function *G = CI->getFunction();
1317 if (Visited.count(G))
1318 continue;
1319 Visited.insert(G);
1320 if (setNoMaskByDefault(G, Visited))
1321 return true;
1322 }
1323 }
1324
1325 return false;
1326 }
1327
buildInstructions()1328 void GenXKernelBuilder::buildInstructions() {
1329 for (auto It = FG->begin(), E = FG->end(); It != E; ++It) {
1330 Func = *It;
1331 LLVM_DEBUG(dbgs() << "Building IR for func " << Func->getName() << "\n");
1332 NoMask = [this]() {
1333 std::unordered_set<Function *> Visited;
1334 return setNoMaskByDefault(Func, Visited);
1335 }();
1336
1337 LastUsedAliasMap.clear();
1338
1339 if (Func->hasFnAttribute(genx::FunctionMD::CMGenXMain) ||
1340 genx::requiresStackCall(Func) || genx::isReferencedIndirectly(Func)) {
1341 KernFunc = Func;
1342 } else {
1343 auto *FuncFG = FGA->getAnyGroup(Func);
1344 IGC_ASSERT_MESSAGE(FuncFG, "Cannot find the function group");
1345 KernFunc = FuncFG->getHead();
1346 }
1347
1348 IGC_ASSERT(KernFunc);
1349 Kernel = Func2Kern.at(KernFunc);
1350
1351 unsigned LabelID = getOrCreateLabel(Func, LABEL_SUBROUTINE);
1352 CISA_CALL(Kernel->AppendVISACFLabelInst(Labels[LabelID]));
1353 GM->updateVisaMapping(KernFunc, nullptr, Kernel->getvIsaInstCount(),
1354 "SubRoutine");
1355
1356 if (UseNewStackBuilder)
1357 beginFunctionLight(Func);
1358 else
1359 beginFunction(Func);
1360 CurrentPadding = 0;
1361
1362 // If a float control is specified, emit code to make that happen.
1363 // Float control contains rounding mode, denorm behaviour and single
1364 // precision float mode (ALT or IEEE) Relevant bits are already set as
1365 // defined for VISA control reg in header definition on enums
1366 if (Func->hasFnAttribute(genx::FunctionMD::CMFloatControl)) {
1367 uint32_t FloatControl = 0;
1368 Func->getFnAttribute(genx::FunctionMD::CMFloatControl)
1369 .getValueAsString()
1370 .getAsInteger(0, FloatControl);
1371
1372 // Clear current float control bits to known zero state
1373 buildControlRegUpdate(CR_Mask, true);
1374
1375 // Set rounding mode to required state if that isn't zero
1376 FloatControl &= CR_Mask;
1377 if (FloatControl) {
1378 if (FG->getHead() == Func)
1379 DefaultFloatControl = FloatControl;
1380 buildControlRegUpdate(FloatControl, false);
1381 }
1382 }
1383
1384 // Only output a label for the initial basic block if it is used from
1385 // somewhere else.
1386 bool NeedsLabel = !Func->front().use_empty();
1387 for (Function::iterator fi = Func->begin(), fe = Func->end(); fi != fe;
1388 ++fi) {
1389 BasicBlock *BB = &*fi;
1390 if (!NeedsLabel && BB != &Func->front()) {
1391 NeedsLabel = !BB->getSinglePredecessor();
1392 if (!NeedsLabel)
1393 NeedsLabel = GotoJoin::isJoinLabel(BB);
1394 }
1395 if (NeedsLabel) {
1396 unsigned LabelID = getOrCreateLabel(BB, LABEL_BLOCK);
1397 CISA_CALL(Kernel->AppendVISACFLabelInst(Labels[LabelID]));
1398 }
1399 NeedsLabel = true;
1400 for (BasicBlock::iterator bi = BB->begin(), be = BB->end(); bi != be;
1401 ++bi) {
1402 Instruction *Inst = &*bi;
1403 if (Inst->isTerminator()) {
1404 // Before the terminator inst of a basic block, if there is a single
1405 // successor and it is the header of a loop, for any vector of at
1406 // least four GRFs with a phi node where our incoming value is
1407 // undef, insert a lifetime.start here.
1408 auto *TI = cast<IGCLLVM::TerminatorInst>(Inst);
1409 if (TI->getNumSuccessors() == 1) {
1410 auto Succ = TI->getSuccessor(0);
1411 if (LIs->getLoopInfo(Succ->getParent())->isLoopHeader(Succ)) {
1412 for (auto si = Succ->begin();; ++si) {
1413 auto Phi = dyn_cast<PHINode>(&*si);
1414 if (!Phi)
1415 break;
1416 if (Phi->getType()->getPrimitiveSizeInBits() >=
1417 (GrfByteSize * 8) * 4 &&
1418 isa<UndefValue>(
1419 Phi->getIncomingValue(Phi->getBasicBlockIndex(BB))))
1420 addLifetimeStartInst(Phi);
1421 }
1422 }
1423 }
1424 }
1425 // Build the instruction.
1426 if (!Baling->isBaled(Inst)) {
1427 if (isa<ReturnInst>(Inst) && !UseNewStackBuilder)
1428 endFunction(Func, cast<ReturnInst>(Inst));
1429 if (buildInstruction(Inst))
1430 NeedsLabel = false;
1431 } else {
1432 LLVM_DEBUG(dbgs() << "Skip baled inst: " << *Inst << "\n");
1433 }
1434 }
1435 }
1436 }
1437 }
1438
buildInstruction(Instruction * Inst)1439 bool GenXKernelBuilder::buildInstruction(Instruction *Inst) {
1440 LLVM_DEBUG(dbgs() << "Build inst: " << *Inst << "\n");
1441 // Make the source location pending, so it is output as vISA FILE and LOC
1442 // instructions next time an opcode is written.
1443 const DebugLoc &DL = Inst->getDebugLoc();
1444 CurrentInst = Inst;
1445 if (DL) {
1446 StringRef Filename = DL->getFilename();
1447 if (Filename != "") {
1448 PendingFilename = Filename;
1449 PendingDirectory = DL->getDirectory();
1450 }
1451 PendingLine = DL.getLine();
1452 }
1453 // Process the bale that this is the head instruction of.
1454 BaleInfo BI = Baling->getBaleInfo(Inst);
1455 LLVM_DEBUG(dbgs() << "Bale type " << BI.Type << "\n");
1456
1457 DstOpndDesc DstDesc;
1458 if (BI.Type == BaleInfo::GSTORE) {
1459 // Inst is a global variable store. It should be baled into a wrr
1460 // instruction.
1461 Bale B;
1462 Baling->buildBale(Inst, &B);
1463 // This is an identity bale; no code will be emitted.
1464 if (isIdentityBale(B))
1465 return false;
1466
1467 IGC_ASSERT(BI.isOperandBaled(0));
1468 DstDesc.GStore = Inst;
1469 Inst = cast<Instruction>(Inst->getOperand(0));
1470 BI = Baling->getBaleInfo(Inst);
1471 }
1472 if (BI.Type == BaleInfo::REGINTR)
1473 return false;
1474 if (BI.Type == BaleInfo::WRREGION || BI.Type == BaleInfo::WRPREDREGION ||
1475 BI.Type == BaleInfo::WRPREDPREDREGION) {
1476 // Inst is a wrregion or wrpredregion or wrpredpredregion.
1477 DstDesc.WrRegion = Inst;
1478 DstDesc.WrRegionBI = BI;
1479 auto *CurInst = Inst;
1480 while (CurInst->hasOneUse() &&
1481 GenXIntrinsic::isWrRegion(CurInst->user_back()) &&
1482 CurInst->use_begin()->getOperandNo() ==
1483 GenXIntrinsic::GenXRegion::OldValueOperandNum)
1484 CurInst = CurInst->user_back();
1485 if (CurInst->hasOneUse() &&
1486 GenXIntrinsic::isWritePredefReg(CurInst->user_back()))
1487 DstDesc.WrPredefReg = CurInst->user_back();
1488 if (isa<UndefValue>(Inst->getOperand(0)) && !DstDesc.GStore) {
1489 // This is a wrregion, probably a partial write, to an undef value.
1490 // Write a lifetime start if appropriate to help the jitter's register
1491 // allocator.
1492 addWriteRegionLifetimeStartInst(DstDesc.WrRegion);
1493 }
1494 // See if it bales in the instruction
1495 // that generates the subregion/element. That is always operand 1.
1496 enum { OperandNum = 1 };
1497 if (!BI.isOperandBaled(OperandNum)) {
1498 if (BI.Type == BaleInfo::WRPREDREGION) {
1499 buildLoneWrPredRegion(DstDesc.WrRegion, DstDesc.WrRegionBI);
1500 } else {
1501 buildLoneWrRegion(DstDesc);
1502 }
1503 return false;
1504 }
1505 // Yes, source of wrregion is baled in.
1506 Inst = cast<Instruction>(DstDesc.WrRegion->getOperand(OperandNum));
1507 BI = Baling->getBaleInfo(Inst);
1508 }
1509 if (BI.Type == BaleInfo::FADDR) {
1510 buildFunctionAddr(Inst, DstDesc);
1511 return false;
1512 }
1513 unsigned Mod = 0;
1514 if (BI.Type == BaleInfo::SATURATE) {
1515 // Inst is a fp saturate. See if it bales in the instruction that
1516 // generates the value to saturate. That is always operand 0. If
1517 // not, just treat the saturate as a normal intrinsic.
1518 if (BI.isOperandBaled(0)) {
1519 Mod = MODIFIER_SAT;
1520 Inst = cast<Instruction>(Inst->getOperand(0));
1521 BI = Baling->getBaleInfo(Inst);
1522 } else
1523 BI.Type = BaleInfo::MAININST;
1524 }
1525 if (BI.Type == BaleInfo::CMPDST) {
1526 // Dst of sel instruction is baled in.
1527 Inst = cast<Instruction>(Inst->getOperand(0));
1528 IGC_ASSERT_MESSAGE(isa<CmpInst>(Inst), "only bale sel into a cmp instr");
1529 BI = Baling->getBaleInfo(Inst);
1530 }
1531 switch (BI.Type) {
1532 case BaleInfo::RDREGION:
1533 case BaleInfo::ABSMOD:
1534 case BaleInfo::NEGMOD:
1535 case BaleInfo::NOTMOD:
1536 // This is a rdregion or modifier not baled in to a main instruction
1537 // (but possibly baled in to a wrregion or sat modifier).
1538 buildLoneOperand(Inst, BI, Mod, DstDesc);
1539 return false;
1540 }
1541 IGC_ASSERT(BI.Type == BaleInfo::MAININST || BI.Type == BaleInfo::NOTP ||
1542 BI.Type == BaleInfo::ZEXT || BI.Type == BaleInfo::SEXT);
1543 return buildMainInst(Inst, BI, Mod, DstDesc);
1544 }
1545
createPredicateDeclFromSelect(Instruction * SI,BaleInfo BI,VISA_PREDICATE_CONTROL & Control,VISA_PREDICATE_STATE & State,VISA_EMask_Ctrl * MaskCtrl)1546 VISA_PredVar *GenXKernelBuilder::createPredicateDeclFromSelect(
1547 Instruction *SI, BaleInfo BI, VISA_PREDICATE_CONTROL &Control,
1548 VISA_PREDICATE_STATE &State, VISA_EMask_Ctrl *MaskCtrl) {
1549 *MaskCtrl = vISA_EMASK_M1_NM;
1550 // Get the predicate (mask) operand, scanning through baled in
1551 // all/any/not/rdpredregion and setting State and MaskCtrl
1552 // appropriately.
1553 Value *Mask = getPredicateOperand(SI, 0 /*selector operand in select*/, BI,
1554 Control, State, MaskCtrl);
1555 IGC_ASSERT(!isa<Constant>(Mask));
1556 // Variable predicate. Derive the predication field from any baled in
1557 // all/any/not and the predicate register number.
1558 Register *Reg = getRegForValueAndSaveAlias(KernFunc, Mask);
1559 IGC_ASSERT(Reg);
1560 IGC_ASSERT(Reg->Category == RegCategory::PREDICATE);
1561 if (NoMask)
1562 *MaskCtrl |= vISA_EMASK_M1_NM;
1563 return getPredicateVar(Reg);
1564 }
1565
1566 VISA_PredOpnd *
createPredFromWrRegion(const DstOpndDesc & DstDesc)1567 GenXKernelBuilder::createPredFromWrRegion(const DstOpndDesc &DstDesc) {
1568 VISA_PredOpnd *result = nullptr;
1569 Instruction *WrRegion = DstDesc.WrRegion;
1570 if (WrRegion) {
1571 // Get the predicate (mask) operand, scanning through baled in
1572 // all/any/not/rdpredregion and setting PredField and MaskCtrl
1573 // appropriately.
1574 VISA_EMask_Ctrl MaskCtrl;
1575 VISA_PREDICATE_CONTROL Control;
1576 VISA_PREDICATE_STATE State;
1577 Value *Mask =
1578 getPredicateOperand(WrRegion, 7 /*mask operand in wrregion*/,
1579 DstDesc.WrRegionBI, Control, State, &MaskCtrl);
1580 if (auto C = dyn_cast<Constant>(Mask)) {
1581 (void)C;
1582 IGC_ASSERT_MESSAGE(C->isAllOnesValue(),
1583 "wrregion mask or predication operand must be const 1 or not constant");
1584 } else {
1585 // Variable predicate. Derive the predication field from any baled in
1586 // all/any/not and the predicate register number. If the predicate has
1587 // not has a register allocated, it must be EM.
1588 Register *Reg = getRegForValueOrNullAndSaveAlias(KernFunc, Mask);
1589 if (Reg) {
1590 IGC_ASSERT(Reg->Category == RegCategory::PREDICATE);
1591 result = createPredOperand(getPredicateVar(Reg), State, Control);
1592 }
1593 }
1594 }
1595 return result;
1596 }
1597
1598 /***********************************************************************
1599 * createPred : create predication field from an instruction operand
1600 *
1601 * Enter: Inst = the instruction (0 to write an "always true" pred field)
1602 * BI = BaleInfo for the instruction, so we can see if there is a
1603 * rdpredregion baled in to the mask
1604 * OperandNum = operand number in the instruction
1605 *
1606 * If the operand is not constant 1, then it must be a predicate register.
1607 */
createPred(Instruction * Inst,BaleInfo BI,unsigned OperandNum)1608 VISA_PredOpnd *GenXKernelBuilder::createPred(Instruction *Inst, BaleInfo BI,
1609 unsigned OperandNum) {
1610 VISA_PredOpnd *ResultOperand = nullptr;
1611 VISA_PREDICATE_CONTROL PredControl;
1612 VISA_PREDICATE_STATE Inverse;
1613 VISA_EMask_Ctrl MaskCtrl;
1614 Value *Mask = getPredicateOperand(Inst, OperandNum, BI, PredControl, Inverse,
1615 &MaskCtrl);
1616 if (auto C = dyn_cast<Constant>(Mask)) {
1617 (void)C;
1618 IGC_ASSERT_MESSAGE(C->isAllOnesValue(),
1619 "wrregion mask or predication operand must be const 1 or not constant");
1620 } else {
1621 // Variable predicate. Derive the predication field from any baled in
1622 // all/any/not and the predicate register number. If the predicate has not
1623 // has a register allocated, it must be EM.
1624 Register *Reg = getRegForValueOrNullAndSaveAlias(KernFunc, Mask);
1625 VISA_PredVar *PredVar = nullptr;
1626 if (Reg) {
1627 IGC_ASSERT(Reg->Category == RegCategory::PREDICATE);
1628 PredVar = getPredicateVar(Reg);
1629 } else
1630 return nullptr;
1631 ResultOperand = createPredOperand(PredVar, Inverse, PredControl);
1632 }
1633 return ResultOperand;
1634 }
1635
createState(Register * Reg,unsigned Offset,bool IsDst)1636 VISA_VectorOpnd *GenXKernelBuilder::createState(Register *Reg, unsigned Offset,
1637 bool IsDst) {
1638 uint8_t Size = 0;
1639 VISA_VectorOpnd *Op = nullptr;
1640
1641 switch (Reg->Category) {
1642 case RegCategory::SURFACE:
1643 CISA_CALL(Kernel->CreateVISAStateOperand(Op, Reg->GetVar<VISA_SurfaceVar>(Kernel),
1644 Size, Offset, IsDst));
1645 break;
1646 case RegCategory::SAMPLER:
1647 CISA_CALL(Kernel->CreateVISAStateOperand(Op, Reg->GetVar<VISA_SamplerVar>(Kernel),
1648 Size, Offset, IsDst));
1649 break;
1650 default:
1651 IGC_ASSERT_EXIT_MESSAGE(0, "unknown state operand");
1652 }
1653
1654 return Op;
1655 }
1656
createDestination(CisaVariable * Dest,genx::Signedness Signed,unsigned * Offset)1657 VISA_VectorOpnd *GenXKernelBuilder::createDestination(CisaVariable *Dest,
1658 genx::Signedness Signed,
1659 unsigned *Offset) {
1660 Region R(IGCLLVM::FixedVectorType::get(
1661 IntegerType::get(Ctx, CISATypeTable[Dest->getType()].typeSize * CHAR_BIT),
1662 Dest->getNumElements()));
1663 if (Offset)
1664 R.Offset = *Offset;
1665 return createRegionOperand(&R, Dest->getGenVar(), Signed, 0, true);
1666 }
1667
createDestination(Value * Dest,genx::Signedness Signed,unsigned * Offset)1668 VISA_VectorOpnd *GenXKernelBuilder::createDestination(Value *Dest,
1669 genx::Signedness Signed,
1670 unsigned *Offset) {
1671 return createDestination(Dest, Signed, 0, DstOpndDesc(), nullptr, Offset);
1672 }
1673
1674 VISA_VectorOpnd *
createDestination(Value * Dest,genx::Signedness Signed,unsigned Mod,const DstOpndDesc & DstDesc,Signedness * SignedRes,unsigned * Offset)1675 GenXKernelBuilder::createDestination(Value *Dest, genx::Signedness Signed,
1676 unsigned Mod, const DstOpndDesc &DstDesc,
1677 Signedness *SignedRes, unsigned *Offset) {
1678 LLVM_DEBUG(dbgs() << "createDest for value: " << *Dest << ", wrr: ");
1679 if (DstDesc.WrRegion)
1680 LLVM_DEBUG(dbgs() << *(DstDesc.WrRegion));
1681 else
1682 LLVM_DEBUG(dbgs() << "null");
1683 LLVM_DEBUG(dbgs() << "\n");
1684 IGC_ASSERT_MESSAGE(!Dest->getType()->isAggregateType(),
1685 "cannot create destination register of an aggregate type");
1686 if (SignedRes)
1687 *SignedRes = Signed;
1688
1689 Type *OverrideType = nullptr;
1690 if (BitCastInst *BCI = dyn_cast<BitCastInst>(Dest)) {
1691 if (!(isa<Constant>(BCI->getOperand(0))) &&
1692 !(BCI->getType()->getScalarType()->isIntegerTy(1)) &&
1693 (BCI->getOperand(0)->getType()->getScalarType()->isIntegerTy(1))) {
1694 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Dest->getType())) {
1695 unsigned int NumBits = VT->getNumElements() *
1696 VT->getElementType()->getPrimitiveSizeInBits();
1697 OverrideType = IntegerType::get(BCI->getContext(), NumBits);
1698 }
1699 }
1700 }
1701
1702 // Saturation can also change signedness.
1703 if (!Dest->user_empty() && GenXIntrinsic::isIntegerSat(Dest->user_back())) {
1704 Signed = getISatDstSign(Dest->user_back());
1705 }
1706
1707 if (!DstDesc.WrRegion) {
1708 if (Mod) {
1709 // There is a sat modifier. Either it is an fp saturate, which is
1710 // represented by its own intrinsic which this instruction is baled
1711 // into, or it is an int saturate which always comes from this
1712 // instruction's semantics. In the former case, use the value
1713 // that is the result of the saturate. But only if this instruction
1714 // itself is not the sat intrinsic.
1715 if (Dest->getType()->getScalarType()->isFloatingPointTy() &&
1716 GenXIntrinsic::getGenXIntrinsicID(Dest) != GenXIntrinsic::genx_sat)
1717 Dest = cast<Instruction>(Dest->use_begin()->getUser());
1718 }
1719 if ((Mod & MODIFIER_SAT) != 0) {
1720 // Similar for integer saturation.
1721 if (Dest->getType()->getScalarType()->isIntegerTy() &&
1722 !GenXIntrinsic::isIntegerSat(Dest) && GenXIntrinsic::isIntegerSat(Dest->user_back()))
1723 Dest = cast<Instruction>(Dest->user_back());
1724 }
1725 Register *Reg =
1726 getRegForValueAndSaveAlias(KernFunc, Dest, Signed, OverrideType);
1727 if (SignedRes)
1728 *SignedRes = RegAlloc->getSigned(Reg);
1729 // Write the vISA general operand:
1730 if (Reg->Category == RegCategory::GENERAL) {
1731 Region DestR(Dest);
1732 if (Offset)
1733 DestR.Offset = *Offset;
1734 return createRegionOperand(&DestR, Reg->GetVar<VISA_GenVar>(Kernel),
1735 DONTCARESIGNED, Mod, true /*isDest*/);
1736 } else {
1737 IGC_ASSERT(Reg->Category == RegCategory::SURFACE ||
1738 Reg->Category == RegCategory::SAMPLER);
1739
1740 return createState(Reg, 0 /*Offset*/, true /*IsDst*/);
1741 }
1742 }
1743 // We need to allow for the case that there is no register allocated if it
1744 // is an indirected arg, and that is OK because the region is indirect so
1745 // the vISA does not contain the base register.
1746 Register *Reg;
1747
1748 Value *V = nullptr;
1749 if (DstDesc.GStore) {
1750 auto GV = getUnderlyingGlobalVariable(DstDesc.GStore->getOperand(1));
1751 IGC_ASSERT_MESSAGE(GV, "out of sync");
1752 if (OverrideType == nullptr)
1753 OverrideType = DstDesc.GStore->getOperand(0)->getType();
1754 Reg = getRegForValueAndSaveAlias(KernFunc, GV, Signed, OverrideType);
1755 V = GV;
1756 } else {
1757 V = DstDesc.WrPredefReg ? DstDesc.WrPredefReg : DstDesc.WrRegion;
1758 // if (!V->user_empty() && GenXIntrinsic::isWritePredefReg(V->user_back()))
1759 // V = V->user_back();
1760 Reg = getRegForValueOrNullAndSaveAlias(KernFunc, V, Signed, OverrideType);
1761 }
1762
1763 // Write the vISA general operand with region:
1764 Region R = makeRegionFromBaleInfo(DstDesc.WrRegion, DstDesc.WrRegionBI);
1765
1766 if (SignedRes)
1767 *SignedRes = RegAlloc->getSigned(Reg);
1768
1769 if (Reg && (Reg->Category == RegCategory::SAMPLER ||
1770 Reg->Category == RegCategory::SURFACE)) {
1771 IGC_ASSERT(R.ElementBytes);
1772 return createState(Reg, R.Offset / R.ElementBytes, true /*IsDest*/);
1773 } else {
1774 IGC_ASSERT(!Reg || Reg->Category == RegCategory::GENERAL);
1775 auto Decl = Reg ? Reg->GetVar<VISA_GenVar>(Kernel) : nullptr;
1776 return createRegionOperand(&R, Decl, Signed, Mod, true /*IsDest*/);
1777 }
1778 }
1779
createSourceOperand(Instruction * Inst,Signedness Signed,unsigned OperandNum,genx::BaleInfo BI,unsigned Mod,Signedness * SignedRes,unsigned MaxWidth)1780 VISA_VectorOpnd *GenXKernelBuilder::createSourceOperand(
1781 Instruction *Inst, Signedness Signed, unsigned OperandNum,
1782 genx::BaleInfo BI, unsigned Mod, Signedness *SignedRes, unsigned MaxWidth) {
1783 Value *V = Inst->getOperand(OperandNum);
1784 return createSource(V, Signed, BI.isOperandBaled(OperandNum), Mod, SignedRes,
1785 MaxWidth);
1786 }
1787
1788 VISA_PredOpnd *
createPredOperand(VISA_PredVar * PredVar,VISA_PREDICATE_STATE State,VISA_PREDICATE_CONTROL Control)1789 GenXKernelBuilder::createPredOperand(VISA_PredVar *PredVar,
1790 VISA_PREDICATE_STATE State,
1791 VISA_PREDICATE_CONTROL Control) {
1792 VISA_PredOpnd *PredOperand = nullptr;
1793 CISA_CALL(
1794 Kernel->CreateVISAPredicateOperand(PredOperand, PredVar, State, Control));
1795
1796 return PredOperand;
1797 }
1798
createCisaSrcOperand(VISA_GenVar * Decl,VISA_Modifier Mod,unsigned VStride,unsigned Width,unsigned HStride,unsigned ROffset,unsigned COffset)1799 VISA_VectorOpnd *GenXKernelBuilder::createCisaSrcOperand(
1800 VISA_GenVar *Decl, VISA_Modifier Mod, unsigned VStride, unsigned Width,
1801 unsigned HStride, unsigned ROffset, unsigned COffset) {
1802 VISA_VectorOpnd *ResultOperand = nullptr;
1803 CISA_CALL(Kernel->CreateVISASrcOperand(ResultOperand, Decl, Mod, VStride,
1804 Width, HStride, ROffset, COffset));
1805 return ResultOperand;
1806 }
1807
createCisaDstOperand(VISA_GenVar * Decl,unsigned HStride,unsigned ROffset,unsigned COffset)1808 VISA_VectorOpnd *GenXKernelBuilder::createCisaDstOperand(VISA_GenVar *Decl,
1809 unsigned HStride,
1810 unsigned ROffset,
1811 unsigned COffset) {
1812 VISA_VectorOpnd *ResultOperand = nullptr;
1813 CISA_CALL(Kernel->CreateVISADstOperand(ResultOperand, Decl, HStride, ROffset,
1814 COffset));
1815 return ResultOperand;
1816 }
1817
1818 /***********************************************************************
1819 * createAddressOperand : create an address register operand
1820 */
createAddressOperand(Value * V,bool IsDst)1821 VISA_VectorOpnd *GenXKernelBuilder::createAddressOperand(Value *V, bool IsDst) {
1822 VISA_VectorOpnd *ResultOperand = nullptr;
1823 Register *Reg = getRegForValueAndSaveAlias(KernFunc, V, DONTCARESIGNED);
1824 IGC_ASSERT(Reg->Category == RegCategory::ADDRESS);
1825 unsigned Width = 1;
1826 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(V->getType()))
1827 Width = VT->getNumElements();
1828 if (IsDst) {
1829 CISA_CALL(Kernel->CreateVISAAddressDstOperand(
1830 ResultOperand, Reg->GetVar<VISA_AddrVar>(Kernel), 0));
1831 } else {
1832 CISA_CALL(Kernel->CreateVISAAddressSrcOperand(
1833 ResultOperand, Reg->GetVar<VISA_AddrVar>(Kernel), 0, Width));
1834 }
1835 return ResultOperand;
1836 }
1837
getVISAImmTy(uint8_t ImmTy)1838 VISA_Type GenXKernelBuilder::getVISAImmTy(uint8_t ImmTy) {
1839 return static_cast<VISA_Type>(ImmTy & 0xf);
1840 }
1841
createImmediateOperand(Constant * V,Signedness Signed)1842 VISA_VectorOpnd *GenXKernelBuilder::createImmediateOperand(Constant *V,
1843 Signedness Signed) {
1844 if (isDerivedFromUndef(V))
1845 V = Constant::getNullValue(V->getType());
1846
1847 Type *T = V->getType();
1848 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(T)) {
1849 // Vector constant.
1850 auto Splat = V->getSplatValue();
1851 if (!Splat) {
1852 // Non-splatted vector constant. Must be a packed vector.
1853 unsigned NumElements = VT->getNumElements();
1854 if (VT->getElementType()->isIntegerTy()) {
1855 // Packed int vector.
1856 IGC_ASSERT(NumElements <= ImmIntVec::Width);
1857 unsigned Packed = 0;
1858 for (unsigned i = 0; i != NumElements; ++i) {
1859 auto El = dyn_cast<ConstantInt>(V->getAggregateElement(i));
1860 if (!El)
1861 continue; // undef element
1862 int This = El->getSExtValue();
1863 if (This < ImmIntVec::MinUInt) {
1864 IGC_ASSERT_MESSAGE(This >= ImmIntVec::MinSInt,
1865 "too big imm, cannot encode as vector imm");
1866 Signed = SIGNED;
1867 } else if (This > ImmIntVec::MaxSInt) {
1868 IGC_ASSERT_MESSAGE(This <= ImmIntVec::MaxUInt,
1869 "too big imm, cannot encode as vector imm");
1870 Signed = UNSIGNED;
1871 }
1872 Packed |= (This & ImmIntVec::MaxUInt) << (ImmIntVec::ElemSize * i);
1873 }
1874 // For a 2- or 4-wide operand, we need to repeat the vector elements
1875 // as which ones are used depends on the position of the other
1876 // operand in its oword.
1877 switch (NumElements) {
1878 case 2:
1879 Packed = Packed * 0x01010101;
1880 break;
1881 case 4:
1882 Packed = Packed * 0x00010001;
1883 break;
1884 }
1885 auto ImmTy =
1886 static_cast<uint8_t>(Signed == UNSIGNED ? ISA_TYPE_UV : ISA_TYPE_V);
1887 auto VISAImmTy = getVISAImmTy(ImmTy);
1888 VISA_VectorOpnd *ImmOp = nullptr;
1889 CISA_CALL(Kernel->CreateVISAImmediate(ImmOp, &Packed, VISAImmTy));
1890 return ImmOp;
1891 }
1892 // Packed float vector.
1893 IGC_ASSERT(VT->getElementType()->isFloatTy());
1894 IGC_ASSERT(NumElements == 1 || NumElements == 2 || NumElements == 4);
1895 unsigned Packed = 0;
1896 for (unsigned i = 0; i != 4; ++i) {
1897 auto CFP =
1898 dyn_cast<ConstantFP>(V->getAggregateElement(i % NumElements));
1899 if (!CFP) // Undef
1900 continue;
1901 const APFloat &FP = CFP->getValueAPF();
1902 Packed |= get8bitPackedFloat(FP.convertToFloat()) << (i * 8);
1903 }
1904 auto VISAImmTy = getVISAImmTy(ISA_TYPE_VF);
1905 VISA_VectorOpnd *ImmOp = nullptr;
1906 CISA_CALL(Kernel->CreateVISAImmediate(ImmOp, &Packed, VISAImmTy));
1907 return ImmOp;
1908 }
1909 // Splatted (or single element) vector. Use the scalar value.
1910 T = VT->getElementType();
1911 V = Splat;
1912 }
1913
1914 if (isDerivedFromUndef(V))
1915 V = Constant::getNullValue(V->getType());
1916 else if (isa<ConstantPointerNull>(V)) {
1917 const DataLayout &DL = Func->getParent()->getDataLayout();
1918 T = DL.getIntPtrType(V->getType());
1919 V = Constant::getNullValue(T);
1920 }
1921
1922 // We have a scalar constant.
1923 if (IntegerType *IT = dyn_cast<IntegerType>(T)) {
1924 ConstantInt *CI = cast<ConstantInt>(V);
1925 // I think we need to use the appropriate one of getZExtValue or
1926 // getSExtValue to avoid an assertion failure on very large 64 bit values...
1927 int64_t Val = Signed == UNSIGNED ? CI->getZExtValue() : CI->getSExtValue();
1928 visa::TypeDetails TD(Func->getParent()->getDataLayout(), IT, Signed);
1929 VISA_VectorOpnd *ImmOp = nullptr;
1930 CISA_CALL(
1931 Kernel->CreateVISAImmediate(ImmOp, &Val, getVISAImmTy(TD.VisaType)));
1932 return ImmOp;
1933 } if (isa<Function>(V)) {
1934 IGC_ASSERT_MESSAGE(0, "Not baled function address");
1935 return nullptr;
1936 } else {
1937 VISA_VectorOpnd *ImmOp = nullptr;
1938 ConstantFP *CF = cast<ConstantFP>(V);
1939 if (T->isFloatTy()) {
1940 union {
1941 float f;
1942 uint32_t i;
1943 } Val;
1944 Val.f = CF->getValueAPF().convertToFloat();
1945 auto VISAImmTy = getVISAImmTy(ISA_TYPE_F);
1946 CISA_CALL(Kernel->CreateVISAImmediate(ImmOp, &Val.i, VISAImmTy));
1947 } else if (T->isHalfTy()) {
1948 uint16_t Val(
1949 (uint16_t)(CF->getValueAPF().bitcastToAPInt().getZExtValue()));
1950 auto VISAImmTy = getVISAImmTy(ISA_TYPE_HF);
1951 auto Val32 = static_cast<uint32_t>(Val);
1952 CISA_CALL(Kernel->CreateVISAImmediate(ImmOp, &Val32, VISAImmTy));
1953 } else {
1954 IGC_ASSERT(T->isDoubleTy());
1955 union {
1956 double f;
1957 uint64_t i;
1958 } Val;
1959 Val.f = CF->getValueAPF().convertToDouble();
1960 auto VISAImmTy = getVISAImmTy(ISA_TYPE_DF);
1961 CISA_CALL(Kernel->CreateVISAImmediate(ImmOp, &Val.i, VISAImmTy));
1962 }
1963 return ImmOp;
1964 }
1965 }
1966
1967 /***********************************************************************
1968 * getOriginalInstructionForSource : trace a source operand back through
1969 * its bale (if any), given a starting instruction.
1970 *
1971 * Enter: Inst = The instruction to start tracing from.
1972 * BI = BaleInfo for Inst
1973 */
1974 Instruction *
getOriginalInstructionForSource(Instruction * Inst,BaleInfo BI)1975 GenXKernelBuilder::getOriginalInstructionForSource(Instruction *Inst,
1976 BaleInfo BI) {
1977 while (!isa<Constant>(Inst->getOperand(0)) && BI.isOperandBaled(0)) {
1978 Inst = cast<Instruction>(Inst->getOperand(0));
1979 BI = Baling->getBaleInfo(Inst);
1980 }
1981
1982 return Inst;
1983 }
1984
buildConvert(CallInst * CI,BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)1985 void GenXKernelBuilder::buildConvert(CallInst *CI, BaleInfo BI, unsigned Mod,
1986 const DstOpndDesc &DstDesc) {
1987 Register *DstReg = getRegForValueAndSaveAlias(KernFunc, CI, UNSIGNED);
1988 if (!isa<Constant>(CI->getOperand(0))) {
1989 Instruction *OrigInst = getOriginalInstructionForSource(CI, BI);
1990 Register *SrcReg =
1991 getRegForValueAndSaveAlias(KernFunc, OrigInst->getOperand(0));
1992 const bool SrcCategory = (SrcReg->Category != RegCategory::GENERAL);
1993 const bool DstCategory = (DstReg->Category != RegCategory::GENERAL);
1994 const bool Categories = (SrcCategory || DstCategory);
1995 IGC_ASSERT_MESSAGE(Categories, "expected a category conversion");
1996 (void)Categories;
1997 }
1998
1999 if (DstReg->Category != RegCategory::ADDRESS) {
2000 // State copy.
2001 int ExecSize = 1;
2002 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(CI->getType())) {
2003 ExecSize = VT->getNumElements();
2004 }
2005
2006 auto ISAExecSize = static_cast<VISA_Exec_Size>(genx::log2(ExecSize));
2007 auto Dst = createDestination(CI, UNSIGNED, 0, DstDesc);
2008 auto Src = createSourceOperand(CI, UNSIGNED, 0, BI);
2009 addDebugInfo();
2010 CISA_CALL(Kernel->AppendVISADataMovementInst(
2011 ISA_MOVS, nullptr /*Pred*/, false /*Mod*/,
2012 NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1, ISAExecSize, Dst, Src));
2013 return;
2014 }
2015
2016 // Destination is address register.
2017 int ExecSize = 1;
2018 if (VectorType *VT = dyn_cast<VectorType>(CI->getType())) {
2019 DiagnosticInfoCisaBuild Err{CI, "vector of addresses not implemented",
2020 DS_Error};
2021 getContext().diagnose(Err);
2022 }
2023
2024 auto ISAExecSize = static_cast<VISA_Exec_Size>(genx::log2(ExecSize));
2025 Register *SrcReg = getRegForValueAndSaveAlias(KernFunc, CI->getOperand(0));
2026 IGC_ASSERT(SrcReg->Category == RegCategory::ADDRESS);
2027
2028 (void)SrcReg;
2029 // This is an address->address copy, inserted due to coalescing failure of
2030 // the address for an indirected arg in GenXArgIndirection.
2031 // (A conversion to address is handled in buildConvertAddr below.)
2032 // Write the addr_add instruction.
2033 Value *SrcOp0 = CI->getOperand(0);
2034 unsigned Src0Width = 1;
2035 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(SrcOp0->getType()))
2036 Src0Width = VT->getNumElements();
2037
2038 Register *RegDst = getRegForValueAndSaveAlias(KernFunc, CI, DONTCARESIGNED);
2039 Register *RegSrc0 =
2040 getRegForValueAndSaveAlias(KernFunc, SrcOp0, DONTCARESIGNED);
2041
2042 VISA_VectorOpnd *Dst = nullptr, *Src0 = nullptr, *Src1 = nullptr;
2043
2044 CISA_CALL(Kernel->CreateVISAAddressDstOperand(
2045 Dst, RegDst->GetVar<VISA_AddrVar>(Kernel), 0));
2046 CISA_CALL(Kernel->CreateVISAAddressSrcOperand(
2047 Src0, RegSrc0->GetVar<VISA_AddrVar>(Kernel), 0, Src0Width));
2048 Src1 =
2049 createImmediateOperand(Constant::getNullValue(CI->getType()), UNSIGNED);
2050
2051 addDebugInfo();
2052 CISA_CALL(Kernel->AppendVISAAddrAddInst(vISA_EMASK_M1_NM, ISAExecSize, Dst,
2053 Src0, Src1));
2054 }
2055
createSource(CisaVariable * V,Signedness Signed,unsigned MaxWidth,unsigned * Offset)2056 VISA_VectorOpnd *GenXKernelBuilder::createSource(CisaVariable *V,
2057 Signedness Signed,
2058 unsigned MaxWidth,
2059 unsigned *Offset) {
2060 Region R(IGCLLVM::FixedVectorType::get(
2061 IntegerType::get(Ctx, CISATypeTable[V->getType()].typeSize * CHAR_BIT),
2062 V->getNumElements()));
2063 if (Offset)
2064 R.Offset = *Offset;
2065 return createRegionOperand(&R, V->getGenVar(), Signed, 0, false, MaxWidth);
2066 }
2067
createSource(Value * V,Signedness Signed,unsigned MaxWidth,unsigned * Offset)2068 VISA_VectorOpnd *GenXKernelBuilder::createSource(Value *V, Signedness Signed,
2069 unsigned MaxWidth,
2070 unsigned *Offset) {
2071 return createSource(V, Signed, false, 0, nullptr, MaxWidth, Offset);
2072 }
2073
createSource(Value * V,Signedness Signed,bool Baled,unsigned Mod,Signedness * SignedRes,unsigned MaxWidth,unsigned * Offset)2074 VISA_VectorOpnd *GenXKernelBuilder::createSource(Value *V, Signedness Signed,
2075 bool Baled, unsigned Mod,
2076 Signedness *SignedRes,
2077 unsigned MaxWidth,
2078 unsigned *Offset) {
2079 LLVM_DEBUG(dbgs() << "createSource for "
2080 << (Baled ? "baled" : "non-baled") << " value: ");
2081 LLVM_DEBUG(V->dump());
2082 LLVM_DEBUG(dbgs() << "\n");
2083 if (SignedRes)
2084 *SignedRes = Signed;
2085 if (auto C = dyn_cast<Constant>(V)) {
2086 if (Mod) {
2087 // Need to negate constant.
2088 IGC_ASSERT_MESSAGE(Mod == MODIFIER_NEG, "unexpected modifier");
2089 if (C->getType()->isIntOrIntVectorTy())
2090 C = ConstantExpr::getNeg(C);
2091 else
2092 C = ConstantExpr::getFNeg(C);
2093 }
2094 return createImmediateOperand(C, Signed);
2095 }
2096 if (!Baled) {
2097 Register *Reg = getRegForValueAndSaveAlias(KernFunc, V, Signed);
2098 IGC_ASSERT(Reg->Category == RegCategory::GENERAL ||
2099 Reg->Category == RegCategory::SURFACE ||
2100 Reg->Category == RegCategory::SAMPLER);
2101 // Write the vISA general operand.
2102 Region R(V);
2103 if (Offset)
2104 R.Offset = *Offset;
2105 if (R.NumElements == 1)
2106 R.VStride = R.Stride = 0;
2107 if (SignedRes)
2108 *SignedRes = RegAlloc->getSigned(Reg);
2109 if (Reg->Category == RegCategory::GENERAL) {
2110 return createRegionOperand(&R, Reg->GetVar<VISA_GenVar>(Kernel), Signed, Mod,
2111 false /*IsDst*/, MaxWidth);
2112 } else {
2113 return createState(Reg, R.Offset >> 2, false /*IsDst*/);
2114 };
2115 }
2116
2117 Instruction *Inst = cast<Instruction>(V);
2118 BaleInfo BI(Baling->getBaleInfo(Inst));
2119 unsigned Idx = 0;
2120 switch (BI.Type) {
2121 case BaleInfo::RDREGION: {
2122 // The source operand has a rdregion baled in. We need to allow for the
2123 // case that there is no register allocated if it is an indirected arg,
2124 // and that is OK because the region is indirect so the vISA does not
2125 // contain the base register.
2126 Value *V = Inst->getOperand(0);
2127 Register *Reg = getRegForValueOrNullAndSaveAlias(KernFunc, V, Signed);
2128
2129 // Ensure we pick a non-DONTCARESIGNED signedness here, as, for an
2130 // indirect region and DONTCARESIGNED, writeRegion arbitrarily picks a
2131 // signedness as it is attached to the operand, unlike a direct region
2132 // where it is attached to the vISA register.
2133 if (Reg)
2134 Signed = RegAlloc->getSigned(Reg);
2135 else if (Signed == DONTCARESIGNED)
2136 Signed = SIGNED;
2137 // Write the vISA general operand with region.
2138 Region R = makeRegionFromBaleInfo(Inst, Baling->getBaleInfo(Inst));
2139 if (Offset)
2140 R.Offset = *Offset;
2141 if (R.NumElements == 1)
2142 R.VStride = 0;
2143 if (R.Width == 1)
2144 R.Stride = 0;
2145 if (!Reg || Reg->Category == RegCategory::GENERAL || R.Indirect) {
2146 if (SignedRes)
2147 *SignedRes = Signed;
2148 return createRegionOperand(&R, Reg ? Reg->GetVar<VISA_GenVar>(Kernel) : nullptr,
2149 Signed, Mod, false, MaxWidth);
2150 } else {
2151 if (SignedRes)
2152 *SignedRes = Signed;
2153 return createState(Reg, R.Offset >> 2, false /*IsDst*/);
2154 }
2155 }
2156 case BaleInfo::ABSMOD:
2157 Signed = SIGNED;
2158 Mod |= MODIFIER_ABS;
2159 break;
2160 case BaleInfo::NEGMOD:
2161 #if LLVM_VERSION_MAJOR > 8
2162 if (Inst->getOpcode() == Instruction::FNeg) {
2163 Mod ^= MODIFIER_NEG;
2164 break;
2165 }
2166 #endif
2167 if (!(Mod & MODIFIER_ABS))
2168 Mod ^= MODIFIER_NEG;
2169 Idx = 1; // the input we want in "0-x" is x, not 0.
2170 break;
2171 case BaleInfo::NOTMOD:
2172 Mod ^= MODIFIER_NOT;
2173 break;
2174 case BaleInfo::ZEXT:
2175 Signed = UNSIGNED;
2176 break;
2177 case BaleInfo::SEXT:
2178 Signed = SIGNED;
2179 break;
2180 default:
2181 IGC_ASSERT_EXIT_MESSAGE(0, "unknown bale type");
2182 break;
2183 }
2184 return createSource(Inst->getOperand(Idx), Signed, BI.isOperandBaled(Idx),
2185 Mod, SignedRes, MaxWidth);
2186 }
2187
createInlineAsmOperand(Register * Reg,genx::Region * R,bool IsDst,genx::Signedness Signed,genx::ConstraintType Ty,unsigned Mod)2188 std::string GenXKernelBuilder::createInlineAsmOperand(
2189 Register *Reg, genx::Region *R, bool IsDst, genx::Signedness Signed,
2190 genx::ConstraintType Ty, unsigned Mod) {
2191 deduceRegion(R, IsDst);
2192
2193 VISA_VectorOpnd *ResultOperand = nullptr;
2194 switch (Ty) {
2195 default:
2196 IGC_ASSERT_EXIT_MESSAGE(0, "constraint unhandled");
2197 case ConstraintType::Constraint_cr: {
2198 IGC_ASSERT(Reg);
2199 IGC_ASSERT(Reg->Category == RegCategory::PREDICATE);
2200 VISA_PredVar *PredVar = getPredicateVar(Reg);
2201 VISA_PredOpnd *PredOperand =
2202 createPredOperand(PredVar, PredState_NO_INVERSE, PRED_CTRL_NON);
2203 return Kernel->getPredicateOperandName(PredOperand);
2204 }
2205 case ConstraintType::Constraint_rw:
2206 return Kernel->getVarName(Reg->GetVar<VISA_GenVar>(Kernel));
2207 case ConstraintType::Constraint_r:
2208 ResultOperand =
2209 createGeneralOperand(R, Reg->GetVar<VISA_GenVar>(Kernel), Signed, Mod, IsDst);
2210 break;
2211 case ConstraintType::Constraint_a:
2212 if (R->Indirect)
2213 ResultOperand = createIndirectOperand(R, Signed, Mod, IsDst);
2214 else
2215 ResultOperand = createGeneralOperand(R, Reg->GetVar<VISA_GenVar>(Kernel),
2216 Signed, Mod, IsDst);
2217 break;
2218 }
2219 return Kernel->getVectorOperandName(ResultOperand, true);
2220 }
2221
createInlineAsmDestinationOperand(Value * Dest,genx::Signedness Signed,genx::ConstraintType Ty,unsigned Mod,const DstOpndDesc & DstDesc)2222 std::string GenXKernelBuilder::createInlineAsmDestinationOperand(
2223 Value *Dest, genx::Signedness Signed, genx::ConstraintType Ty, unsigned Mod,
2224 const DstOpndDesc &DstDesc) {
2225
2226 Type *OverrideType = nullptr;
2227
2228 // Saturation can also change signedness.
2229 if (!Dest->user_empty() && GenXIntrinsic::isIntegerSat(Dest->user_back())) {
2230 Signed = getISatDstSign(Dest->user_back());
2231 }
2232
2233 if (!DstDesc.WrRegion) {
2234 Register *Reg =
2235 getRegForValueAndSaveAlias(KernFunc, Dest, Signed, OverrideType);
2236
2237 Region DestR(Dest);
2238 return createInlineAsmOperand(Reg, &DestR, true /*IsDst*/, DONTCARESIGNED,
2239 Ty, Mod);
2240 }
2241 // We need to allow for the case that there is no register allocated if it is
2242 // an indirected arg, and that is OK because the region is indirect so the
2243 // vISA does not contain the base register.
2244 Register *Reg;
2245
2246 Value *V = nullptr;
2247 if (DstDesc.GStore) {
2248 auto GV = getUnderlyingGlobalVariable(DstDesc.GStore->getOperand(1));
2249 IGC_ASSERT_MESSAGE(GV, "out of sync");
2250 if (OverrideType == nullptr)
2251 OverrideType = DstDesc.GStore->getOperand(0)->getType();
2252 Reg = getRegForValueAndSaveAlias(KernFunc, GV, Signed, OverrideType);
2253 V = GV;
2254 } else {
2255 V = DstDesc.WrRegion;
2256 Reg = getRegForValueOrNullAndSaveAlias(KernFunc, V, Signed, OverrideType);
2257 }
2258
2259 IGC_ASSERT(!Reg || Reg->Category == RegCategory::GENERAL);
2260
2261 // Write the vISA general operand with region:
2262 Region R = makeRegionFromBaleInfo(DstDesc.WrRegion, DstDesc.WrRegionBI);
2263
2264 return createInlineAsmOperand(Reg, &R, true /*IsDst*/, Signed, Ty, Mod);
2265 }
2266
createInlineAsmSourceOperand(Value * V,genx::Signedness Signed,bool Baled,genx::ConstraintType Ty,unsigned Mod,unsigned MaxWidth)2267 std::string GenXKernelBuilder::createInlineAsmSourceOperand(
2268 Value *V, genx::Signedness Signed, bool Baled, genx::ConstraintType Ty,
2269 unsigned Mod, unsigned MaxWidth) {
2270
2271 if (auto C = dyn_cast<Constant>(V)) {
2272 if (Ty != genx::ConstraintType::Constraint_n) {
2273 if (Mod) {
2274 // Need to negate constant.
2275 IGC_ASSERT_MESSAGE(Mod == MODIFIER_NEG, "unexpected modifier");
2276 if (C->getType()->isIntOrIntVectorTy())
2277 C = ConstantExpr::getNeg(C);
2278 else
2279 C = ConstantExpr::getFNeg(C);
2280 }
2281 VISA_VectorOpnd *ImmOp = createImmediateOperand(C, Signed);
2282 return Kernel->getVectorOperandName(ImmOp, false);
2283 } else {
2284 ConstantInt *CI = cast<ConstantInt>(C);
2285 return llvm::to_string(CI->getSExtValue());
2286 }
2287 }
2288
2289 if (!Baled) {
2290 Register *Reg = getRegForValueAndSaveAlias(KernFunc, V, Signed);
2291 Region R(V);
2292 if (R.NumElements == 1)
2293 R.VStride = R.Stride = 0;
2294
2295 return createInlineAsmOperand(Reg, &R, false /*IsDst*/, Signed, Ty, Mod);
2296 }
2297
2298 Instruction *Inst = cast<Instruction>(V);
2299 BaleInfo BI(Baling->getBaleInfo(Inst));
2300 IGC_ASSERT(BI.Type == BaleInfo::RDREGION);
2301 // The source operand has a rdregion baled in. We need to allow for the
2302 // case that there is no register allocated if it is an indirected arg,
2303 // and that is OK because the region is indirect so the vISA does not
2304 // contain the base register.
2305 V = Inst->getOperand(0);
2306 Register *Reg = getRegForValueAndSaveAlias(KernFunc, V, Signed);
2307
2308 // Ensure we pick a non-DONTCARESIGNED signedness here, as, for an
2309 // indirect region and DONTCARESIGNED, writeRegion arbitrarily picks a
2310 // signedness as it is attached to the operand, unlike a direct region
2311 // where it is attached to the vISA register.
2312 if (Signed == DONTCARESIGNED)
2313 Signed = SIGNED;
2314 // Write the vISA general operand with region.
2315 Region R = makeRegionFromBaleInfo(Inst, Baling->getBaleInfo(Inst));
2316 if (R.NumElements == 1)
2317 R.VStride = 0;
2318 if (R.Width == 1)
2319 R.Stride = 0;
2320
2321 IGC_ASSERT(Reg->Category == RegCategory::GENERAL || R.Indirect);
2322
2323 return createInlineAsmOperand(Reg, &R, false /*IsDst*/, Signed, Ty, Mod);
2324 }
2325
2326 /***********************************************************************
2327 * getPredicateVar : get predicate var from value
2328 */
getPredicateVar(Value * V)2329 VISA_PredVar *GenXKernelBuilder::getPredicateVar(Value *V) {
2330 auto Reg = getRegForValueAndSaveAlias(KernFunc, V, DONTCARESIGNED);
2331 IGC_ASSERT(Reg);
2332 IGC_ASSERT(Reg->Category == RegCategory::PREDICATE);
2333 return getPredicateVar(Reg);
2334 }
2335
2336 /***********************************************************************
2337 * getZeroedPredicateVar : get predicate var from value with zeroing it
2338 */
getZeroedPredicateVar(Value * V)2339 VISA_PredVar *GenXKernelBuilder::getZeroedPredicateVar(Value *V) {
2340 auto Reg = getRegForValueAndSaveAlias(KernFunc, V, DONTCARESIGNED);
2341 IGC_ASSERT(Reg);
2342 IGC_ASSERT(Reg->Category == RegCategory::PREDICATE);
2343 auto PredVar = getPredicateVar(Reg);
2344 unsigned Size = V->getType()->getPrimitiveSizeInBits();
2345 auto C = Constant::getNullValue(V->getType());
2346 CISA_CALL(Kernel->AppendVISASetP(
2347 vISA_EMASK_M1_NM, VISA_Exec_Size(genx::log2(Size)),
2348 PredVar, createImmediateOperand(C, DONTCARESIGNED)));
2349
2350 return PredVar;
2351 }
2352
2353 /***********************************************************************
2354 * getPredicateVar : get predicate var from register
2355 */
getPredicateVar(Register * R)2356 VISA_PredVar *GenXKernelBuilder::getPredicateVar(Register *R) {
2357 IGC_ASSERT(R);
2358 return R->Num >= visa::VISA_NUM_RESERVED_PREDICATES
2359 ? R->GetVar<VISA_PredVar>(Kernel)
2360 : nullptr;
2361 }
2362
buildSelectInst(SelectInst * SI,BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)2363 void GenXKernelBuilder::buildSelectInst(SelectInst *SI, BaleInfo BI,
2364 unsigned Mod,
2365 const DstOpndDesc &DstDesc) {
2366 unsigned ExecSize = 1;
2367 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(SI->getType()))
2368 ExecSize = VT->getNumElements();
2369 // Get the predicate (mask) operand, scanning through baled in
2370 // all/any/not/rdpredregion and setting PredField and MaskCtrl
2371 // appropriately.
2372 VISA_EMask_Ctrl MaskCtrl;
2373 VISA_PREDICATE_CONTROL Control;
2374 VISA_PREDICATE_STATE State;
2375
2376 VISA_PredVar *PredDecl =
2377 createPredicateDeclFromSelect(SI, BI, Control, State, &MaskCtrl);
2378 VISA_PredOpnd* PredOp = createPredOperand(PredDecl, State, Control);
2379
2380 VISA_VectorOpnd *Dst = createDestination(SI, DONTCARESIGNED, Mod, DstDesc);
2381 VISA_VectorOpnd *Src0 = createSourceOperand(SI, DONTCARESIGNED, 1, BI);
2382 VISA_VectorOpnd *Src1 = createSourceOperand(SI, DONTCARESIGNED, 2, BI);
2383
2384 addDebugInfo();
2385 CISA_CALL(Kernel->AppendVISADataMovementInst(
2386 ISA_SEL, PredOp, Mod & MODIFIER_SAT, MaskCtrl,
2387 getExecSizeFromValue(ExecSize), Dst, Src0, Src1));
2388 }
2389
buildNoopCast(CastInst * CI,genx::BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)2390 void GenXKernelBuilder::buildNoopCast(CastInst *CI, genx::BaleInfo BI,
2391 unsigned Mod, const DstOpndDesc &DstDesc) {
2392 IGC_ASSERT_MESSAGE(isMaskPacking(CI) || !BI.Bits,
2393 "non predicate bitcast should not be baled with anything");
2394 IGC_ASSERT_MESSAGE(isMaskPacking(CI) || !Mod,
2395 "non predicate bitcast should not be baled with anything");
2396 IGC_ASSERT_MESSAGE(isMaskPacking(CI) || !DstDesc.WrRegion,
2397 "non predicate bitcast should not be baled with anything");
2398
2399 // ignore bitcasts of volatile globals
2400 // (they used to be a part of load/store as a constexpr)
2401 if ((isa<GlobalVariable>(CI->getOperand(0)) &&
2402 cast<GlobalVariable>(CI->getOperand(0))
2403 ->hasAttribute(VCModuleMD::VCVolatile)))
2404 return;
2405
2406 if (CI->getType()->getScalarType()->isIntegerTy(1)) {
2407 if (CI->getOperand(0)->getType()->getScalarType()->isIntegerTy(1)) {
2408 if (auto C = dyn_cast<Constant>(CI->getOperand(0))) {
2409 auto Reg =
2410 getRegForValueOrNullAndSaveAlias(KernFunc, CI, DONTCARESIGNED);
2411 if (!Reg)
2412 return; // write to EM/RM value, ignore
2413 // We can move a constant predicate to a predicate register
2414 // using setp, if we get the constant predicate as a single int.
2415 unsigned IntVal = getPredicateConstantAsInt(C);
2416 unsigned Size = C->getType()->getPrimitiveSizeInBits();
2417 C = ConstantInt::get(
2418 Type::getIntNTy(CI->getContext(), std::max(Size, 8U)), IntVal);
2419
2420 addDebugInfo();
2421 CISA_CALL(Kernel->AppendVISASetP(
2422 vISA_EMASK_M1_NM, VISA_Exec_Size(genx::log2(Size)),
2423 getPredicateVar(Reg), createSourceOperand(CI, UNSIGNED, 0, BI)));
2424 return;
2425 }
2426 // There does not appear to be a vISA instruction to move predicate
2427 // to predicate. GenXCoalescing avoids this by moving in two steps
2428 // via a general register. So the only pred->pred bitcast that arrives
2429 // here should be one from GenXLowering, and it should have been copy
2430 // coalesced in GenXCoalescing.
2431 const Register *const Reg1 =
2432 getRegForValueAndSaveAlias(KernFunc, CI, DONTCARESIGNED);
2433 const Register *const Reg2 =
2434 getRegForValueAndSaveAlias(KernFunc, CI->getOperand(0), DONTCARESIGNED);
2435 IGC_ASSERT_MESSAGE(Reg1 == Reg2, "uncoalesced phi move of predicate");
2436 (void) Reg1;
2437 (void) Reg2;
2438 return;
2439 }
2440
2441 VISA_PredVar *PredVar = getPredicateVar(CI);
2442
2443 addDebugInfo();
2444 CISA_CALL(Kernel->AppendVISASetP(
2445 vISA_EMASK_M1_NM,
2446 VISA_Exec_Size(
2447 genx::log2(CI->getType()->getPrimitiveSizeInBits())),
2448 PredVar, createSourceOperand(CI, UNSIGNED, 0, BI)));
2449 return;
2450 }
2451 if (isa<Constant>(CI->getOperand(0))) {
2452 if (isa<UndefValue>(CI->getOperand(0)))
2453 return; // undef source, generate no code
2454 // Source is constant.
2455 int ExecSize = 1;
2456 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(CI->getType()))
2457 ExecSize = VT->getNumElements();
2458
2459 VISA_EMask_Ctrl ctrlMask = getExecMaskFromWrRegion(DstDesc, true);
2460 VISA_Exec_Size execSize = getExecSizeFromValue(ExecSize);
2461 addDebugInfo();
2462 CISA_CALL(Kernel->AppendVISADataMovementInst(
2463 ISA_MOV, createPredFromWrRegion(DstDesc), Mod & MODIFIER_SAT, ctrlMask,
2464 execSize, createDestination(CI, DONTCARESIGNED, Mod, DstDesc),
2465 createSourceOperand(CI, DONTCARESIGNED, 0, BI)));
2466 return;
2467 }
2468 if (CI->getOperand(0)->getType()->getScalarType()->isIntegerTy(1)) {
2469 // Bitcast from predicate to scalar int
2470 Register *PredReg =
2471 getRegForValueAndSaveAlias(KernFunc, CI->getOperand(0), DONTCARESIGNED);
2472 IGC_ASSERT(PredReg->Category == RegCategory::PREDICATE);
2473 addDebugInfo();
2474 CISA_CALL(Kernel->AppendVISAPredicateMove(
2475 createDestination(CI, UNSIGNED, 0, DstDesc),
2476 PredReg->GetVar<VISA_PredVar>(Kernel)));
2477
2478 return;
2479 }
2480
2481 if (Liveness->isNoopCastCoalesced(CI))
2482 return; // cast was coalesced away
2483
2484 // Here we always choose minimal (in size) type in order to avoid issues
2485 // with alignment. We expect that execution size should still be valid
2486 Type *Ty = CI->getSrcTy();
2487 if (Ty->getScalarType()->getPrimitiveSizeInBits() >
2488 CI->getDestTy()->getScalarType()->getPrimitiveSizeInBits())
2489 Ty = CI->getDestTy();
2490
2491 Register *DstReg =
2492 getRegForValueAndSaveAlias(KernFunc, CI, DONTCARESIGNED, Ty);
2493 // Give dest and source the same signedness for byte mov.
2494 auto Signed = RegAlloc->getSigned(DstReg);
2495 Register *SrcReg =
2496 getRegForValueAndSaveAlias(KernFunc, CI->getOperand(0), Signed, Ty);
2497 VISA_Exec_Size ExecSize = EXEC_SIZE_1;
2498 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Ty))
2499 ExecSize = getExecSizeFromValue(VT->getNumElements());
2500 IGC_ASSERT_MESSAGE(ExecSize >= EXEC_SIZE_1,
2501 "illegal exec size in bitcast: should have been coalesced away");
2502 IGC_ASSERT_MESSAGE(ExecSize <= EXEC_SIZE_32,
2503 "illegal exec size in bitcast: should have been coalesced away");
2504 // destination
2505 Region DestR(CI);
2506 // source
2507 Region SourceR(CI->getOperand(0));
2508
2509 VISA_EMask_Ctrl ctrlMask = NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1;
2510 addDebugInfo();
2511 CISA_CALL(Kernel->AppendVISADataMovementInst(
2512 ISA_MOV, nullptr, Mod, ctrlMask, ExecSize,
2513 createRegionOperand(&DestR, DstReg->GetVar<VISA_GenVar>(Kernel), DONTCARESIGNED,
2514 0, true),
2515 createRegionOperand(&SourceR, SrcReg->GetVar<VISA_GenVar>(Kernel), Signed, 0,
2516 false)));
2517 }
2518
buildFunctionAddr(Instruction * Inst,const DstOpndDesc & DstDesc)2519 void GenXKernelBuilder::buildFunctionAddr(Instruction *Inst,
2520 const DstOpndDesc &DstDesc) {
2521 auto *CI = dyn_cast<CallInst>(Inst);
2522 IGC_ASSERT(CI);
2523 IGC_ASSERT_MESSAGE(GenXIntrinsic::getGenXIntrinsicID(CI) == GenXIntrinsic::genx_faddr,
2524 "genx.faddr expected in a FADDR bale");
2525 auto *Dst = createDestination(Inst, DONTCARESIGNED, MODIFIER_NONE, DstDesc);
2526 IGC_ASSERT(Dst);
2527 auto *F = cast<Function>(Inst->getOperand(0));
2528 CISA_CALL(Kernel->AppendVISACFSymbolInst(F->getName().str(), Dst));
2529 }
2530
2531 /***********************************************************************
2532 * buildLoneWrRegion : build a lone wrregion
2533 */
buildLoneWrRegion(const DstOpndDesc & DstDesc)2534 void GenXKernelBuilder::buildLoneWrRegion(const DstOpndDesc &DstDesc) {
2535 enum { OperandNum = 1 };
2536 Value *Input = DstDesc.WrRegion->getOperand(OperandNum);
2537 if (isa<UndefValue>(Input))
2538 return; // No code if input is undef
2539 VISA_Exec_Size ExecSize = EXEC_SIZE_1;
2540 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Input->getType()))
2541 ExecSize = getExecSizeFromValue(VT->getNumElements());
2542
2543 VISA_EMask_Ctrl ExecMask = getExecMaskFromWrRegion(DstDesc, true);
2544
2545 // TODO: fix signedness of the source
2546 addDebugInfo();
2547 auto *Src = createSource(Input, DONTCARESIGNED, false, 0);
2548 auto *Dst = createDestination(Input, DONTCARESIGNED, 0, DstDesc);
2549 CISA_CALL(Kernel->AppendVISADataMovementInst(
2550 ISA_MOV, createPredFromWrRegion(DstDesc), false, ExecMask, ExecSize,
2551 Dst, Src));
2552 }
2553
2554 /***********************************************************************
2555 * buildLoneWrPredRegion : build a lone wrpredregion
2556 */
buildLoneWrPredRegion(Instruction * Inst,BaleInfo BI)2557 void GenXKernelBuilder::buildLoneWrPredRegion(Instruction *Inst, BaleInfo BI) {
2558 IGC_ASSERT_MESSAGE(isWrPredRegionLegalSetP(*cast<CallInst>(Inst)),
2559 "wrpredregion cannot be legally represented as SETP instruction");
2560 enum { OperandNum = 1 };
2561 Value *Input = Inst->getOperand(OperandNum);
2562 IGC_ASSERT_MESSAGE(isa<Constant>(Input), "only immediate case is supported");
2563 auto *C = cast<Constant>(Input);
2564 unsigned Size = C->getType()->getPrimitiveSizeInBits();
2565
2566 VISA_EMask_Ctrl ctrlMask = getExecMaskFromWrPredRegion(Inst, true);
2567 VISA_Exec_Size execSize = getExecSizeFromValue(Size);
2568
2569 unsigned IntVal = getPredicateConstantAsInt(C);
2570 C = ConstantInt::get(Type::getIntNTy(Inst->getContext(), std::max(Size, 8U)),
2571 IntVal);
2572 addDebugInfo();
2573 CISA_CALL(Kernel->AppendVISASetP(ctrlMask, execSize, getPredicateVar(Inst),
2574 createImmediateOperand(C, UNSIGNED)));
2575 }
2576
2577 /***********************************************************************
2578 * buildLoneOperand : build a rdregion or modifier that is not baled in to
2579 * a main instruction
2580 *
2581 * Enter: Inst = the rdregion or modifier instruction
2582 * BI = BaleInfo for Inst
2583 * Mod = modifier for destination
2584 * WrRegion = 0 else wrregion for destination
2585 * WrRegionBI = BaleInfo for WrRegion (possibly baling in
2586 * variable index add)
2587 */
buildLoneOperand(Instruction * Inst,genx::BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)2588 void GenXKernelBuilder::buildLoneOperand(Instruction *Inst, genx::BaleInfo BI,
2589 unsigned Mod,
2590 const DstOpndDesc &DstDesc) {
2591 Instruction *WrRegion = DstDesc.WrRegion;
2592 BaleInfo WrRegionBI = DstDesc.WrRegionBI;
2593
2594 VISA_Exec_Size ExecSize = EXEC_SIZE_1;
2595 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Inst->getType()))
2596 ExecSize = getExecSizeFromValue(VT->getNumElements());
2597 ISA_Opcode Opcode = ISA_MOV;
2598 bool Baled = true;
2599 VISA_EMask_Ctrl ExecMask = getExecMaskFromWrRegion(DstDesc);
2600 // Default source from Inst
2601 Value *Src = Inst;
2602
2603 // Give dest and source the same signedness for byte mov.
2604 auto Signed = DONTCARESIGNED;
2605 // destination
2606 auto Dest = createDestination(Inst, Signed, Mod, DstDesc, &Signed);
2607
2608 // source
2609 if ((Mod & MODIFIER_SAT) != 0 &&
2610 Inst->getType()->getScalarType()->isIntegerTy() &&
2611 GenXIntrinsic::isIntegerSat(Inst->user_back()))
2612 Signed = getISatSrcSign(Inst->user_back());
2613
2614 if (BI.Type == BaleInfo::NOTMOD) {
2615 // A lone "not" is implemented as a not instruction, rather than a mov
2616 // with a not modifier. A mov only allows an arithmetic modifier.
2617 Opcode = ISA_NOT;
2618 Baled = BI.isOperandBaled(0);
2619 // In this case the src is actually operand 0 of the noti intrinsic
2620 Src = Inst->getOperand(0);
2621 } else if (BI.Type == BaleInfo::RDREGION && !Mod) {
2622 Register *DstReg;
2623 if (WrRegion) {
2624 DstReg =
2625 getRegForValueOrNullAndSaveAlias(KernFunc, WrRegion, DONTCARESIGNED);
2626 } else {
2627 DstReg = getRegForValueAndSaveAlias(KernFunc, Inst, DONTCARESIGNED);
2628 }
2629 if (DstReg && (DstReg->Category == RegCategory::SURFACE ||
2630 DstReg->Category == RegCategory::SAMPLER)) {
2631 Opcode = ISA_MOVS;
2632 }
2633 }
2634 // TODO: mb need to get signed from dest for src and then modify that
2635 addDebugInfo();
2636 CISA_CALL(Kernel->AppendVISADataMovementInst(
2637 Opcode, (Opcode != ISA_MOVS ? createPredFromWrRegion(DstDesc) : nullptr),
2638 Mod & MODIFIER_SAT, ExecMask, ExecSize, Dest,
2639 createSource(Src, Signed, Baled, 0)));
2640 }
2641
getResultedTypeSize(Type * Ty,const DataLayout & DL)2642 static unsigned getResultedTypeSize(Type *Ty, const DataLayout& DL) {
2643 unsigned TySz = 0;
2644 if (auto *VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty))
2645 TySz =
2646 VTy->getNumElements() * getResultedTypeSize(VTy->getElementType(), DL);
2647 else if (Ty->isArrayTy())
2648 TySz = Ty->getArrayNumElements() *
2649 getResultedTypeSize(Ty->getArrayElementType(), DL);
2650 else if (Ty->isStructTy()) {
2651 StructType *STy = dyn_cast<StructType>(Ty);
2652 IGC_ASSERT(STy);
2653 for (Type *Ty : STy->elements())
2654 TySz += getResultedTypeSize(Ty, DL);
2655 } else if (Ty->isPointerTy())
2656 TySz = DL.getPointerSize();
2657 else {
2658 TySz = Ty->getPrimitiveSizeInBits() / CHAR_BIT;
2659 IGC_ASSERT_MESSAGE(TySz, "Ty is not primitive?");
2660 }
2661
2662 return TySz;
2663 }
2664
2665 // Check if we're trying to form return value of a structure type
2666 // TODO: should check full insert/extract chain (for failed coalescing cases),
2667 // e.g. after failed coalescing we may end up having a bunch of
2668 // extractvalue, insertvalue and bitcasts inst where only the last one
2669 // should be actually lowered
checkInsertToRetv(InsertValueInst * Inst)2670 static bool checkInsertToRetv(InsertValueInst *Inst) {
2671 if (auto IVI = dyn_cast<InsertValueInst>(Inst->use_begin()->getUser()))
2672 return checkInsertToRetv(IVI);
2673
2674 if (auto RI = dyn_cast<ReturnInst>(Inst->use_begin()->getUser())) {
2675 const auto *F = RI->getFunction();
2676 return genx::requiresStackCall(F) || genx::isReferencedIndirectly(F);
2677 }
2678
2679 return false;
2680 }
2681
2682 /***********************************************************************
2683 * buildMainInst : build a main instruction
2684 *
2685 * Enter: Inst = the main instruction
2686 * BI = BaleInfo for Inst
2687 * Mod = modifier bits for destination
2688 * WrRegion = 0 else wrregion for destination
2689 * WrRegionBI = BaleInfo for WrRegion (possibly baling in
2690 * variable index add)
2691 *
2692 * Return: true if terminator inst that falls through to following block
2693 */
buildMainInst(Instruction * Inst,BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)2694 bool GenXKernelBuilder::buildMainInst(Instruction *Inst, BaleInfo BI,
2695 unsigned Mod,
2696 const DstOpndDesc &DstDesc) {
2697 if (PHINode *Phi = dyn_cast<PHINode>(Inst))
2698 buildPhiNode(Phi);
2699 else if (ReturnInst *RI = dyn_cast<ReturnInst>(Inst)) {
2700 buildRet(RI);
2701 } else if (BranchInst *BR = dyn_cast<BranchInst>(Inst)) {
2702 return buildBranch(BR);
2703 } else if (IndirectBrInst *IBR = dyn_cast<IndirectBrInst>(Inst)) {
2704 buildIndirectBr(IBR);
2705 } else if (CmpInst *Cmp = dyn_cast<CmpInst>(Inst)) {
2706 buildCmp(Cmp, BI, DstDesc);
2707 } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Inst)) {
2708 if (!BO->getType()->getScalarType()->isIntegerTy(1)) {
2709 buildBinaryOperator(BO, BI, Mod, DstDesc);
2710 } else {
2711 IGC_ASSERT(!Mod);
2712 IGC_ASSERT(!DstDesc.WrRegion);
2713 IGC_ASSERT(!BI.isOperandBaled(0));
2714 IGC_ASSERT(!BI.isOperandBaled(1));
2715 buildBoolBinaryOperator(BO);
2716 }
2717 } else if (auto EVI = dyn_cast<ExtractValueInst>(Inst)) {
2718 if (auto *CI = dyn_cast<CallInst>(Inst->getOperand(0)))
2719 // translate extraction of structured type from retv
2720 if (!UseNewStackBuilder && !CI->isInlineAsm() &&
2721 (genx::requiresStackCall(CI->getCalledFunction()) ||
2722 IGCLLVM::isIndirectCall(*CI)))
2723 buildExtractRetv(EVI);
2724 // no code generated
2725 } else if (auto IVI = dyn_cast<InsertValueInst>(Inst)) {
2726 if (!UseNewStackBuilder && checkInsertToRetv(IVI) &&
2727 // TODO: safely remove this tmp workaround for failed coalescing cases
2728 // and insert-extract-insert chains
2729 !isa<BitCastInst>(Inst->getOperand(1)))
2730 RetvInserts.push_back(IVI);
2731 // no code generated
2732 } else if (CastInst *CI = dyn_cast<CastInst>(Inst)) {
2733 if (genx::isNoopCast(CI))
2734 buildNoopCast(CI, BI, Mod, DstDesc);
2735 else
2736 buildCastInst(CI, BI, Mod, DstDesc);
2737 } else if (auto SI = dyn_cast<SelectInst>(Inst)) {
2738 buildSelectInst(SI, BI, Mod, DstDesc);
2739 } else if (auto LI = dyn_cast<LoadInst>(Inst)) {
2740 (void)LI; // no code generated
2741 } else if (auto GEPI = dyn_cast<GetElementPtrInst>(Inst)) {
2742 // Skip genx.print.format.index GEP here.
2743 IGC_ASSERT_MESSAGE(vc::isLegalPrintFormatIndexGEP(*GEPI),
2744 "only genx.print.format.index src GEP can still be "
2745 "present at this stage");
2746 #if (LLVM_VERSION_MAJOR > 8)
2747 } else if (UnaryOperator *UO = dyn_cast<UnaryOperator>(Inst)) {
2748 buildUnaryOperator(UO, BI, Mod, DstDesc);
2749 #endif
2750 } else if (auto *CI = dyn_cast<CallInst>(Inst)) {
2751 if (CI->isInlineAsm())
2752 buildInlineAsm(CI);
2753 else if (IGCLLVM::isIndirectCall(*CI)) {
2754 IGC_ASSERT_MESSAGE(!Mod,
2755 "cannot bale subroutine call into anything");
2756 IGC_ASSERT_MESSAGE(!DstDesc.WrRegion,
2757 "cannot bale subroutine call into anything");
2758 buildCall(CI, DstDesc);
2759 } else {
2760 Function *Callee = CI->getCalledFunction();
2761 unsigned IntrinID = GenXIntrinsic::getAnyIntrinsicID(Callee);
2762 switch (IntrinID) {
2763 case Intrinsic::dbg_value:
2764 case Intrinsic::dbg_declare:
2765 addDebugInfo();
2766 break;
2767 case GenXIntrinsic::genx_predefined_surface:
2768 case GenXIntrinsic::genx_output:
2769 case GenXIntrinsic::genx_output_1:
2770 case GenXIntrinsic::genx_jump_table:
2771 // ignore
2772 break;
2773 case GenXIntrinsic::genx_simdcf_goto:
2774 // A goto that is not baled into a branch (via an extractvalue)
2775 buildGoto(CI, nullptr);
2776 break;
2777 case GenXIntrinsic::genx_simdcf_join:
2778 // A join that is not baled into a branch (via an extractvalue)
2779 buildJoin(CI, nullptr);
2780 break;
2781 case GenXIntrinsic::genx_convert:
2782 buildConvert(CI, BI, Mod, DstDesc);
2783 break;
2784 case GenXIntrinsic::genx_print_format_index:
2785 buildPrintIndex(CI, IntrinID, Mod, DstDesc);
2786 break;
2787 case GenXIntrinsic::genx_convert_addr:
2788 buildConvertAddr(CI, BI, Mod, DstDesc);
2789 break;
2790 case GenXIntrinsic::genx_alloca:
2791 if (!UseNewStackBuilder)
2792 buildAlloca(CI, IntrinID, Mod, DstDesc);
2793 break;
2794 case GenXIntrinsic::genx_gaddr:
2795 buildSymbolInst(CI, Mod, DstDesc);
2796 break;
2797 case GenXIntrinsic::genx_write_predef_surface:
2798 buildWritePredefSurface(*CI);
2799 break;
2800 case GenXIntrinsic::genx_get_hwid:
2801 buildGetHWID(CI, DstDesc);
2802 break;
2803 case GenXIntrinsic::genx_constanti:
2804 case GenXIntrinsic::genx_constantf:
2805 case GenXIntrinsic::genx_constantpred:
2806 if (isa<UndefValue>(CI->getOperand(0)))
2807 return false; // Omit llvm.genx.constant with undef operand.
2808 if (!DstDesc.WrRegion &&
2809 !getRegForValueOrNullAndSaveAlias(KernFunc, CI))
2810 return false; // Omit llvm.genx.constantpred that is EM or RM and so
2811 // does not have a register allocated.
2812 // fall through...
2813 default:
2814 if (!(CI->user_empty() &&
2815 GenXIntrinsic::getAnyIntrinsicID(CI->getCalledFunction()) ==
2816 GenXIntrinsic::genx_any))
2817 buildIntrinsic(CI, IntrinID, BI, Mod, DstDesc);
2818 break;
2819 case GenXIntrinsic::not_any_intrinsic:
2820 IGC_ASSERT_MESSAGE(!Mod,
2821 "cannot bale subroutine call into anything");
2822 IGC_ASSERT_MESSAGE(!DstDesc.WrRegion,
2823 "cannot bale subroutine call into anything");
2824 buildCall(CI, DstDesc);
2825 break;
2826 }
2827 }
2828 } else if (isa<UnreachableInst>(Inst))
2829 ; // no code generated
2830 else {
2831 DiagnosticInfoCisaBuild Err{Inst, "main inst not implemented", DS_Error};
2832 getContext().diagnose(Err);
2833 }
2834
2835 return false;
2836 }
2837
2838 /***********************************************************************
2839 * buildPhiNode : build code for a phi node
2840 *
2841 * A phi node generates no code because coalescing has ensured that all
2842 * incomings and the result are in the same register. This function just
2843 * asserts that that is the case.
2844 */
buildPhiNode(PHINode * Phi)2845 void GenXKernelBuilder::buildPhiNode(PHINode *Phi) {
2846 IGC_ASSERT(testPhiNodeHasNoMismatchedRegs(Phi, Liveness));
2847 }
2848
2849 /***********************************************************************
2850 * buildGoto : translate a goto
2851 *
2852 * Enter: Goto = goto instruction that is baled into an extractvalue of
2853 * field 2 (the !any(EM) value), that is baled into Branch
2854 * Branch = branch instruction, 0 if this is a goto that is not
2855 * baled into a branch, which happens when the goto is
2856 * followed by a join point so the goto's JIP points there,
2857 * and LLVM changes the resulting conditional branch with
2858 * both successors the same into an unconditional branch
2859 */
buildGoto(CallInst * Goto,BranchInst * Branch)2860 void GenXKernelBuilder::buildGoto(CallInst *Goto, BranchInst *Branch) {
2861 // GenXSimdCFConformance and GenXTidyControlFlow ensure that we have either
2862 // 1. a forward goto, where the false successor is fallthrough; or
2863 // 2. a backward goto, where the UIP (the join whose RM the goto updates)
2864 // and the true successor are both fallthrough, and the false successor
2865 // is the top of the loop.
2866 // (1) generates a vISA forward goto, but the condition has the wrong sense
2867 // so we need to invert it.
2868 // (2) generates a vISA backward goto.
2869 Value *BranchTarget = nullptr;
2870 VISA_PREDICATE_STATE StateInvert = PredState_NO_INVERSE;
2871 if (!Branch ||
2872 Branch->getSuccessor(1) == Branch->getParent()->getNextNode()) {
2873 // Forward goto. Find the join.
2874 auto Join = GotoJoin::findJoin(Goto);
2875 IGC_ASSERT_MESSAGE(Join, "join not found");
2876 BranchTarget = Join;
2877 StateInvert = PredState_INVERSE;
2878 } else {
2879 IGC_ASSERT_MESSAGE(Branch->getSuccessor(0) == Branch->getParent()->getNextNode(),
2880 "bad goto structure");
2881 // Backward branch.
2882 BranchTarget = Branch->getSuccessor(1);
2883 }
2884 // Get the condition.
2885 VISA_EMask_Ctrl Mask = vISA_EMASK_M1;
2886 VISA_PREDICATE_CONTROL Control = PRED_CTRL_NON;
2887 VISA_PREDICATE_STATE State = PredState_NO_INVERSE;
2888
2889 Value *Pred = getPredicateOperand(
2890 Goto, 2 /*OperandNum*/, Baling->getBaleInfo(Goto), Control, State, &Mask);
2891 IGC_ASSERT_MESSAGE(!Mask, "cannot have rdpredregion baled into goto");
2892
2893 Instruction *Not = dyn_cast<Instruction>(Pred);
2894 if (Not && isPredNot(Not)) {
2895 // Eliminate excess NOT
2896 // %P1 = ...
2897 // %P2 = not %P1
2898 // (!%P2) goto
2899 // Transforms into
2900 // (%P1) goto
2901 StateInvert = (StateInvert == PredState_NO_INVERSE) ? PredState_INVERSE
2902 : PredState_NO_INVERSE;
2903 Pred = getPredicateOperand(Not, 0 /*OperandNum*/, Baling->getBaleInfo(Not),
2904 Control, State, &Mask);
2905 IGC_ASSERT_MESSAGE(!Mask, "cannot have rdpredregion baled into goto");
2906 }
2907
2908 Register *PredReg = nullptr;
2909 if (auto C = dyn_cast<Constant>(Pred)) {
2910 (void)C;
2911 if (StateInvert)
2912 IGC_ASSERT_MESSAGE(C->isNullValue(),
2913 "predication operand must be constant 0 or not constant");
2914 else
2915 IGC_ASSERT_MESSAGE(C->isAllOnesValue(),
2916 "predication operand must be constant 1 or not constant");
2917 } else {
2918 State ^= StateInvert;
2919 PredReg = getRegForValueOrNullAndSaveAlias(KernFunc, Pred);
2920 IGC_ASSERT(PredReg);
2921 IGC_ASSERT(PredReg->Category == RegCategory::PREDICATE);
2922 }
2923
2924 uint8_t execSize = genx::log2(
2925 cast<IGCLLVM::FixedVectorType>(Pred->getType())->getNumElements());
2926
2927 // Visa decoder part
2928 VISA_EMask_Ctrl emask =
2929 VISA_EMask_Ctrl((execSize >> 0x4) & 0xF);
2930 VISA_Exec_Size esize = (VISA_Exec_Size)((execSize)&0xF);
2931
2932 VISA_PredOpnd *pred = nullptr;
2933 if (PredReg) {
2934 VISA_PredVar *Decl = getPredicateVar(PredReg);
2935 VISA_PredOpnd *opnd = createPredOperand(Decl, State, Control);
2936 pred = opnd;
2937 }
2938
2939 unsigned LabelID = getOrCreateLabel(BranchTarget, LABEL_BLOCK);
2940
2941 VISA_LabelOpnd *label = Labels[LabelID];
2942 addDebugInfo();
2943 CISA_CALL(Kernel->AppendVISACFGotoInst(pred, emask, esize, label));
2944 }
2945
2946 // Convert predicate offset to EM offset according to
2947 // vISA spec 3.3.1 Execution Mask.
getVisaEMOffset(unsigned PredOffset)2948 static VISA_EMask_Ctrl getVisaEMOffset(unsigned PredOffset) {
2949 switch (PredOffset) {
2950 case 0:
2951 return vISA_EMASK_M1;
2952 case 4:
2953 return vISA_EMASK_M2;
2954 case 8:
2955 return vISA_EMASK_M3;
2956 case 12:
2957 return vISA_EMASK_M4;
2958 case 16:
2959 return vISA_EMASK_M5;
2960 case 20:
2961 return vISA_EMASK_M6;
2962 case 24:
2963 return vISA_EMASK_M7;
2964 case 28:
2965 return vISA_EMASK_M8;
2966 }
2967 IGC_ASSERT_EXIT_MESSAGE(0, "Unexpected EM offset");
2968 }
2969
2970 /***********************************************************************
2971 * getPredicateOperand : get predicate operand, scanning through any baled
2972 * in rdpredregion, all, any, not instructions to derive the mask control
2973 * field and the predication field
2974 *
2975 * Enter: Inst = instruction to get predicate operand from
2976 * OperandNum = operand number in Inst
2977 * BI = bale info for Inst
2978 * *Control = where to write control information about predicate
2979 * *State = where to write state information about predicate
2980 * *MaskCtrl = where to write mask control field (bits 7..4)
2981 *
2982 * Return: Value of mask after scanning through baled in instructions
2983 * *PredField and *MaskCtrl set
2984 */
getPredicateOperand(Instruction * Inst,unsigned OperandNum,BaleInfo BI,VISA_PREDICATE_CONTROL & Control,VISA_PREDICATE_STATE & State,VISA_EMask_Ctrl * MaskCtrl)2985 Value *GenXKernelBuilder::getPredicateOperand(
2986 Instruction *Inst, unsigned OperandNum, BaleInfo BI,
2987 VISA_PREDICATE_CONTROL &Control, VISA_PREDICATE_STATE &State,
2988 VISA_EMask_Ctrl *MaskCtrl) {
2989 State = PredState_NO_INVERSE;
2990 *MaskCtrl = vISA_EMASK_M1;
2991 Control = PRED_CTRL_NON;
2992 Value *Mask = Inst->getOperand(OperandNum);
2993 // Check for baled in all/any/notp/rdpredregion.
2994 while (BI.isOperandBaled(OperandNum)) {
2995 Instruction *Inst = dyn_cast<Instruction>(Mask);
2996 if (isNot(Inst)) {
2997 if (Control != PRED_CTRL_NON) {
2998 // switch any<->all as well as invert bit
2999 Control ^= (VISA_PREDICATE_CONTROL)(PRED_CTRL_ANY | PRED_CTRL_ALL);
3000 State ^= PredState_INVERSE;
3001 } else {
3002 // all/any not set, just invert invert bit
3003 State ^= PredState_INVERSE;
3004 }
3005 OperandNum = 0;
3006 IGC_ASSERT(Inst);
3007 Mask = Inst->getOperand(OperandNum);
3008 BI = Baling->getBaleInfo(Inst);
3009 continue;
3010 }
3011 switch (GenXIntrinsic::getGenXIntrinsicID(Inst)) {
3012 case GenXIntrinsic::genx_all:
3013 Control |= PRED_CTRL_ALL; // predicate combine field = "all"
3014 OperandNum = 0;
3015 Mask = Inst->getOperand(OperandNum);
3016 BI = Baling->getBaleInfo(Inst);
3017 continue;
3018 case GenXIntrinsic::genx_any:
3019 Control |= PRED_CTRL_ANY; // predicate combine field = "any"
3020 OperandNum = 0;
3021 Mask = Inst->getOperand(OperandNum);
3022 BI = Baling->getBaleInfo(Inst);
3023 continue;
3024 case GenXIntrinsic::genx_rdpredregion: {
3025 // Baled in rdpredregion. Use its constant offset for the mask control
3026 // field.
3027 unsigned MaskOffset =
3028 cast<ConstantInt>(Inst->getOperand(1))->getSExtValue();
3029 *MaskCtrl = getVisaEMOffset(MaskOffset);
3030 Mask = Inst->getOperand(0);
3031 break;
3032 }
3033 default:
3034 break;
3035 }
3036 // Baled shufflepred. Mask offset is deduced from initial value of slice.
3037 if (auto *SVI = dyn_cast<ShuffleVectorInst>(Inst)) {
3038 unsigned MaskOffset =
3039 ShuffleVectorAnalyzer::getReplicatedSliceDescriptor(SVI)
3040 .InitialOffset;
3041 *MaskCtrl = getVisaEMOffset(MaskOffset);
3042 Mask = SVI->getOperand(0);
3043 }
3044 break;
3045 }
3046 return Mask;
3047 }
3048
AddGenVar(Register & Reg)3049 void GenXKernelBuilder::AddGenVar(Register &Reg) {
3050 auto &DL = FG->getModule()->getDataLayout();
3051
3052 VISA_GenVar *parentDecl = nullptr;
3053 VISA_GenVar *Decl = nullptr;
3054
3055 if (!Reg.AliasTo) {
3056 LLVM_DEBUG(dbgs() << "GenXKernelBuilder::AddGenVar: "; Reg.print(dbgs()); dbgs() << "\n");
3057 // This is not an aliased register. Go through all the aliases and
3058 // determine the biggest alignment required. If the register is at least
3059 // as big as a GRF, make the alignment GRF.
3060 unsigned Alignment = getLogAlignment(
3061 VISA_Align::ALIGN_GRF, Subtarget ? Subtarget->getGRFByteSize()
3062 : defaultGRFByteSize); // GRF alignment
3063 Type *Ty = Reg.Ty;
3064 unsigned NBits = Ty->isPointerTy() ? DL.getPointerSizeInBits()
3065 : Ty->getPrimitiveSizeInBits();
3066 LLVM_DEBUG(dbgs() << "RegTy " << *Ty << ", nbits = " << NBits << "\n");
3067 if (NBits < GrfByteSize * 8 /* bits in GRF */) {
3068 Alignment = 0;
3069 for (Register *AliasReg = &Reg; AliasReg;
3070 AliasReg = AliasReg->NextAlias[KernFunc]) {
3071 LLVM_DEBUG(dbgs() << "Alias reg " << AliasReg->Num << ", ty "
3072 << *(AliasReg->Ty) << "\n");
3073 Type *AliasTy = AliasReg->Ty->getScalarType();
3074 unsigned ThisElementBytes = AliasTy->isPointerTy()
3075 ? DL.getPointerTypeSize(AliasTy)
3076 : AliasTy->getPrimitiveSizeInBits() / 8;
3077 unsigned LogThisElementBytes = genx::log2(ThisElementBytes);
3078 if (LogThisElementBytes > Alignment)
3079 Alignment = LogThisElementBytes;
3080 if (AliasReg->Alignment > Alignment)
3081 Alignment = AliasReg->Alignment;
3082 }
3083 }
3084 LLVM_DEBUG(dbgs() << "Final alignment of " << Alignment << " for reg "
3085 << Reg.Num << "\n");
3086 for (Register *AliasReg = &Reg; AliasReg; AliasReg = AliasReg->NextAlias[KernFunc]) {
3087 if (AliasReg->Alignment < Alignment) {
3088 AliasReg->Alignment = Alignment;
3089 LLVM_DEBUG(dbgs() << "Setting alignment of " << Alignment << " for reg "
3090 << AliasReg->Num << "\n");
3091 }
3092 }
3093 } else {
3094 if (Reg.AliasTo->Num < visa::VISA_NUM_RESERVED_REGS) {
3095 LLVM_DEBUG(dbgs() << "GenXKernelBuilder::AddGenVar alias: " << Reg.AliasTo->Num << "\n");
3096 CISA_CALL(Kernel->GetPredefinedVar(parentDecl,
3097 (PreDefined_Vars)Reg.AliasTo->Num));
3098 IGC_ASSERT_MESSAGE(parentDecl, "Predefeined variable is null");
3099 } else {
3100 parentDecl = Reg.AliasTo->GetVar<VISA_GenVar>(Kernel);
3101 LLVM_DEBUG(dbgs() << "GenXKernelBuilder::AddGenVar decl: " << parentDecl << "\n");
3102 IGC_ASSERT_MESSAGE(parentDecl, "Refers to undefined var");
3103 }
3104 }
3105
3106 visa::TypeDetails TD(DL, Reg.Ty, Reg.Signed);
3107 LLVM_DEBUG(dbgs() << "Resulting #of elements: " << TD.NumElements << "\n");
3108
3109 VISA_Align VA = getVISA_Align(
3110 Reg.Alignment, Subtarget ? Subtarget->getGRFByteSize() : defaultGRFByteSize);
3111 CISA_CALL(Kernel->CreateVISAGenVar(Decl, Reg.NameStr.c_str(), TD.NumElements,
3112 static_cast<VISA_Type>(TD.VisaType), VA,
3113 parentDecl, 0));
3114
3115 Reg.SetVar(Kernel, Decl);
3116 LLVM_DEBUG(dbgs() << "Resulting decl: " << Decl << "\n");
3117
3118 for (auto &Attr : Reg.Attributes) {
3119 CISA_CALL(Kernel->AddAttributeToVar(
3120 Decl, getStringByIndex(Attr.first).begin(), Attr.second.size(),
3121 (void *)(Attr.second.c_str())));
3122 }
3123 }
3124
allowI64Ops() const3125 bool GenXKernelBuilder::allowI64Ops() const {
3126 IGC_ASSERT(Subtarget);
3127 if (!Subtarget->hasLongLong())
3128 return false;
3129 return true;
3130 }
3131 /**************************************************************************************************
3132 * Scan ir to collect information about whether kernel has callable function or
3133 * barrier.
3134 */
collectKernelInfo()3135 void GenXKernelBuilder::collectKernelInfo() {
3136 for (auto It = FG->begin(), E = FG->end(); It != E; ++It) {
3137 auto Func = *It;
3138 HasStackcalls |=
3139 genx::requiresStackCall(Func) || genx::isReferencedIndirectly(Func);
3140 for (auto &BB : *Func) {
3141 for (auto &I : BB) {
3142 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
3143 if (CI->isInlineAsm())
3144 continue;
3145 if (GenXIntrinsicInst *II = dyn_cast<GenXIntrinsicInst>(CI)) {
3146 auto IID = II->getIntrinsicID();
3147 if (IID == GenXIntrinsic::genx_barrier ||
3148 IID == GenXIntrinsic::genx_sbarrier)
3149 HasBarrier = true;
3150 else if (IID == GenXIntrinsic::genx_alloca)
3151 HasAlloca = true;
3152 } else {
3153 Function *Callee = CI->getCalledFunction();
3154 if (Callee && Callee->hasFnAttribute("CMCallable"))
3155 HasCallable = true;
3156 }
3157 }
3158 }
3159 }
3160 }
3161 }
3162 /**************************************************************************************************
3163 * Build variables
3164 */
buildVariables()3165 void GenXKernelBuilder::buildVariables() {
3166 RegAlloc->SetRegPushHook(this, [](void *Object, GenXVisaRegAlloc::Reg &Reg) {
3167 static_cast<GenXKernelBuilder *>(Object)->AddGenVar(Reg);
3168 });
3169
3170 for (auto &It : RegAlloc->getRegStorage()) {
3171 Register *Reg = &(It);
3172 switch (Reg->Category) {
3173 case RegCategory::GENERAL:
3174 if (Reg->Num >= visa::VISA_NUM_RESERVED_REGS)
3175 AddGenVar(*Reg);
3176 break;
3177
3178 case RegCategory::ADDRESS: {
3179 VISA_AddrVar *Decl = nullptr;
3180 unsigned NumElements = 1;
3181 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Reg->Ty))
3182 NumElements = VT->getNumElements();
3183 CISA_CALL(
3184 Kernel->CreateVISAAddrVar(Decl, Reg->NameStr.c_str(), NumElements));
3185 Reg->SetVar(Kernel, Decl);
3186 for (auto &Attr : Reg->Attributes) {
3187 CISA_CALL(Kernel->AddAttributeToVar(
3188 Decl, getStringByIndex(Attr.first).begin(), Attr.second.size(),
3189 (void *)(Attr.second.c_str())));
3190 }
3191 } break;
3192
3193 case RegCategory::PREDICATE: {
3194 VISA_PredVar *Decl = nullptr;
3195 unsigned NumElements = 1;
3196 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Reg->Ty))
3197 NumElements = VT->getNumElements();
3198 CISA_CALL(
3199 Kernel->CreateVISAPredVar(Decl, Reg->NameStr.c_str(), NumElements));
3200 Reg->SetVar(Kernel, Decl);
3201 for (auto &Attr : Reg->Attributes) {
3202 CISA_CALL(Kernel->AddAttributeToVar(
3203 Decl, getStringByIndex(Attr.first).begin(), Attr.second.size(),
3204 (void *)(Attr.second.c_str())));
3205 }
3206 } break;
3207
3208 case RegCategory::SAMPLER: {
3209 unsigned NumElements = 1;
3210 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Reg->Ty))
3211 NumElements = VT->getNumElements();
3212 VISA_SamplerVar *Decl = nullptr;
3213 CISA_CALL(Kernel->CreateVISASamplerVar(Decl, Reg->NameStr.c_str(),
3214 NumElements));
3215 Reg->SetVar(Kernel, Decl);
3216 } break;
3217
3218 case RegCategory::SURFACE: {
3219 VISA_SurfaceVar *Decl = nullptr;
3220 if (Reg->Num < visa::VISA_NUM_RESERVED_SURFACES) {
3221 Kernel->GetPredefinedSurface(Decl, (PreDefined_Surface)Reg->Num);
3222 } else {
3223 unsigned NumElements = 1;
3224 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Reg->Ty))
3225 NumElements = VT->getNumElements();
3226
3227 CISA_CALL(Kernel->CreateVISASurfaceVar(Decl, Reg->NameStr.c_str(),
3228 NumElements));
3229 }
3230 Reg->SetVar(Kernel, Decl);
3231 } break;
3232
3233 default:
3234 report_fatal_error("Unknown category for register");
3235 break;
3236 }
3237 }
3238
3239 VISA_GenVar *ArgDecl = nullptr, *RetDecl = nullptr;
3240 Kernel->GetPredefinedVar(ArgDecl, PREDEFINED_ARG);
3241 Kernel->GetPredefinedVar(RetDecl, PREDEFINED_RET);
3242 createCisaVariable(Kernel, "argv", ArgDecl,
3243 visa::ArgRegSizeInGRFs * GrfByteSize);
3244 createCisaVariable(Kernel, "retv", RetDecl,
3245 visa::RetRegSizeInGRFs * GrfByteSize);
3246 }
3247
3248 /***********************************************************************
3249 * getExecMaskFromWrPredRegion : write exec size field from wrpredregion
3250 * or wrpredpredregion instruction
3251 *
3252 * Enter: ExecSize = execution size
3253 * WrPredRegion = 0 else wrpredregion instruction
3254 *
3255 * The exec size byte includes the mask control field, which we need to set
3256 * up from the wrpredregion/wrpredpredregion.
3257 */
3258 VISA_EMask_Ctrl
getExecMaskFromWrPredRegion(Instruction * WrPredRegion,bool IsNoMask)3259 GenXKernelBuilder::getExecMaskFromWrPredRegion(Instruction *WrPredRegion,
3260 bool IsNoMask) {
3261 VISA_EMask_Ctrl MaskCtrl =
3262 (IsNoMask | NoMask) ? vISA_EMASK_M1_NM : vISA_EMASK_M1;
3263 if (WrPredRegion) {
3264 // Get the mask control field from the offset in the wrpredregion.
3265 unsigned MaskOffset =
3266 cast<ConstantInt>(WrPredRegion->getOperand(2))->getSExtValue();
3267 IGC_ASSERT_MESSAGE(MaskOffset < 32, "unexpected mask offset");
3268 MaskCtrl = static_cast<VISA_EMask_Ctrl>(MaskOffset >> 2);
3269 }
3270
3271 // Set to NoMask if requested. Otherwise use the default NM mode
3272 // when WrPredRegion is null.
3273 if ((IsNoMask && MaskCtrl < vISA_EMASK_M1_NM) ||
3274 (!WrPredRegion && NoMask && MaskCtrl < vISA_EMASK_M1_NM))
3275 MaskCtrl = static_cast<VISA_EMask_Ctrl>(
3276 static_cast<unsigned>(MaskCtrl) + vISA_EMASK_M1_NM);
3277
3278 return MaskCtrl;
3279 }
3280
3281 /***********************************************************************
3282 * getExecMaskFromWrRegion : get exec size field from wrregion instruction
3283 *
3284 * Enter: ExecSize = execution size
3285 * WrRegion = 0 else wrregion instruction
3286 * WrRegionBI = BaleInfo for wrregion, so we can see if there is a
3287 * rdpredregion baled in to the mask
3288 *
3289 * If WrRegion != 0, and it has a mask that is not constant 1, then the
3290 * mask must be a predicate register.
3291 *
3292 * The exec size byte includes the mask control field, which we need to set
3293 * up from any rdpredregion baled in to a predicated wrregion.
3294 *
3295 * If the predicate has no register allocated, it must be EM, and we set the
3296 * instruction to be masked. Otherwise we set nomask.
3297 */
3298 VISA_EMask_Ctrl
getExecMaskFromWrRegion(const DstOpndDesc & DstDesc,bool IsNoMask)3299 GenXKernelBuilder::getExecMaskFromWrRegion(const DstOpndDesc &DstDesc,
3300 bool IsNoMask) {
3301 // Override mask control if requested.
3302 auto MaskCtrl = (IsNoMask | NoMask) ? vISA_EMASK_M1_NM : vISA_EMASK_M1;
3303
3304 if (DstDesc.WrRegion) {
3305 // Get the predicate (mask) operand, scanning through baled in
3306 // all/any/not/rdpredregion and setting PredField and MaskCtrl
3307 // appropriately.
3308 VISA_PREDICATE_CONTROL Control = PRED_CTRL_NON;
3309 VISA_PREDICATE_STATE State = PredState_NO_INVERSE;
3310 Value *Mask =
3311 getPredicateOperand(DstDesc.WrRegion, 7 /*mask operand in wrregion*/,
3312 DstDesc.WrRegionBI, Control, State, &MaskCtrl);
3313 if ((isa<Constant>(Mask) ||
3314 getRegForValueOrNullAndSaveAlias(KernFunc, Mask)) &&
3315 NoMask)
3316 MaskCtrl |= vISA_EMASK_M1_NM;
3317 }
3318 return MaskCtrl;
3319 }
3320
3321 /***********************************************************************
3322 * buildIntrinsic : build code for an intrinsic
3323 *
3324 * Enter: CI = the CallInst
3325 * IntrinID = intrinsic ID
3326 * BI = BaleInfo for the instruction
3327 * Mod = modifier bits for destination
3328 * WrRegion = 0 else wrregion for destination
3329 * WrRegionBI = BaleInfo for WrRegion
3330 */
buildIntrinsic(CallInst * CI,unsigned IntrinID,BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)3331 void GenXKernelBuilder::buildIntrinsic(CallInst *CI, unsigned IntrinID,
3332 BaleInfo BI, unsigned Mod,
3333 const DstOpndDesc &DstDesc) {
3334 using II = GenXIntrinsicInfo;
3335 LLVM_DEBUG(dbgs() << "buildIntrinsic: " << *CI << "\n");
3336
3337 int MaxRawOperands = std::numeric_limits<int>::max();
3338
3339 // TODO: replace lambdas by methods
3340
3341 auto GetUnsignedValue = [&](II::ArgInfo AI) {
3342 ConstantInt *Const =
3343 dyn_cast<ConstantInt>(CI->getArgOperand(AI.getArgIdx()));
3344 if (!Const) {
3345 DiagnosticInfoCisaBuild Err{CI, "Incorrect args to intrinsic call",
3346 DS_Error};
3347 getContext().diagnose(Err);
3348 }
3349 unsigned val = Const->getSExtValue();
3350 LLVM_DEBUG(dbgs() << "GetUnsignedValue from op #" << AI.getArgIdx()
3351 << " yields: " << val << "\n");
3352 return val;
3353 };
3354
3355 auto CreateSurfaceOperand = [&](II::ArgInfo AI) {
3356 LLVM_DEBUG(dbgs() << "CreateSurfaceOperand\n");
3357 llvm::Value *Arg = CI->getArgOperand(AI.getArgIdx());
3358 VISA_SurfaceVar *SurfDecl = nullptr;
3359 int Index = visa::convertToSurfaceIndex(Arg);
3360 if (visa::isReservedSurfaceIndex(Index)) {
3361 Kernel->GetPredefinedSurface(SurfDecl, visa::getReservedSurface(Index));
3362 } else {
3363 Register *Reg = getRegForValueAndSaveAlias(KernFunc, Arg);
3364 IGC_ASSERT_MESSAGE(Reg->Category == RegCategory::SURFACE,
3365 "Expected surface register");
3366 SurfDecl = Reg->GetVar<VISA_SurfaceVar>(Kernel);
3367 }
3368 VISA_StateOpndHandle *ResultOperand = nullptr;
3369 CISA_CALL(Kernel->CreateVISAStateOperandHandle(ResultOperand, SurfDecl));
3370 return ResultOperand;
3371 };
3372
3373 auto CreatePredefSurfaceOperand = [&](II::ArgInfo AI) {
3374 LLVM_DEBUG(dbgs() << "CreatePredefinedSurfaceOperand\n");
3375 auto *Arg = cast<GlobalVariable>(CI->getArgOperand(AI.getArgIdx()));
3376 VISA_SurfaceVar *SurfVar = getPredefinedSurfaceVar(*Arg);
3377 VISA_StateOpndHandle *ResultOperand = nullptr;
3378 CISA_CALL(Kernel->CreateVISAStateOperandHandle(ResultOperand, SurfVar));
3379 return ResultOperand;
3380 };
3381
3382 auto CreateSamplerOperand = [&](II::ArgInfo AI) {
3383 LLVM_DEBUG(dbgs() << "CreateSamplerOperand\n");
3384 Register *Reg =
3385 getRegForValueAndSaveAlias(KernFunc, CI->getArgOperand(AI.getArgIdx()));
3386 IGC_ASSERT_MESSAGE(Reg->Category == RegCategory::SAMPLER,
3387 "Expected sampler register");
3388 VISA_StateOpndHandle *ResultOperand = nullptr;
3389 CISA_CALL(Kernel->CreateVISAStateOperandHandle(
3390 ResultOperand, Reg->GetVar<VISA_SamplerVar>(Kernel)));
3391 return ResultOperand;
3392 };
3393
3394 auto GetMediaHeght = [&](II::ArgInfo AI) {
3395 LLVM_DEBUG(dbgs() << "GetMediaHeght\n");
3396 // constant byte for media height that we need to infer from the
3397 // media width and the return type or final arg
3398 ConstantInt *Const =
3399 dyn_cast<ConstantInt>(CI->getArgOperand(AI.getArgIdx()));
3400 IGC_ASSERT_MESSAGE(Const, "Incorrect args to intrinsic call");
3401 unsigned Width = Const->getZExtValue();
3402 IGC_ASSERT_MESSAGE(Width > 0 && Width <= 64, "Invalid media width");
3403 unsigned RoundedWidth = roundedVal(Width, 4u);
3404 Type *DataType = CI->getType();
3405 if (DataType->isVoidTy())
3406 DataType = CI->getOperand(CI->getNumArgOperands() - 1)->getType();
3407 unsigned DataSize;
3408 if (VectorType *VT = dyn_cast<VectorType>(DataType))
3409 DataSize = DL.getTypeSizeInBits(VT) / genx::ByteBits;
3410 else
3411 DataSize = DL.getTypeSizeInBits(DataType) / genx::ByteBits;
3412 if (DataSize <= RoundedWidth && DataSize >= Width)
3413 return static_cast<uint8_t>(1);
3414 IGC_ASSERT_MESSAGE(RoundedWidth && (DataSize % RoundedWidth == 0),
3415 "Invalid media width");
3416 return static_cast<uint8_t>(DataSize / RoundedWidth);
3417 };
3418
3419 auto ChooseSign = [&](ArrayRef<unsigned> SrcIdxs) {
3420 IGC_ASSERT_MESSAGE(!SrcIdxs.empty(), "Expected at least one source index");
3421
3422 bool hasExt = std::any_of(SrcIdxs.begin(), SrcIdxs.end(),
3423 [CI, B = Baling](unsigned Idx) {
3424 return isExtOperandBaled(CI, Idx, B);
3425 });
3426
3427 // Keep the old behavior.
3428 if (hasExt)
3429 return DONTCARESIGNED;
3430
3431 SmallVector<Value *, 4> SrcValues;
3432 std::transform(SrcIdxs.begin(), SrcIdxs.end(),
3433 std::back_inserter(SrcValues),
3434 [CI](unsigned Idx) { return CI->getOperand(Idx); });
3435
3436 return getCommonSignedness(SrcValues);
3437 };
3438
3439 auto CreateOperand = [&](II::ArgInfo AI, Signedness Signed = DONTCARESIGNED) {
3440 LLVM_DEBUG(dbgs() << "CreateOperand from arg #" << AI.getArgIdx() << "\n");
3441 VISA_VectorOpnd *ResultOperand = nullptr;
3442 IGC_ASSERT_MESSAGE(Signed == DONTCARESIGNED ||
3443 !(AI.needsSigned() || AI.needsUnsigned()),
3444 "Signedness was set in two different ways.");
3445 if (AI.needsSigned())
3446 Signed = SIGNED;
3447 else if (AI.needsUnsigned())
3448 Signed = UNSIGNED;
3449 if (AI.isRet()) {
3450 if (AI.getSaturation() == II::SATURATION_SATURATE)
3451 Mod |= MODIFIER_SAT;
3452 ResultOperand = createDestination(CI, Signed, Mod, DstDesc);
3453 } else {
3454 unsigned MaxWidth = 16;
3455 if (AI.getRestriction() == II::TWICEWIDTH) {
3456 // For a TWICEWIDTH operand, do not allow width bigger than the
3457 // execution size.
3458 MaxWidth =
3459 cast<IGCLLVM::FixedVectorType>(CI->getType())->getNumElements();
3460 }
3461 if ((IntrinID == GenXIntrinsic::genx_dpas) ||
3462 (IntrinID == GenXIntrinsic::genx_dpas2) ||
3463 (IntrinID == GenXIntrinsic::genx_dpasw) ||
3464 (IntrinID == GenXIntrinsic::genx_dpas_nosrc0) ||
3465 (IntrinID == GenXIntrinsic::genx_dpasw_nosrc0)) {
3466 MaxWidth = Subtarget->dpasWidth();
3467 }
3468 ResultOperand = createSourceOperand(CI, Signed, AI.getArgIdx(), BI, 0,
3469 nullptr, MaxWidth);
3470 }
3471 return ResultOperand;
3472 };
3473
3474 auto CreateRawOperand = [&](II::ArgInfo AI) {
3475 LLVM_DEBUG(dbgs() << "CreateRawOperand from "
3476 << (AI.isRet() ? "Dest" : "Src")
3477 << " op #" << AI.getArgIdx() << "\n");
3478 VISA_RawOpnd *ResultOperand = nullptr;
3479 auto Signed = DONTCARESIGNED;
3480 if (AI.needsSigned())
3481 Signed = SIGNED;
3482 else if (AI.needsUnsigned())
3483 Signed = UNSIGNED;
3484 if (AI.isRet()) {
3485 IGC_ASSERT(!Mod);
3486 ResultOperand = createRawDestination(CI, DstDesc, Signed);
3487 } else if (AI.getArgIdx() < MaxRawOperands)
3488 ResultOperand = createRawSourceOperand(CI, AI.getArgIdx(), BI, Signed);
3489 return ResultOperand;
3490 };
3491
3492 auto CreateRawOperands = [&](II::ArgInfo AI, VISA_RawOpnd **Operands) {
3493 LLVM_DEBUG(dbgs() << "CreateRawOperands\n");
3494 IGC_ASSERT_MESSAGE(MaxRawOperands != std::numeric_limits<int>::max(),
3495 "MaxRawOperands must be defined");
3496 for (int i = 0; i < AI.getArgIdx() + MaxRawOperands; ++i) {
3497 Operands[i] = CreateRawOperand(II::ArgInfo(II::RAW | (AI.Info + i)));
3498 }
3499 };
3500
3501 auto GetOwords = [&](II::ArgInfo AI) {
3502 LLVM_DEBUG(dbgs() << "GetOwords\n");
3503 // constant byte for log2 number of owords
3504 Value *Arg = CI;
3505 if (!AI.isRet())
3506 Arg = CI->getOperand(AI.getArgIdx());
3507 auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Arg->getType());
3508 if (!VT)
3509 report_fatal_error("Invalid number of owords");
3510 int DataSize = VT->getNumElements() *
3511 DL.getTypeSizeInBits(VT->getElementType()) / 8;
3512 DataSize = std::max(0, genx::exactLog2(DataSize) - 4);
3513 if (DataSize > 4)
3514 report_fatal_error("Invalid number of words");
3515 return static_cast<VISA_Oword_Num>(DataSize);
3516 };
3517
3518 auto GetExecSize = [&](II::ArgInfo AI, VISA_EMask_Ctrl *Mask) {
3519 LLVM_DEBUG(dbgs() << "GetExecSize\n");
3520 int ExecSize = GenXIntrinsicInfo::getOverridedExecSize(CI, Subtarget);
3521 if (ExecSize == 0) {
3522 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(CI->getType())) {
3523 ExecSize = VT->getNumElements();
3524 } else {
3525 ExecSize = 1;
3526 }
3527 }
3528 bool IsNoMask = AI.getCategory() == II::EXECSIZE_NOMASK;
3529 *Mask = getExecMaskFromWrRegion(DstDesc, IsNoMask);
3530 return getExecSizeFromValue(ExecSize);
3531 };
3532
3533 auto GetBitWidth = [&](II::ArgInfo AI) {
3534 LLVM_DEBUG(dbgs() << "GetBitWidth\n");
3535 #ifndef NDEBUG
3536 // Only SVM atomics have this field
3537 auto ID = GenXIntrinsic::getGenXIntrinsicID(CI);
3538 switch (ID)
3539 {
3540 case llvm::GenXIntrinsic::genx_svm_atomic_add:
3541 case llvm::GenXIntrinsic::genx_svm_atomic_and:
3542 case llvm::GenXIntrinsic::genx_svm_atomic_cmpxchg:
3543 case llvm::GenXIntrinsic::genx_svm_atomic_dec:
3544 case llvm::GenXIntrinsic::genx_svm_atomic_fcmpwr:
3545 case llvm::GenXIntrinsic::genx_svm_atomic_fmax:
3546 case llvm::GenXIntrinsic::genx_svm_atomic_fmin:
3547 case llvm::GenXIntrinsic::genx_svm_atomic_imax:
3548 case llvm::GenXIntrinsic::genx_svm_atomic_imin:
3549 case llvm::GenXIntrinsic::genx_svm_atomic_inc:
3550 case llvm::GenXIntrinsic::genx_svm_atomic_max:
3551 case llvm::GenXIntrinsic::genx_svm_atomic_min:
3552 case llvm::GenXIntrinsic::genx_svm_atomic_or:
3553 case llvm::GenXIntrinsic::genx_svm_atomic_sub:
3554 case llvm::GenXIntrinsic::genx_svm_atomic_xchg:
3555 case llvm::GenXIntrinsic::genx_svm_atomic_xor:
3556 break;
3557 default:
3558 IGC_ASSERT(false &&
3559 "Trying to get bit width for non-svm atomic inst");
3560 break;
3561 }
3562 #endif // !NDEBUG
3563 auto* T = AI.isRet() ? CI->getType() : CI->getArgOperand(AI.getArgIdx())->getType();
3564 unsigned short Width = T->getScalarType()->getPrimitiveSizeInBits();
3565 return Width;
3566 };
3567
3568 auto GetExecSizeFromArg = [&](II::ArgInfo AI,
3569 VISA_EMask_Ctrl *ExecMask) {
3570 LLVM_DEBUG(dbgs() << "GetExecSizeFromArg\n");
3571 // exec_size inferred from width of predicate arg, defaulting to 16 if
3572 // it is scalar i1 (as can happen in raw send). Also get M3 etc flag
3573 // if the predicate has a baled in rdpredregion, and mark as nomask if
3574 // the predicate is not EM.
3575 int ExecSize;
3576 *ExecMask = NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1;
3577 // Get the predicate (mask) operand, scanning through baled in
3578 // all/any/not/rdpredregion and setting PredField and MaskCtrl
3579 // appropriately.
3580 VISA_PREDICATE_CONTROL Control;
3581 VISA_PREDICATE_STATE State;
3582 Value *Mask =
3583 getPredicateOperand(CI, AI.getArgIdx(), BI, Control, State, ExecMask);
3584 if (isa<Constant>(Mask) || getRegForValueOrNullAndSaveAlias(KernFunc, Mask))
3585 *ExecMask |= NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1;
3586 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(
3587 CI->getOperand(AI.getArgIdx())->getType()))
3588 ExecSize = VT->getNumElements();
3589 else
3590 ExecSize = GenXIntrinsicInfo::getOverridedExecSize(CI, Subtarget);
3591 return getExecSizeFromValue(ExecSize);
3592 };
3593
3594 auto GetExecSizeFromByte = [&](II::ArgInfo AI, VISA_EMask_Ctrl *Mask) {
3595 LLVM_DEBUG(dbgs() << "GetExecSizeFromByte\n");
3596 ConstantInt *Const =
3597 dyn_cast<ConstantInt>(CI->getArgOperand(AI.getArgIdx()));
3598 if (!Const) {
3599 DiagnosticInfoCisaBuild Err{CI, "Incorrect args to intrinsic call",
3600 DS_Error};
3601 getContext().diagnose(Err);
3602 }
3603 unsigned Byte = Const->getSExtValue() & 0xFF;
3604 *Mask = (VISA_EMask_Ctrl)(Byte >> 4);
3605 unsigned Res = Byte & 0xF;
3606 if (Res > 5) {
3607 DiagnosticInfoCisaBuild Err{
3608 CI, "illegal common ISA execsize (should be 1, 2, 4, 8, 16, 32)",
3609 DS_Error};
3610 getContext().diagnose(Err);
3611 }
3612 return (VISA_Exec_Size)Res;
3613 };
3614
3615 auto CreateImplicitPredication = [&](II::ArgInfo AI) {
3616 LLVM_DEBUG(dbgs() << "CreateImplicitPredication\n");
3617 return createPredFromWrRegion(DstDesc);
3618 };
3619
3620 auto CreatePredication = [&](II::ArgInfo AI) {
3621 LLVM_DEBUG(dbgs() << "CreatePredication\n");
3622 return createPred(CI, BI, AI.getArgIdx());
3623 };
3624
3625 auto GetPredicateVar = [&](II::ArgInfo AI) {
3626 LLVM_DEBUG(dbgs() << "GetPredicateVar\n");
3627 if (AI.isRet())
3628 return getPredicateVar(CI);
3629 else
3630 return getPredicateVar(CI->getArgOperand(AI.getArgIdx()));
3631 };
3632
3633 auto GetZeroedPredicateVar = [&](II::ArgInfo AI) {
3634 LLVM_DEBUG(dbgs() << "GetZeroedPredicateVar\n");
3635 if (AI.isRet())
3636 return getZeroedPredicateVar(CI);
3637 else
3638 return getZeroedPredicateVar(CI->getArgOperand(AI.getArgIdx()));
3639 };
3640
3641 auto CreateNullRawOperand = [&](II::ArgInfo AI) {
3642 LLVM_DEBUG(dbgs() << "CreateNullRawOperand\n");
3643 VISA_RawOpnd *ResultOperand = nullptr;
3644 CISA_CALL(Kernel->CreateVISANullRawOperand(ResultOperand, false));
3645 return ResultOperand;
3646 };
3647
3648 auto ProcessTwoAddr = [&](II::ArgInfo AI) {
3649 LLVM_DEBUG(dbgs() << "ProcessTwoAddr\n");
3650 if (AI.getCategory() != II::TWOADDR)
3651 return;
3652 auto Reg = getRegForValueOrNullAndSaveAlias(KernFunc, CI, DONTCARESIGNED);
3653 if (isa<UndefValue>(CI->getArgOperand(AI.getArgIdx())) && Reg &&
3654 isInLoop(CI->getParent()))
3655 addLifetimeStartInst(CI);
3656 };
3657
3658 // Constant vector of i1 (or just scalar i1) as i32 (used in setp)
3659 auto ConstVi1Asi32 = [&](II::ArgInfo AI) {
3660 LLVM_DEBUG(dbgs() << "ConstVi1Asi32\n");
3661 VISA_VectorOpnd *ResultOperand = nullptr;
3662 auto C = cast<Constant>(CI->getArgOperand(AI.getArgIdx()));
3663 // Get the bit value of the vXi1 constant.
3664 unsigned IntVal = getPredicateConstantAsInt(C);
3665 // unsigned i32 constant source operand
3666 CISA_CALL(Kernel->CreateVISAImmediate(ResultOperand, &IntVal, ISA_TYPE_UD));
3667 return ResultOperand;
3668 };
3669
3670 auto CreateAddressOperand = [&](II::ArgInfo AI) {
3671 LLVM_DEBUG(dbgs() << "CreateAddressOperand\n");
3672 if (AI.isRet())
3673 return createAddressOperand(CI, true);
3674 else
3675 return createAddressOperand(CI->getArgOperand(AI.getArgIdx()), false);
3676 };
3677
3678 auto GetArgCount = [&](II::ArgInfo AI) {
3679 LLVM_DEBUG(dbgs() << "GetArgCount\n");
3680 auto BaseArg = AI.getArgIdx();
3681 MaxRawOperands = BaseArg;
3682
3683 for (unsigned Idx = BaseArg; Idx < CI->getNumArgOperands(); ++Idx) {
3684 if (auto CA = dyn_cast<Constant>(CI->getArgOperand(Idx))) {
3685 if (CA->isNullValue())
3686 continue;
3687 }
3688 MaxRawOperands = Idx + 1;
3689 }
3690
3691 if (MaxRawOperands < BaseArg + AI.getArgCountMin())
3692 MaxRawOperands = BaseArg + AI.getArgCountMin();
3693
3694 return MaxRawOperands - AI.getArgIdx();
3695 };
3696
3697 auto GetNumGrfs = [&](II::ArgInfo AI) {
3698 LLVM_DEBUG(dbgs() << "GetNumGrfs\n");
3699 // constant byte for number of GRFs
3700 Value *Arg = CI;
3701 if (!AI.isRet())
3702 Arg = CI->getOperand(AI.getArgIdx());
3703 auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Arg->getType());
3704 if (!VT) {
3705 DiagnosticInfoCisaBuild Err{CI, "Invalid number of GRFs", DS_Error};
3706 getContext().diagnose(Err);
3707 }
3708 int DataSize = VT->getNumElements() *
3709 VT->getElementType()->getPrimitiveSizeInBits() / 8;
3710 return (uint8_t)((DataSize + (GrfByteSize - 1)) / GrfByteSize);
3711 };
3712
3713 auto GetSampleChMask = [&](II::ArgInfo AI) {
3714 LLVM_DEBUG(dbgs() << "GetSampleChMask\n");
3715 ConstantInt *Const =
3716 dyn_cast<ConstantInt>(CI->getArgOperand(AI.getArgIdx()));
3717 if (!Const) {
3718 DiagnosticInfoCisaBuild Err{CI, "Incorrect args to intrinsic call",
3719 DS_Error};
3720 getContext().diagnose(Err);
3721 }
3722 unsigned Byte = Const->getSExtValue() & 15;
3723 // Find the U_offset arg. It is the first vector arg after this one.
3724 IGCLLVM::FixedVectorType *VT;
3725 for (unsigned Idx = AI.getArgIdx() + 1;
3726 !(VT = dyn_cast<IGCLLVM::FixedVectorType>(
3727 CI->getOperand(Idx)->getType()));
3728 ++Idx)
3729 ;
3730 unsigned Width = VT->getNumElements();
3731 if (Width != 8 && Width != 16) {
3732 DiagnosticInfoCisaBuild Err{CI, "Invalid execution size for load/sample",
3733 DS_Error};
3734 getContext().diagnose(Err);
3735 }
3736 Byte |= Width & 16;
3737 return Byte;
3738 };
3739
3740 auto GetSvmBlockSizeNum = [&](II::ArgInfo Sz, II::ArgInfo Num) {
3741 LLVM_DEBUG(dbgs() << "SVM gather/scatter element size and num blocks\n");
3742 // svm gather/scatter "block size" field, set to reflect the element
3743 // type of the data
3744 Value *V = CI;
3745 if (!Sz.isRet())
3746 V = CI->getArgOperand(Sz.getArgIdx());
3747 auto *EltType = V->getType()->getScalarType();
3748 if (auto *MDType = CI->getMetadata(InstMD::SVMBlockType))
3749 EltType = cast<ValueAsMetadata>(MDType->getOperand(0).get())->getType();
3750 ConstantInt *LogOp = cast<ConstantInt>(CI->getArgOperand(Num.getArgIdx()));
3751 unsigned LogNum = LogOp->getZExtValue();
3752 unsigned ElBytes = getResultedTypeSize(EltType, DL);
3753 switch (ElBytes) {
3754 // For N = 2 byte data type, use block size 1 and block count x2
3755 // Otherwise, use block size N and original block count.
3756 case 2:
3757 ElBytes = 0;
3758 IGC_ASSERT(LogNum < 4);
3759 // This is correct but I can not merge this in while ISPC not fixed
3760 // LogNum += 1;
3761
3762 // this is incorrect temporary solution
3763 LogNum = 1;
3764 break;
3765 case 1:
3766 ElBytes = 0;
3767 break;
3768 case 4:
3769 ElBytes = 1;
3770 break;
3771 case 8:
3772 ElBytes = 2;
3773 break;
3774 default:
3775 DiagnosticInfoCisaBuild Err{CI, "Bad element type for SVM scatter/gather",
3776 DS_Error};
3777 getContext().diagnose(Err);
3778 }
3779 return std::make_pair(ElBytes, LogNum);
3780 };
3781
3782 auto CreateOpndPredefinedSrc = [&](PreDefined_Vars RegId, unsigned ROffset,
3783 unsigned COffset, unsigned VStride,
3784 unsigned Width, unsigned HStride) {
3785 LLVM_DEBUG(dbgs() << "CreateOpndPredefinedSrc\n");
3786 VISA_GenVar *Decl = nullptr;
3787 CISA_CALL(Kernel->GetPredefinedVar(Decl, RegId));
3788 VISA_VectorOpnd *ResultOperand = nullptr;
3789 CISA_CALL(Kernel->CreateVISASrcOperand(ResultOperand, Decl,
3790 (VISA_Modifier)Mod, VStride, Width,
3791 HStride, ROffset, COffset));
3792 return ResultOperand;
3793 };
3794
3795 auto CreateOpndPredefinedDst = [&](PreDefined_Vars RegId, unsigned ROffset,
3796 unsigned COffset, unsigned HStride) {
3797 LLVM_DEBUG(dbgs() << "CreateOpndPredefinedDst\n");
3798 VISA_GenVar *Decl = nullptr;
3799 CISA_CALL(Kernel->GetPredefinedVar(Decl, RegId));
3800 VISA_VectorOpnd *ResultOperand = nullptr;
3801 CISA_CALL(Kernel->CreateVISADstOperand(ResultOperand, Decl, HStride,
3802 ROffset, COffset));
3803 return ResultOperand;
3804 };
3805
3806 auto CreateImmOpndFromUInt = [&](VISA_Type ImmType, unsigned Val) {
3807 LLVM_DEBUG(dbgs() << "CreateImmOpndFromUInt\n");
3808 VISA_VectorOpnd *src = nullptr;
3809 CISA_CALL(Kernel->CreateVISAImmediate(src, &Val, ImmType));
3810
3811 return src;
3812 };
3813
3814 auto MakeSubbAddcDestination =
3815 [&](GenXIntrinsic::GenXResult::ResultIndexes MemberIdx) {
3816 LLVM_DEBUG(dbgs() << "MakeSubbAddcDestination\n");
3817 IGC_ASSERT(GenXIntrinsic::getGenXIntrinsicID(CI) ==
3818 llvm::GenXIntrinsic::genx_addc ||
3819 GenXIntrinsic::getGenXIntrinsicID(CI) ==
3820 llvm::GenXIntrinsic::genx_subb);
3821 IGC_ASSERT(IndexFlattener::getNumElements(CI->getType()) == 2);
3822
3823 auto SV = SimpleValue(CI, MemberIdx);
3824 auto *DstType = SV.getType();
3825
3826 IGC_ASSERT(DstType->getScalarType()->isIntegerTy(genx::DWordBits));
3827
3828 auto *Reg = getRegForValueAndSaveAlias(KernFunc, SV, UNSIGNED);
3829
3830 const auto TypeSize = CISATypeTable[ISA_TYPE_UD].typeSize;
3831 auto Elements = 1;
3832 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(DstType))
3833 Elements = VT->getNumElements();
3834
3835 Region R(IGCLLVM::FixedVectorType::get(
3836 IntegerType::get(Ctx, TypeSize * genx::ByteBits), Elements));
3837 return createRegionOperand(&R, Reg->GetVar<VISA_GenVar>(Kernel),
3838 UNSIGNED, Mod, true /* Dst */);
3839 };
3840
3841
3842 VISA_EMask_Ctrl exec_mask;
3843 addDebugInfo();
3844 #include "GenXIntrinsicsBuildMap.inc"
3845 }
3846
3847 /**************************************************************************************************
3848 * buildControlRegUpdate : generate an instruction to apply a mask to
3849 * the control register (V14).
3850 *
3851 * Enter: Mask = the mask to apply
3852 * Clear = false if bits set in Mask should be set in V14,
3853 * true if bits set in Mask should be cleared in V14.
3854 */
buildControlRegUpdate(unsigned Mask,bool Clear)3855 void GenXKernelBuilder::buildControlRegUpdate(unsigned Mask, bool Clear) {
3856 ISA_Opcode Opcode;
3857 // write opcode
3858 if (Clear) {
3859 Opcode = ISA_AND;
3860 Mask = ~Mask;
3861 } else
3862 Opcode = ISA_OR;
3863
3864 Region Single = Region(1, 4);
3865
3866 VISA_GenVar *Decl = nullptr;
3867 CISA_CALL(Kernel->GetPredefinedVar(Decl, PREDEFINED_CR0));
3868 VISA_VectorOpnd *dst =
3869 createRegionOperand(&Single, Decl, DONTCARESIGNED, 0, true);
3870 VISA_VectorOpnd *src0 =
3871 createRegionOperand(&Single, Decl, DONTCARESIGNED, 0, false);
3872
3873 VISA_VectorOpnd *src1 = nullptr;
3874 CISA_CALL(Kernel->CreateVISAImmediate(src1, &Mask, ISA_TYPE_UD));
3875
3876 addDebugInfo();
3877 CISA_CALL(Kernel->AppendVISALogicOrShiftInst(Opcode, nullptr, false,
3878 vISA_EMASK_M1, EXEC_SIZE_1, dst,
3879 src0, src1, nullptr, nullptr));
3880 }
3881
3882 /***********************************************************************
3883 * buildBranch : build a conditional or unconditional branch
3884 *
3885 * Return: true if fell through to successor
3886 */
buildBranch(BranchInst * Branch)3887 bool GenXKernelBuilder::buildBranch(BranchInst *Branch) {
3888 BasicBlock *Next = Branch->getParent()->getNextNode();
3889 if (Branch->isUnconditional()) {
3890 // Unconditional branch
3891 if (Branch->getOperand(0) == Next)
3892 return true; // fall through to successor
3893 auto labelId = getOrCreateLabel(Branch->getSuccessor(0), LABEL_BLOCK);
3894 addDebugInfo();
3895 CISA_CALL(Kernel->AppendVISACFJmpInst(nullptr, Labels[labelId]));
3896 return false;
3897 }
3898 // Conditional branch.
3899 // First check if it is a baled in goto/join, via an extractvalue.
3900 auto BI = Baling->getBaleInfo(Branch);
3901 if (BI.isOperandBaled(0 /*condition*/)) {
3902 if (auto Extract = dyn_cast<ExtractValueInst>(Branch->getCondition())) {
3903 auto GotoJoin = cast<CallInst>(Extract->getAggregateOperand());
3904 if (GenXIntrinsic::getGenXIntrinsicID(GotoJoin) == GenXIntrinsic::genx_simdcf_goto) {
3905 buildGoto(GotoJoin, Branch);
3906 } else {
3907 IGC_ASSERT_MESSAGE(GotoJoin::isValidJoin(GotoJoin),
3908 "extra unexpected code in join block");
3909 buildJoin(GotoJoin, Branch);
3910 }
3911 return true;
3912 }
3913 }
3914 // Normal conditional branch.
3915 VISA_EMask_Ctrl MaskCtrl;
3916 VISA_PREDICATE_CONTROL Control = PRED_CTRL_NON;
3917 VISA_PREDICATE_STATE State = PredState_NO_INVERSE;
3918 Value *Pred = getPredicateOperand(Branch, 0, BI, Control, State, &MaskCtrl);
3919 IGC_ASSERT_MESSAGE(!isa<VectorType>(Branch->getCondition()->getType()),
3920 "branch must have scalar condition");
3921 BasicBlock *True = Branch->getSuccessor(0);
3922 BasicBlock *False = Branch->getSuccessor(1);
3923 if (True == Next) {
3924 State ^= PredState_INVERSE; // invert bit in predicate field
3925 True = False;
3926 False = Next;
3927 }
3928 // Write the conditional branch.
3929 VISA_PredVar *PredVar = getPredicateVar(Pred);
3930 VISA_PredOpnd *PredOperand = createPredOperand(PredVar, State, Control);
3931 addDebugInfo();
3932 CISA_CALL(Kernel->AppendVISACFJmpInst(
3933 PredOperand, Labels[getOrCreateLabel(True, LABEL_BLOCK)]));
3934 // If the other successor is not the next block, write an unconditional
3935 // jmp to that.
3936 if (False == Next)
3937 return true; // fall through to successor
3938 addDebugInfo();
3939 CISA_CALL(Kernel->AppendVISACFJmpInst(
3940 nullptr, Labels[getOrCreateLabel(False, LABEL_BLOCK)]));
3941 return false;
3942 }
3943
3944 /***********************************************************************
3945 * buildIndirectBr : build an indirect branch
3946 *
3947 * Indirectbr instructions are used only for jump tables.
3948 *
3949 * Enter: Br = indirect branch inst
3950 */
buildIndirectBr(IndirectBrInst * Br)3951 void GenXKernelBuilder::buildIndirectBr(IndirectBrInst *Br) {
3952 IGC_ASSERT(Subtarget->hasSwitchjmp());
3953 Value *Addr = Br->getAddress();
3954 auto JumpTable = cast<IntrinsicInst>(Addr);
3955 unsigned IID = GenXIntrinsic::getAnyIntrinsicID(JumpTable);
3956 IGC_ASSERT(IID == GenXIntrinsic::genx_jump_table);
3957 Value *Idx = JumpTable->getArgOperand(0);
3958
3959 VISA_VectorOpnd *JMPIdx = createSource(Idx, UNSIGNED);
3960 unsigned NumDest = Br->getNumDestinations();
3961 std::vector<VISA_LabelOpnd *> JMPLabels(NumDest, nullptr);
3962 for (unsigned I = 0; I < NumDest; ++I)
3963 JMPLabels[I] = Labels[getOrCreateLabel(Br->getDestination(I), LABEL_BLOCK)];
3964
3965 addDebugInfo();
3966 CISA_CALL(
3967 Kernel->AppendVISACFSwitchJMPInst(JMPIdx, NumDest, JMPLabels.data()));
3968 }
3969
3970 /***********************************************************************
3971 * buildJoin : build a join
3972 *
3973 * Enter: Join = join instruction that is baled into an extractvalue of
3974 * field 1 (the !any(EM) value), that is baled into Branch,
3975 * if Branch is non-zero
3976 * Branch = branch instruction, or 0 for a join that is not baled
3977 * in to a branch because it always ends up with at least
3978 * one channel enabled
3979 */
buildJoin(CallInst * Join,BranchInst * Branch)3980 void GenXKernelBuilder::buildJoin(CallInst *Join, BranchInst *Branch) {
3981 // A join needs a label. (If the join is at the start of its block, then
3982 // this gets merged into the block label.)
3983 addLabelInst(Join);
3984 // There is no join instruction in vISA -- the finalizer derives it by
3985 // looking for gotos targeting the basic block's label.
3986 }
3987
3988 #if (LLVM_VERSION_MAJOR > 8)
3989 /***********************************************************************
3990 * buildUnaryOperator : build code for an unary operator
3991 *
3992 * Enter: UO = the UnaryOperator
3993 * BI = BaleInfo for UO
3994 * Mod = modifier bits for destination
3995 * WrRegion = 0 else wrregion for destination
3996 * WrRegionBI = BaleInfo for WrRegion
3997 */
buildUnaryOperator(UnaryOperator * UO,BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)3998 void GenXKernelBuilder::buildUnaryOperator(UnaryOperator *UO, BaleInfo BI,
3999 unsigned Mod,
4000 const DstOpndDesc &DstDesc) {
4001 ISA_Opcode Opcode = ISA_RESERVED_0;
4002 Signedness DstSigned = SIGNED;
4003 Signedness SrcSigned = SIGNED;
4004 unsigned Mod1 = 0;
4005 VISA_Exec_Size ExecSize = EXEC_SIZE_1;
4006 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(UO->getType()))
4007 ExecSize = getExecSizeFromValue(VT->getNumElements());
4008
4009 switch (UO->getOpcode()) {
4010 case Instruction::FNeg:
4011 Opcode = ISA_MOV;
4012 Mod1 ^= MODIFIER_NEG;
4013 break;
4014 default:
4015 report_fatal_error("buildUnaryOperator: unimplemented unary operator");
4016 }
4017
4018 VISA_VectorOpnd *Dst = createDestination(UO, DstSigned, Mod, DstDesc);
4019
4020 VISA_VectorOpnd *Src0 = nullptr;
4021 VISA_PredOpnd *Pred = createPredFromWrRegion(DstDesc);
4022
4023 Src0 = createSourceOperand(UO, SrcSigned, 0, BI, Mod1);
4024
4025 auto ExecMask = getExecMaskFromWrRegion(DstDesc);
4026
4027 addDebugInfo();
4028
4029 if (Opcode == ISA_MOV) {
4030 CISA_CALL(Kernel->AppendVISADataMovementInst(
4031 ISA_MOV, Pred, Mod1 & MODIFIER_SAT, ExecMask, ExecSize, Dst, Src0, NULL));
4032 return;
4033 }
4034 report_fatal_error("buildUnaryOperator: unimplemented opcode");
4035 }
4036 #endif
4037
4038 /***********************************************************************
4039 * getCommonSignedness : predict the most suitable sign of a instruction based
4040 * on incoming values.
4041 *
4042 * Enter: Vs = incoming values to use for signedness prediction
4043 */
getCommonSignedness(ArrayRef<Value * > Vs) const4044 Signedness GenXKernelBuilder::getCommonSignedness(ArrayRef<Value *> Vs) const {
4045 // Expect the first value is always set.
4046 IGC_ASSERT(!Vs.empty());
4047 std::vector<Register *> Regs;
4048 std::transform(
4049 Vs.begin(), Vs.end(), std::back_inserter(Regs), [this](Value *V) { return getLastUsedAlias(V); });
4050 // If there is no register allocated for Value, getLastUsedAlias returns
4051 // nullptr. Remove such nodes.
4052 Regs.erase(std::remove(Regs.begin(), Regs.end(), nullptr), Regs.end());
4053
4054 if (Regs.empty())
4055 // Use SIGNED by default if there are no registers for the values.
4056 return SIGNED;
4057
4058 bool hasSigned = std::any_of(Regs.begin(), Regs.end(),
4059 [](Register *R) { return R->Signed == SIGNED; });
4060 bool hasUnsigned = std::any_of(Regs.begin(), Regs.end(), [](Register *R) {
4061 return R->Signed == UNSIGNED;
4062 });
4063 // If there is at least one UNSIGNED and others are UNSIGNED or DONTCARESIGNED
4064 // (absence of a register also means DONTCARESIGNED), UNSIGNED must be used.
4065 // Otherwise, SIGNED.
4066 if (hasUnsigned && !hasSigned)
4067 return UNSIGNED;
4068 return SIGNED;
4069 }
4070
4071 /***********************************************************************
4072 * getLastUsedAlias : get the last used alias of a vISA virtual register
4073 * for a value. Nullptr if none.
4074 */
getLastUsedAlias(Value * V) const4075 GenXKernelBuilder::Register *GenXKernelBuilder::getLastUsedAlias(Value *V) const {
4076 auto Res = LastUsedAliasMap.find(V);
4077 if (Res == LastUsedAliasMap.end())
4078 return nullptr;
4079 return Res->second;
4080 }
4081
4082 /***********************************************************************
4083 * getRegForValueUntypedAndSaveAlias : a wrapper for
4084 * GenXVisaRegAlloc::getRegForValueUntyped which also saves the register alias
4085 * in a special map.
4086 *
4087 * Enter: args = the wrapped function parameters.
4088 */
4089 template <typename... Args>
4090 GenXKernelBuilder::Register *
getRegForValueUntypedAndSaveAlias(Args &&...args)4091 GenXKernelBuilder::getRegForValueUntypedAndSaveAlias(Args &&... args) {
4092 Register *R = RegAlloc->getRegForValueUntyped(std::forward<Args>(args)...);
4093 SimpleValue SV = std::get<1>(std::make_tuple(args...));
4094 if (R)
4095 LastUsedAliasMap[SV.getValue()] = R;
4096 return R;
4097 }
4098
4099 /***********************************************************************
4100 * getRegForValueOrNullAndSaveAlias : a wrapper for
4101 * GenXVisaRegAlloc::getRegForValueOrNull which also saves the register alias in
4102 * a special map.
4103 *
4104 * Enter: args = the wrapped function parameters.
4105 */
4106 template <typename... Args>
4107 GenXKernelBuilder::Register *
getRegForValueOrNullAndSaveAlias(Args &&...args)4108 GenXKernelBuilder::getRegForValueOrNullAndSaveAlias(Args &&... args) {
4109 Register *R = RegAlloc->getRegForValueOrNull(std::forward<Args>(args)...);
4110 SimpleValue SV = std::get<1>(std::make_tuple(args...));
4111 if (R)
4112 LastUsedAliasMap[SV.getValue()] = R;
4113 return R;
4114 }
4115
4116 /***********************************************************************
4117 * getRegForValueAndSaveAlias : a wrapper for GenXVisaRegAlloc::getRegForValue
4118 * which also saves the register alias in a special map.
4119 *
4120 * Enter: args = the wrapped function parameters.
4121 */
4122 template <typename... Args>
4123 GenXKernelBuilder::Register *
getRegForValueAndSaveAlias(Args &&...args)4124 GenXKernelBuilder::getRegForValueAndSaveAlias(Args &&... args) {
4125 Register *R = RegAlloc->getRegForValue(std::forward<Args>(args)...);
4126 SimpleValue SV = std::get<1>(std::make_tuple(args...));
4127 IGC_ASSERT_MESSAGE(R, "getRegForValue must return non-nullptr register");
4128 LastUsedAliasMap[SV.getValue()] = R;
4129 return R;
4130 }
4131
4132 /***********************************************************************
4133 * buildBinaryOperator : build code for a binary operator
4134 *
4135 * Enter: BO = the BinaryOperator
4136 * BI = BaleInfo for BO
4137 * Mod = modifier bits for destination
4138 * WrRegion = 0 else wrregion for destination
4139 * WrRegionBI = BaleInfo for WrRegion
4140 */
buildBinaryOperator(BinaryOperator * BO,BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)4141 void GenXKernelBuilder::buildBinaryOperator(BinaryOperator *BO, BaleInfo BI,
4142 unsigned Mod,
4143 const DstOpndDesc &DstDesc) {
4144 bool IsLogic = false;
4145 ISA_Opcode Opcode = ISA_RESERVED_0;
4146
4147 Signedness SrcSigned = DONTCARESIGNED;
4148 Signedness DstSigned = DONTCARESIGNED;
4149 unsigned Mod1 = 0;
4150 VISA_Exec_Size ExecSize = EXEC_SIZE_1;
4151 auto hasConstantIntFitsInWord = [BO]() {
4152 return std::any_of(BO->op_begin(), BO->op_end(), [](Value *V) {
4153 auto C = dyn_cast<ConstantInt>(V);
4154 if (!C)
4155 return false;
4156 return C->getValue().getMinSignedBits() <= genx::WordBits;
4157 });
4158 };
4159 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(BO->getType()))
4160 ExecSize = getExecSizeFromValue(VT->getNumElements());
4161 switch (BO->getOpcode()) {
4162 case Instruction::Add:
4163 case Instruction::FAdd:
4164 Opcode = ISA_ADD;
4165 break;
4166 case Instruction::Sub:
4167 case Instruction::FSub:
4168 Opcode = ISA_ADD;
4169 Mod1 ^= MODIFIER_NEG;
4170 break;
4171 case Instruction::Mul:
4172 case Instruction::FMul:
4173 Opcode = ISA_MUL;
4174 // Check if there is a possibility to truncate the integer constant further
4175 // that will help to generate better code. In this case SIGNED type must be
4176 // used.
4177 if (hasConstantIntFitsInWord())
4178 DstSigned = SrcSigned = SIGNED;
4179 break;
4180 case Instruction::Shl:
4181 Opcode = ISA_SHL;
4182 IsLogic = true;
4183 break;
4184 case Instruction::AShr:
4185 Opcode = ISA_ASR;
4186 DstSigned = SrcSigned = SIGNED;
4187 IsLogic = true;
4188 break;
4189 case Instruction::LShr:
4190 Opcode = ISA_SHR;
4191 DstSigned = SrcSigned = UNSIGNED;
4192 IsLogic = true;
4193 break;
4194 case Instruction::UDiv:
4195 Opcode = ISA_DIV;
4196 DstSigned = SrcSigned = UNSIGNED;
4197 break;
4198 case Instruction::SDiv:
4199 Opcode = ISA_DIV;
4200 DstSigned = SrcSigned = SIGNED;
4201 break;
4202 case Instruction::FDiv: {
4203 Opcode = ISA_DIV;
4204 if (Constant *Op0 = dyn_cast<Constant>(BO->getOperand(0))) {
4205 if (Op0->getType()->isVectorTy())
4206 Op0 = Op0->getSplatValue();
4207 ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(Op0);
4208 if (CFP && CFP->isExactlyValue(1.0))
4209 Opcode = ISA_INV;
4210 }
4211 } break;
4212 case Instruction::URem:
4213 Opcode = ISA_MOD;
4214 DstSigned = SrcSigned = UNSIGNED;
4215 break;
4216 case Instruction::SRem:
4217 DstSigned = SrcSigned = SIGNED;
4218 Opcode = ISA_MOD;
4219 break;
4220 case Instruction::FRem:
4221 Opcode = ISA_MOD;
4222 break;
4223 case Instruction::And:
4224 Opcode = ISA_AND;
4225 IsLogic = true;
4226 break;
4227 case Instruction::Or:
4228 Opcode = ISA_OR;
4229 IsLogic = true;
4230 break;
4231 case Instruction::Xor:
4232 Opcode = ISA_XOR;
4233 IsLogic = true;
4234 break;
4235 default:
4236 report_fatal_error("buildBinaryOperator: unimplemented binary operator");
4237 break;
4238 }
4239
4240 // If signedness wasn't set explicitly earlier and destination modifier isn't
4241 // set.
4242 if (SrcSigned == DONTCARESIGNED && DstSigned == DONTCARESIGNED) {
4243
4244 bool hasExt =
4245 std::any_of(BO->use_begin(), BO->use_end(),
4246 [B = Baling](Use &U) { return isExtOperandBaled(U, B); });
4247
4248 if (Mod == MODIFIER_NONE && !hasExt) {
4249 Value *Op0 = BO->getOperand(0);
4250 Value *Op1 = BO->getOperand(1);
4251 if (Opcode == ISA_INV)
4252 SrcSigned = DstSigned = getCommonSignedness({Op1});
4253 else
4254 SrcSigned = DstSigned = getCommonSignedness({Op0, Op1});
4255 } else
4256 // If the modifier is set or SEXT, ZEXT is baled, use old behavior.
4257 SrcSigned = DstSigned = SIGNED;
4258 }
4259
4260 VISA_VectorOpnd *Dst = createDestination(BO, DstSigned, Mod, DstDesc);
4261
4262 VISA_VectorOpnd *Src0 = nullptr;
4263 VISA_VectorOpnd *Src1 = nullptr;
4264 VISA_PredOpnd *Pred = createPredFromWrRegion(DstDesc);
4265
4266 if (Opcode == ISA_INV) {
4267 Src0 = createSourceOperand(BO, SrcSigned, 1, BI, Mod1); // source 0
4268 } else {
4269 Src0 = createSourceOperand(BO, SrcSigned, 0, BI); // source 0
4270 Src1 = createSourceOperand(BO, SrcSigned, 1, BI, Mod1); // source 1
4271 }
4272
4273 auto ExecMask = getExecMaskFromWrRegion(DstDesc);
4274
4275 addDebugInfo();
4276 if (IsLogic) {
4277 CISA_CALL(Kernel->AppendVISALogicOrShiftInst(
4278 Opcode, Pred, Mod, ExecMask, ExecSize, Dst, Src0, Src1, NULL, NULL));
4279 } else {
4280 if (Opcode == ISA_ADDC || Opcode == ISA_SUBB) {
4281 IGC_ASSERT(0);
4282 } else {
4283 CISA_CALL(Kernel->AppendVISAArithmeticInst(
4284 Opcode, Pred, Mod, ExecMask, ExecSize, Dst, Src0, Src1, NULL));
4285 }
4286 }
4287 }
4288
4289 /***********************************************************************
4290 * buildBoolBinaryOperator : build code for a binary operator acting on
4291 * i1 or vector of i1
4292 *
4293 * Enter: BO = the BinaryOperator
4294 */
buildBoolBinaryOperator(BinaryOperator * BO)4295 void GenXKernelBuilder::buildBoolBinaryOperator(BinaryOperator *BO) {
4296 VISA_Exec_Size ExecSize = EXEC_SIZE_1;
4297 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(BO->getType()))
4298 ExecSize = getExecSizeFromValue(VT->getNumElements());
4299 ISA_Opcode Opcode = ISA_RESERVED_0;
4300 switch (BO->getOpcode()) {
4301 case Instruction::And:
4302 Opcode = ISA_AND;
4303 break;
4304 case Instruction::Or:
4305 Opcode = ISA_OR;
4306 break;
4307 case Instruction::Xor:
4308 Opcode = ISA_XOR;
4309 if (isNot(BO))
4310 Opcode = ISA_NOT;
4311 break;
4312 default:
4313 report_fatal_error(
4314 "buildBoolBinaryOperator: unimplemented binary operator");
4315 break;
4316 }
4317
4318 if (isPredNot(BO) && BO->hasOneUse()) {
4319 // If this NOT predicate is a goto operand and it has only one use, then we
4320 // won't emit it. %P1 = ... %P2 = not %P1
4321 // (!%P2) goto
4322 // Transforms into
4323 // (%P1) goto
4324
4325 auto Goto = dyn_cast<CallInst>(*BO->user_begin());
4326 if (Goto && GenXIntrinsic::getGenXIntrinsicID(Goto) ==
4327 GenXIntrinsic::genx_simdcf_goto)
4328 return;
4329 }
4330
4331 VISA_PredVar *Dst = getPredicateVar(BO);
4332 VISA_PredVar *Src0 = getPredicateVar(BO->getOperand(0));
4333 VISA_PredVar *Src1 =
4334 Opcode != ISA_NOT ? getPredicateVar(BO->getOperand(1)) : nullptr;
4335
4336 addDebugInfo();
4337 CISA_CALL(Kernel->AppendVISALogicOrShiftInst(
4338 Opcode, NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1, ExecSize, Dst, Src0,
4339 Src1));
4340 }
4341
buildSymbolInst(CallInst * GAddrInst,unsigned Mod,const DstOpndDesc & DstDesc)4342 void GenXKernelBuilder::buildSymbolInst(CallInst *GAddrInst, unsigned Mod,
4343 const DstOpndDesc &DstDesc) {
4344 IGC_ASSERT_MESSAGE(GAddrInst, "wrong argument: nullptr is unallowed");
4345 IGC_ASSERT_MESSAGE(GenXIntrinsic::getGenXIntrinsicID(GAddrInst) ==
4346 GenXIntrinsic::genx_gaddr,
4347 "wrong argument: genx.addr intrinsic is expected");
4348 auto *GV = cast<GlobalValue>(GAddrInst->getOperand(0));
4349 VISA_VectorOpnd *Dst = createDestination(GAddrInst, UNSIGNED, Mod, DstDesc);
4350 CISA_CALL(Kernel->AppendVISACFSymbolInst(GV->getName().str(), Dst));
4351 }
4352
4353 /***********************************************************************
4354 * buildWritePredefSurface : get predefined visa surface variable
4355 *
4356 * Enter: GV = global that denotes predefined variable
4357 *
4358 * Return: visa surface variable, non-null
4359 *
4360 */
4361 VISA_SurfaceVar *
getPredefinedSurfaceVar(GlobalVariable & GV)4362 GenXKernelBuilder::getPredefinedSurfaceVar(GlobalVariable &GV) {
4363 StringRef SurfName = GV.getName();
4364 PreDefined_Surface VisaSurfName =
4365 StringSwitch<PreDefined_Surface>(SurfName)
4366 .Case(genx::BSSVariableName, PREDEFINED_SURFACE_T252)
4367 .Default(PREDEFINED_SURFACE_LAST);
4368 IGC_ASSERT_MESSAGE(VisaSurfName != PREDEFINED_SURFACE_LAST,
4369 "Unexpected predefined surface");
4370 VISA_SurfaceVar *SurfVar = nullptr;
4371 CISA_CALL(Kernel->GetPredefinedSurface(SurfVar, VisaSurfName));
4372 return SurfVar;
4373 }
4374
4375 /***********************************************************************
4376 * buildWritePredefSurface : build code to write to predefined surface
4377 *
4378 * Enter: CI = write_predef_surface intrinsic
4379 *
4380 */
buildWritePredefSurface(CallInst & CI)4381 void GenXKernelBuilder::buildWritePredefSurface(CallInst &CI) {
4382 IGC_ASSERT_MESSAGE(GenXIntrinsic::getGenXIntrinsicID(&CI) ==
4383 GenXIntrinsic::genx_write_predef_surface,
4384 "Expected predefined surface write intrinsic");
4385 auto *PredefSurf = cast<GlobalVariable>(CI.getArgOperand(0));
4386 VISA_SurfaceVar *SurfVar = getPredefinedSurfaceVar(*PredefSurf);
4387 VISA_VectorOpnd *SurfOpnd = nullptr;
4388 CISA_CALL(Kernel->CreateVISAStateOperand(SurfOpnd, SurfVar, /*offset=*/0,
4389 /*useAsDst=*/true));
4390 VISA_VectorOpnd *SrcOpnd = createSource(CI.getArgOperand(1), genx::UNSIGNED);
4391 CISA_CALL(Kernel->AppendVISADataMovementInst(
4392 ISA_MOVS, /*pred=*/nullptr, /*satMod=*/false, vISA_EMASK_M1_NM,
4393 EXEC_SIZE_1, SurfOpnd, SrcOpnd));
4394 }
4395
4396 /***********************************************************************
4397 * buildCastInst : build code for a cast (other than a no-op cast)
4398 *
4399 * Enter: CI = the CastInst
4400 * BI = BaleInfo for CI
4401 * Mod = modifier bits for destination
4402 * WrRegion = 0 else wrregion for destination
4403 * WrRegionBI = BaleInfo for WrRegion
4404 */
buildCastInst(CastInst * CI,BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)4405 void GenXKernelBuilder::buildCastInst(CastInst *CI, BaleInfo BI, unsigned Mod,
4406 const DstOpndDesc &DstDesc) {
4407 Signedness InSigned = DONTCARESIGNED;
4408 Signedness OutSigned = DONTCARESIGNED;
4409 switch (CI->getOpcode()) {
4410 case Instruction::UIToFP:
4411 InSigned = UNSIGNED;
4412 break;
4413 case Instruction::SIToFP:
4414 InSigned = SIGNED;
4415 break;
4416 case Instruction::FPToUI:
4417 OutSigned = UNSIGNED;
4418 break;
4419 case Instruction::FPToSI:
4420 OutSigned = SIGNED;
4421 break;
4422 case Instruction::ZExt:
4423 InSigned = UNSIGNED;
4424 break;
4425 case Instruction::SExt:
4426 InSigned = SIGNED;
4427 break;
4428 case Instruction::FPTrunc:
4429 case Instruction::FPExt:
4430 break;
4431 case Instruction::PtrToInt:
4432 case Instruction::IntToPtr:
4433 break;
4434 case Instruction::AddrSpaceCast:
4435 break;
4436 case Instruction::Trunc:
4437 break;
4438 default:
4439 DiagnosticInfoCisaBuild Err{CI, "buildCastInst: unimplemented cast",
4440 DS_Error};
4441 getContext().diagnose(Err);
4442 }
4443
4444 VISA_Exec_Size ExecSize = EXEC_SIZE_1;
4445 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(CI->getType()))
4446 ExecSize = getExecSizeFromValue(VT->getNumElements());
4447
4448 auto ExecMask = getExecMaskFromWrRegion(DstDesc);
4449
4450 VISA_PredOpnd *Pred = createPredFromWrRegion(DstDesc);
4451 // Give dest and source the same signedness for byte mov.
4452 VISA_VectorOpnd *Dst = createDestination(CI, OutSigned, Mod, DstDesc);
4453
4454 if (InSigned == DONTCARESIGNED)
4455 InSigned = OutSigned;
4456 VISA_VectorOpnd *Src0 = createSourceOperand(CI, InSigned, 0, BI);
4457
4458 addDebugInfo();
4459 CISA_CALL(Kernel->AppendVISADataMovementInst(
4460 ISA_MOV, Pred, Mod & MODIFIER_SAT, ExecMask, ExecSize, Dst, Src0, NULL));
4461 }
4462
4463 /***********************************************************************
4464 * buildCmp : build code for a compare
4465 *
4466 * Enter: Cmp = the compare instruction
4467 * BI = BaleInfo for Cmp
4468 * WrRegion = 0 else wrpredregion, wrpredpredregion, or wrregion for
4469 * destination
4470 */
buildCmp(CmpInst * Cmp,BaleInfo BI,const DstOpndDesc & DstDesc)4471 void GenXKernelBuilder::buildCmp(CmpInst *Cmp, BaleInfo BI,
4472 const DstOpndDesc &DstDesc) {
4473 IGC_ASSERT_MESSAGE(testPredicate(Cmp, DstDesc),
4474 "write predicate size 4 only allowed for double/longlong type");
4475 Signedness Signed = DONTCARESIGNED;
4476 VISA_Cond_Mod opSpec;
4477 switch (Cmp->getPredicate()) {
4478 case CmpInst::FCMP_ONE:
4479 case CmpInst::FCMP_ORD:
4480 case CmpInst::FCMP_UEQ:
4481 case CmpInst::FCMP_UGT:
4482 case CmpInst::FCMP_UGE:
4483 case CmpInst::FCMP_ULT:
4484 case CmpInst::FCMP_ULE:
4485 case CmpInst::FCMP_UNO:
4486 IGC_ASSERT_MESSAGE(0, "unsupported fcmp predicate");
4487 break;
4488 case CmpInst::FCMP_OEQ:
4489 case CmpInst::ICMP_EQ:
4490 opSpec = ISA_CMP_E;
4491 break;
4492 case CmpInst::FCMP_UNE:
4493 case CmpInst::ICMP_NE:
4494 opSpec = ISA_CMP_NE;
4495 break;
4496 case CmpInst::FCMP_OGT:
4497 opSpec = ISA_CMP_G;
4498 break;
4499 case CmpInst::ICMP_UGT:
4500 opSpec = ISA_CMP_G;
4501 Signed = UNSIGNED;
4502 break;
4503 case CmpInst::ICMP_SGT:
4504 opSpec = ISA_CMP_G;
4505 Signed = SIGNED;
4506 break;
4507 case CmpInst::FCMP_OGE:
4508 opSpec = ISA_CMP_GE;
4509 break;
4510 case CmpInst::ICMP_UGE:
4511 opSpec = ISA_CMP_GE;
4512 Signed = UNSIGNED;
4513 break;
4514 case CmpInst::ICMP_SGE:
4515 opSpec = ISA_CMP_GE;
4516 Signed = SIGNED;
4517 break;
4518 case CmpInst::FCMP_OLT:
4519 opSpec = ISA_CMP_L;
4520 break;
4521 case CmpInst::ICMP_ULT:
4522 opSpec = ISA_CMP_L;
4523 Signed = UNSIGNED;
4524 break;
4525 case CmpInst::ICMP_SLT:
4526 opSpec = ISA_CMP_L;
4527 Signed = SIGNED;
4528 break;
4529 case CmpInst::FCMP_OLE:
4530 opSpec = ISA_CMP_LE;
4531 break;
4532 case CmpInst::ICMP_ULE:
4533 opSpec = ISA_CMP_LE;
4534 Signed = UNSIGNED;
4535 break;
4536 case CmpInst::ICMP_SLE:
4537 opSpec = ISA_CMP_LE;
4538 Signed = SIGNED;
4539 break;
4540 default:
4541 DiagnosticInfoCisaBuild Err{Cmp, "unknown predicate", DS_Error};
4542 getContext().diagnose(Err);
4543 }
4544
4545 // Check if this is to write to a predicate desination or a GRF desination.
4546 bool WriteToPred = true;
4547 if (Cmp->hasOneUse()) {
4548 Instruction *UI = Cmp->user_back();
4549 BaleInfo UserBI = Baling->getBaleInfo(UI);
4550 if (UserBI.Type == BaleInfo::CMPDST)
4551 WriteToPred = false;
4552 }
4553
4554 VISA_Exec_Size ExecSize = EXEC_SIZE_1;
4555 VISA_EMask_Ctrl ctrlMask = vISA_EMASK_M1;
4556 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Cmp->getType()))
4557 ExecSize = getExecSizeFromValue(VT->getNumElements());
4558
4559 VISA_VectorOpnd *Dst = nullptr;
4560 genx::Signedness SignedSrc0;
4561 VISA_VectorOpnd *Src0 =
4562 createSourceOperand(Cmp, Signed, 0, BI, 0, &SignedSrc0);
4563 VISA_VectorOpnd *Src1 = createSourceOperand(Cmp, SignedSrc0, 1, BI);
4564
4565 if (WriteToPred) {
4566 ctrlMask = getExecMaskFromWrPredRegion(DstDesc.WrRegion, false);
4567 VISA_PredVar *PredVar =
4568 getPredicateVar(DstDesc.WrRegion ? DstDesc.WrRegion : Cmp);
4569 addDebugInfo();
4570 CISA_CALL(Kernel->AppendVISAComparisonInst(opSpec, ctrlMask, ExecSize,
4571 PredVar, Src0, Src1));
4572 } else {
4573 ctrlMask = getExecMaskFromWrRegion(DstDesc);
4574 Value *Val = DstDesc.WrRegion ? DstDesc.WrRegion : Cmp->user_back();
4575 Dst = createDestination(Val, Signed, 0, DstDesc);
4576 addDebugInfo();
4577 CISA_CALL(Kernel->AppendVISAComparisonInst(opSpec, ctrlMask, ExecSize, Dst,
4578 Src0, Src1));
4579 }
4580 }
4581
4582 /***********************************************************************
4583 * buildConvertAddr : build code for conversion to address
4584 *
4585 * Enter: CI = the CallInst
4586 * BI = BaleInfo for CI
4587 * Mod = modifier bits for destination
4588 * WrRegion = 0 else wrregion for destination
4589 * WrRegionBI = BaleInfo for WrRegion
4590 */
buildConvertAddr(CallInst * CI,genx::BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)4591 void GenXKernelBuilder::buildConvertAddr(CallInst *CI, genx::BaleInfo BI,
4592 unsigned Mod,
4593 const DstOpndDesc &DstDesc) {
4594 IGC_ASSERT(!DstDesc.WrRegion);
4595 Value *Base = Liveness->getAddressBase(CI);
4596 VISA_Exec_Size ExecSize = EXEC_SIZE_1;
4597 VISA_EMask_Ctrl MaskCtrl = NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1;
4598
4599 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(CI->getType()))
4600 ExecSize = getExecSizeFromValue(VT->getNumElements());
4601 // If the offset is less aligned than the base register element type, then
4602 // we need a different type.
4603 Type *OverrideTy = nullptr;
4604 Type *BaseTy = Base->getType();
4605 if (BaseTy->isPointerTy())
4606 BaseTy = BaseTy->getPointerElementType();
4607 unsigned ElementBytes =
4608 BaseTy->getScalarType()->getPrimitiveSizeInBits() >> 3;
4609 int Offset = cast<ConstantInt>(CI->getArgOperand(1))->getSExtValue();
4610 if ((ElementBytes - 1) & Offset) {
4611 OverrideTy = IGCLLVM::FixedVectorType::get(
4612 Type::getInt8Ty(CI->getContext()),
4613 cast<IGCLLVM::FixedVectorType>(BaseTy)->getNumElements() *
4614 ElementBytes);
4615 ElementBytes = 1;
4616 }
4617 Register *BaseReg =
4618 getRegForValueAndSaveAlias(KernFunc, Base, DONTCARESIGNED, OverrideTy);
4619
4620 VISA_VectorOpnd *Dst = createAddressOperand(CI, true);
4621 VISA_VectorOpnd *Src1 = nullptr;
4622
4623 if (BaseReg->Category == RegCategory::SURFACE ||
4624 BaseReg->Category == RegCategory::SAMPLER) {
4625 uint8_t offset = Offset >> 2;
4626 switch (BaseReg->Category) {
4627 case RegCategory::SURFACE: {
4628 VISA_SurfaceVar *Decl = BaseReg->GetVar<VISA_SurfaceVar>(Kernel);
4629 unsigned int offsetB = offset * 2; // 2 is bytes size of UW
4630 CISA_CALL(Kernel->CreateVISAAddressOfOperand(Src1, Decl, offsetB));
4631 break;
4632 }
4633 case RegCategory::SAMPLER: {
4634 VISA_SurfaceVar *Decl = BaseReg->GetVar<VISA_SurfaceVar>(Kernel);
4635 unsigned int offsetB = offset * 2; // 2 is bytes size of UW
4636 CISA_CALL(Kernel->CreateVISAAddressOfOperand(Src1, Decl, offsetB));
4637 break;
4638 }
4639 default:
4640 DiagnosticInfoCisaBuild Err{
4641 CI,
4642 "Invalid state operand class: only surface, vme, and "
4643 "sampler are supported.",
4644 DS_Error};
4645 getContext().diagnose(Err);
4646 }
4647 } else {
4648 uint8_t rowOffset = Offset >> genx::log2(GrfByteSize);
4649 uint8_t colOffset = (Offset & (GrfByteSize - 1)) >> Log2_32(ElementBytes);
4650 auto TypeSize = BaseReg->Ty->getScalarType()->getPrimitiveSizeInBits() >> 3;
4651 unsigned int offset = colOffset * TypeSize + rowOffset * GrfByteSize;
4652
4653 if (BaseReg->Category == RegCategory::ADDRESS) {
4654 VISA_AddrVar *Decl = BaseReg->GetVar<VISA_AddrVar>(Kernel);
4655 unsigned Width = 1;
4656 CISA_CALL(Kernel->CreateVISAAddressSrcOperand(Src1, Decl, offset, Width));
4657 } else {
4658 VISA_GenVar *Decl = BaseReg->GetVar<VISA_GenVar>(Kernel);
4659 CISA_CALL(Kernel->CreateVISAAddressOfOperand(Src1, Decl, offset));
4660 }
4661 }
4662 VISA_VectorOpnd *Src2 = createSourceOperand(CI, UNSIGNED, 0, BI);
4663 addDebugInfo();
4664 CISA_CALL(Kernel->AppendVISAAddrAddInst(MaskCtrl, ExecSize, Dst, Src1, Src2));
4665 }
4666
4667 /***********************************************************************
4668 * buildAlloca : build code for allocating in thread-private memory
4669 *
4670 * Enter: CI = the CallInst
4671 *
4672 */
buildAlloca(CallInst * CI,unsigned IntrinID,unsigned Mod,const DstOpndDesc & DstDesc)4673 void GenXKernelBuilder::buildAlloca(CallInst *CI, unsigned IntrinID,
4674 unsigned Mod, const DstOpndDesc &DstDesc) {
4675 LLVM_DEBUG(dbgs() << "Building alloca " << *CI << "\n");
4676 VISA_GenVar *Sp = nullptr;
4677 CISA_CALL(Kernel->GetPredefinedVar(Sp, PreDefined_Vars::PREDEFINED_FE_SP));
4678 if (!allowI64Ops())
4679 CISA_CALL(Kernel->CreateVISAGenVar(Sp, "Sp", 1, ISA_TYPE_UD, ALIGN_DWORD, Sp));
4680
4681 Value *AllocaOff = CI->getOperand(0);
4682 Type *AllocaOffTy = AllocaOff->getType();
4683
4684 if (CurrentPadding) {
4685 // padd the current alloca the comply with gather/scatter alignment rules
4686 // unsigned LastOff = getResultedTypeSize(LastAlloca->getOperand(0)->getType(), DL);
4687 auto *AllocaEltTy = AllocaOffTy->getScalarType();
4688 if (AllocaOffTy->isArrayTy())
4689 AllocaEltTy = AllocaOffTy->getArrayElementType();
4690 unsigned Padding = DL.getTypeSizeInBits(AllocaEltTy) / genx::ByteBits;
4691 Padding = (Padding - CurrentPadding) % Padding;
4692 if (Padding) {
4693 VISA_VectorOpnd *SpSrc = nullptr;
4694 CISA_CALL(Kernel->CreateVISASrcOperand(SpSrc, Sp, MODIFIER_NONE, 0, 1, 0,
4695 0, 0));
4696 VISA_VectorOpnd *PaddImm = nullptr;
4697 CISA_CALL(Kernel->CreateVISAImmediate(PaddImm, &Padding, ISA_TYPE_D));
4698 VISA_VectorOpnd *DstSp = nullptr;
4699 CISA_CALL(Kernel->CreateVISADstOperand(
4700 DstSp, static_cast<VISA_GenVar *>(Sp), 1, 0, 0));
4701
4702 CISA_CALL(Kernel->AppendVISAArithmeticInst(ISA_ADD, nullptr, false,
4703 vISA_EMASK_M1, EXEC_SIZE_1,
4704 DstSp, SpSrc, PaddImm));
4705 CurrentPadding += Padding;
4706 }
4707 }
4708
4709 VISA_VectorOpnd *SpSrc = nullptr;
4710 CISA_CALL(
4711 Kernel->CreateVISASrcOperand(SpSrc, Sp, MODIFIER_NONE, 0, 1, 0, 0, 0));
4712
4713 unsigned OffVal = getResultedTypeSize(AllocaOffTy, DL);
4714 CurrentPadding = (CurrentPadding + OffVal) %
4715 (DL.getLargestLegalIntTypeSizeInBits() / genx::ByteBits);
4716
4717 VISA_VectorOpnd *Imm = nullptr;
4718 CISA_CALL(Kernel->CreateVISAImmediate(Imm, &OffVal, ISA_TYPE_D));
4719
4720 if (IntrinID == llvm::GenXIntrinsic::genx_alloca) {
4721 VISA_VectorOpnd *Src = nullptr;
4722 CISA_CALL(Kernel->CreateVISASrcOperand(Src, static_cast<VISA_GenVar *>(Sp),
4723 MODIFIER_NONE, 0, 1, 0, 0, 0));
4724 VISA_VectorOpnd *Dst = createDestination(CI, DONTCARESIGNED, Mod, DstDesc);
4725 CISA_CALL(Kernel->AppendVISADataMovementInst(
4726 ISA_MOV, nullptr, false, vISA_EMASK_M1, EXEC_SIZE_1, Dst, Src));
4727 }
4728
4729 VISA_VectorOpnd *DstSp = nullptr;
4730 CISA_CALL(Kernel->CreateVISADstOperand(DstSp, static_cast<VISA_GenVar *>(Sp),
4731 1, 0, 0));
4732
4733 CISA_CALL(Kernel->AppendVISAArithmeticInst(
4734 ISA_ADD, nullptr, false, vISA_EMASK_M1, EXEC_SIZE_1, DstSp, SpSrc, Imm));
4735 }
4736
4737 /***********************************************************************
4738 * buildPrintIndex : build code for storing constant format strins as metadata
4739 * and returning idx for that string
4740 *
4741 * Enter: CI = the CallInst
4742 *
4743 */
buildPrintIndex(CallInst * CI,unsigned IntrinID,unsigned Mod,const DstOpndDesc & DstDesc)4744 void GenXKernelBuilder::buildPrintIndex(CallInst *CI, unsigned IntrinID,
4745 unsigned Mod,
4746 const DstOpndDesc &DstDesc) {
4747 // create move with constant
4748 VISA_VectorOpnd *Imm = nullptr;
4749 Module* M = CI->getModule();
4750 NamedMDNode *NMD = M->getOrInsertNamedMetadata("cm_print_strings");
4751 unsigned NumOp = NMD->getNumOperands();
4752 CISA_CALL(Kernel->CreateVISAImmediate(Imm, &NumOp, ISA_TYPE_UD));
4753 VISA_VectorOpnd *Dst = createDestination(CI, DONTCARESIGNED, Mod, DstDesc);
4754 CISA_CALL(Kernel->AppendVISADataMovementInst(
4755 ISA_MOV, nullptr, false, vISA_EMASK_M1_NM,
4756 EXEC_SIZE_1, Dst, Imm));
4757
4758 // access string
4759 StringRef UnderlyingCStr =
4760 vc::getConstStringFromOperand(*CI->getArgOperand(0));
4761
4762 // store metadata
4763 LLVMContext &Context = CI->getContext();
4764 MDNode* N = MDNode::get(Context, MDString::get(Context, UnderlyingCStr));
4765 NMD->addOperand(N);
4766 }
4767
deduceRegion(Region * R,bool IsDest,unsigned MaxWidth)4768 void GenXKernelBuilder::deduceRegion(Region *R, bool IsDest,
4769 unsigned MaxWidth) {
4770 IGC_ASSERT(Subtarget);
4771 if (!IsDest && !R->is2D() && R->Indirect &&
4772 Subtarget->hasIndirectGRFCrossing()) {
4773 // For a source 1D indirect region that might possibly cross a GRF
4774 // (because we are on SKL+ so a single GRF crossing is allowed), make it
4775 // Nx1 instead of 1xN to avoid crossing a GRF within a row.
4776 R->VStride = R->Stride;
4777 R->Width = 1;
4778 R->Stride = 0;
4779 }
4780 // another case of converting to <N;1,0> region format
4781 if (!IsDest &&
4782 (R->VStride == (int)R->Width * R->Stride || R->Width == R->NumElements)) {
4783 R->Width = 1;
4784 R->VStride = R->Stride;
4785 R->Stride = 0;
4786 } else if (R->Width > MaxWidth) {
4787 // A Width of more than 16 (or whatever MaxWidth is) is not allowed. If it
4788 // is more than 16, then legalization has ensured that either there is one
4789 // row or the rows are contiguous (VStride == Width * Stride) and we can
4790 // increase the number of rows. (Note that Width and VStride are ignored
4791 // in a destination operand; legalization ensures that there is only one
4792 // row.)
4793 R->Width = MaxWidth;
4794 R->VStride = R->Width * R->Stride;
4795 }
4796
4797 if (R->Width == R->NumElements) {
4798 // Use VStride 0 on a 1D region. This is necessary for src0 in line or
4799 // pln, so we may as well do it for everything.
4800 R->VStride = 0;
4801 }
4802
4803 if (R->Indirect) {
4804 R->IndirectAddrOffset = 0;
4805 if (GenXIntrinsic::isRdRegion(R->Indirect)) {
4806 auto AddrRdR = cast<Instruction>(R->Indirect);
4807 Region AddrR = makeRegionFromBaleInfo(AddrRdR, BaleInfo());
4808 IGC_ASSERT_MESSAGE(!AddrR.Indirect,
4809 "cannot have address rdregion that is indirect");
4810 R->IndirectAddrOffset =
4811 AddrR.Offset / 2; // address element is always 2 byte
4812 }
4813 }
4814 }
4815
4816 VISA_VectorOpnd *
createGeneralOperand(Region * R,VISA_GenVar * Decl,Signedness Signed,unsigned Mod,bool IsDest,unsigned MaxWidth)4817 GenXKernelBuilder::createGeneralOperand(Region *R, VISA_GenVar *Decl,
4818 Signedness Signed, unsigned Mod,
4819 bool IsDest, unsigned MaxWidth) {
4820 VISA_VectorOpnd *ResultOperand = nullptr;
4821 // Write the vISA general operand, canonicalizing the
4822 // region parameters where applicable.
4823 IGC_ASSERT_MESSAGE(Decl, "no register allocated for this value");
4824 if (!IsDest) {
4825 ResultOperand = createCisaSrcOperand(
4826 Decl, static_cast<VISA_Modifier>(Mod), R->VStride, R->Width, R->Stride,
4827 R->Offset >> genx::log2(GrfByteSize),
4828 (R->Offset & (GrfByteSize - 1)) / R->ElementBytes);
4829 } else {
4830 ResultOperand = createCisaDstOperand(
4831 Decl, R->Stride, R->Offset >> genx::log2(GrfByteSize),
4832 (R->Offset & (GrfByteSize - 1)) / R->ElementBytes);
4833 }
4834 return ResultOperand;
4835 }
4836
createIndirectOperand(Region * R,Signedness Signed,unsigned Mod,bool IsDest,unsigned MaxWidth)4837 VISA_VectorOpnd *GenXKernelBuilder::createIndirectOperand(Region *R,
4838 Signedness Signed,
4839 unsigned Mod,
4840 bool IsDest,
4841 unsigned MaxWidth) {
4842 VISA_VectorOpnd *ResultOperand = nullptr;
4843 // Check if the indirect operand is a baled in rdregion.
4844 Value *Indirect = R->Indirect;
4845 if (GenXIntrinsic::isRdRegion(Indirect)) {
4846 auto AddrRdR = cast<Instruction>(Indirect);
4847 Indirect = AddrRdR->getOperand(0);
4848 }
4849 // Write the vISA indirect operand.
4850 Register *IdxReg =
4851 getRegForValueAndSaveAlias(KernFunc, Indirect, DONTCARESIGNED);
4852 IGC_ASSERT(IdxReg->Category == RegCategory::ADDRESS);
4853
4854 bool NotCrossGrf = !(R->Offset & (GrfByteSize - 1));
4855 if (!NotCrossGrf) {
4856 // Determine the NotCrossGrf bit setting (whether we can guarantee
4857 // that adding an indirect region's constant offset does not cause
4858 // a carry out of bit 4)
4859 // by looking at the partial constant for the index
4860 // before the constant is added on.
4861 // This only works for a scalar index.
4862 if (auto IndirInst = dyn_cast<Instruction>(R->Indirect)) {
4863 auto A = AI.get(IndirInst);
4864 unsigned Mask = (1U << std::min(5U, A.getLogAlign())) - 1;
4865 if (Mask) {
4866 if ((A.getExtraBits() & Mask) + (R->Offset & Mask) <= Mask &&
4867 (unsigned)(R->Offset & (GrfByteSize - 1)) <= Mask) {
4868 // The alignment and extrabits are such that adding R->Offset
4869 // cannot cause a carry from bit 4 to bit 5.
4870 NotCrossGrf = true;
4871 }
4872 }
4873 }
4874 }
4875 visa::TypeDetails TD(Func->getParent()->getDataLayout(), R->ElementTy,
4876 Signed);
4877 unsigned VStride = R->VStride;
4878 if (isa<VectorType>(R->Indirect->getType()))
4879 // multi indirect (vector index), set vstride
4880 VStride = 0x8000; // field to null
4881 VISA_AddrVar *AddrDecl = IdxReg->GetVar<VISA_AddrVar>(Kernel);
4882 if (IsDest) {
4883 CISA_CALL(Kernel->CreateVISAIndirectDstOperand(
4884 ResultOperand, AddrDecl, R->IndirectAddrOffset, R->Offset, R->Stride,
4885 (VISA_Type)TD.VisaType));
4886 } else {
4887 CISA_CALL(Kernel->CreateVISAIndirectSrcOperand(
4888 ResultOperand, AddrDecl, static_cast<VISA_Modifier>(Mod),
4889 R->IndirectAddrOffset, R->Offset, VStride, R->Width, R->Stride,
4890 (VISA_Type)TD.VisaType));
4891 }
4892 return ResultOperand;
4893 }
4894
4895
4896 /***********************************************************************
4897 * createRegionOperand : create a vISA region operand
4898 *
4899 * Enter: R = Region
4900 * RegNum = vISA register number (ignored if region is indirect)
4901 * Signed = whether signed or unsigned required (only used for
4902 * indirect operand)
4903 * Mod = modifiers
4904 * IsDest = true if destination operand
4905 * MaxWidth = maximum width (used to stop TWICEWIDTH operand
4906 * getting a width bigger than the execution size, but
4907 * for other uses defaults to 16)
4908 */
4909 VISA_VectorOpnd *
createRegionOperand(Region * R,VISA_GenVar * Decl,Signedness Signed,unsigned Mod,bool IsDest,unsigned MaxWidth)4910 GenXKernelBuilder::createRegionOperand(Region *R, VISA_GenVar *Decl,
4911 Signedness Signed, unsigned Mod,
4912 bool IsDest, unsigned MaxWidth) {
4913 deduceRegion(R, IsDest, MaxWidth);
4914
4915 if (R->Indirect)
4916 return createIndirectOperand(R, Signed, Mod, IsDest, MaxWidth);
4917 else
4918 return createGeneralOperand(R, Decl, Signed, Mod, IsDest, MaxWidth);
4919 }
4920
4921
isInLoop(BasicBlock * BB)4922 bool GenXKernelBuilder::isInLoop(BasicBlock *BB) {
4923 Function *BBFunc = BB->getParent();
4924 // Cannot predict for stack calls and indirectly called functions.
4925 // Let's assume the function is in a loop.
4926 if (genx::requiresStackCall(BBFunc) || genx::isReferencedIndirectly(BBFunc))
4927 return true;
4928
4929 IGC_ASSERT(LIs->getLoopInfo(BBFunc));
4930 if (LIs->getLoopInfo(BBFunc)->getLoopFor(BB))
4931 return true; // inside loop in this function
4932 // Now we need to see if this function is called from inside a loop.
4933 // First check the cache.
4934 auto i = IsInLoopCache.find(BBFunc);
4935 if (i != IsInLoopCache.end())
4936 return i->second;
4937 // Now check all call sites. This recurses as deep as the depth of the call
4938 // graph, which must be acyclic as GenX does not allow recursion.
4939 bool InLoop = false;
4940 for (auto ui = BBFunc->use_begin(), ue = BBFunc->use_end(); ui != ue; ++ui) {
4941 auto CI = dyn_cast<CallInst>(ui->getUser());
4942 if (!checkFunctionCall(CI, BBFunc))
4943 continue;
4944 IGC_ASSERT(ui->getOperandNo() == CI->getNumArgOperands());
4945 if (CI->getFunction() == BBFunc)
4946 continue;
4947 if (isInLoop(CI->getParent())) {
4948 InLoop = true;
4949 break;
4950 }
4951 }
4952 IsInLoopCache[BBFunc] = InLoop;
4953 return InLoop;
4954 }
4955
addWriteRegionLifetimeStartInst(Instruction * WrRegion)4956 void GenXKernelBuilder::addWriteRegionLifetimeStartInst(Instruction *WrRegion) {
4957 if (!GenXIntrinsic::isWrRegion(WrRegion))
4958 return; // No lifetime start for wrpredregion.
4959 // See if the wrregion is in a loop.
4960 auto BB = WrRegion->getParent();
4961 if (!isInLoop(BB))
4962 return; // not in loop
4963 // See if the wrregion is the first of a sequence in the same basic block
4964 // that together write the whole register. We assume that each region is
4965 // contiguous, and the regions are written in ascending offset order, as
4966 // that is what legalization does if the original write was to the whole
4967 // register.
4968 unsigned NumElementsSoFar = 0;
4969 unsigned TotalNumElements = 1;
4970 if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(WrRegion->getType()))
4971 TotalNumElements = VT->getNumElements();
4972 Instruction *ThisWr = WrRegion;
4973 for (;;) {
4974 Region R = makeRegionFromBaleInfo(ThisWr, BaleInfo());
4975 if (R.Indirect)
4976 break;
4977 if ((unsigned)R.Offset != NumElementsSoFar * R.ElementBytes)
4978 break;
4979 if (R.Stride != 1 && R.Width != 1)
4980 break;
4981 if (R.Width != R.NumElements)
4982 break;
4983 NumElementsSoFar += R.NumElements;
4984 if (NumElementsSoFar == TotalNumElements)
4985 return; // whole register is written
4986 // Go on to next wrregion in the same basic block if any.
4987 if (!ThisWr->hasOneUse())
4988 break;
4989 ThisWr = cast<Instruction>(ThisWr->use_begin()->getUser());
4990 if (!GenXIntrinsic::isWrRegion(ThisWr))
4991 break;
4992 if (ThisWr->getParent() != BB)
4993 break;
4994 }
4995 // The wrregion is in a loop and is not the first in a sequence in the same
4996 // basic block that writes the whole register. Write a lifetime start.
4997 addLifetimeStartInst(WrRegion);
4998 }
4999
5000 /**************************************************************************************************
5001 * addLifetimeStartInst : add a lifetime.start instruction
5002 *
5003 * Enter: Inst = value to use in lifetime.start
5004 */
addLifetimeStartInst(Instruction * Inst)5005 void GenXKernelBuilder::addLifetimeStartInst(Instruction *Inst) {
5006 VISA_VectorOpnd *opnd = nullptr;
5007 auto Reg = getRegForValueOrNullAndSaveAlias(KernFunc, Inst);
5008 if (!Reg)
5009 return; // no register allocated such as being indirected.
5010
5011 switch (Reg->Category) {
5012 case RegCategory::GENERAL:
5013 opnd = createCisaDstOperand(Reg->GetVar<VISA_GenVar>(Kernel), 1, 0, 0);
5014 break;
5015 case RegCategory::ADDRESS:
5016 CISA_CALL(Kernel->CreateVISAAddressDstOperand(
5017 opnd, Reg->GetVar<VISA_AddrVar>(Kernel), 0));
5018 break;
5019 #if 0 // Not currently used.
5020 case RegCategory::PREDICATE:
5021 break;
5022 #endif // 0
5023 default:
5024 report_fatal_error("createLifetimeStartInst: Invalid register category");
5025 break;
5026 }
5027 addDebugInfo();
5028 CISA_CALL(Kernel->AppendVISALifetime(LIFETIME_START, opnd));
5029 }
5030
5031 /***********************************************************************
5032 * addDebugInfo : add debug infromation
5033 */
addDebugInfo()5034 void GenXKernelBuilder::addDebugInfo() {
5035 // Check if we have a pending debug location.
5036 if (PendingLine) {
5037 // Do the source location debug info with vISA FILE and LOC instructions.
5038 if (PendingFilename != "" && (PendingFilename != LastFilename ||
5039 PendingDirectory != LastDirectory)) {
5040 SmallString<256> Filename;
5041 // Bodge here to detect Windows absolute path even when built on cygwin.
5042 if (sys::path::is_absolute(PendingFilename) ||
5043 (PendingFilename.size() > 2 && PendingFilename[1] == ':'))
5044 Filename = PendingFilename;
5045 else {
5046 Filename = PendingDirectory;
5047 sys::path::append(Filename, PendingFilename);
5048 }
5049 CISA_CALL(Kernel->AppendVISAMiscFileInst(Filename.c_str()));
5050 GM->updateVisaMapping(KernFunc, nullptr, Kernel->getvIsaInstCount(),
5051 "FILE");
5052 LastDirectory = PendingDirectory;
5053 LastFilename = PendingFilename;
5054 }
5055 if (PendingLine != LastLine) {
5056 LLVM_DEBUG(dbgs() << "LOC instruction appended:" << PendingLine << "\n");
5057 CISA_CALL(Kernel->AppendVISAMiscLOC(PendingLine));
5058 GM->updateVisaMapping(KernFunc, nullptr, Kernel->getvIsaInstCount(),
5059 "LOC");
5060 LastLine = PendingLine;
5061 PendingLine = 0;
5062 }
5063 }
5064 // +1 since we update debug info BEFORE appending the instruction
5065 GM->updateVisaMapping(KernFunc, CurrentInst, Kernel->getvIsaInstCount() + 1,
5066 CurrentInst ? CurrentInst->getName() : "Init_Special");
5067 }
5068
emitOptimizationHints()5069 void GenXKernelBuilder::emitOptimizationHints() {
5070 if (skipOptWithLargeBlock(*FG))
5071 return;
5072
5073 const auto &DL = FG->getModule()->getDataLayout();
5074 // Track rp considering byte variable widening.
5075 PressureTracker RP(DL, *FG, Liveness, /*ByteWidening*/ true);
5076 const std::vector<genx::LiveRange *> &WidenLRs = RP.getWidenVariables();
5077
5078 if (!SkipNoWiden) {
5079 for (auto LR : WidenLRs) {
5080 SimpleValue SV = *LR->value_begin();
5081 auto *R = getRegForValueOrNullAndSaveAlias(FG->getHead(), SV);
5082 // This variable is being used in or crossing a high register pressure
5083 // region. Set an optimization hint not to widen it.
5084 if (R && RP.intersectWithRedRegion(LR)) {
5085 R->addAttribute(addStringToPool("NoWidening"), "");
5086 RP.decreasePressure(LR);
5087 }
5088 }
5089 }
5090 }
5091
5092 /***********************************************************************
5093 * addLabelInst : add a label instruction for a basic block or join
5094 */
addLabelInst(const Value * BB)5095 void GenXKernelBuilder::addLabelInst(const Value *BB) {
5096 GM->updateVisaMapping(KernFunc, nullptr, Kernel->getvIsaInstCount(), "LBL");
5097 auto LabelID = getOrCreateLabel(BB, LABEL_BLOCK);
5098 IGC_ASSERT(LabelID < Labels.size());
5099 CISA_CALL(Kernel->AppendVISACFLabelInst(Labels[LabelID]));
5100 }
5101
5102 /***********************************************************************
5103 * getOrCreateLabel : get/create label number for a Function or BasicBlock
5104 */
getOrCreateLabel(const Value * V,int Kind)5105 unsigned GenXKernelBuilder::getOrCreateLabel(const Value *V, int Kind) {
5106 int Num = getLabel(V);
5107 if (Num >= 0)
5108 return Num;
5109 Num = Labels.size();
5110 setLabel(V, Num);
5111 VISA_LabelOpnd *Decl = nullptr;
5112
5113 // Replicate the functionality of the old compiler and make the first label
5114 // for a function contain the name (makes sure the function label is unique)
5115 // It's not clear this is strictly necessary any more (but doesn't do any
5116 // harm and may even make reading the intermediate forms easier)
5117 if (Kind == LABEL_SUBROUTINE) {
5118 StringRef N = TheKernelMetadata.getName();
5119 std::string NameBuf;
5120 if (V != FG->getHead()) {
5121 // This is a subroutine, not the kernel/function at the head of the
5122 // FunctionGroup. Use the name of the subroutine.
5123 N = V->getName();
5124 } else {
5125 // For a kernel/function name, fix illegal characters. The jitter uses
5126 // the same name for the label in the .asm file, and aubload does not
5127 // like the illegal characters.
5128 NameBuf = legalizeName(N.str());
5129 N = NameBuf;
5130 }
5131 auto SubroutineLabel =
5132 cutString(Twine(N) + Twine("_BB_") + Twine(Labels.size()));
5133 LLVM_DEBUG(dbgs() << "creating SubroutineLabel: " << SubroutineLabel
5134 << "\n");
5135 CISA_CALL(Kernel->CreateVISALabelVar(Decl, SubroutineLabel.c_str(),
5136 VISA_Label_Kind(Kind)));
5137 } else if (Kind == LABEL_BLOCK) {
5138 auto BlockLabel = cutString(Twine("BB_") + Twine(Labels.size()));
5139 LLVM_DEBUG(dbgs() << "creating BlockLabel: " << BlockLabel << "\n");
5140 CISA_CALL(Kernel->CreateVISALabelVar(Decl, BlockLabel.c_str(),
5141 VISA_Label_Kind(Kind)));
5142 } else if (Kind == LABEL_FC) {
5143 const auto *F = cast<Function>(V);
5144 IGC_ASSERT(F->hasFnAttribute("CMCallable"));
5145 StringRef N(F->getName());
5146 auto FCLabel = cutString(Twine(N));
5147 LLVM_DEBUG(dbgs() << "creating FCLabel: " << FCLabel << "\n");
5148 CISA_CALL(Kernel->CreateVISALabelVar(Decl, FCLabel.c_str(),
5149 VISA_Label_Kind(Kind)));
5150 } else {
5151 StringRef N = V->getName();
5152 auto Label =
5153 cutString(Twine("_") + Twine(N) + Twine("_") + Twine(Labels.size()));
5154 LLVM_DEBUG(dbgs() << "creating Label: " << Label << "\n");
5155 CISA_CALL(
5156 Kernel->CreateVISALabelVar(Decl, Label.c_str(), VISA_Label_Kind(Kind)));
5157 }
5158 IGC_ASSERT(Decl);
5159 Labels.push_back(Decl);
5160 return Num;
5161 }
5162
buildInlineAsm(CallInst * CI)5163 void GenXKernelBuilder::buildInlineAsm(CallInst *CI) {
5164 IGC_ASSERT_MESSAGE(CI->isInlineAsm(), "Inline asm expected");
5165 InlineAsm *IA = dyn_cast<InlineAsm>(IGCLLVM::getCalledValue(CI));
5166 std::string AsmStr(IA->getAsmString());
5167 std::stringstream &AsmTextStream = CisaBuilder->GetAsmTextStream();
5168
5169 // Nothing to substitute if no constraints provided
5170 if (IA->getConstraintString().empty()) {
5171 AsmTextStream << AsmStr << std::endl;
5172 return;
5173 }
5174
5175 unsigned NumOutputs = genx::getInlineAsmNumOutputs(CI);
5176 auto ConstraintsInfo = genx::getGenXInlineAsmInfo(CI);
5177
5178 // Scan asm string in reverse direction to match larger numbers first
5179 for (int ArgNo = ConstraintsInfo.size() - 1; ArgNo >= 0; ArgNo--) {
5180 // Regexp to match number of operand
5181 Regex R("\\$+" + llvm::to_string(ArgNo));
5182 if (!R.match(AsmStr))
5183 continue;
5184 // Operand that must be substituded into inline assembly string
5185 Value *InlasmOp = nullptr;
5186 std::string InlasmOpAsString;
5187 // For output collect destination descriptor with
5188 // baling info and WrRegion instruction
5189 DstOpndDesc DstDesc;
5190 auto Info = ConstraintsInfo[ArgNo];
5191 if (Info.isOutput()) {
5192 // If result is a struct than inline assembly
5193 // instruction has multiple outputs
5194 if (isa<StructType>(CI->getType())) {
5195 // Go through all users of a result and find extractelement with
5196 // ArgNo indice: ArgNo is a number of a constraint in constraint
5197 // list
5198 for (auto ui = CI->use_begin(), ue = CI->use_end(); ui != ue; ++ui) {
5199 auto EV = dyn_cast<ExtractValueInst>(ui->getUser());
5200 if (EV && (EV->getIndices()[0] == ArgNo)) {
5201 InlasmOp = EV;
5202 break;
5203 }
5204 }
5205 } else
5206 // Single output
5207 InlasmOp = CI;
5208
5209 if (InlasmOp) {
5210 Instruction *Inst = cast<Instruction>(InlasmOp);
5211 Instruction *Head = Baling->getBaleHead(Inst);
5212 BaleInfo BI = Baling->getBaleInfo(Head);
5213 // If head is g_store than change head to store's
5214 // operand and check if it's baled wrr
5215 if (BI.Type == BaleInfo::GSTORE) {
5216 DstDesc.GStore = Head;
5217 Head = cast<Instruction>(Head->getOperand(0));
5218 BI = Baling->getBaleInfo(Head);
5219 }
5220 if (BI.Type == BaleInfo::WRREGION) {
5221 DstDesc.WrRegion = Head;
5222 DstDesc.WrRegionBI = BI;
5223 }
5224 InlasmOpAsString = createInlineAsmDestinationOperand(
5225 InlasmOp, DONTCARESIGNED, Info.getConstraintType(), 0, DstDesc);
5226 } else {
5227 // Can't deduce output operand because there are no users
5228 // but we have register allocated. If region is needed we can use
5229 // default one based one type.
5230 SimpleValue SV(CI, ArgNo);
5231 Register *Reg =
5232 getRegForValueAndSaveAlias(KernFunc, SV, DONTCARESIGNED);
5233 Region R(SV.getType());
5234 InlasmOpAsString =
5235 createInlineAsmOperand(Reg, &R, true /*IsDst*/, DONTCARESIGNED,
5236 Info.getConstraintType(), 0);
5237 }
5238 } else {
5239 // Input of inline assembly
5240 InlasmOp = CI->getArgOperand(ArgNo - NumOutputs);
5241 bool IsBaled = false;
5242 if (GenXIntrinsic::isRdRegion(InlasmOp)) {
5243 Instruction *RdR = cast<Instruction>(InlasmOp);
5244 IsBaled = Baling->isBaled(RdR);
5245 }
5246 InlasmOpAsString = createInlineAsmSourceOperand(
5247 InlasmOp, DONTCARESIGNED, IsBaled, Info.getConstraintType());
5248 }
5249 // Substitute string name of the variable until
5250 // there are no possible sustitutions. Do-while
5251 // since first match was checked in the beginning
5252 // of the loop.
5253 do {
5254 AsmStr = R.sub(InlasmOpAsString, AsmStr);
5255 } while (R.match(AsmStr));
5256 }
5257
5258 AsmTextStream << "\n// INLASM BEGIN\n"
5259 << AsmStr << "\n// INLASM END\n"
5260 << std::endl;
5261 }
5262
buildCall(CallInst * CI,const DstOpndDesc & DstDesc)5263 void GenXKernelBuilder::buildCall(CallInst *CI, const DstOpndDesc &DstDesc) {
5264 LLVM_DEBUG(dbgs() << CI << "\n");
5265 Function *Callee = CI->getCalledFunction();
5266 IGC_ASSERT_MESSAGE(
5267 !Callee || !Callee->isDeclaration(),
5268 "Currently VC backend does not support modules with external functions");
5269
5270 if (!Callee || genx::requiresStackCall(Callee)) {
5271 if (UseNewStackBuilder)
5272 buildStackCallLight(CI, DstDesc);
5273 else
5274 buildStackCall(CI, DstDesc);
5275 return;
5276 }
5277
5278 unsigned LabelKind = LABEL_SUBROUTINE;
5279 if (Callee->hasFnAttribute("CMCallable"))
5280 LabelKind = LABEL_FC;
5281 else
5282 IGC_ASSERT_MESSAGE(FG == FG->getParent()->getAnyGroup(Callee),
5283 "unexpected call to outside FunctionGroup");
5284
5285 // Check whether the called function has a predicate arg that is EM.
5286 int EMOperandNum = -1;
5287 for (auto ai = Callee->arg_begin(), ae = Callee->arg_end(); ai != ae; ++ai) {
5288 auto Arg = &*ai;
5289 if (!Arg->getType()->getScalarType()->isIntegerTy(1))
5290 continue;
5291 if (Liveness->getLiveRange(Arg)->getCategory() == RegCategory::EM) {
5292 EMOperandNum = Arg->getArgNo();
5293 break;
5294 }
5295 }
5296
5297 if (EMOperandNum < 0) {
5298 addDebugInfo();
5299 // Scalar calls must be marked with NoMask
5300 CISA_CALL(Kernel->AppendVISACFCallInst(
5301 nullptr, vISA_EMASK_M1_NM, EXEC_SIZE_1,
5302 Labels[getOrCreateLabel(Callee, LabelKind)]));
5303 } else {
5304 auto PredicateOpnd = NoMask ? nullptr : createPred(CI, BaleInfo(), EMOperandNum);
5305 addDebugInfo();
5306 auto *VTy = cast<IGCLLVM::FixedVectorType>(
5307 CI->getArgOperand(EMOperandNum)->getType());
5308 VISA_Exec_Size ExecSize = getExecSizeFromValue(VTy->getNumElements());
5309 CISA_CALL(Kernel->AppendVISACFCallInst(
5310 PredicateOpnd, vISA_EMASK_M1, ExecSize,
5311 Labels[getOrCreateLabel(Callee, LabelKind)]));
5312 }
5313 }
5314
buildRet(ReturnInst * RI)5315 void GenXKernelBuilder::buildRet(ReturnInst *RI) {
5316 uint32_t FloatControl = 0;
5317 auto F = RI->getFunction();
5318 F->getFnAttribute(genx::FunctionMD::CMFloatControl)
5319 .getValueAsString()
5320 .getAsInteger(0, FloatControl);
5321 FloatControl &= CR_Mask;
5322 if (FloatControl != DefaultFloatControl) {
5323 buildControlRegUpdate(CR_Mask, true);
5324 if (DefaultFloatControl)
5325 buildControlRegUpdate(DefaultFloatControl, false);
5326 }
5327 addDebugInfo();
5328 if (!genx::isKernel(F) &&
5329 (genx::requiresStackCall(Func) || genx::isReferencedIndirectly(F))) {
5330 CISA_CALL(Kernel->AppendVISACFFunctionRetInst(nullptr, vISA_EMASK_M1,
5331 EXEC_SIZE_16));
5332 } else {
5333 CISA_CALL(Kernel->AppendVISACFRetInst(nullptr, vISA_EMASK_M1, EXEC_SIZE_1));
5334 }
5335 }
5336
buildGetHWID(CallInst * CI,const DstOpndDesc & DstDesc)5337 void GenXKernelBuilder::buildGetHWID(CallInst *CI, const DstOpndDesc &DstDesc) {
5338 IGC_ASSERT(Subtarget);
5339 if (Subtarget->getsHWTIDFromPredef()) {
5340 // Use predefined variable
5341 VISA_GenVar *hwid = nullptr;
5342 CISA_CALL(Kernel->GetPredefinedVar(hwid, PREDEFINED_HW_TID));
5343
5344 VISA_VectorOpnd *dst = createDestination(CI, DONTCARESIGNED, 0, DstDesc);
5345 VISA_VectorOpnd *src = nullptr;
5346 CISA_CALL(
5347 Kernel->CreateVISASrcOperand(src, hwid, MODIFIER_NONE, 0, 1, 0, 0, 0));
5348 CISA_CALL(Kernel->AppendVISADataMovementInst(
5349 ISA_MOV, nullptr /*Pred*/, false /*Mod*/, vISA_EMASK_M1_NM, EXEC_SIZE_1,
5350 dst, src));
5351
5352 return;
5353 }
5354
5355 // Build HWTID from sr0
5356
5357 // Initialize temporary regs
5358 VISA_GenVar *HwtidTmp0 = nullptr, *HwtidTmp1 = nullptr, *HwtidSR0 = nullptr;
5359 CISA_CALL(Kernel->CreateVISAGenVar(HwtidTmp0, "hwtid_tmp0", 1, ISA_TYPE_UD,
5360 ALIGN_DWORD));
5361 CISA_CALL(Kernel->CreateVISAGenVar(HwtidTmp1, "hwtid_tmp1", 1, ISA_TYPE_UD,
5362 ALIGN_DWORD));
5363 CISA_CALL(Kernel->CreateVISAGenVar(HwtidSR0, "hwtid_sr0", 1, ISA_TYPE_UD,
5364 ALIGN_DWORD));
5365
5366 // Local helper for instruction generation
5367 auto generateLogicOrShift = [this](ISA_Opcode Opcode, VISA_GenVar *Dst,
5368 VISA_GenVar *Left, uint32_t RightImm,
5369 VISA_GenVar *Right = nullptr) -> void {
5370 VISA_VectorOpnd *LeftOp = nullptr, *RightOp = nullptr, *DstOp = nullptr;
5371 CISA_CALL(Kernel->CreateVISASrcOperand(LeftOp, Left, MODIFIER_NONE, 0, 1, 0,
5372 0, 0));
5373 if (Right) {
5374 CISA_CALL(Kernel->CreateVISASrcOperand(RightOp, Right, MODIFIER_NONE, 0,
5375 1, 0, 0, 0));
5376 } else {
5377 CISA_CALL(Kernel->CreateVISAImmediate(RightOp, &RightImm,
5378 getVISAImmTy(ISA_TYPE_UD)));
5379 }
5380 CISA_CALL(Kernel->CreateVISADstOperand(DstOp, Dst, 1, 0, 0));
5381 CISA_CALL(Kernel->AppendVISALogicOrShiftInst(
5382 Opcode, nullptr /*Pred*/, false /*Mod*/, vISA_EMASK_M1_NM, EXEC_SIZE_1,
5383 DstOp, LeftOp, RightOp));
5384 };
5385
5386 // Local helper for masked sr0 value load
5387 auto loadMaskedSR0 = [this, generateLogicOrShift,
5388 HwtidSR0](unsigned MaskBits) -> void {
5389 auto SR0Mask = maskTrailingOnes<uint32_t>(MaskBits);
5390
5391 VISA_GenVar *sr0 = nullptr;
5392 CISA_CALL(Kernel->GetPredefinedVar(sr0, PREDEFINED_SR0));
5393 generateLogicOrShift(ISA_AND, HwtidSR0, sr0, SR0Mask);
5394 };
5395
5396 // Local helper for reserved bits elimination
5397 auto removeBitRange = [this, generateLogicOrShift, HwtidTmp0, HwtidTmp1,
5398 HwtidSR0](unsigned RemoveBit, unsigned Range) -> void {
5399 // src = (src & mask) | ((src >> range) & ~mask)
5400 auto TmpMask = maskTrailingOnes<uint32_t>(RemoveBit);
5401 // tmp0 = (src & mask)
5402 generateLogicOrShift(ISA_AND, HwtidTmp0, HwtidSR0, TmpMask);
5403 // tmp1 = (src >> range)
5404 generateLogicOrShift(ISA_SHR, HwtidTmp1, HwtidSR0, Range);
5405 // tmp1 = (tmp1 & ~mask)
5406 generateLogicOrShift(ISA_AND, HwtidTmp1, HwtidTmp1, ~TmpMask);
5407 // src = (tmp0 | tmp1)
5408 generateLogicOrShift(ISA_OR, HwtidSR0, HwtidTmp0, 0 /*RightImm*/,
5409 HwtidTmp1);
5410 };
5411
5412 // Local helper for passing final hwtid to the dst
5413 auto writeHwtidToDst = [this, &DstDesc, HwtidSR0, CI](void) -> void {
5414 VISA_VectorOpnd *src = nullptr, *dst = nullptr;
5415 CISA_CALL(Kernel->CreateVISASrcOperand(src, HwtidSR0, MODIFIER_NONE, 0, 1,
5416 0, 0, 0));
5417 dst = createDestination(CI, DONTCARESIGNED, 0, DstDesc);
5418 CISA_CALL(Kernel->AppendVISADataMovementInst(
5419 ISA_MOV, nullptr, false, vISA_EMASK_M1_NM, EXEC_SIZE_1, dst, src));
5420 };
5421
5422 // XeHP_SDV
5423 // [13:11] Slice ID.
5424 // [10:9] Dual - SubSlice ID
5425 // [8] SubSlice ID.
5426 // [7] : EUID[2]
5427 // [6] : Reserved
5428 // [5:4] EUID[1:0]
5429 // [3] : Reserved MBZ
5430 // [2:0] : TID
5431 //
5432 // HWTID is calculated using a concatenation of TID:EUID:SubSliceID:SliceID
5433
5434 // Load sr0 with [13:0] mask
5435 loadMaskedSR0(14);
5436
5437 // Remove reserved bits
5438 removeBitRange(6, 1);
5439 removeBitRange(3, 1);
5440
5441 // Store final value
5442 writeHwtidToDst();
5443 }
5444
5445 /***********************************************************************
5446 * createRawSourceOperand : create raw source operand of instruction
5447 *
5448 * Enter: Inst = instruction to get source operand from
5449 * OperandNum = operand number
5450 * BI = BaleInfo for Inst (so we can tell whether a rdregion
5451 * or modifier is bundled in)
5452 */
createRawSourceOperand(const Instruction * Inst,unsigned OperandNum,BaleInfo BI,Signedness Signed)5453 VISA_RawOpnd *GenXKernelBuilder::createRawSourceOperand(const Instruction *Inst,
5454 unsigned OperandNum,
5455 BaleInfo BI,
5456 Signedness Signed) {
5457 VISA_RawOpnd *ResultOperand = nullptr;
5458 Value *V = Inst->getOperand(OperandNum);
5459 if (isa<UndefValue>(V)) {
5460 CISA_CALL(Kernel->CreateVISANullRawOperand(ResultOperand, false));
5461 } else {
5462 unsigned ByteOffset = 0;
5463 bool Baled = Baling->getBaleInfo(Inst).isOperandBaled(OperandNum);
5464 if (Baled) {
5465 Instruction *RdRegion = cast<Instruction>(V);
5466 Region R = makeRegionFromBaleInfo(RdRegion, BaleInfo());
5467 ByteOffset = R.Offset;
5468 V = RdRegion->getOperand(0);
5469 }
5470 LLVM_DEBUG(dbgs() << "createRawSourceOperand for "
5471 << (Baled ? "baled" : "non-baled") << " value: ");
5472 LLVM_DEBUG(V->dump());
5473 LLVM_DEBUG(dbgs() << "\n");
5474 Register *Reg = getRegForValueAndSaveAlias(KernFunc, V, Signed);
5475 IGC_ASSERT(Reg->Category == RegCategory::GENERAL);
5476 LLVM_DEBUG(dbgs() << "CreateVISARawOperand: "; Reg->print(dbgs()); dbgs() << "\n");
5477 CISA_CALL(Kernel->CreateVISARawOperand(
5478 ResultOperand, Reg->GetVar<VISA_GenVar>(Kernel), ByteOffset));
5479 }
5480 return ResultOperand;
5481 }
5482
5483 /***********************************************************************
5484 * createRawDestination : create raw destination operand
5485 *
5486 * Enter: Inst = destination value
5487 * WrRegion = 0 else wrregion that destination is baled into
5488 *
5489 * A raw destination can be baled into a wrregion, but only if the region
5490 * is direct and its start index is GRF aligned.
5491 */
5492 VISA_RawOpnd *
createRawDestination(Value * V,const DstOpndDesc & DstDesc,Signedness Signed)5493 GenXKernelBuilder::createRawDestination(Value *V, const DstOpndDesc &DstDesc,
5494 Signedness Signed) {
5495 VISA_RawOpnd *ResultOperand = nullptr;
5496 unsigned ByteOffset = 0;
5497 if (DstDesc.WrRegion) {
5498 V = DstDesc.WrRegion;
5499 Region R = makeRegionFromBaleInfo(DstDesc.WrRegion, BaleInfo());
5500 ByteOffset = R.Offset;
5501 }
5502 Type *OverrideType = nullptr;
5503 if (DstDesc.GStore) {
5504 V = getUnderlyingGlobalVariable(DstDesc.GStore->getOperand(1));
5505 IGC_ASSERT_MESSAGE(V, "out of sync");
5506 OverrideType = DstDesc.GStore->getOperand(0)->getType();
5507 }
5508 LLVM_DEBUG(dbgs() << "createRawDestination for "
5509 << (DstDesc.GStore ? "global" : "non-global") << " value: ");
5510 LLVM_DEBUG(V->dump());
5511 LLVM_DEBUG(dbgs() << "\n");
5512 if (DstDesc.WrPredefReg)
5513 V = DstDesc.WrPredefReg;
5514 Register *Reg =
5515 getRegForValueOrNullAndSaveAlias(KernFunc, V, Signed, OverrideType);
5516 if (!Reg) {
5517 // No register assigned. This happens to an unused raw result where the
5518 // result is marked as RAW_NULLALLOWED in GenXIntrinsics.
5519 CISA_CALL(Kernel->CreateVISANullRawOperand(ResultOperand, true));
5520 } else {
5521 IGC_ASSERT(Reg->Category == RegCategory::GENERAL);
5522 LLVM_DEBUG(dbgs() << "CreateVISARawOperand: "; Reg->print(dbgs()); dbgs() << "\n");
5523 CISA_CALL(Kernel->CreateVISARawOperand(
5524 ResultOperand, Reg->GetVar<VISA_GenVar>(Kernel), ByteOffset));
5525 }
5526 return ResultOperand;
5527 }
5528
5529 /***********************************************************************
5530 * getLabel : get label number for a Function or BasicBlock
5531 *
5532 * Return: label number, -1 if none found
5533 */
getLabel(const Value * V) const5534 int GenXKernelBuilder::getLabel(const Value *V) const {
5535 auto It = LabelMap.find(V);
5536 if (It != LabelMap.end())
5537 return It->second;
5538 return -1;
5539 }
5540
5541 /***********************************************************************
5542 * setLabel : set the label number for a Function or BasicBlock
5543 */
setLabel(const Value * V,unsigned Num)5544 void GenXKernelBuilder::setLabel(const Value *V, unsigned Num) {
5545 LabelMap[V] = Num;
5546 }
5547
addStringToPool(StringRef Str)5548 unsigned GenXKernelBuilder::addStringToPool(StringRef Str) {
5549 auto val = std::pair<std::string, unsigned>(Str.begin(), StringPool.size());
5550 auto Res = StringPool.insert(val);
5551 return Res.first->second;
5552 }
5553
getStringByIndex(unsigned Val)5554 StringRef GenXKernelBuilder::getStringByIndex(unsigned Val) {
5555 for (const auto &it : StringPool) {
5556 if (it.second == Val)
5557 return it.first;
5558 }
5559 IGC_ASSERT_EXIT_MESSAGE(0, "Can't find string by index.");
5560 }
5561
5562 /***********************************************************************
5563 * Get size of the argument of type 'type' in bytes considering layout of
5564 * subtypes of aggregate type in units of size 'mod'
5565 * mod is typically 32 (GRF) or 16 (oword)
5566 */
getValueSize(Type * T,unsigned Mod) const5567 unsigned GenXKernelBuilder::getValueSize(Type *T, unsigned Mod) const {
5568 unsigned Result = 0;
5569 if (T->isAggregateType()) {
5570 for (unsigned i = 0; i < T->getStructNumElements(); i++) {
5571 Result += getValueSize(T->getContainedType(i)) / Mod +
5572 (getValueSize(T->getContainedType(i)) % Mod ? 1 : 0);
5573 }
5574 Result *= Mod;
5575 } else
5576 Result = FG->getModule()->getDataLayout().getTypeSizeInBits(T) / 8;
5577 return Result;
5578 }
5579
getFuncArgsSize(llvm::Function * F)5580 unsigned GenXKernelBuilder::getFuncArgsSize(llvm::Function *F) {
5581 unsigned Result = 0;
5582 for (auto &Arg : F->args())
5583 Result += getValueSize(&Arg);
5584 return Result;
5585 }
5586
5587 GenericCisaVariable *
createCisaVariable(VISAKernel * Kernel,const char * Name,VISA_GenVar * AliasVar,unsigned ByteSize)5588 GenXKernelBuilder::createCisaVariable(VISAKernel *Kernel, const char *Name,
5589 VISA_GenVar *AliasVar,
5590 unsigned ByteSize) {
5591 auto it = CisaVars[Kernel].find(Name);
5592 if (it != CisaVars[Kernel].end())
5593 it->second = GenericCisaVariable(Name, AliasVar, ByteSize);
5594 else
5595 CisaVars[Kernel].insert(
5596 std::make_pair(Name, GenericCisaVariable(Name, AliasVar, ByteSize)));
5597 return &(CisaVars[Kernel].at(Name));
5598 }
5599
deduceByteSize(Value * V,const DataLayout & DL)5600 static unsigned deduceByteSize(Value *V, const DataLayout &DL) {
5601 return DL.getTypeSizeInBits(V->getType()->getScalarType()) / 8;
5602 }
5603
deduceByteSize(CisaVariable * V,const DataLayout & DL)5604 static unsigned deduceByteSize(CisaVariable *V, const DataLayout &DL) {
5605 IGC_ASSERT(V->getType() < ISA_TYPE_NUM);
5606 return CISATypeTable[V->getType()].typeSize;
5607 }
5608
5609 /**************************************************************************************************
5610 * emitVectorCopy : emit vISA that performs copying form Dst to Src
5611 *
5612 * Emit sufficient amount of MOVs from Dst to Src picking size in a greedy manner
5613 *
5614 * T1 and T2 should be llvm::Value and CisaVariable or vice-versa,
5615 * CisaVariable=>CisaVariable or Value=>Value copying is not supported here
5616 *
5617 */
5618 template <typename T1, typename T2>
emitVectorCopy(T1 * Dst,T2 * Src,unsigned & RowOff,unsigned & ColOff,unsigned & SrcRowOff,unsigned & SrcColOff,int TotalSize,bool DoCopy)5619 void GenXKernelBuilder::emitVectorCopy(T1 *Dst, T2 *Src, unsigned &RowOff,
5620 unsigned &ColOff, unsigned &SrcRowOff,
5621 unsigned &SrcColOff, int TotalSize,
5622 bool DoCopy) {
5623 IGC_ASSERT(Subtarget);
5624 auto partCopy = [&, GRFWidth = Subtarget->getGRFByteSize()](int Sz) {
5625 int ByteSz = Sz * deduceByteSize(Dst, DL);
5626 IGC_ASSERT(ByteSz);
5627
5628 unsigned Start = SrcRowOff;
5629 unsigned End = (SrcRowOff * GRFWidth + SrcColOff + ByteSz) / GRFWidth;
5630
5631 // mov is prohibited to span across >2 GRF
5632 if (End - Start >= 2) {
5633 IGC_ASSERT(Sz > 1);
5634 return;
5635 }
5636
5637 while (TotalSize >= ByteSz) {
5638 VISA_VectorOpnd *ArgSrc = nullptr, *ArgDst = nullptr;
5639 unsigned Offset = SrcRowOff * GrfByteSize + SrcColOff;
5640 ArgSrc = createSource(Src, UNSIGNED, Sz, &Offset);
5641 SrcRowOff += (SrcColOff + ByteSz) / GrfByteSize;
5642 SrcColOff = (SrcColOff + ByteSz) % GrfByteSize;
5643
5644 Offset = RowOff * GrfByteSize + ColOff;
5645 ArgDst = createDestination(Dst, UNSIGNED, &Offset);
5646 RowOff += (ColOff + ByteSz) / GrfByteSize;
5647 ColOff = (ColOff + ByteSz) % GrfByteSize;
5648
5649 if (DoCopy)
5650 CISA_CALL(Kernel->AppendVISADataMovementInst(
5651 ISA_MOV, nullptr, false,
5652 (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5653 getExecSizeFromValue(Sz), ArgDst, ArgSrc));
5654 TotalSize -= ByteSz;
5655 }
5656 };
5657 partCopy(16);
5658 partCopy(8);
5659 partCopy(4);
5660 partCopy(2);
5661 partCopy(1);
5662 }
5663
pushStackArg(VISA_StateOpndHandle * Dst,Value * Src,int TotalSz,unsigned & RowOff,unsigned & ColOff,unsigned & SrcRowOff,unsigned & SrcColOff,bool DoCopy)5664 void GenXKernelBuilder::pushStackArg(VISA_StateOpndHandle *Dst, Value *Src,
5665 int TotalSz, unsigned &RowOff,
5666 unsigned &ColOff, unsigned &SrcRowOff,
5667 unsigned &SrcColOff, bool DoCopy) {
5668 VISA_GenVar *StackOff = nullptr, *Sp = nullptr;
5669
5670 auto StackTmp = createCisaVariable(Kernel, "stackTmp", nullptr, TotalSz);
5671
5672 auto TmpType = llvmToVisaType(Src->getType());
5673 auto TmpVar = StackTmp->getAlias(TmpType, Kernel);
5674
5675 CISA_CALL(Kernel->CreateVISAGenVar(StackOff, "stackOff", 1, ISA_TYPE_UQ,
5676 ALIGN_OWORD));
5677 unsigned RawOff = 0;
5678 auto partCopy = [&](int Sz) {
5679 // TODO: mb we have some constant for oword size
5680 int ByteSz = Sz * visa::BytesPerOword;
5681 int CopySz = std::min(ByteSz, TotalSz);
5682
5683 while (TotalSz - ByteSz >= 0 || (TotalSz > 0 && Sz == 1)) {
5684 CISA_CALL(Kernel->GetPredefinedVar(Sp, PREDEFINED_FE_SP));
5685 VISA_VectorOpnd *SpOpSrc1 = nullptr;
5686 VISA_VectorOpnd *SpOpSrc2 = nullptr;
5687 VISA_VectorOpnd *SpOpDst = nullptr;
5688 CISA_CALL(Kernel->CreateVISADstOperand(SpOpDst, Sp, 1, 0, 0));
5689 CISA_CALL(Kernel->CreateVISASrcOperand(SpOpSrc1, Sp, MODIFIER_NONE, 0, 1,
5690 0, 0, 0));
5691 CISA_CALL(Kernel->CreateVISASrcOperand(SpOpSrc2, Sp, MODIFIER_NONE, 0, 1,
5692 0, 0, 0));
5693
5694 VISA_VectorOpnd *TmpOffDst = nullptr, *TmpOffSrc = nullptr;
5695 CISA_CALL(Kernel->CreateVISADstOperand(TmpOffDst, StackOff, 1, 0, 0));
5696 CISA_CALL(Kernel->CreateVISASrcOperand(TmpOffSrc, StackOff, MODIFIER_NONE,
5697 0, 1, 0, 0, 0));
5698
5699 emitVectorCopy(TmpVar, Src, RowOff, ColOff, SrcRowOff, SrcColOff, CopySz,
5700 DoCopy);
5701 VISA_VectorOpnd *Imm = nullptr;
5702 unsigned OffVal = Sz;
5703 CISA_CALL(Kernel->CreateVISAImmediate(Imm, &OffVal, ISA_TYPE_UD));
5704 VISA_RawOpnd *RawSrc = nullptr;
5705 CISA_CALL(
5706 Kernel->CreateVISARawOperand(RawSrc, TmpVar->getGenVar(), RawOff));
5707 RawOff += Sz * visa::BytesPerOword;
5708
5709 if (DoCopy) {
5710 CISA_CALL(Kernel->AppendVISADataMovementInst(ISA_MOV, nullptr, false,
5711 vISA_EMASK_M1, EXEC_SIZE_1,
5712 TmpOffDst, SpOpSrc1));
5713 CISA_CALL(Kernel->AppendVISASurfAccessOwordLoadStoreInst(
5714 ISA_OWORD_ST, vISA_EMASK_M1, Dst, getCisaOwordNumFromNumber(Sz),
5715 TmpOffSrc, RawSrc));
5716 }
5717 CISA_CALL(Kernel->AppendVISAArithmeticInst(ISA_ADD, nullptr, false,
5718 vISA_EMASK_M1, EXEC_SIZE_1,
5719 SpOpDst, SpOpSrc2, Imm));
5720 TotalSz -= ByteSz;
5721 }
5722 };
5723
5724 partCopy(8);
5725 partCopy(4);
5726 partCopy(2);
5727 partCopy(1);
5728 }
5729
popStackArg(llvm::Value * Dst,VISA_StateOpndHandle * Src,int TotalSz,unsigned & RowOff,unsigned & ColOff,unsigned & SrcRowOff,unsigned & SrcColOff,int & PrevStackOff)5730 void GenXKernelBuilder::popStackArg(llvm::Value *Dst, VISA_StateOpndHandle *Src,
5731 int TotalSz, unsigned &RowOff,
5732 unsigned &ColOff, unsigned &SrcRowOff,
5733 unsigned &SrcColOff, int &PrevStackOff) {
5734 VISA_GenVar *StackOff = nullptr, *Sp = nullptr;
5735
5736 auto StackTmp = createCisaVariable(Kernel, "stackTmp", nullptr, TotalSz);
5737
5738 auto TmpType = llvmToVisaType(Dst->getType());
5739 auto TmpVar = StackTmp->getAlias(TmpType, Kernel);
5740
5741 CISA_CALL(Kernel->CreateVISAGenVar(StackOff, "stackOff", 1, ISA_TYPE_UQ,
5742 ALIGN_OWORD));
5743 auto partCopy = [&](int Sz) {
5744 // TODO: mb we have some constant for oword size
5745 int ByteSz = Sz * visa::BytesPerOword;
5746 while (TotalSz - ByteSz >= 0 || (TotalSz > 0 && Sz == 1)) {
5747 CISA_CALL(Kernel->GetPredefinedVar(Sp, PREDEFINED_FE_SP));
5748 VISA_VectorOpnd *SpOpSrc = nullptr;
5749 CISA_CALL(Kernel->CreateVISASrcOperand(SpOpSrc, Sp, MODIFIER_NONE, 0, 1,
5750 0, 0, 0));
5751
5752 VISA_VectorOpnd *TmpOffDst = nullptr;
5753 VISA_VectorOpnd *TmpOffSrc = nullptr;
5754 CISA_CALL(Kernel->CreateVISADstOperand(TmpOffDst, StackOff, 1, 0, 0));
5755 CISA_CALL(Kernel->CreateVISASrcOperand(TmpOffSrc, StackOff, MODIFIER_NONE,
5756 0, 1, 0, 0, 0));
5757
5758 VISA_VectorOpnd *Imm = nullptr;
5759 int OffVal = PrevStackOff;
5760 CISA_CALL(Kernel->CreateVISAImmediate(Imm, &OffVal, ISA_TYPE_UD));
5761 PrevStackOff += Sz;
5762 VISA_RawOpnd *RawSrc = nullptr;
5763 CISA_CALL(Kernel->CreateVISARawOperand(RawSrc, TmpVar->getGenVar(), 0));
5764
5765 CISA_CALL(Kernel->AppendVISAArithmeticInst(ISA_ADD, nullptr, false,
5766 vISA_EMASK_M1, EXEC_SIZE_1,
5767 TmpOffDst, SpOpSrc, Imm));
5768 CISA_CALL(Kernel->AppendVISASurfAccessOwordLoadStoreInst(
5769 ISA_OWORD_LD, vISA_EMASK_M1, Src, getCisaOwordNumFromNumber(Sz),
5770 TmpOffSrc, RawSrc));
5771 int CopySz = std::min(ByteSz, TotalSz);
5772 SrcRowOff = SrcColOff = 0;
5773 emitVectorCopy(Dst, TmpVar, RowOff, ColOff, SrcRowOff, SrcColOff, CopySz);
5774 TotalSz -= ByteSz;
5775 }
5776 SrcRowOff = SrcColOff = 0;
5777 };
5778
5779 partCopy(8);
5780 partCopy(4);
5781 partCopy(2);
5782 partCopy(1);
5783 }
5784
5785 /**************************************************************************************************
5786 * beginFunction : emit function prologue and arguments passing code
5787 *
5788 * Emit stack-related function prologue if Func is a kernel and there're
5789 * stackcalls or Func is a stack function.
5790 *
5791 * Prologue performs Sp and Fp initialization (both for kernel and stack
5792 * function). For stack functions arguments passing code is generated as well,
5793 * %arg and stackmem passing is supported.
5794 */
beginFunction(Function * Func)5795 void GenXKernelBuilder::beginFunction(Function *Func) {
5796 VISA_GenVar *Sp = nullptr, *Fp = nullptr, *Hwtid = nullptr;
5797 CISA_CALL(Kernel->GetPredefinedVar(Sp, PREDEFINED_FE_SP));
5798 CISA_CALL(Kernel->GetPredefinedVar(Fp, PREDEFINED_FE_FP));
5799 // TODO: consider removing the if for local stack
5800 if (!allowI64Ops()) {
5801 CISA_CALL(Kernel->CreateVISAGenVar(Sp, "Sp", 1, ISA_TYPE_UD, ALIGN_DWORD, Sp));
5802 CISA_CALL(Kernel->CreateVISAGenVar(Fp, "Fp", 1, ISA_TYPE_UD, ALIGN_DWORD, Fp));
5803 }
5804 CISA_CALL(Kernel->GetPredefinedVar(Hwtid, PREDEFINED_HW_TID));
5805
5806 VISA_VectorOpnd *SpOpSrc = nullptr;
5807 VISA_VectorOpnd *SpOpSrc1 = nullptr;
5808 VISA_VectorOpnd *SpOpDst = nullptr;
5809 VISA_VectorOpnd *SpOpDst1 = nullptr;
5810 VISA_VectorOpnd *FpOpDst = nullptr;
5811 VISA_VectorOpnd *FpOpSrc = nullptr;
5812 VISA_VectorOpnd *Imm = nullptr;
5813
5814 CISA_CALL(Kernel->CreateVISADstOperand(SpOpDst, Sp, 1, 0, 0));
5815 CISA_CALL(Kernel->CreateVISADstOperand(SpOpDst1, Sp, 1, 0, 0));
5816 CISA_CALL(Kernel->CreateVISADstOperand(FpOpDst, Fp, 1, 0, 0));
5817
5818 CISA_CALL(
5819 Kernel->CreateVISASrcOperand(SpOpSrc, Sp, MODIFIER_NONE, 0, 1, 0, 0, 0));
5820 CISA_CALL(
5821 Kernel->CreateVISASrcOperand(SpOpSrc1, Sp, MODIFIER_NONE, 0, 1, 0, 0, 0));
5822
5823 CISA_CALL(
5824 Kernel->CreateVISASrcOperand(FpOpSrc, Fp, MODIFIER_NONE, 0, 1, 0, 0, 0));
5825
5826 if (genx::isKernel(Func) && (HasStackcalls || HasAlloca)) {
5827 // init kernel stack
5828 VISA_GenVar *Hwtid = nullptr;
5829 CISA_CALL(Kernel->GetPredefinedVar(Hwtid, PREDEFINED_HW_TID));
5830
5831 VISA_VectorOpnd *HwtidOp = nullptr;
5832
5833 // probably here would be better calculate exact stack size required
5834 // by the kernel, but legacy stack builder is to be dropped away soon
5835 uint32_t Val = visa::StackPerThreadScratch;
5836
5837 CISA_CALL(Kernel->CreateVISAImmediate(Imm, &Val, ISA_TYPE_UD));
5838 CISA_CALL(Kernel->CreateVISASrcOperand(HwtidOp, Hwtid, MODIFIER_NONE, 0, 1,
5839 0, 0, 0));
5840
5841 if (StackSurf == PREDEFINED_SURFACE_STACK) {
5842 CISA_CALL(Kernel->AppendVISAArithmeticInst(
5843 ISA_MUL, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5844 EXEC_SIZE_1, SpOpDst, HwtidOp, Imm));
5845 } else {
5846 VISA_GenVar *Tmp = nullptr;
5847
5848 CISA_CALL(Kernel->CreateVISAGenVar(
5849 Tmp, "SpOff", 1, allowI64Ops() ? ISA_TYPE_UQ : ISA_TYPE_UD, ALIGN_DWORD));
5850
5851 VISA_VectorOpnd *OffOpDst = nullptr;
5852 VISA_VectorOpnd *OffOpSrc = nullptr;
5853 CISA_CALL(Kernel->CreateVISADstOperand(OffOpDst, Tmp, 1, 0, 0));
5854 CISA_CALL(Kernel->CreateVISASrcOperand(OffOpSrc, Tmp, MODIFIER_NONE, 0, 1,
5855 0, 0, 0));
5856 CISA_CALL(Kernel->AppendVISAArithmeticInst(
5857 ISA_MUL, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5858 EXEC_SIZE_1, OffOpDst, HwtidOp, Imm));
5859
5860 VISA_VectorOpnd *OpSrc = nullptr;
5861 VISA_GenVar *R0 = nullptr;
5862 CISA_CALL(Kernel->GetPredefinedVar(R0, PREDEFINED_R0));
5863 CISA_CALL(Kernel->CreateVISASrcOperand(OpSrc, R0, MODIFIER_NONE, 0, 1, 0,
5864 0, 5));
5865 if (OptStrictI64Check)
5866 report_fatal_error("CisaBuilder should not produce 64-bit instructions"
5867 " add64", false);
5868 CISA_CALL(Kernel->AppendVISADataMovementInst(
5869 ISA_MOV, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5870 EXEC_SIZE_1, SpOpDst, OpSrc));
5871 Kernel->AppendVISAArithmeticInst(
5872 ISA_ADD, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5873 EXEC_SIZE_1, SpOpDst1, SpOpSrc1, OffOpSrc);
5874 }
5875 CISA_CALL(Kernel->AppendVISADataMovementInst(
5876 ISA_MOV, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5877 EXEC_SIZE_1, FpOpDst, SpOpSrc));
5878 unsigned SMO = BackendConfig->getStackSurfaceMaxSize();
5879 Kernel->AddKernelAttribute("SpillMemOffset", 4, &SMO);
5880 } else if (genx::requiresStackCall(Func) ||
5881 genx::isReferencedIndirectly(Func)) {
5882 if (genx::isReferencedIndirectly(Func)) {
5883 int ExtVal = 1;
5884 Kernel->AddKernelAttribute("Extern", 4, &ExtVal);
5885 }
5886 // stack function prologue
5887 VISA_GenVar *FpTmp = nullptr;
5888
5889 auto *ArgVar = &CisaVars[Kernel].at("argv");
5890 auto *RetVar = &CisaVars[Kernel].at("retv");
5891
5892 if (FPMap.count(Func) == 0) {
5893 CISA_CALL(
5894 Kernel->CreateVISAGenVar(FpTmp, "tmp", 1, ISA_TYPE_UQ, ALIGN_DWORD));
5895 FPMap.insert(std::pair<Function *, VISA_GenVar *>(Func, FpTmp));
5896 } else
5897 FpTmp = FPMap[Func];
5898
5899 // init func stack pointers
5900 VISA_VectorOpnd *TmpOp = nullptr;
5901 CISA_CALL(Kernel->CreateVISADstOperand(TmpOp, FpTmp, 1, 0, 0));
5902
5903 Kernel->AppendVISADataMovementInst(
5904 ISA_MOV, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5905 EXEC_SIZE_1, TmpOp, FpOpSrc);
5906 Kernel->AppendVISADataMovementInst(
5907 ISA_MOV, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5908 EXEC_SIZE_1, FpOpDst, SpOpSrc);
5909
5910 // unpack args
5911 int Sz = 0, StackOff = 0;
5912 unsigned RowOff = 0, ColOff = 0, SrcRowOff = 0, SrcColOff = 0;
5913 bool StackStarted = false;
5914 unsigned NoStackSize = 0;
5915 // NOTE: using reverse iterators for args would be much better we don't have
5916 // any though
5917 for (auto &FArg : Func->args()) {
5918 if (Liveness->getLiveRange(&FArg) &&
5919 Liveness->getLiveRange(&FArg)->getCategory() == RegCategory::EM)
5920 continue;
5921
5922 RowOff = 0, ColOff = 0;
5923 unsigned ArgSize = getValueSize(FArg.getType());
5924 if (SrcColOff &&
5925 (FArg.getType()->isVectorTy() || ArgSize > (GrfByteSize - ColOff))) {
5926 SrcRowOff++;
5927 SrcColOff = 0;
5928 NoStackSize++;
5929 }
5930 if (Liveness->getLiveRange(&FArg)->getCategory() ==
5931 RegCategory::PREDICATE) {
5932 VISA_VectorOpnd *argSrc = nullptr;
5933 Kernel->CreateVISASrcOperand(
5934 argSrc,
5935 ArgVar->getAlias(llvmToVisaType(FArg.getType()), Kernel)
5936 ->getGenVar(),
5937 MODIFIER_NONE, 0, 1, 0, SrcRowOff, SrcColOff);
5938 auto *PReg =
5939 getRegForValueOrNullAndSaveAlias(KernFunc, SimpleValue(&FArg));
5940 IGC_ASSERT(PReg);
5941 Kernel->AppendVISASetP(vISA_EMASK_M1_NM, EXEC_SIZE_1,
5942 PReg->GetVar<VISA_PredVar>(Kernel), argSrc);
5943 } else {
5944 if ((int)ArgVar->getByteSize() - SrcRowOff * GrfByteSize >= ArgSize &&
5945 !StackStarted) {
5946 emitVectorCopy(&FArg, ArgVar->getAlias(&FArg, Kernel), RowOff, ColOff,
5947 SrcRowOff, SrcColOff, getValueSize(&FArg));
5948 NoStackSize = RowOff;
5949 } else {
5950 StackStarted = true;
5951 VISA_StateOpndHandle *stackSurf = nullptr;
5952 VISA_SurfaceVar *stackSurfVar = nullptr;
5953 CISA_CALL(Kernel->GetPredefinedSurface(stackSurfVar, StackSurf));
5954 CISA_CALL(
5955 Kernel->CreateVISAStateOperandHandle(stackSurf, stackSurfVar));
5956 popStackArg(&FArg, stackSurf, ArgSize, RowOff, ColOff, SrcRowOff,
5957 SrcColOff, StackOff);
5958 }
5959 }
5960 Sz += ArgSize;
5961 }
5962 if (!StackStarted && ColOff)
5963 NoStackSize++;
5964 auto *StackCallee = Func2Kern[Func];
5965 auto *FuncTy = Func->getFunctionType();
5966 int RetSize =
5967 (FuncTy->getReturnType()->isVoidTy() ||
5968 getValueSize(FuncTy->getReturnType()) > RetVar->getByteSize())
5969 ? 0
5970 : (getValueSize(FuncTy->getReturnType()) + GrfByteSize - 1) /
5971 GrfByteSize;
5972
5973 StackCallee->SetFunctionInputSize(NoStackSize);
5974 StackCallee->SetFunctionReturnSize(RetSize);
5975 StackCallee->AddKernelAttribute("ArgSize", 1, &NoStackSize);
5976 StackCallee->AddKernelAttribute("RetValSize", 1, &RetSize);
5977 }
5978 }
5979
beginFunctionLight(Function * Func)5980 void GenXKernelBuilder::beginFunctionLight(Function *Func) {
5981 if (genx::isKernel(Func))
5982 return;
5983 if (!genx::requiresStackCall(Func) && !genx::isReferencedIndirectly(Func))
5984 return;
5985 if (genx::isReferencedIndirectly(Func)) {
5986 int ExtVal = 1;
5987 Kernel->AddKernelAttribute("Extern", 4, &ExtVal);
5988 }
5989 // stack function prologue
5990 auto *MDArg = Func->getMetadata(InstMD::FuncArgSize);
5991 auto *MDRet = Func->getMetadata(InstMD::FuncRetSize);
5992 IGC_ASSERT(MDArg && MDRet);
5993 auto ArgSize =
5994 cast<ConstantInt>(
5995 cast<ConstantAsMetadata>(MDArg->getOperand(0).get())->getValue())
5996 ->getZExtValue();
5997 auto RetSize =
5998 cast<ConstantInt>(
5999 cast<ConstantAsMetadata>(MDRet->getOperand(0).get())->getValue())
6000 ->getZExtValue();
6001
6002 auto *StackCallee = Func2Kern[Func];
6003 StackCallee->SetFunctionInputSize(ArgSize);
6004 StackCallee->SetFunctionReturnSize(RetSize);
6005 StackCallee->AddKernelAttribute("ArgSize", 1, &ArgSize);
6006 StackCallee->AddKernelAttribute("RetValSize", 1, &RetSize);
6007 }
6008
6009 /**************************************************************************************************
6010 * endFunction : emit function epilogue and return value passing code
6011 *
6012 * Emit stack-related function epilogue if Func is a stack function.
6013 *
6014 * Epilogue restores Sp and Fp. Return value may be passed either visa %retval
6015 * arg or stackmem, both scalar/vector and aggregate types are supported (please
6016 * also see build[Extract|Insert]Value).
6017 */
endFunction(Function * Func,ReturnInst * RI)6018 void GenXKernelBuilder::endFunction(Function *Func, ReturnInst *RI) {
6019 if (!genx::isKernel(Func) &&
6020 (genx::requiresStackCall(Func) || genx::isReferencedIndirectly(Func))) {
6021 VISA_GenVar *Sp = nullptr, *Fp = nullptr;
6022 CISA_CALL(Kernel->GetPredefinedVar(Sp, PREDEFINED_FE_SP));
6023 CISA_CALL(Kernel->GetPredefinedVar(Fp, PREDEFINED_FE_FP));
6024
6025 VISA_VectorOpnd *SpOpSrc = nullptr;
6026 VISA_VectorOpnd *SpOpDst = nullptr;
6027 VISA_VectorOpnd *FpOpDst = nullptr;
6028 VISA_VectorOpnd *FpOpSrc = nullptr;
6029
6030 CISA_CALL(Kernel->CreateVISADstOperand(SpOpDst, Sp, 1, 0, 0));
6031 CISA_CALL(Kernel->CreateVISADstOperand(FpOpDst, Fp, 1, 0, 0));
6032 CISA_CALL(Kernel->CreateVISASrcOperand(SpOpSrc, Sp, MODIFIER_NONE, 0, 1,
6033 0, 0, 0));
6034 CISA_CALL(Kernel->CreateVISASrcOperand(FpOpSrc, Fp, MODIFIER_NONE, 0, 1,
6035 0, 0, 0));
6036
6037 VISA_VectorOpnd *TmpOp = nullptr;
6038 CISA_CALL(Kernel->CreateVISASrcOperand(TmpOp, FPMap[Func], MODIFIER_NONE,
6039 0, 1, 0, 0, 0));
6040
6041 Kernel->AppendVISADataMovementInst(
6042 ISA_MOV, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
6043 EXEC_SIZE_1, SpOpDst, FpOpSrc);
6044 Kernel->AppendVISADataMovementInst(
6045 ISA_MOV, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
6046 EXEC_SIZE_1, FpOpDst, TmpOp);
6047
6048 VISA_GenVar *Ret = nullptr;
6049 CISA_CALL(Kernel->GetPredefinedVar(Ret, PREDEFINED_RET));
6050
6051 if (!Func->getReturnType()->isVoidTy() &&
6052 !Func->getReturnType()->isAggregateType() &&
6053 Liveness->getLiveRangeOrNull(RI->getReturnValue()) &&
6054 (Liveness->getLiveRange(RI->getReturnValue())->getCategory() !=
6055 RegCategory::EM &&
6056 Liveness->getLiveRange(RI->getReturnValue())->getCategory() !=
6057 RegCategory::PREDICATE)) {
6058 GenericCisaVariable *RetVar = &CisaVars[Kernel].at("retv");
6059 IGC_ASSERT(!Func->getReturnType()->isAggregateType());
6060
6061 // pack retval
6062 unsigned RowOff = 0, ColOff = 0, SrcRowOff = 0, SrcColOff = 0;
6063 if (getValueSize(Func->getReturnType()) <=
6064 RetVar->getByteSize()) {
6065 unsigned RowOff = 0, ColOff = 0, SrcRowOff = 0, SrcColOff = 0;
6066 emitVectorCopy(RetVar->getAlias(RI->getReturnValue(), Kernel), RI->getReturnValue(),
6067 RowOff, ColOff, SrcRowOff,
6068 SrcColOff, getValueSize(RI->getReturnValue()));
6069 } else {
6070 VISA_StateOpndHandle *StackSurfOp = nullptr;
6071 VISA_SurfaceVar *StackSurfVar = nullptr;
6072 CISA_CALL(Kernel->GetPredefinedSurface(StackSurfVar,
6073 StackSurf));
6074 CISA_CALL(
6075 Kernel->CreateVISAStateOperandHandle(StackSurfOp, StackSurfVar));
6076 pushStackArg(StackSurfOp, RI->getReturnValue(),
6077 getValueSize(Func->getReturnType()), RowOff, ColOff,
6078 SrcRowOff, SrcColOff);
6079 }
6080 }
6081 for (auto II : RetvInserts)
6082 buildInsertRetv(II);
6083 RetvInserts.clear();
6084 }
6085 }
6086
buildExtractRetv(ExtractValueInst * Inst)6087 void GenXKernelBuilder::buildExtractRetv(ExtractValueInst *Inst) {
6088 auto T = Inst->getOperand(0)->getType();
6089 auto *RetVar = &CisaVars[Kernel].at("retv");
6090
6091 bool UseStack = getValueSize(T) > RetVar->getByteSize();
6092
6093 auto Index = Inst->getIndices().front();
6094 if (T->getContainedType(Index)->isVectorTy() &&
6095 cast<VectorType>(T->getContainedType(Index))
6096 ->getElementType()
6097 ->isIntegerTy(1))
6098 // elements of <N x i1> type should be ignored
6099 return;
6100
6101 unsigned RowOff = 0, ColOff = 0;
6102 unsigned SrcRowOff = 0, SrcColOff = 0;
6103 for (unsigned i = 0; i < Index; i++) {
6104 int Mod = UseStack ? visa::BytesPerOword : GrfByteSize;
6105 SrcRowOff += (getValueSize(T->getContainedType(i)) + Mod - 1) / Mod;
6106 }
6107
6108 if (UseStack) {
6109 int Prev = SrcRowOff;
6110 VISA_StateOpndHandle *StackSurfOp = nullptr;
6111 VISA_SurfaceVar *StackSurfVar = nullptr;
6112 CISA_CALL(
6113 Kernel->GetPredefinedSurface(StackSurfVar, StackSurf));
6114 CISA_CALL(Kernel->CreateVISAStateOperandHandle(StackSurfOp, StackSurfVar));
6115 popStackArg(Inst, StackSurfOp, getValueSize(T->getContainedType(Index)),
6116 RowOff, ColOff, SrcRowOff, SrcColOff, Prev);
6117 } else
6118 emitVectorCopy(Inst, RetVar->getAlias(Inst, Kernel), RowOff, ColOff,
6119 SrcRowOff, SrcColOff, getValueSize(Inst));
6120 }
6121
buildInsertRetv(InsertValueInst * Inst)6122 void GenXKernelBuilder::buildInsertRetv(InsertValueInst *Inst) {
6123 auto T = Inst->getOperand(0)->getType();
6124 auto *RetVar = &CisaVars[Kernel].at("retv");
6125
6126 bool UseStack = getValueSize(T) > RetVar->getByteSize();
6127
6128 auto Index = Inst->getIndices().front();
6129 if (T->getContainedType(Index)->isVectorTy() &&
6130 cast<VectorType>(T->getContainedType(Index))
6131 ->getElementType()
6132 ->isIntegerTy(1)) {
6133 // elements of <N x i1> type should be ignored
6134 return;
6135 }
6136
6137 unsigned RowOff = 0, ColOff = 0;
6138 unsigned SrcRowOff = 0, SrcColOff = 0;
6139
6140 if (!UseStack)
6141 for (unsigned i = 0; i < Index; i++)
6142 RowOff += (getValueSize(T->getContainedType(i)) + GrfByteSize - 1) /
6143 GrfByteSize;
6144
6145 if (UseStack) {
6146 VISA_StateOpndHandle *StackSurfOp = nullptr;
6147 VISA_SurfaceVar *StackSurfVar = nullptr;
6148 CISA_CALL(
6149 Kernel->GetPredefinedSurface(StackSurfVar, StackSurf));
6150 CISA_CALL(Kernel->CreateVISAStateOperandHandle(StackSurfOp, StackSurfVar));
6151 pushStackArg(StackSurfOp, Inst->getOperand(1),
6152 getValueSize(T->getContainedType(Index)), RowOff, ColOff,
6153 SrcRowOff, SrcColOff);
6154 } else
6155 emitVectorCopy(RetVar->getAlias(Inst->getOperand(1), Kernel),
6156 Inst->getOperand(1), RowOff, ColOff, SrcRowOff, SrcColOff,
6157 getValueSize(Inst->getOperand(1)));
6158 }
6159
buildStackCallLight(CallInst * CI,const DstOpndDesc & DstDesc)6160 void GenXKernelBuilder::buildStackCallLight(CallInst *CI,
6161 const DstOpndDesc &DstDesc) {
6162 LLVM_DEBUG(dbgs() << "Build stack call " << *CI << "\n");
6163 Function *Callee = CI->getCalledFunction();
6164
6165 // Check whether the called function has a predicate arg that is EM.
6166 auto *EMArg = std::find_if(CI->arg_begin(), CI->arg_end(), [this](Use &Arg) {
6167 return Arg->getType()->getScalarType()->isIntegerTy(1) &&
6168 Liveness->getLiveRange(Arg)->getCategory() == RegCategory::EM;
6169 });
6170 VISA_PredOpnd *Pred = nullptr;
6171 VISA_Exec_Size Esz = EXEC_SIZE_16;
6172 if (EMArg != CI->arg_end()) {
6173 auto EMOperandNum = EMArg->getOperandNo();
6174 Pred = createPred(CI, BaleInfo(), EMOperandNum);
6175 auto *VTy = cast<IGCLLVM::FixedVectorType>(
6176 CI->getArgOperand(EMOperandNum)->getType());
6177 Esz = getExecSizeFromValue(VTy->getNumElements());
6178 }
6179 addDebugInfo();
6180 auto *MDArg = CI->getMetadata(InstMD::FuncArgSize);
6181 auto *MDRet = CI->getMetadata(InstMD::FuncRetSize);
6182 IGC_ASSERT(MDArg && MDRet);
6183 auto ArgSize =
6184 cast<ConstantInt>(
6185 cast<ConstantAsMetadata>(MDArg->getOperand(0).get())->getValue())
6186 ->getZExtValue();
6187 auto RetSize =
6188 cast<ConstantInt>(
6189 cast<ConstantAsMetadata>(MDRet->getOperand(0).get())->getValue())
6190 ->getZExtValue();
6191 if (Callee) {
6192 CISA_CALL(Kernel->AppendVISACFFunctionCallInst(
6193 Pred, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1), EXEC_SIZE_16,
6194 Callee->getName().str(), ArgSize, RetSize));
6195 } else {
6196 auto *FuncAddr = createSource(IGCLLVM::getCalledValue(CI), DONTCARESIGNED);
6197 IGC_ASSERT(FuncAddr);
6198 CISA_CALL(Kernel->AppendVISACFIndirectFuncCallInst(
6199 Pred, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1), EXEC_SIZE_16,
6200 FuncAddr, ArgSize, RetSize));
6201 }
6202 }
6203
buildStackCall(CallInst * CI,const DstOpndDesc & DstDesc)6204 void GenXKernelBuilder::buildStackCall(CallInst *CI,
6205 const DstOpndDesc &DstDesc) {
6206 LLVM_DEBUG(dbgs() << "Build stack call " << *CI << "\n");
6207 Function *Callee = CI->getCalledFunction();
6208 auto *FuncTy = CI->getFunctionType();
6209
6210 // Check whether the called function has a predicate arg that is EM.
6211 int EMOperandNum = -1, EMIdx = -1;
6212 for (auto &Arg : CI->arg_operands()) {
6213 ++EMIdx;
6214 if (!Arg->getType()->getScalarType()->isIntegerTy(1))
6215 continue;
6216 if (Liveness->getLiveRange(Arg)->getCategory() == RegCategory::EM) {
6217 EMOperandNum = EMIdx;
6218 break;
6219 }
6220 }
6221
6222 int TotalArgSize = 0;
6223 for (auto &CallArg : CI->arg_operands())
6224 TotalArgSize += getValueSize(CallArg->getType());
6225
6226 VISA_GenVar *Sp = nullptr, *Arg = nullptr, *Ret = nullptr;
6227 CISA_CALL(Kernel->GetPredefinedVar(Sp, PREDEFINED_FE_SP));
6228 CISA_CALL(Kernel->GetPredefinedVar(Arg, PREDEFINED_ARG));
6229 CISA_CALL(Kernel->GetPredefinedVar(Ret, PREDEFINED_RET));
6230
6231 unsigned ColOff = 0, RowOff = 0, SrcRowOff = 0, SrcColOff = 0;
6232
6233 int Sz = 0, NoStackSize = 0, StackArgSz = 0;
6234 uint64_t StackOff = 0;
6235 bool StackStarted = false;
6236 // pack arguments
6237 for (auto &CallArg : CI->arg_operands()) {
6238 auto *CallArgLR = Liveness->getLiveRangeOrNull(CallArg.get());
6239 if (CallArgLR && CallArgLR->getCategory() == RegCategory::EM)
6240 continue;
6241
6242 IGC_ASSERT(!CallArg->getType()->isAggregateType());
6243 SrcRowOff = 0, SrcColOff = 0;
6244 unsigned ArgSize = getValueSize(CallArg->getType());
6245
6246 if (ColOff && (CallArg->getType()->isVectorTy() ||
6247 ArgSize > (GrfByteSize - ColOff))) {
6248 RowOff++;
6249 ColOff = 0;
6250 // adjust size if we use only a part the last used GRF
6251 NoStackSize++;
6252 }
6253
6254 bool IsUndef = isa<UndefValue>(CallArg);
6255 auto *ArgVar = &CisaVars[Kernel].at("argv");
6256 if ((int)ArgVar->getByteSize() - RowOff * GrfByteSize >= ArgSize &&
6257 !StackStarted) {
6258 IGC_ASSERT_MESSAGE(ArgSize <= Sz - ArgVar->getByteSize(),
6259 "cannot pass arg via stack and %arg as well");
6260
6261 SrcRowOff = 0, SrcColOff = 0;
6262 if (!IsUndef && CallArgLR->getCategory() == RegCategory::PREDICATE) {
6263 VISA_VectorOpnd *PredDst = nullptr;
6264 Kernel->CreateVISADstOperand(
6265 PredDst,
6266 ArgVar->getAlias(llvmToVisaType(CallArg->getType()), Kernel)
6267 ->getGenVar(),
6268 1, RowOff, ColOff);
6269 auto PReg =
6270 getRegForValueOrNullAndSaveAlias(KernFunc, SimpleValue(CallArg));
6271 IGC_ASSERT(PReg);
6272 Kernel->AppendVISAPredicateMove(PredDst,
6273 PReg->GetVar<VISA_PredVar>(Kernel));
6274 ColOff += ArgSize;
6275 } else
6276 emitVectorCopy<CisaVariable, Value>(
6277 ArgVar->getAlias(CallArg, Kernel), CallArg, RowOff, ColOff,
6278 SrcRowOff, SrcColOff, getValueSize(CallArg), !IsUndef);
6279 Sz += ArgSize;
6280 NoStackSize = RowOff;
6281 } else {
6282 StackStarted = true;
6283 RowOff = ColOff = 0;
6284 SrcRowOff = SrcColOff = 0;
6285 VISA_StateOpndHandle *StackSurfOp = nullptr;
6286 VISA_SurfaceVar *StackSurfVar = nullptr;
6287 CISA_CALL(
6288 Kernel->GetPredefinedSurface(StackSurfVar, StackSurf));
6289 CISA_CALL(Kernel->CreateVISAStateOperandHandle(StackSurfOp, StackSurfVar));
6290 pushStackArg(StackSurfOp, CallArg, ArgSize, RowOff, ColOff, SrcRowOff,
6291 SrcColOff, !IsUndef);
6292
6293 StackArgSz += (ArgSize / visa::BytesPerOword) +
6294 (ArgSize % visa::BytesPerOword ? 1 : 0);
6295 StackOff = -StackArgSz;
6296 }
6297 }
6298 if (!StackStarted && ColOff)
6299 NoStackSize++;
6300
6301 VISA_VectorOpnd *SpOpSrc = nullptr, *SpOpDst = nullptr, *Imm = nullptr;
6302 if (StackOff) {
6303 CISA_CALL(Kernel->CreateVISADstOperand(SpOpDst, Sp, 1, 0, 0));
6304 CISA_CALL(Kernel->CreateVISASrcOperand(SpOpSrc, Sp, MODIFIER_NONE, 0, 1, 0,
6305 0, 0));
6306 CISA_CALL(Kernel->CreateVISAImmediate(Imm, &StackOff, ISA_TYPE_UQ));
6307 CISA_CALL(Kernel->AppendVISAArithmeticInst(
6308 ISA_ADD, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
6309 EXEC_SIZE_1, SpOpDst, SpOpSrc, Imm));
6310 }
6311
6312 VISA_PredOpnd *Pred = nullptr;
6313 VISA_Exec_Size Esz = EXEC_SIZE_16;
6314 if (EMOperandNum >= 0) {
6315 Pred = createPred(CI, BaleInfo(), EMOperandNum);
6316 auto *VTy = cast<IGCLLVM::FixedVectorType>(
6317 CI->getArgOperand(EMOperandNum)->getType());
6318 Esz = getExecSizeFromValue(VTy->getNumElements());
6319 }
6320 addDebugInfo();
6321
6322 auto *RetVar = &CisaVars[Kernel].at("retv");
6323 bool ProcessRet = !FuncTy->getReturnType()->isVoidTy() &&
6324 !FuncTy->getReturnType()->isAggregateType() &&
6325 !(FuncTy->getReturnType()->isVectorTy() &&
6326 cast<VectorType>(FuncTy->getReturnType())
6327 ->getElementType()
6328 ->isIntegerTy(1));
6329
6330 // cannot use processRet here since aggr/em args should be co
6331 int RetSize =
6332 (FuncTy->getReturnType()->isVoidTy() ||
6333 getValueSize(FuncTy->getReturnType()) > RetVar->getByteSize())
6334 ? 0
6335 : (getValueSize(FuncTy->getReturnType()) + GrfByteSize - 1) /
6336 GrfByteSize;
6337 if (Callee) {
6338 CISA_CALL(Kernel->AppendVISACFFunctionCallInst(
6339 Pred, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1), EXEC_SIZE_16,
6340 Callee->getName().str(), NoStackSize, RetSize));
6341 } else {
6342 auto *FuncAddr = createSource(IGCLLVM::getCalledValue(CI), DONTCARESIGNED);
6343 IGC_ASSERT(FuncAddr);
6344 CISA_CALL(Kernel->AppendVISACFIndirectFuncCallInst(
6345 Pred, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1), EXEC_SIZE_16,
6346 FuncAddr, NoStackSize, RetSize));
6347 }
6348
6349 unsigned StackRetSz = 0;
6350 if (!FuncTy->getReturnType()->isVoidTy() &&
6351 getValueSize(FuncTy->getReturnType()) > RetVar->getByteSize())
6352 StackRetSz = (getValueSize(FuncTy->getReturnType(), visa::BytesPerOword) /
6353 visa::BytesPerOword +
6354 ((getValueSize(FuncTy->getReturnType(), visa::BytesPerOword) %
6355 visa::BytesPerOword)
6356 ? 1
6357 : 0));
6358 // unpack retval
6359 if (ProcessRet && Liveness->getLiveRange(CI) &&
6360 Liveness->getLiveRange(CI)->getCategory() != RegCategory::EM) {
6361 unsigned RowOff = 0, ColOff = 0, SrcRowOff = 0, SrcColOff = 0;
6362 if (getValueSize(FuncTy->getReturnType()) <= RetVar->getByteSize()) {
6363 emitVectorCopy(CI, RetVar->getAlias(CI, Kernel), RowOff, ColOff,
6364 SrcRowOff, SrcColOff, getValueSize(CI));
6365 } else {
6366 int StackOffVal = -StackRetSz;
6367 VISA_StateOpndHandle *StackSurfOp = nullptr;
6368 VISA_SurfaceVar *StackSurfVar = nullptr;
6369 CISA_CALL(
6370 Kernel->GetPredefinedSurface(StackSurfVar, StackSurf));
6371 CISA_CALL(Kernel->CreateVISAStateOperandHandle(StackSurfOp, StackSurfVar));
6372 popStackArg(CI, StackSurfOp, getValueSize(Callee->getReturnType()), RowOff,
6373 ColOff, SrcRowOff, SrcColOff, StackOffVal);
6374 }
6375 }
6376 // restore Sp
6377 CISA_CALL(
6378 Kernel->CreateVISASrcOperand(SpOpSrc, Sp, MODIFIER_NONE, 0, 1, 0, 0, 0));
6379 CISA_CALL(Kernel->CreateVISADstOperand(SpOpDst, Sp, 1, 0, 0));
6380 uint64_t OffVal = -StackRetSz;
6381 IGC_ASSERT(OffVal <= std::numeric_limits<uint32_t>::max());
6382
6383 if (OffVal) {
6384 CISA_CALL(Kernel->CreateVISAImmediate(Imm, &OffVal, ISA_TYPE_UD));
6385 CISA_CALL(Kernel->AppendVISAArithmeticInst(
6386 ISA_ADD, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
6387 EXEC_SIZE_1, SpOpDst, SpOpSrc, Imm));
6388 }
6389 }
6390
6391 namespace {
6392
6393 class GenXFinalizer : public ModulePass {
6394 raw_pwrite_stream &Out;
6395 LLVMContext *Ctx = nullptr;
6396
6397 public:
6398 static char ID;
GenXFinalizer(raw_pwrite_stream & o)6399 explicit GenXFinalizer(raw_pwrite_stream &o) : ModulePass(ID), Out(o) {}
6400
getPassName() const6401 StringRef getPassName() const override { return "GenX Finalizer"; }
6402
getContext()6403 LLVMContext &getContext() {
6404 IGC_ASSERT(Ctx);
6405 return *Ctx;
6406 }
6407
getAnalysisUsage(AnalysisUsage & AU) const6408 void getAnalysisUsage(AnalysisUsage &AU) const override {
6409 AU.addRequired<GenXModule>();
6410 AU.addRequired<FunctionGroupAnalysis>();
6411 AU.addRequired<TargetPassConfig>();
6412 AU.addRequired<GenXBackendConfig>();
6413 AU.setPreservesAll();
6414 }
6415
runOnModule(Module & M)6416 bool runOnModule(Module &M) override {
6417 Ctx = &M.getContext();
6418
6419 auto BC = &getAnalysis<GenXBackendConfig>();
6420 auto &FGA = getAnalysis<FunctionGroupAnalysis>();
6421 auto &GM = getAnalysis<GenXModule>();
6422 std::stringstream ss;
6423 VISABuilder *CisaBuilder = GM.GetCisaBuilder();
6424 if (GM.HasInlineAsm())
6425 CisaBuilder = GM.GetVISAAsmReader();
6426 CISA_CALL(CisaBuilder->Compile("genxir", &ss, EmitVisa));
6427
6428 if (!BC->isDisableFinalizerMsg())
6429 dbgs() << CisaBuilder->GetCriticalMsg();
6430
6431 Out << ss.str();
6432
6433 // Collect some useful statistics
6434 for (auto *FG: FGA) {
6435 VISAKernel *Kernel = CisaBuilder->GetVISAKernel(FG->getName().str());
6436 IGC_ASSERT(Kernel);
6437 FINALIZER_INFO *jitInfo = nullptr;
6438 CISA_CALL(Kernel->GetJitInfo(jitInfo));
6439 IGC_ASSERT(jitInfo);
6440 NumAsmInsts += jitInfo->numAsmCount;
6441 SpillMemUsed += jitInfo->spillMemUsed;
6442 }
6443 return false;
6444 }
6445 };
6446 } // end anonymous namespace.
6447
6448 char GenXFinalizer::ID = 0;
6449
createGenXFinalizerPass(raw_pwrite_stream & o)6450 ModulePass *llvm::createGenXFinalizerPass(raw_pwrite_stream &o) {
6451 return new GenXFinalizer(o);
6452 }
6453
6454 static SmallVector<const char *, 8>
collectFinalizerArgs(StringSaver & Saver,const GenXSubtarget & ST,bool EmitDebugInformation,const GenXBackendConfig & BC)6455 collectFinalizerArgs(StringSaver &Saver, const GenXSubtarget &ST,
6456 bool EmitDebugInformation, const GenXBackendConfig &BC) {
6457 SmallVector<const char *, 8> Argv;
6458 auto addArgument = [&Argv, &Saver](StringRef Arg) {
6459 // String saver guarantees that string is null-terminated.
6460 Argv.push_back(Saver.save(Arg).data());
6461 };
6462
6463 // enable preemption if we have SKL+ and option switched on
6464 if (BC.enablePreemption() && ST.hasPreemption())
6465 addArgument("-enablePreemption");
6466
6467 addArgument("-dumpvisa");
6468 for (const auto &Fos : FinalizerOpts)
6469 cl::TokenizeGNUCommandLine(Fos, Saver, Argv);
6470
6471 if (EmitDebugInformation)
6472 addArgument("-generateDebugInfo");
6473 if (BC.passDebugToFinalizer())
6474 addArgument("-debug");
6475 if (BC.emitDebuggableKernels()) {
6476 addArgument("-addKernelID");
6477 addArgument("-setstartbp");
6478 }
6479 if (BC.asmDumpsEnabled()) {
6480 addArgument("-dumpcommonisa");
6481 addArgument("-output");
6482 addArgument("-binary");
6483 }
6484 if (ST.needsWANoMaskFusedEU() && !DisableNoMaskWA) {
6485 addArgument("-noMaskWA");
6486 addArgument("2");
6487 }
6488 if (BC.isLargeGRFMode()) {
6489 addArgument("-TotalGRFNum");
6490 addArgument("256");
6491 }
6492 return Argv;
6493 }
6494
dumpFinalizerArgs(const SmallVectorImpl<const char * > & Argv,StringRef CPU)6495 static void dumpFinalizerArgs(const SmallVectorImpl<const char *> &Argv,
6496 StringRef CPU) {
6497 // NOTE: CPU is not the Platform used by finalizer
6498 // The mapping is described by getVisaPlatform from GenXSubtarget.h
6499 outs() << "GenXCpu: " << CPU << "\n";
6500 outs() << "Finalizer Parameters:\n\t";
6501 std::for_each(Argv.begin(), Argv.end(),
6502 [](const char *Arg) { outs() << " " << Arg; });
6503 outs() << "\n";
6504 }
6505
getContext()6506 LLVMContext &GenXModule::getContext() {
6507 IGC_ASSERT(Ctx);
6508 return *Ctx;
6509 }
6510
createVISABuilder(const GenXSubtarget & ST,const GenXBackendConfig & BC,bool EmitDebugInformation,vISABuilderMode Mode,LLVMContext & Ctx,BumpPtrAllocator & Alloc)6511 static VISABuilder *createVISABuilder(const GenXSubtarget &ST,
6512 const GenXBackendConfig &BC,
6513 bool EmitDebugInformation,
6514 vISABuilderMode Mode, LLVMContext &Ctx,
6515 BumpPtrAllocator &Alloc) {
6516 auto Platform = ST.getVisaPlatform();
6517 // Use SKL for unknown platforms
6518 if (Platform == TARGET_PLATFORM::GENX_NONE)
6519 Platform = TARGET_PLATFORM::GENX_SKL;
6520
6521 // Prepare array of arguments for Builder API.
6522 StringSaver Saver{Alloc};
6523 SmallVector<const char *, 8> Argv =
6524 collectFinalizerArgs(Saver, ST, EmitDebugInformation, BC);
6525
6526 if (PrintFinalizerOptions)
6527 dumpFinalizerArgs(Argv, ST.getCPU());
6528
6529 // Special error processing here related to strange case where on Windows
6530 // machines only we had failures, reproducible only when shader dumps are
6531 // off. This code is to diagnose such cases simpler.
6532 VISABuilder *VB = nullptr;
6533 int Result = CreateVISABuilder(
6534 VB, Mode, EmitVisa ? VISA_BUILDER_VISA : VISA_BUILDER_BOTH, Platform,
6535 Argv.size(), Argv.data(), BC.getWATable());
6536 if (Result != 0 || VB == nullptr) {
6537 std::string Str;
6538 llvm::raw_string_ostream Os(Str);
6539 Os << "VISA builder creation failed\n";
6540 Os << "Mode: " << Mode << "\n";
6541 Os << "Args:\n";
6542 for (const char *Arg : Argv)
6543 Os << Arg << " ";
6544 Os << "Visa only: " << (EmitVisa ? "yes" : "no") << "\n";
6545 Os << "Platform: " << ST.getVisaPlatform() << "\n";
6546 DiagnosticInfoCisaBuild Err(Os.str(), DS_Error);
6547 Ctx.diagnose(Err);
6548 }
6549 return VB;
6550 }
6551
InitCISABuilder()6552 void GenXModule::InitCISABuilder() {
6553 IGC_ASSERT(ST);
6554 const vISABuilderMode Mode = HasInlineAsm() ? vISA_ASM_WRITER : vISA_DEFAULT;
6555 CisaBuilder = createVISABuilder(*ST, *BC, EmitDebugInformation, Mode,
6556 getContext(), ArgStorage);
6557 }
6558
GetCisaBuilder()6559 VISABuilder *GenXModule::GetCisaBuilder() {
6560 if (!CisaBuilder)
6561 InitCISABuilder();
6562 return CisaBuilder;
6563 }
6564
DestroyCISABuilder()6565 void GenXModule::DestroyCISABuilder() {
6566 if (CisaBuilder) {
6567 CISA_CALL(DestroyVISABuilder(CisaBuilder));
6568 CisaBuilder = nullptr;
6569 }
6570 }
6571
InitVISAAsmReader()6572 void GenXModule::InitVISAAsmReader() {
6573 IGC_ASSERT(ST);
6574 VISAAsmTextReader =
6575 createVISABuilder(*ST, *BC, EmitDebugInformation, vISA_ASM_READER,
6576 getContext(), ArgStorage);
6577 }
6578
GetVISAAsmReader()6579 VISABuilder *GenXModule::GetVISAAsmReader() {
6580 if (!VISAAsmTextReader)
6581 InitVISAAsmReader();
6582 return VISAAsmTextReader;
6583 }
6584
DestroyVISAAsmReader()6585 void GenXModule::DestroyVISAAsmReader() {
6586 if (VISAAsmTextReader) {
6587 CISA_CALL(DestroyVISABuilder(VISAAsmTextReader));
6588 VISAAsmTextReader = nullptr;
6589 }
6590 }
6591