1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2019-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 //
10 /// GenXCisaBuilder
11 /// ---------------
12 ///
13 /// This file contains to passes: GenXCisaBuilder and GenXFinalizer.
14 ///
15 /// 1. GenXCisaBuilder transforms LLVM IR to CISA IR via Finalizer' public API.
16 ///    It is a FunctionGroupWrapperPass, thus it runs once for each kernel and
17 ///    creates CISA IR for it and all its subroutines. Real building of kernels
18 ///    is performed by the GenXKernelBuilder class. This splitting is necessary
19 ///    because GenXCisaBuilder object lives through all Function Groups, but we
20 ///    don't need to keep all Kernel building specific data in such lifetime.
21 ///
22 /// 2. GenXFinalizer is a module pass, thus it runs once and all that it does
23 ///    is a running of Finalizer for kernels created in GenXCisaBuilder pass.
24 ///
25 //===----------------------------------------------------------------------===//
26 
27 #include "FunctionGroup.h"
28 #include "GenX.h"
29 #include "GenXDebugInfo.h"
30 #include "GenXGotoJoin.h"
31 #include "GenXIntrinsics.h"
32 #include "GenXPressureTracker.h"
33 #include "GenXSubtarget.h"
34 #include "GenXTargetMachine.h"
35 #include "GenXUtil.h"
36 #include "GenXVisaRegAlloc.h"
37 
38 #include "vc/GenXOpts/Utils/KernelInfo.h"
39 #include "vc/Support/BackendConfig.h"
40 #include "vc/Support/ShaderDump.h"
41 #include "vc/Utils/GenX/Printf.h"
42 
43 #include "llvm/GenXIntrinsics/GenXIntrinsicInst.h"
44 
45 #include "visaBuilder_interface.h"
46 
47 #include "llvm/ADT/IndexedMap.h"
48 #include "llvm/ADT/Statistic.h"
49 #include "llvm/ADT/StringExtras.h"
50 #include "llvm/Analysis/LoopInfo.h"
51 #include "llvm/CodeGen/TargetPassConfig.h"
52 #include "llvm/IR/DebugInfo.h"
53 #include "llvm/IR/DiagnosticInfo.h"
54 #include "llvm/IR/DiagnosticPrinter.h"
55 #include "llvm/InitializePasses.h"
56 #include "llvm/Support/Error.h"
57 #include "llvm/Support/Path.h"
58 #include "llvm/Support/Regex.h"
59 #include "llvm/Support/ScopedPrinter.h"
60 #include "llvm/Support/StringSaver.h"
61 
62 #include "Probe/Assertion.h"
63 #include "llvmWrapper/IR/CallSite.h"
64 #include "llvmWrapper/IR/InstrTypes.h"
65 #include "llvmWrapper/IR/Instructions.h"
66 #include "llvmWrapper/IR/DerivedTypes.h"
67 
68 #include <algorithm>
69 #include <map>
70 #include <string>
71 #include <vector>
72 
73 using namespace llvm;
74 using namespace genx;
75 
76 #define DEBUG_TYPE "GENX_CISA_BUILDER"
77 
78 static cl::list<std::string>
79     FinalizerOpts("finalizer-opts", cl::Hidden, cl::ZeroOrMore,
80                   cl::desc("Additional options for finalizer."));
81 
82 static cl::opt<bool> EmitVisa("emit-visa", cl::init(false), cl::Hidden,
83                               cl::desc("Generate Visa instead of fat binary."));
84 
85 static cl::opt<std::string> AsmNameOpt("asm-name", cl::init(""), cl::Hidden,
86     cl::desc("Output assembly code to this file during compilation."));
87 
88 static cl::opt<bool> ReverseKernels("reverse-kernels", cl::init(false), cl::Hidden,
89     cl::desc("Emit the kernel asm name in reversed order (if user asm name presented)."));
90 
91 static cl::opt<bool>
92     PrintFinalizerOptions("cg-print-finalizer-args", cl::init(false), cl::Hidden,
93                           cl::desc("Prints options used to invoke finalizer"));
94 
95 static cl::opt<bool> SkipNoWiden("skip-widen", cl::init(false), cl::Hidden,
96                                  cl::desc("Do new emit NoWiden hint"));
97 
98 static cl::opt<bool> DisableNoMaskWA(
99     "vc-cg-disable-no-mask-wa", cl::init(false), cl::Hidden,
100     cl::desc("do not apply noMask WA (fusedEU)"));
101 
102 static cl::opt<bool> OptStrictI64Check(
103         "genx-cisa-builder-noi64-check", cl::init(false), cl::Hidden,
104         cl::desc("strict check to ensure we produce no 64-bit operations"));
105 
106 STATISTIC(NumVisaInsts, "Number of VISA instructions");
107 STATISTIC(NumAsmInsts, "Number of Gen asm instructions");
108 STATISTIC(SpillMemUsed, "Spill memory size used");
109 
110 /// For VISA_PREDICATE_CONTROL & VISA_PREDICATE_STATE
operator ^=(T & a,T b)111 template <class T> T &operator^=(T &a, T b) {
112   using _T = typename std::underlying_type<T>::type;
113   static_assert(std::is_integral<_T>::value,
114                 "Wrong operation for non-integral type");
115   a = static_cast<T>(static_cast<_T>(a) ^ static_cast<_T>(b));
116   return a;
117 }
118 
operator |=(T & a,T b)119 template <class T> T operator|=(T &a, T b) {
120   using _T = typename std::underlying_type<T>::type;
121   static_assert(std::is_integral<_T>::value,
122                 "Wrong operation for non-integral type");
123   a = static_cast<T>(static_cast<_T>(a) | static_cast<_T>(b));
124   return a;
125 }
126 
127 struct DstOpndDesc {
128   Instruction *WrRegion = nullptr;
129   Instruction *GStore = nullptr;
130   Instruction *WrPredefReg = nullptr;
131   genx::BaleInfo WrRegionBI;
132 };
133 
134 namespace {
135 
136 // Diagnostic information for errors/warnings in the GEN IR building passes.
137 class DiagnosticInfoCisaBuild : public DiagnosticInfo {
138 private:
139   std::string Description;
140   static int KindID;
141 
getKindID()142   static int getKindID() {
143     if (KindID == 0)
144       KindID = llvm::getNextAvailablePluginDiagnosticKind();
145     return KindID;
146   }
147 
148 public:
DiagnosticInfoCisaBuild(const Twine & Desc,DiagnosticSeverity Severity)149   DiagnosticInfoCisaBuild(const Twine &Desc, DiagnosticSeverity Severity)
150       : DiagnosticInfo(getKindID(), Severity) {
151     Description = (Twine("GENX IR generation error: ") + Desc).str();
152   }
153 
DiagnosticInfoCisaBuild(Instruction * Inst,const Twine & Desc,DiagnosticSeverity Severity)154   DiagnosticInfoCisaBuild(Instruction *Inst, const Twine &Desc,
155                           DiagnosticSeverity Severity)
156       : DiagnosticInfo(getKindID(), Severity) {
157     std::string Str;
158     llvm::raw_string_ostream(Str) << *Inst;
159     Description =
160         (Twine("CISA builder failed for intruction <") + Str + ">: " + Desc)
161             .str();
162   }
163 
print(DiagnosticPrinter & DP) const164   void print(DiagnosticPrinter &DP) const override { DP << Description; }
165 
classof(const DiagnosticInfo * DI)166   static bool classof(const DiagnosticInfo *DI) {
167     return DI->getKind() == getKindID();
168   }
169 };
170 int DiagnosticInfoCisaBuild::KindID = 0;
171 
172 
getExecSizeFromValue(unsigned int Size)173 static VISA_Exec_Size getExecSizeFromValue(unsigned int Size) {
174   int Res = genx::log2(Size);
175   IGC_ASSERT(std::bitset<sizeof(unsigned int) * 8>(Size).count() <= 1);
176   IGC_ASSERT_MESSAGE(Res <= 5,
177          "illegal common ISA execsize (should be 1, 2, 4, 8, 16, 32).");
178   return Res == -1 ? EXEC_SIZE_ILLEGAL : (VISA_Exec_Size)Res;
179 }
180 
getCisaOwordNumFromNumber(unsigned num)181 static VISA_Oword_Num getCisaOwordNumFromNumber(unsigned num) {
182   switch (num) {
183   case 1:
184     return OWORD_NUM_1;
185   case 2:
186     return OWORD_NUM_2;
187   case 4:
188     return OWORD_NUM_4;
189   case 8:
190     return OWORD_NUM_8;
191   case 16:
192     return OWORD_NUM_16;
193   default:
194     IGC_ASSERT_MESSAGE(0, "illegal Oword number.");
195     return OWORD_NUM_ILLEGAL;
196   }
197 }
198 
convertChannelMaskToVisaType(unsigned Mask)199 VISAChannelMask convertChannelMaskToVisaType(unsigned Mask) {
200   switch (Mask & 0xf) {
201   case 1:
202     return CHANNEL_MASK_R;
203   case 2:
204     return CHANNEL_MASK_G;
205   case 3:
206     return CHANNEL_MASK_RG;
207   case 4:
208     return CHANNEL_MASK_B;
209   case 5:
210     return CHANNEL_MASK_RB;
211   case 6:
212     return CHANNEL_MASK_GB;
213   case 7:
214     return CHANNEL_MASK_RGB;
215   case 8:
216     return CHANNEL_MASK_A;
217   case 9:
218     return CHANNEL_MASK_RA;
219   case 10:
220     return CHANNEL_MASK_GA;
221   case 11:
222     return CHANNEL_MASK_RGA;
223   case 12:
224     return CHANNEL_MASK_BA;
225   case 13:
226     return CHANNEL_MASK_RBA;
227   case 14:
228     return CHANNEL_MASK_GBA;
229   case 15:
230     return CHANNEL_MASK_RGBA;
231   default:
232     IGC_ASSERT_EXIT_MESSAGE(0, "Wrong mask");
233   }
234 }
235 
getChannelOutputFormat(uint8_t ChannelOutput)236 CHANNEL_OUTPUT_FORMAT getChannelOutputFormat(uint8_t ChannelOutput) {
237   return (CHANNEL_OUTPUT_FORMAT)((ChannelOutput >> 4) & 0x3);
238 }
239 
cutString(const Twine & Str)240 static std::string cutString(const Twine &Str) {
241   // vISA is limited to 64 byte strings. But old fe-compiler seems to ignore
242   // that for source filenames.
243   constexpr size_t MaxVisaLabelLength = 64;
244   auto Result = Str.str();
245   if (Result.size() > MaxVisaLabelLength)
246     Result.erase(MaxVisaLabelLength);
247   return Result;
248 }
249 
handleCisaCallError(const Twine & Call,LLVMContext & Ctx)250 void handleCisaCallError(const Twine &Call, LLVMContext &Ctx) {
251   DiagnosticInfoCisaBuild Err(
252       "VISA builder API call failed: " + Call, DS_Error);
253   Ctx.diagnose(Err);
254 }
255 
256 /***********************************************************************
257  * Local function for testing one assertion statement.
258  * It returns true if all is ok.
259  * A phi node not generates any code.
260  * The phi node should has no live range because it is part of an indirected
261  * arg/retval in GenXArgIndirection or it is an EM/RM category.
262  */
testPhiNodeHasNoMismatchedRegs(const llvm::PHINode * const Phi,const llvm::GenXLiveness * const Liveness)263 bool testPhiNodeHasNoMismatchedRegs(const llvm::PHINode *const Phi,
264   const llvm::GenXLiveness *const Liveness) {
265   IGC_ASSERT(Phi);
266   IGC_ASSERT(Liveness);
267   bool Result = true;
268   const size_t Count = Phi->getNumIncomingValues();
269   for (size_t i = 0; (i < Count) && Result; ++i) {
270     const llvm::Value *const Incoming = Phi->getIncomingValue(i);
271     if (!isa<UndefValue>(Incoming)) {
272       const genx::SimpleValue SVI(const_cast<llvm::Value *> (Incoming));
273       const genx::LiveRange *const LRI = Liveness->getLiveRangeOrNull(SVI);
274       if (LRI) {
275         if (LRI->getCategory() < RegCategory::NUMREALCATEGORIES) {
276           const genx::SimpleValue SVP(const_cast<llvm::PHINode *> (Phi));
277           const genx::LiveRange *const LRP = Liveness->getLiveRangeOrNull(SVP);
278           Result = (LRI == LRP);
279           IGC_ASSERT_MESSAGE(Result, "mismatched registers in phi node");
280         }
281       }
282     }
283   }
284   return Result;
285 }
286 
287 /***********************************************************************
288  * Local function for testing one assertion statement.
289  */
testPredicate(const CmpInst * const Cmp,const DstOpndDesc & DstDesc)290 bool testPredicate(const CmpInst *const Cmp, const DstOpndDesc &DstDesc) {
291   bool Result = (!DstDesc.WrRegion);
292   Result = (Result || (Cmp->getType()->getPrimitiveSizeInBits() != 4));
293   Result = (Result || (Cmp->getOperand(0)->getType()->getScalarType()
294     ->getPrimitiveSizeInBits() == 64));
295   IGC_ASSERT(Result);
296   return Result;
297 }
298 
299 } // namespace
300 
301 #define CISA_CALL_CTX(c, ctx)                                                  \
302   do {                                                                         \
303     auto result = c;                                                           \
304     if (result != 0) {                                                         \
305       handleCisaCallError(#c, (ctx));                                          \
306     }                                                                          \
307   } while (0);
308 
309 #define CISA_CALL(c) CISA_CALL_CTX(c, getContext())
310 
311 namespace llvm {
312 
getVisaTypeFromBytesNumber(unsigned BytesNum,bool IsFloat,genx::Signedness Sign)313 static VISA_Type getVisaTypeFromBytesNumber(unsigned BytesNum, bool IsFloat,
314                                             genx::Signedness Sign) {
315   VISA_Type aliasType;
316   if (IsFloat) {
317     switch (BytesNum) {
318     case 2:
319       aliasType = ISA_TYPE_HF;
320       break;
321     case 4:
322       aliasType = ISA_TYPE_F;
323       break;
324     case 8:
325       aliasType = ISA_TYPE_DF;
326       break;
327     default:
328       report_fatal_error("unknown float type");
329       break;
330     }
331   } else {
332     switch (BytesNum) {
333     case 1:
334       aliasType = (Sign == SIGNED) ? ISA_TYPE_B : ISA_TYPE_UB;
335       break;
336     case 2:
337       aliasType = (Sign == SIGNED) ? ISA_TYPE_W : ISA_TYPE_UW;
338       break;
339     case 4:
340       aliasType = (Sign == SIGNED) ? ISA_TYPE_D : ISA_TYPE_UD;
341       break;
342     case 8:
343       aliasType = (Sign == SIGNED) ? ISA_TYPE_Q : ISA_TYPE_UQ;
344       break;
345     default:
346       report_fatal_error("unknown integer type");
347       break;
348     }
349   }
350   return aliasType;
351 }
352 
llvmToVisaType(Type * Type,genx::Signedness Sign=DONTCARESIGNED)353 static VISA_Type llvmToVisaType(Type *Type,
354                                 genx::Signedness Sign = DONTCARESIGNED) {
355   auto T = Type;
356   IGC_ASSERT(!T->isAggregateType());
357   VISA_Type Result = ISA_TYPE_NUM;
358   if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(T);
359       VT && VT->getElementType()->isIntegerTy(1)) {
360     IGC_ASSERT(VT->getNumElements() == 8 || VT->getNumElements() == 16 ||
361                VT->getNumElements() == 32);
362     Result = getVisaTypeFromBytesNumber(VT->getNumElements() / genx::ByteBits,
363                                         false, Sign);
364   } else {
365     if (T->isVectorTy())
366       T = cast<VectorType>(T)->getElementType();
367     if (T->isPointerTy()) {
368       // we might have used DL to get the type size but that'd
369       // overcomplicate this function's type unnecessarily
370       Result = getVisaTypeFromBytesNumber(visa::BytesPerSVMPtr, false,
371                                           DONTCARESIGNED);
372     } else {
373       IGC_ASSERT(T->isFloatingPointTy() || T->isIntegerTy());
374       Result = getVisaTypeFromBytesNumber(T->getScalarSizeInBits() / CHAR_BIT,
375                                           T->isFloatingPointTy(), Sign);
376     }
377   }
378   IGC_ASSERT(Result != ISA_TYPE_NUM);
379   return Result;
380 }
381 
llvmToVisaType(Value * V,genx::Signedness Sign=DONTCARESIGNED)382 static VISA_Type llvmToVisaType(Value *V,
383                                 genx::Signedness Sign = DONTCARESIGNED) {
384   return llvmToVisaType(V->getType(), Sign);
385 }
386 
387 // Due to the lack of access to VISA_GenVar internal interfaces (concerning type, size, etc)
388 // some local DS are required to store such info: CisaVariable and GenericCisaVariable.
389 
390 //===----------------------------------------------------------------------===//
391 // CisaVariable
392 // ------------------
393 //
394 // CisaVariable keeps VISA_GenVar of a specific VISA_Type and provides accessors
395 // to its byte size and number of elements thus emulating some internal vISA machinery.
396 //
397 //===----------------------------------------------------------------------===//
398 class CisaVariable {
399   VISA_Type Type;
400   unsigned ByteSize = 0;
401   VISA_GenVar *VisaVar = nullptr;
402 
403 public:
CisaVariable(VISA_Type T,unsigned BS,VISA_GenVar * V)404   CisaVariable(VISA_Type T, unsigned BS, VISA_GenVar *V)
405       : Type(T), ByteSize(BS), VisaVar(V) {}
406 
getType() const407   VISA_Type getType() const { return Type; }
408 
getGenVar()409   VISA_GenVar *getGenVar() { return VisaVar; }
410 
getByteSize() const411   unsigned getByteSize() const { return ByteSize; }
412 
getNumElements() const413   unsigned getNumElements() const {
414     const int size = CISATypeTable[Type].typeSize;
415     IGC_ASSERT(size);
416     IGC_ASSERT(!(ByteSize % size));
417     return ByteSize / size;
418   }
419 };
420 
421 //===----------------------------------------------------------------------===//
422 // GenericCisaVariable
423 // ------------------
424 //
425 // GenericCisaVariable describes vISA value that isn't intended to have matching llvm::Value
426 // (e.g. stack regs %arg and %retv). It provides interface to get a VisaVar alias with a specific
427 // vISA type.
428 //
429 //===----------------------------------------------------------------------===//
430 class GenericCisaVariable {
431   const char *Name = "";
432   VISA_GenVar *VisaVar = nullptr;
433   unsigned ByteSize = 0;
434 
435   IndexedMap<CisaVariable *> AliasDecls;
436   std::list<CisaVariable> Storage;
437 
getNumElements(VISA_Type T) const438   unsigned getNumElements(VISA_Type T) const {
439     const int size = CISATypeTable[T].typeSize;
440     IGC_ASSERT(size);
441     IGC_ASSERT(!(ByteSize % size));
442     return ByteSize / size;
443   }
444 
445 public:
GenericCisaVariable(const char * Nm,VISA_GenVar * V,unsigned BS)446   GenericCisaVariable(const char *Nm, VISA_GenVar *V, unsigned BS)
447       : Name(Nm), VisaVar(V), ByteSize(BS) {
448     AliasDecls.grow(ISA_TYPE_NUM);
449   }
450 
getAlias(Value * V,VISAKernel * K)451   CisaVariable *getAlias(Value *V, VISAKernel *K) {
452     return getAlias(llvmToVisaType(V), K);
453   }
454 
getAlias(VISA_Type T,VISAKernel * K)455   CisaVariable *getAlias(VISA_Type T, VISAKernel *K) {
456     if (!AliasDecls[T]) {
457       VISA_GenVar *VV = nullptr;
458       K->CreateVISAGenVar(VV, Name, getNumElements(T), T, ALIGN_GRF, VisaVar);
459       Storage.push_back(CisaVariable(T, ByteSize, VV));
460       AliasDecls[T] = &Storage.back();
461     }
462     return AliasDecls[T];
463   }
464 
getByteSize() const465   unsigned getByteSize() const { return ByteSize; }
466 };
467 
468 //===----------------------------------------------------------------------===//
469 /// GenXCisaBuilder
470 /// ------------------
471 ///
472 /// This class encapsulates a creation of vISA kernels.
473 /// It is a FunctionGroupWrapperPass, thus it runs once for each kernel and
474 /// builds vISA kernel via class GenXKernelBuilder.
475 /// All created kernels are stored in CISA Builder object which is provided
476 /// by finalizer.
477 ///
478 //===----------------------------------------------------------------------===//
479 class GenXCisaBuilder : public FGPassImplInterface,
480                         public IDMixin<GenXCisaBuilder> {
481   LLVMContext *Ctx = nullptr;
482 
483 public:
GenXCisaBuilder()484   explicit GenXCisaBuilder() {}
485 
getPassName()486   static StringRef getPassName() { return "GenX CISA construction pass"; }
487   static void getAnalysisUsage(AnalysisUsage &AU);
488   bool runOnFunctionGroup(FunctionGroup &FG) override;
489 
getContext()490   LLVMContext &getContext() {
491     IGC_ASSERT(Ctx);
492     return *Ctx;
493   }
494 };
495 
496 void initializeGenXCisaBuilderWrapperPass(PassRegistry &);
497 using GenXCisaBuilderWrapper = FunctionGroupWrapperPass<GenXCisaBuilder>;
498 
499 //===----------------------------------------------------------------------===//
500 /// GenXKernelBuilder
501 /// ------------------
502 ///
503 /// This class does all the work for creation of vISA kernels.
504 ///
505 //===----------------------------------------------------------------------===//
506 class GenXKernelBuilder {
507   using Register = GenXVisaRegAlloc::Reg;
508 
509   VISAKernel *MainKernel = nullptr;
510   VISAFunction *Kernel = nullptr;
511   genx::KernelMetadata TheKernelMetadata;
512   LLVMContext &Ctx;
513   const DataLayout &DL;
514 
515   std::map<Function *, VISAFunction *> Func2Kern;
516 
517   std::map<std::string, unsigned> StringPool;
518   std::vector<VISA_LabelOpnd *> Labels;
519   std::map<const Value *, unsigned> LabelMap;
520 
521   // loop info for each function
522   std::map<Function *, LoopInfoBase<BasicBlock, Loop> *> Loops;
523   ValueMap<Function *, bool> IsInLoopCache;
524 
525   // whether kernel has barrier or sbarrier instruction
526   bool HasBarrier = false;
527   bool HasCallable = false;
528   bool HasStackcalls = false;
529   bool HasAlloca = false;
530   bool UseNewStackBuilder = false;
531   // GRF width in unit of byte
532   unsigned GrfByteSize = defaultGRFByteSize;
533 
534   unsigned LastLine = 0;
535   unsigned PendingLine = 0;
536   StringRef LastFilename;
537   StringRef PendingFilename;
538   StringRef LastDirectory;
539   StringRef PendingDirectory;
540 
541   // function currently being written during constructor
542   Function *Func = nullptr;
543   // function corresponding to VISAKernel currently being written
544   Function *KernFunc = nullptr;
545   PreDefined_Surface StackSurf;
546 
547   std::map<Function *, VISA_GenVar *> FPMap;
548   SmallVector<InsertValueInst *, 10> RetvInserts;
549 
550   std::map<VISAKernel *, std::map<StringRef, GenericCisaVariable>> CisaVars;
551 
552   // The default float control from kernel attribute. Each subroutine may
553   // overrride this control mask, but it should revert back to the default float
554   // control mask before exiting from the subroutine.
555   uint32_t DefaultFloatControl = 0;
556 
557   static const uint32_t CR_Mask = 0x1 << 10 | 0x3 << 6 | 0x3 << 4 | 0x1;
558 
559   // normally false, set to true if there is any SIMD CF in the func or this is
560   // (indirectly) called inside any SIMD CF.
561   bool NoMask = false;
562 
563   genx::AlignmentInfo AI;
564   const Instruction *CurrentInst = nullptr;
565 
566   // Map from LLVM Value to pointer to the last used register alias for this
567   // Value.
568   std::map<Value *, Register *> LastUsedAliasMap;
569   unsigned CurrentPadding = 0;
570 
571 public:
572   FunctionGroup *FG = nullptr;
573   GenXLiveness *Liveness = nullptr;
574   GenXNumbering *Numbering = nullptr;
575   GenXVisaRegAlloc *RegAlloc = nullptr;
576   FunctionGroupAnalysis *FGA = nullptr;
577   GenXModule *GM = nullptr;
578   LoopInfoGroupWrapperPass *LIs = nullptr;
579   const GenXSubtarget *Subtarget = nullptr;
580   const GenXBackendConfig *BackendConfig = nullptr;
581   GenXBaling *Baling = nullptr;
582   VISABuilder *CisaBuilder = nullptr;
583 
584 private:
585   bool allowI64Ops() const;
586   void collectKernelInfo();
587   void buildVariables();
588   void buildInstructions();
589 
590   bool buildInstruction(Instruction *Inst);
591   bool buildMainInst(Instruction *Inst, genx::BaleInfo BI, unsigned Mod,
592                      const DstOpndDesc &DstDesc);
593   void buildControlRegUpdate(unsigned Mask, bool Clear);
594   void buildJoin(CallInst *Join, BranchInst *Branch);
595   bool buildBranch(BranchInst *Branch);
596   void buildIndirectBr(IndirectBrInst *Br);
597   void buildIntrinsic(CallInst *CI, unsigned IntrinID, genx::BaleInfo BI,
598                       unsigned Mod, const DstOpndDesc &DstDesc);
599   void buildInputs(Function *F, bool NeedRetIP);
600 
601   void buildFunctionAddr(Instruction *Inst, const DstOpndDesc &DstDesc);
602   void buildLoneWrRegion(const DstOpndDesc &Desc);
603   void buildLoneWrPredRegion(Instruction *Inst, genx::BaleInfo BI);
604   void buildLoneOperand(Instruction *Inst, genx::BaleInfo BI, unsigned Mod,
605                         const DstOpndDesc &DstDesc);
606 
607   VISA_PredVar *getPredicateVar(Register *Idx);
608   VISA_PredVar *getPredicateVar(Value *V);
609   VISA_PredVar *getZeroedPredicateVar(Value *V);
610   VISA_SurfaceVar *getPredefinedSurfaceVar(GlobalVariable &GV);
611   VISA_EMask_Ctrl getExecMaskFromWrPredRegion(Instruction *WrPredRegion,
612                                                      bool IsNoMask);
613   VISA_EMask_Ctrl getExecMaskFromWrRegion(const DstOpndDesc &DstDesc,
614                                                  bool IsNoMask = false);
615   unsigned getOrCreateLabel(const Value *V, int Kind);
616   int getLabel(const Value *V) const;
617   void setLabel(const Value *V, unsigned Num);
618 
619   void emitOptimizationHints();
620 
621   Value *getPredicateOperand(Instruction *Inst, unsigned OperandNum,
622                              genx::BaleInfo BI, VISA_PREDICATE_CONTROL &Control,
623                              VISA_PREDICATE_STATE &PredField,
624                              VISA_EMask_Ctrl *MaskCtrl);
625   bool isInLoop(BasicBlock *BB);
626 
627   void addLabelInst(const Value *BB);
628   void buildPhiNode(PHINode *Phi);
629   void buildGoto(CallInst *Goto, BranchInst *Branch);
630   void buildCall(CallInst *CI, const DstOpndDesc &DstDesc);
631   void buildStackCall(CallInst *CI, const DstOpndDesc &DstDesc);
632   void buildStackCallLight(CallInst *CI, const DstOpndDesc &DstDesc);
633   void buildInlineAsm(CallInst *CI);
634   void buildPrintIndex(CallInst *CI, unsigned IntrinID, unsigned Mod,
635                        const DstOpndDesc &DstDesc);
636   void buildSelectInst(SelectInst *SI, genx::BaleInfo BI, unsigned Mod,
637                        const DstOpndDesc &DstDesc);
638   void buildBinaryOperator(BinaryOperator *BO, genx::BaleInfo BI, unsigned Mod,
639                            const DstOpndDesc &DstDesc);
640 #if (LLVM_VERSION_MAJOR > 8)
641   void buildUnaryOperator(UnaryOperator *UO, genx::BaleInfo BI, unsigned Mod,
642                           const DstOpndDesc &DstDesc);
643 #endif
644   void buildBoolBinaryOperator(BinaryOperator *BO);
645   void buildSymbolInst(CallInst *GAddrInst, unsigned Mod,
646                        const DstOpndDesc &DstDesc);
647   void buildCastInst(CastInst *CI, genx::BaleInfo BI, unsigned Mod,
648                      const DstOpndDesc &DstDesc);
649   void buildConvertAddr(CallInst *CI, genx::BaleInfo BI, unsigned Mod,
650                         const DstOpndDesc &DstDesc);
651   void buildAlloca(CallInst *CI, unsigned IntrinID, unsigned Mod,
652                    const DstOpndDesc &DstDesc);
653   void buildWritePredefSurface(CallInst &CI);
654   void buildGetHWID(CallInst *CI, const DstOpndDesc &DstDesc);
655   void addWriteRegionLifetimeStartInst(Instruction *WrRegion);
656   void addLifetimeStartInst(Instruction *Inst);
657   void AddGenVar(Register &Reg);
658   void buildRet(ReturnInst *RI);
659   void buildNoopCast(CastInst *CI, genx::BaleInfo BI, unsigned Mod,
660                      const DstOpndDesc &DstDesc);
661   void buildCmp(CmpInst *Cmp, genx::BaleInfo BI, const DstOpndDesc &DstDesc);
662   void buildExtractRetv(ExtractValueInst *Inst);
663   void buildInsertRetv(InsertValueInst *Inst);
664 
665   VISA_VectorOpnd *createState(Register *Reg, unsigned Offset, bool IsDst);
666   VISA_Type getVISAImmTy(uint8_t ImmTy);
667 
668   VISA_PredOpnd *createPredOperand(VISA_PredVar *PredVar,
669                                    VISA_PREDICATE_STATE State,
670                                    VISA_PREDICATE_CONTROL Control);
671 
672   VISA_VectorOpnd *createCisaSrcOperand(VISA_GenVar *Decl, VISA_Modifier Mod,
673                                         unsigned VStride, unsigned Width,
674                                         unsigned HStride, unsigned ROffset,
675                                         unsigned COffset);
676 
677   VISA_VectorOpnd *createCisaDstOperand(VISA_GenVar *Decl, unsigned HStride,
678                                         unsigned ROffset, unsigned COffset);
679 
680   VISA_VectorOpnd *createDestination(Value *Dest, genx::Signedness Signed,
681                                      unsigned Mod, const DstOpndDesc &DstDesc,
682                                      genx::Signedness *SignedRes = nullptr,
683                                      unsigned *Offset = nullptr);
684   VISA_VectorOpnd *createDestination(CisaVariable *Dest,
685                                      genx::Signedness Signed,
686                                      unsigned *Offset = nullptr);
687   VISA_VectorOpnd *createDestination(Value *Dest,
688                                      genx::Signedness Signed,
689                                      unsigned *Offset = nullptr);
690   VISA_VectorOpnd *createSourceOperand(Instruction *Inst,
691                                        genx::Signedness Signed,
692                                        unsigned OperandNum, genx::BaleInfo BI,
693                                        unsigned Mod = 0,
694                                        genx::Signedness *SignedRes = nullptr,
695                                        unsigned MaxWidth = 16);
696   VISA_VectorOpnd *createSource(CisaVariable *V, genx::Signedness Signed,
697                                 unsigned MaxWidth = 16,
698                                 unsigned *Offset = nullptr);
699   VISA_VectorOpnd *createSource(Value *V, genx::Signedness Signed, bool Baled,
700                                 unsigned Mod = 0,
701                                 genx::Signedness *SignedRes = nullptr,
702                                 unsigned MaxWidth = 16,
703                                 unsigned *Offset = nullptr);
704   VISA_VectorOpnd *createSource(Value *V, genx::Signedness Signed,
705                                 unsigned MaxWidth = 16,
706                                 unsigned *Offset = nullptr);
707 
708   std::string createInlineAsmOperand(Register *Reg, genx::Region *R, bool IsDst,
709                                      genx::Signedness Signed,
710                                      genx::ConstraintType Ty, unsigned Mod);
711 
712   std::string createInlineAsmSourceOperand(Value *V, genx::Signedness Signed,
713                                            bool Baled, genx::ConstraintType Ty,
714                                            unsigned Mod = 0,
715                                            unsigned MaxWidth = 16);
716 
717   std::string createInlineAsmDestinationOperand(Value *Dest,
718                                                 genx::Signedness Signed,
719                                                 genx::ConstraintType Ty,
720                                                 unsigned Mod,
721                                                 const DstOpndDesc &DstDesc);
722 
723   VISA_VectorOpnd *createImmediateOperand(Constant *V, genx::Signedness Signed);
724 
725   VISA_PredVar *createPredicateDeclFromSelect(Instruction *SI,
726                                               genx::BaleInfo BI,
727                                               VISA_PREDICATE_CONTROL &Control,
728                                               VISA_PREDICATE_STATE &PredField,
729                                               VISA_EMask_Ctrl *MaskCtrl);
730 
731   VISA_RawOpnd *createRawSourceOperand(const Instruction *Inst,
732                                        unsigned OperandNum, genx::BaleInfo BI,
733                                        genx::Signedness Signed);
734   VISA_RawOpnd *createRawDestination(Value *V, const DstOpndDesc &DstDesc,
735                                      genx::Signedness Signed);
736 
737   VISA_VectorOpnd *createAddressOperand(Value *V, bool IsDst);
738 
739   void addDebugInfo();
740 
741   void deduceRegion(Region *R, bool IsDest, unsigned MaxWidth = 16);
742 
743   VISA_VectorOpnd *createGeneralOperand(genx::Region *R, VISA_GenVar *Decl,
744                                         genx::Signedness Signed, unsigned Mod,
745                                         bool IsDest, unsigned MaxWidth = 16);
746   VISA_VectorOpnd *createIndirectOperand(genx::Region *R,
747                                          genx::Signedness Signed, unsigned Mod,
748                                          bool IsDest, unsigned MaxWidth = 16);
749   VISA_VectorOpnd *createRegionOperand(genx::Region *R, VISA_GenVar *Decl,
750                                        genx::Signedness Signed, unsigned Mod,
751                                        bool IsDest, unsigned MaxWidth = 16);
752   VISA_PredOpnd *createPredFromWrRegion(const DstOpndDesc &DstDesc);
753 
754   VISA_PredOpnd *createPred(Instruction *Inst, genx::BaleInfo BI,
755                             unsigned OperandNum);
756 
757   Instruction *getOriginalInstructionForSource(Instruction *CI,
758                                                genx::BaleInfo BI);
759   void buildConvert(CallInst *CI, genx::BaleInfo BI, unsigned Mod,
760                     const DstOpndDesc &DstDesc);
761   std::string buildAsmName() const;
762   void beginFunction(Function *Func);
763   void beginFunctionLight(Function *Func);
764   void endFunction(Function *Func, ReturnInst *RI);
765 
766   unsigned getFuncArgsSize(Function *F);
767   unsigned getValueSize(Type *T, unsigned Mod = 32) const;
getValueSize(CisaVariable * V) const768   unsigned getValueSize(CisaVariable *V) const {
769     return V->getByteSize();
770   }
getValueSize(Value * V,unsigned Mod=32) const771   unsigned getValueSize(Value *V, unsigned Mod = 32) const {
772     return getValueSize(V->getType(), Mod);
773   }
774   GenericCisaVariable *createCisaVariable(VISAKernel *Kernel, const char *Name,
775                                    VISA_GenVar *AliasVar, unsigned ByteSize);
776 
777   template <typename T1, typename T2>
778   void emitVectorCopy(
779       T1 *Dst, T2 *Src, unsigned &RowOff, unsigned &ColOff, unsigned &SrcRowOff,
780       unsigned &SrcColOff, int TotalSize, bool DoCopy = true);
781 
782   void pushStackArg(VISA_StateOpndHandle *Dst, Value *Src, int TotalSz,
783                     unsigned &RowOff, unsigned &ColOff, unsigned &SrcRowOff,
784                     unsigned &SrcColOff, bool DoCopy = true);
785   void popStackArg(Value *Dst, VISA_StateOpndHandle *Src, int TotalSz,
786                    unsigned &RowOff, unsigned &ColOff, unsigned &SrcRowOff,
787                    unsigned &SrcColOff, int &PrevStackOff);
788   Signedness getCommonSignedness(ArrayRef<Value *> Vs) const;
789 
790   Register *getLastUsedAlias(Value *V) const;
791 
792   template <typename... Args>
793   Register *getRegForValueUntypedAndSaveAlias(Args &&... args);
794   template <typename... Args>
795   Register *getRegForValueOrNullAndSaveAlias(Args &&... args);
796   template <typename... Args>
797   Register *getRegForValueAndSaveAlias(Args &&... args);
798 
799   void runOnKernel();
800   void runOnFunction();
801 
802 public:
GenXKernelBuilder(FunctionGroup & FG)803   GenXKernelBuilder(FunctionGroup &FG)
804       : TheKernelMetadata(FG.getHead()), Ctx(FG.getContext()),
805         DL(FG.getModule()->getDataLayout()), FG(&FG) {
806     collectKernelInfo();
807   }
~GenXKernelBuilder()808   ~GenXKernelBuilder() { clearLoops(); }
clearLoops()809   void clearLoops() {
810     for (auto i = Loops.begin(), e = Loops.end(); i != e; ++i) {
811       delete i->second;
812       i->second = nullptr;
813     }
814     Loops.clear();
815   }
816 
817   bool run();
818 
getContext()819   LLVMContext &getContext() { return Ctx; }
820 
821   unsigned addStringToPool(StringRef Str);
822   StringRef getStringByIndex(unsigned Val);
823 };
createGenXCisaBuilderWrapperPass()824 ModulePass *createGenXCisaBuilderWrapperPass() {
825   initializeGenXCisaBuilderWrapperPass(*PassRegistry::getPassRegistry());
826   return new GenXCisaBuilderWrapper();
827 }
828 
829 } // end namespace llvm
830 
831 INITIALIZE_PASS_BEGIN(GenXCisaBuilderWrapper, "GenXCisaBuilderPassWrapper",
832                       "GenXCisaBuilderPassWrapper", false, false)
INITIALIZE_PASS_DEPENDENCY(LoopInfoGroupWrapperPassWrapper)833 INITIALIZE_PASS_DEPENDENCY(LoopInfoGroupWrapperPassWrapper)
834 INITIALIZE_PASS_DEPENDENCY(GenXGroupBalingWrapper)
835 INITIALIZE_PASS_DEPENDENCY(GenXLivenessWrapper)
836 INITIALIZE_PASS_DEPENDENCY(GenXVisaRegAllocWrapper)
837 INITIALIZE_PASS_DEPENDENCY(GenXModule)
838 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
839 INITIALIZE_PASS_DEPENDENCY(GenXBackendConfig)
840 INITIALIZE_PASS_END(GenXCisaBuilderWrapper, "GenXCisaBuilderPassWrapper",
841                     "GenXCisaBuilderPassWrapper", false, false)
842 
843 void GenXCisaBuilder::getAnalysisUsage(AnalysisUsage &AU) {
844   AU.addRequired<LoopInfoGroupWrapperPass>();
845   AU.addRequired<GenXGroupBaling>();
846   AU.addRequired<GenXLiveness>();
847   AU.addRequired<GenXVisaRegAlloc>();
848   AU.addRequired<GenXModule>();
849   AU.addRequired<FunctionGroupAnalysis>();
850   AU.addRequired<TargetPassConfig>();
851   AU.addRequired<GenXBackendConfig>();
852   AU.setPreservesAll();
853 }
854 
runOnFunctionGroup(FunctionGroup & FG)855 bool GenXCisaBuilder::runOnFunctionGroup(FunctionGroup &FG) {
856   Ctx = &FG.getContext();
857   std::unique_ptr<GenXKernelBuilder> KernelBuilder(new GenXKernelBuilder(FG));
858   KernelBuilder->FGA = getAnalysisIfAvailable<FunctionGroupAnalysis>();
859   KernelBuilder->GM = getAnalysisIfAvailable<GenXModule>();
860   KernelBuilder->CisaBuilder = KernelBuilder->GM->GetCisaBuilder();
861   KernelBuilder->RegAlloc = getAnalysisIfAvailable<GenXVisaRegAlloc>();
862   KernelBuilder->Baling = &getAnalysis<GenXGroupBaling>();
863   KernelBuilder->LIs = &getAnalysis<LoopInfoGroupWrapperPass>();
864   KernelBuilder->Liveness = &getAnalysis<GenXLiveness>();
865   KernelBuilder->Subtarget = &getAnalysis<TargetPassConfig>()
866                                   .getTM<GenXTargetMachine>()
867                                   .getGenXSubtarget();
868   KernelBuilder->BackendConfig = &getAnalysis<GenXBackendConfig>();
869 
870   KernelBuilder->run();
871 
872   GenXModule *GM = KernelBuilder->GM;
873   if (GM->HasInlineAsm()) {
874     auto VISAAsmTextReader = GM->GetVISAAsmReader();
875     auto VISAText = KernelBuilder->CisaBuilder->GetAsmTextStream().str();
876     CISA_CALL(VISAAsmTextReader->ParseVISAText(VISAText, ""));
877   }
878 
879   return false;
880 }
881 
isDerivedFromUndef(Constant * C)882 static bool isDerivedFromUndef(Constant *C) {
883   if (isa<UndefValue>(C))
884     return true;
885   if (!isa<ConstantExpr>(C))
886     return false;
887   ConstantExpr *CE = cast<ConstantExpr>(C);
888   for (auto &Opnd : CE->operands())
889     if (isDerivedFromUndef(cast<Constant>(Opnd)))
890       return true;
891   return false;
892 }
893 
get8bitPackedFloat(float f)894 static unsigned get8bitPackedFloat(float f) {
895   union {
896     float f;
897     unsigned u;
898   } u;
899 
900   u.f = f;
901   unsigned char Sign = (u.u >> 31) << 7;
902   unsigned Exp = (u.u >> 23) & 0xFF;
903   unsigned Frac = u.u & 0x7FFFFF;
904   if (Exp == 0 && Frac == 0)
905     return Sign;
906 
907   IGC_ASSERT(Exp >= 124);
908   IGC_ASSERT(Exp <= 131);
909   Exp -= 124;
910   IGC_ASSERT((Frac & 0x780000) == Frac);
911   Frac >>= 19;
912   IGC_ASSERT(!(Exp == 124 && Frac == 0));
913 
914   Sign |= (Exp << 4);
915   Sign |= Frac;
916 
917   return Sign;
918 }
919 
getISatSrcSign(unsigned IID)920 static Signedness getISatSrcSign(unsigned IID) {
921   switch (IID) {
922   case GenXIntrinsic::genx_sstrunc_sat:
923   case GenXIntrinsic::genx_ustrunc_sat:
924     return SIGNED;
925   case GenXIntrinsic::genx_sutrunc_sat:
926   case GenXIntrinsic::genx_uutrunc_sat:
927     return UNSIGNED;
928   default:
929     return DONTCARESIGNED;
930   }
931 }
932 
getISatDstSign(unsigned IID)933 static Signedness getISatDstSign(unsigned IID) {
934   switch (IID) {
935   case GenXIntrinsic::genx_sstrunc_sat:
936   case GenXIntrinsic::genx_sutrunc_sat:
937     return SIGNED;
938   case GenXIntrinsic::genx_ustrunc_sat:
939   case GenXIntrinsic::genx_uutrunc_sat:
940     return UNSIGNED;
941   default:
942     return DONTCARESIGNED;
943   }
944 }
945 
getISatSrcSign(Value * V)946 static Signedness getISatSrcSign(Value *V) {
947   return getISatSrcSign(GenXIntrinsic::getGenXIntrinsicID(V));
948 }
949 
getISatDstSign(Value * V)950 static Signedness getISatDstSign(Value *V) {
951   return getISatDstSign(GenXIntrinsic::getGenXIntrinsicID(V));
952 }
953 
954 // isExtOperandBaled : check whether a sext/zext operand is baled.
isExtOperandBaled(Instruction * Inst,unsigned OpIdx,const GenXBaling * Baling)955 static bool isExtOperandBaled(Instruction *Inst, unsigned OpIdx,
956                               const GenXBaling *Baling) {
957   BaleInfo InstBI = Baling->getBaleInfo(Inst);
958   if (!InstBI.isOperandBaled(OpIdx))
959     return false;
960 
961   auto OpInst = cast<Instruction>(Inst->getOperand(OpIdx));
962   BaleInfo OpBI = Baling->getBaleInfo(OpInst);
963   return OpBI.Type == BaleInfo::ZEXT || OpBI.Type == BaleInfo::SEXT;
964 }
965 
isExtOperandBaled(Use & U,const GenXBaling * Baling)966 static bool isExtOperandBaled(Use &U, const GenXBaling *Baling) {
967   return isExtOperandBaled(cast<Instruction>(U.get()), U.getOperandNo(),
968                            Baling);
969 }
970 
addKernelAttrsFromMetadata(VISAKernel & Kernel,const KernelMetadata & KM,const GenXSubtarget * Subtarget)971 void addKernelAttrsFromMetadata(VISAKernel &Kernel, const KernelMetadata &KM,
972                                 const GenXSubtarget* Subtarget) {
973   IGC_ASSERT(Subtarget);
974   unsigned SLMSizeInKb = divideCeil(KM.getSLMSize(), 1024);
975   if (SLMSizeInKb > Subtarget->getMaxSlmSize())
976     report_fatal_error("SLM size exceeds target limits");
977   if (!Subtarget->isOCLRuntime() && SLMSizeInKb > 255)
978     report_fatal_error("SLM size greater than 255KB is not supported by CMRT");
979   Kernel.AddKernelAttribute("SLMSize", sizeof(SLMSizeInKb), &SLMSizeInKb);
980 
981   // Load thread payload from memory.
982   if (Subtarget->hasThreadPayloadInMemory()) {
983     // The number of GRFs for per thread inputs (thread local IDs)
984     unsigned NumGRFs = 0;
985     bool HasImplicit = false;
986     for (auto Kind : KM.getArgKinds()) {
987       if (Kind & 0x8)
988         HasImplicit = true;
989     }
990     if (Subtarget->isOCLRuntime()) {
991       // When CM kernel is run with OCL runtime, it is dispatched in a
992       // special "SIMD1" mode (aka "Programmable Media Kernels").
993       // This mode implies that we always have a "full" thread payload,
994       // even when CM kernel does *not* have implicit arguments.
995       // Payload format:
996       // | 0-15     | 16 - 31  | 32 - 47  | 46 - 256 |
997       // | localIDX | localIDY | localIDZ | unused   |
998       NumGRFs = 1;
999     } else {
1000       // One GRF for per thread input size for CM
1001       NumGRFs = std::max(HasImplicit ? 1U : 0U, NumGRFs);
1002     }
1003 
1004     uint16_t Bytes = NumGRFs * Subtarget->getGRFByteSize();
1005     Kernel.AddKernelAttribute("PerThreadInputSize", sizeof(Bytes), &Bytes);
1006   }
1007 
1008 }
1009 
1010 // Legalize name for using as filename or in visa asm
legalizeName(std::string Name)1011 static std::string legalizeName(std::string Name) {
1012   std::replace_if(Name.begin(), Name.end(),
1013                   [](unsigned char c) { return (!isalnum(c) && c != '_'); },
1014                   '_');
1015   return Name;
1016 }
1017 
buildAsmName() const1018 std::string GenXKernelBuilder::buildAsmName() const {
1019   std::string AsmName;
1020   auto UserAsmName = AsmNameOpt.getValue();
1021   if (UserAsmName.empty()) {
1022     AsmName = vc::legalizeShaderDumpName(TheKernelMetadata.getName());
1023   } else {
1024     int idx = -1;
1025     auto *KernelMDs =
1026         FG->getModule()->getOrInsertNamedMetadata(genx::FunctionMD::GenXKernels);
1027     unsigned E = KernelMDs->getNumOperands();
1028     for (unsigned I = 0; I < E; ++I) {
1029       MDNode *KernelMD = KernelMDs->getOperand(I);
1030       StringRef KernelName =
1031           cast<MDString>(KernelMD->getOperand(genx::KernelMDOp::Name).get())
1032               ->getString();
1033       if (KernelName == TheKernelMetadata.getName()) {
1034         idx = I;
1035         break;
1036       }
1037     }
1038     IGC_ASSERT(idx >= 0);
1039     // Reverse kernel ASM names during codegen.
1040     // This provides an option to match the old compiler's output.
1041     if (ReverseKernels.getValue())
1042       idx = E - idx - 1;
1043     AsmName = (UserAsmName + llvm::Twine('_') + llvm::Twine(idx)).str();
1044   }
1045 
1046   // Currently installed shader dumper can provide its own path for
1047   // dumps. Prepend it to generated asm name.
1048   if (!BackendConfig->hasShaderDumper())
1049     return AsmName;
1050 
1051   vc::ShaderDumper &Dumper = BackendConfig->getShaderDumper();
1052   return Dumper.composeDumpPath(AsmName);
1053 }
1054 
runOnKernel()1055 void GenXKernelBuilder::runOnKernel() {
1056   IGC_ASSERT(TheKernelMetadata.isKernel());
1057 
1058   const std::string KernelName = TheKernelMetadata.getName().str();
1059   CisaBuilder->AddKernel(MainKernel, KernelName.c_str());
1060   Kernel = static_cast<VISAFunction *>(MainKernel);
1061   Func2Kern[Func] = Kernel;
1062 
1063   IGC_ASSERT_MESSAGE(Kernel, "Kernel initialization failed!");
1064   LLVM_DEBUG(dbgs() << "=== PROCESS KERNEL(" << KernelName << ") ===\n");
1065 
1066   addKernelAttrsFromMetadata(*Kernel, TheKernelMetadata, Subtarget);
1067 
1068   // Set CM target for all functions produced by VC.
1069   // See visa spec for CMTarget value (section 4, Kernel).
1070   const uint8_t CMTarget = 0;
1071   CISA_CALL(Kernel->AddKernelAttribute("Target", sizeof(CMTarget), &CMTarget));
1072 
1073   bool NeedRetIP = false; // Need special return IP variable for FC.
1074   // For a kernel, add an attribute for asm filename for the jitter.
1075   std::string AsmName = buildAsmName();
1076   StringRef AsmNameRef = AsmName;
1077   CISA_CALL(Kernel->AddKernelAttribute("OutputAsmPath", AsmNameRef.size(),
1078                                        AsmNameRef.begin()));
1079   // Populate variable attributes if any.
1080   unsigned Idx = 0;
1081   bool IsComposable = false;
1082   for (auto &Arg : Func->args()) {
1083     const char *Kind = nullptr;
1084     switch (TheKernelMetadata.getArgInputOutputKind(Idx++)) {
1085     default:
1086       break;
1087     case KernelMetadata::ArgIOKind::Input:
1088       Kind = "Input";
1089       break;
1090     case KernelMetadata::ArgIOKind::Output:
1091       Kind = "Output";
1092       break;
1093     case KernelMetadata::ArgIOKind::InputOutput:
1094       Kind = "Input_Output";
1095       break;
1096     }
1097     if (Kind != nullptr) {
1098       auto R = getRegForValueUntypedAndSaveAlias(Func, &Arg);
1099       IGC_ASSERT(R);
1100       IGC_ASSERT(R->Category == RegCategory::GENERAL);
1101       R->addAttribute(addStringToPool(Kind), "");
1102       IsComposable = true;
1103     }
1104   }
1105   if (IsComposable)
1106     CISA_CALL(Kernel->AddKernelAttribute("Composable", 0, ""));
1107   if (HasCallable) {
1108     CISA_CALL(Kernel->AddKernelAttribute("Caller", 0, ""));
1109     NeedRetIP = true;
1110   }
1111   if (Func->hasFnAttribute("CMCallable")) {
1112     CISA_CALL(Kernel->AddKernelAttribute("Callable", 0, ""));
1113     NeedRetIP = true;
1114   }
1115   if (Func->hasFnAttribute("CMEntry")) {
1116     CISA_CALL(Kernel->AddKernelAttribute("Entry", 0, ""));
1117   }
1118 
1119   if (NeedRetIP) {
1120     // Ask RegAlloc to add a special variable RetIP.
1121     RegAlloc->addRetIPArgument();
1122     auto R = RegAlloc->getRetIPArgument();
1123     R->NameStr = "RetIP";
1124     R->addAttribute(addStringToPool("Input_Output"), "");
1125   }
1126 
1127   // Emit optimization hints if any.
1128   emitOptimizationHints();
1129 
1130   // Build variables
1131   buildVariables();
1132 
1133   // Build input variables
1134   buildInputs(Func, NeedRetIP);
1135 }
1136 
runOnFunction()1137 void GenXKernelBuilder::runOnFunction() {
1138   VISAFunction *visaFunc = nullptr;
1139 
1140   std::string FuncName = Func->getName().str();
1141   CisaBuilder->AddFunction(visaFunc, FuncName.c_str());
1142   std::string AsmName = buildAsmName().append("_").append(FuncName);
1143   CISA_CALL(visaFunc->AddKernelAttribute("OutputAsmPath", AsmName.size(),
1144                                          AsmName.c_str()));
1145   IGC_ASSERT(visaFunc);
1146   Func2Kern[Func] = visaFunc;
1147   Kernel = visaFunc;
1148   buildVariables();
1149 }
1150 
run()1151 bool GenXKernelBuilder::run() {
1152   GrfByteSize = Subtarget ? Subtarget->getGRFByteSize() : defaultGRFByteSize;
1153   StackSurf = Subtarget ? Subtarget->stackSurface() : PREDEFINED_SURFACE_STACK;
1154 
1155   UseNewStackBuilder =
1156       BackendConfig->useNewStackBuilder() && Subtarget->isOCLRuntime();
1157 
1158   IGC_ASSERT(Subtarget);
1159 
1160   Func = FG->getHead();
1161   if (genx::fg::isGroupHead(*Func))
1162     runOnKernel();
1163   else if (genx::fg::isSubGroupHead(*Func))
1164     runOnFunction();
1165   else
1166     llvm_unreachable("unknown function group type");
1167 
1168   // Build instructions
1169   buildInstructions();
1170 
1171   // Reset Regalloc hook
1172   RegAlloc->SetRegPushHook(nullptr, nullptr);
1173 
1174   if (TheKernelMetadata.isKernel()) {
1175     // For a kernel with no barrier instruction, add a NoBarrier attribute.
1176     if (!HasBarrier)
1177       CISA_CALL(Kernel->AddKernelAttribute("NoBarrier", 0, nullptr));
1178   }
1179 
1180   NumVisaInsts += Kernel->getvIsaInstCount();
1181 
1182   return false;
1183 }
1184 
PatchImpArgOffset(Function * F,const GenXSubtarget * ST,const KernelMetadata & KM)1185 static bool PatchImpArgOffset(Function *F, const GenXSubtarget *ST,
1186                               const KernelMetadata &KM) {
1187   IGC_ASSERT(ST);
1188   if (ST->isOCLRuntime())
1189     return false;
1190   if (!ST->hasThreadPayloadInMemory())
1191     return false;
1192 
1193   unsigned Idx = 0;
1194   for (auto i = F->arg_begin(), e = F->arg_end(); i != e; ++i, ++Idx) {
1195     uint8_t Kind = (KM.getArgKind(Idx));
1196     if (Kind & 0xf8)
1197       return true;
1198   }
1199 
1200   return false;
1201 }
1202 
getStateVariableSizeInBytes(const Type * Ty,const unsigned ElemSize)1203 static unsigned getStateVariableSizeInBytes(const Type *Ty,
1204                                             const unsigned ElemSize) {
1205   auto *VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty);
1206   if (!VTy)
1207     return ElemSize;
1208   return ElemSize * VTy->getNumElements();
1209 }
1210 
getInputSizeInBytes(const DataLayout & DL,const unsigned ArgCategory,Type * Ty)1211 static unsigned getInputSizeInBytes(const DataLayout &DL,
1212                                     const unsigned ArgCategory, Type *Ty) {
1213   switch (ArgCategory) {
1214   case RegCategory::GENERAL:
1215     return DL.getTypeSizeInBits(Ty) / genx::ByteBits;
1216   case RegCategory::SAMPLER:
1217     return getStateVariableSizeInBytes(Ty, genx::SamplerElementBytes);
1218   case RegCategory::SURFACE:
1219     return getStateVariableSizeInBytes(Ty, genx::SurfaceElementBytes);
1220   default:
1221     break;
1222   }
1223   IGC_ASSERT_EXIT_MESSAGE(0, "Unexpected register category for input");
1224 }
1225 
buildInputs(Function * F,bool NeedRetIP)1226 void GenXKernelBuilder::buildInputs(Function *F, bool NeedRetIP) {
1227 
1228   IGC_ASSERT_MESSAGE(F->arg_size() == TheKernelMetadata.getNumArgs(),
1229     "Mismatch between metadata for kernel and number of args");
1230 
1231   // Number of globals to be binded statically.
1232   std::vector<std::pair<GlobalVariable *, int32_t>> Bindings;
1233   Module *M = F->getParent();
1234   for (auto &GV : M->getGlobalList()) {
1235     int32_t Offset = 0;
1236     GV.getAttribute(genx::FunctionMD::GenXByteOffset)
1237         .getValueAsString()
1238         .getAsInteger(0, Offset);
1239     if (Offset > 0)
1240       Bindings.emplace_back(&GV, Offset);
1241   }
1242   // Each argument.
1243   unsigned Idx = 0;
1244   bool PatchImpArgOff = PatchImpArgOffset(F, Subtarget, TheKernelMetadata);
1245   for (auto i = F->arg_begin(), e = F->arg_end(); i != e; ++i, ++Idx) {
1246     if (TheKernelMetadata.shouldSkipArg(Idx))
1247       continue;
1248     Argument *Arg = &*i;
1249     Register *Reg = getRegForValueUntypedAndSaveAlias(F, Arg);
1250     IGC_ASSERT(Reg);
1251     uint8_t Kind = TheKernelMetadata.getArgKind(Idx);
1252     uint16_t Offset = 0;
1253     if (!PatchImpArgOff) {
1254       Offset = TheKernelMetadata.getArgOffset(Idx);
1255     }
1256     else {
1257       if ((Kind >> 3) == 3) {
1258         Offset = GrfByteSize;
1259       } else {
1260         Offset = (TheKernelMetadata.getArgOffset(Idx) + GrfByteSize);
1261       }
1262     }
1263     // Argument size in bytes.
1264     const unsigned NumBytes = getInputSizeInBytes(
1265         DL, TheKernelMetadata.getArgCategory(Idx), Arg->getType());
1266 
1267     switch (Kind & 0x7) {
1268     case visa::VISA_INPUT_GENERAL:
1269     case visa::VISA_INPUT_SAMPLER:
1270     case visa::VISA_INPUT_SURFACE:
1271       CISA_CALL(Kernel->CreateVISAImplicitInputVar(
1272           Reg->GetVar<VISA_GenVar>(Kernel), Offset, NumBytes, Kind >> 3));
1273       break;
1274 
1275     default:
1276       report_fatal_error("Unknown input category");
1277       break;
1278     }
1279   }
1280   // Add pseudo-input for global variables with offset attribute.
1281   for (auto &Item : Bindings) {
1282     // TODO: sanity check. No overlap with other inputs.
1283     GlobalVariable *GV = Item.first;
1284     uint16_t Offset = Item.second;
1285     IGC_ASSERT(Offset > 0);
1286     uint16_t NumBytes = (GV->getValueType()->getPrimitiveSizeInBits() / 8U);
1287     uint8_t Kind = KernelMetadata::IMP_PSEUDO_INPUT;
1288     Register *Reg = getRegForValueUntypedAndSaveAlias(F, GV);
1289     CISA_CALL(Kernel->CreateVISAImplicitInputVar(Reg->GetVar<VISA_GenVar>(Kernel),
1290                                                  Offset, NumBytes, Kind >> 3));
1291   }
1292   // Add the special RetIP argument.
1293   // Current assumption in Finalizer is that RetIP should be the last argument,
1294   // so we add it after generation of all other arguments.
1295   if (NeedRetIP) {
1296     Register *Reg = RegAlloc->getRetIPArgument();
1297     uint16_t Offset = (127 * GrfByteSize + 6 * 4); // r127.6
1298     uint16_t NumBytes = (64 / 8);
1299     uint8_t Kind = KernelMetadata::IMP_PSEUDO_INPUT;
1300     CISA_CALL(Kernel->CreateVISAImplicitInputVar(Reg->GetVar<VISA_GenVar>(Kernel),
1301                                                  Offset, NumBytes, Kind >> 3));
1302   }
1303 }
1304 
1305 // FIXME: We should use NM by default once code quality issues are addressed
1306 // in vISA compiler.
setNoMaskByDefault(Function * F,std::unordered_set<Function * > & Visited)1307 static bool setNoMaskByDefault(Function *F,
1308                                std::unordered_set<Function *> &Visited) {
1309   for (auto &BB : F->getBasicBlockList())
1310     if (GotoJoin::isGotoBlock(&BB))
1311       return true;
1312 
1313   // Check if this is subroutine call.
1314   for (auto U : F->users()) {
1315     if (auto CI = dyn_cast<CallInst>(U)) {
1316       Function *G = CI->getFunction();
1317       if (Visited.count(G))
1318         continue;
1319       Visited.insert(G);
1320       if (setNoMaskByDefault(G, Visited))
1321         return true;
1322     }
1323   }
1324 
1325   return false;
1326 }
1327 
buildInstructions()1328 void GenXKernelBuilder::buildInstructions() {
1329   for (auto It = FG->begin(), E = FG->end(); It != E; ++It) {
1330     Func = *It;
1331     LLVM_DEBUG(dbgs() << "Building IR for func " << Func->getName() << "\n");
1332     NoMask = [this]() {
1333       std::unordered_set<Function *> Visited;
1334       return setNoMaskByDefault(Func, Visited);
1335     }();
1336 
1337     LastUsedAliasMap.clear();
1338 
1339     if (Func->hasFnAttribute(genx::FunctionMD::CMGenXMain) ||
1340         genx::requiresStackCall(Func) || genx::isReferencedIndirectly(Func)) {
1341       KernFunc = Func;
1342     } else {
1343       auto *FuncFG = FGA->getAnyGroup(Func);
1344       IGC_ASSERT_MESSAGE(FuncFG, "Cannot find the function group");
1345       KernFunc = FuncFG->getHead();
1346     }
1347 
1348     IGC_ASSERT(KernFunc);
1349     Kernel = Func2Kern.at(KernFunc);
1350 
1351     unsigned LabelID = getOrCreateLabel(Func, LABEL_SUBROUTINE);
1352     CISA_CALL(Kernel->AppendVISACFLabelInst(Labels[LabelID]));
1353     GM->updateVisaMapping(KernFunc, nullptr, Kernel->getvIsaInstCount(),
1354                           "SubRoutine");
1355 
1356     if (UseNewStackBuilder)
1357       beginFunctionLight(Func);
1358     else
1359       beginFunction(Func);
1360     CurrentPadding = 0;
1361 
1362     // If a float control is specified, emit code to make that happen.
1363     // Float control contains rounding mode, denorm behaviour and single
1364     // precision float mode (ALT or IEEE) Relevant bits are already set as
1365     // defined for VISA control reg in header definition on enums
1366     if (Func->hasFnAttribute(genx::FunctionMD::CMFloatControl)) {
1367       uint32_t FloatControl = 0;
1368       Func->getFnAttribute(genx::FunctionMD::CMFloatControl)
1369           .getValueAsString()
1370           .getAsInteger(0, FloatControl);
1371 
1372       // Clear current float control bits to known zero state
1373       buildControlRegUpdate(CR_Mask, true);
1374 
1375       // Set rounding mode to required state if that isn't zero
1376       FloatControl &= CR_Mask;
1377       if (FloatControl) {
1378         if (FG->getHead() == Func)
1379           DefaultFloatControl = FloatControl;
1380         buildControlRegUpdate(FloatControl, false);
1381       }
1382     }
1383 
1384     // Only output a label for the initial basic block if it is used from
1385     // somewhere else.
1386     bool NeedsLabel = !Func->front().use_empty();
1387     for (Function::iterator fi = Func->begin(), fe = Func->end(); fi != fe;
1388          ++fi) {
1389       BasicBlock *BB = &*fi;
1390       if (!NeedsLabel && BB != &Func->front()) {
1391         NeedsLabel = !BB->getSinglePredecessor();
1392         if (!NeedsLabel)
1393           NeedsLabel = GotoJoin::isJoinLabel(BB);
1394       }
1395       if (NeedsLabel) {
1396         unsigned LabelID = getOrCreateLabel(BB, LABEL_BLOCK);
1397         CISA_CALL(Kernel->AppendVISACFLabelInst(Labels[LabelID]));
1398       }
1399       NeedsLabel = true;
1400       for (BasicBlock::iterator bi = BB->begin(), be = BB->end(); bi != be;
1401            ++bi) {
1402         Instruction *Inst = &*bi;
1403         if (Inst->isTerminator()) {
1404           // Before the terminator inst of a basic block, if there is a single
1405           // successor and it is the header of a loop, for any vector of at
1406           // least four GRFs with a phi node where our incoming value is
1407           // undef, insert a lifetime.start here.
1408           auto *TI = cast<IGCLLVM::TerminatorInst>(Inst);
1409           if (TI->getNumSuccessors() == 1) {
1410             auto Succ = TI->getSuccessor(0);
1411             if (LIs->getLoopInfo(Succ->getParent())->isLoopHeader(Succ)) {
1412               for (auto si = Succ->begin();; ++si) {
1413                 auto Phi = dyn_cast<PHINode>(&*si);
1414                 if (!Phi)
1415                   break;
1416                 if (Phi->getType()->getPrimitiveSizeInBits() >=
1417                         (GrfByteSize * 8) * 4 &&
1418                     isa<UndefValue>(
1419                         Phi->getIncomingValue(Phi->getBasicBlockIndex(BB))))
1420                   addLifetimeStartInst(Phi);
1421               }
1422             }
1423           }
1424         }
1425         // Build the instruction.
1426         if (!Baling->isBaled(Inst)) {
1427           if (isa<ReturnInst>(Inst) && !UseNewStackBuilder)
1428             endFunction(Func, cast<ReturnInst>(Inst));
1429           if (buildInstruction(Inst))
1430             NeedsLabel = false;
1431         } else {
1432           LLVM_DEBUG(dbgs() << "Skip baled inst: " << *Inst << "\n");
1433         }
1434       }
1435     }
1436   }
1437 }
1438 
buildInstruction(Instruction * Inst)1439 bool GenXKernelBuilder::buildInstruction(Instruction *Inst) {
1440   LLVM_DEBUG(dbgs() << "Build inst: " << *Inst << "\n");
1441   // Make the source location pending, so it is output as vISA FILE and LOC
1442   // instructions next time an opcode is written.
1443   const DebugLoc &DL = Inst->getDebugLoc();
1444   CurrentInst = Inst;
1445   if (DL) {
1446     StringRef Filename = DL->getFilename();
1447     if (Filename != "") {
1448       PendingFilename = Filename;
1449       PendingDirectory = DL->getDirectory();
1450     }
1451     PendingLine = DL.getLine();
1452   }
1453   // Process the bale that this is the head instruction of.
1454   BaleInfo BI = Baling->getBaleInfo(Inst);
1455   LLVM_DEBUG(dbgs() << "Bale type " << BI.Type << "\n");
1456 
1457   DstOpndDesc DstDesc;
1458   if (BI.Type == BaleInfo::GSTORE) {
1459     // Inst is a global variable store. It should be baled into a wrr
1460     // instruction.
1461     Bale B;
1462     Baling->buildBale(Inst, &B);
1463     // This is an identity bale; no code will be emitted.
1464     if (isIdentityBale(B))
1465       return false;
1466 
1467     IGC_ASSERT(BI.isOperandBaled(0));
1468     DstDesc.GStore = Inst;
1469     Inst = cast<Instruction>(Inst->getOperand(0));
1470     BI = Baling->getBaleInfo(Inst);
1471   }
1472   if (BI.Type == BaleInfo::REGINTR)
1473     return false;
1474   if (BI.Type == BaleInfo::WRREGION || BI.Type == BaleInfo::WRPREDREGION ||
1475       BI.Type == BaleInfo::WRPREDPREDREGION) {
1476     // Inst is a wrregion or wrpredregion or wrpredpredregion.
1477     DstDesc.WrRegion = Inst;
1478     DstDesc.WrRegionBI = BI;
1479     auto *CurInst = Inst;
1480     while (CurInst->hasOneUse() &&
1481            GenXIntrinsic::isWrRegion(CurInst->user_back()) &&
1482            CurInst->use_begin()->getOperandNo() ==
1483                GenXIntrinsic::GenXRegion::OldValueOperandNum)
1484       CurInst = CurInst->user_back();
1485     if (CurInst->hasOneUse() &&
1486         GenXIntrinsic::isWritePredefReg(CurInst->user_back()))
1487       DstDesc.WrPredefReg = CurInst->user_back();
1488     if (isa<UndefValue>(Inst->getOperand(0)) && !DstDesc.GStore) {
1489       // This is a wrregion, probably a partial write, to an undef value.
1490       // Write a lifetime start if appropriate to help the jitter's register
1491       // allocator.
1492       addWriteRegionLifetimeStartInst(DstDesc.WrRegion);
1493     }
1494     // See if it bales in the instruction
1495     // that generates the subregion/element.  That is always operand 1.
1496     enum { OperandNum = 1 };
1497     if (!BI.isOperandBaled(OperandNum)) {
1498       if (BI.Type == BaleInfo::WRPREDREGION) {
1499         buildLoneWrPredRegion(DstDesc.WrRegion, DstDesc.WrRegionBI);
1500       } else {
1501         buildLoneWrRegion(DstDesc);
1502       }
1503       return false;
1504     }
1505     // Yes, source of wrregion is baled in.
1506     Inst = cast<Instruction>(DstDesc.WrRegion->getOperand(OperandNum));
1507     BI = Baling->getBaleInfo(Inst);
1508   }
1509   if (BI.Type == BaleInfo::FADDR) {
1510     buildFunctionAddr(Inst, DstDesc);
1511     return false;
1512   }
1513   unsigned Mod = 0;
1514   if (BI.Type == BaleInfo::SATURATE) {
1515     // Inst is a fp saturate. See if it bales in the instruction that
1516     // generates the value to saturate. That is always operand 0. If
1517     // not, just treat the saturate as a normal intrinsic.
1518     if (BI.isOperandBaled(0)) {
1519       Mod = MODIFIER_SAT;
1520       Inst = cast<Instruction>(Inst->getOperand(0));
1521       BI = Baling->getBaleInfo(Inst);
1522     } else
1523       BI.Type = BaleInfo::MAININST;
1524   }
1525   if (BI.Type == BaleInfo::CMPDST) {
1526     // Dst of sel instruction is baled in.
1527     Inst = cast<Instruction>(Inst->getOperand(0));
1528     IGC_ASSERT_MESSAGE(isa<CmpInst>(Inst), "only bale sel into a cmp instr");
1529     BI = Baling->getBaleInfo(Inst);
1530   }
1531   switch (BI.Type) {
1532   case BaleInfo::RDREGION:
1533   case BaleInfo::ABSMOD:
1534   case BaleInfo::NEGMOD:
1535   case BaleInfo::NOTMOD:
1536     // This is a rdregion or modifier not baled in to a main instruction
1537     // (but possibly baled in to a wrregion or sat modifier).
1538     buildLoneOperand(Inst, BI, Mod, DstDesc);
1539     return false;
1540   }
1541   IGC_ASSERT(BI.Type == BaleInfo::MAININST || BI.Type == BaleInfo::NOTP ||
1542          BI.Type == BaleInfo::ZEXT || BI.Type == BaleInfo::SEXT);
1543   return buildMainInst(Inst, BI, Mod, DstDesc);
1544 }
1545 
createPredicateDeclFromSelect(Instruction * SI,BaleInfo BI,VISA_PREDICATE_CONTROL & Control,VISA_PREDICATE_STATE & State,VISA_EMask_Ctrl * MaskCtrl)1546 VISA_PredVar *GenXKernelBuilder::createPredicateDeclFromSelect(
1547     Instruction *SI, BaleInfo BI, VISA_PREDICATE_CONTROL &Control,
1548     VISA_PREDICATE_STATE &State, VISA_EMask_Ctrl *MaskCtrl) {
1549   *MaskCtrl = vISA_EMASK_M1_NM;
1550   // Get the predicate (mask) operand, scanning through baled in
1551   // all/any/not/rdpredregion and setting State and MaskCtrl
1552   // appropriately.
1553   Value *Mask = getPredicateOperand(SI, 0 /*selector operand in select*/, BI,
1554                                     Control, State, MaskCtrl);
1555   IGC_ASSERT(!isa<Constant>(Mask));
1556   // Variable predicate. Derive the predication field from any baled in
1557   // all/any/not and the predicate register number.
1558   Register *Reg = getRegForValueAndSaveAlias(KernFunc, Mask);
1559   IGC_ASSERT(Reg);
1560   IGC_ASSERT(Reg->Category == RegCategory::PREDICATE);
1561   if (NoMask)
1562     *MaskCtrl |= vISA_EMASK_M1_NM;
1563   return getPredicateVar(Reg);
1564 }
1565 
1566 VISA_PredOpnd *
createPredFromWrRegion(const DstOpndDesc & DstDesc)1567 GenXKernelBuilder::createPredFromWrRegion(const DstOpndDesc &DstDesc) {
1568   VISA_PredOpnd *result = nullptr;
1569   Instruction *WrRegion = DstDesc.WrRegion;
1570   if (WrRegion) {
1571     // Get the predicate (mask) operand, scanning through baled in
1572     // all/any/not/rdpredregion and setting PredField and MaskCtrl
1573     // appropriately.
1574     VISA_EMask_Ctrl MaskCtrl;
1575     VISA_PREDICATE_CONTROL Control;
1576     VISA_PREDICATE_STATE State;
1577     Value *Mask =
1578         getPredicateOperand(WrRegion, 7 /*mask operand in wrregion*/,
1579                             DstDesc.WrRegionBI, Control, State, &MaskCtrl);
1580     if (auto C = dyn_cast<Constant>(Mask)) {
1581       (void)C;
1582       IGC_ASSERT_MESSAGE(C->isAllOnesValue(),
1583        "wrregion mask or predication operand must be const 1 or not constant");
1584     } else {
1585       // Variable predicate. Derive the predication field from any baled in
1586       // all/any/not and the predicate register number. If the predicate has
1587       // not has a register allocated, it must be EM.
1588       Register *Reg = getRegForValueOrNullAndSaveAlias(KernFunc, Mask);
1589       if (Reg) {
1590         IGC_ASSERT(Reg->Category == RegCategory::PREDICATE);
1591         result = createPredOperand(getPredicateVar(Reg), State, Control);
1592       }
1593     }
1594   }
1595   return result;
1596 }
1597 
1598 /***********************************************************************
1599  * createPred : create predication field from an instruction operand
1600  *
1601  * Enter:   Inst = the instruction (0 to write an "always true" pred field)
1602  *          BI = BaleInfo for the instruction, so we can see if there is a
1603  *                rdpredregion baled in to the mask
1604  *          OperandNum = operand number in the instruction
1605  *
1606  * If the operand is not constant 1, then it must be a predicate register.
1607  */
createPred(Instruction * Inst,BaleInfo BI,unsigned OperandNum)1608 VISA_PredOpnd *GenXKernelBuilder::createPred(Instruction *Inst, BaleInfo BI,
1609                                              unsigned OperandNum) {
1610   VISA_PredOpnd *ResultOperand = nullptr;
1611   VISA_PREDICATE_CONTROL PredControl;
1612   VISA_PREDICATE_STATE Inverse;
1613   VISA_EMask_Ctrl MaskCtrl;
1614   Value *Mask = getPredicateOperand(Inst, OperandNum, BI, PredControl, Inverse,
1615                                     &MaskCtrl);
1616   if (auto C = dyn_cast<Constant>(Mask)) {
1617     (void)C;
1618     IGC_ASSERT_MESSAGE(C->isAllOnesValue(),
1619       "wrregion mask or predication operand must be const 1 or not constant");
1620   } else {
1621     // Variable predicate. Derive the predication field from any baled in
1622     // all/any/not and the predicate register number. If the predicate has not
1623     // has a register allocated, it must be EM.
1624     Register *Reg = getRegForValueOrNullAndSaveAlias(KernFunc, Mask);
1625     VISA_PredVar *PredVar = nullptr;
1626     if (Reg) {
1627       IGC_ASSERT(Reg->Category == RegCategory::PREDICATE);
1628       PredVar = getPredicateVar(Reg);
1629     } else
1630       return nullptr;
1631     ResultOperand = createPredOperand(PredVar, Inverse, PredControl);
1632   }
1633   return ResultOperand;
1634 }
1635 
createState(Register * Reg,unsigned Offset,bool IsDst)1636 VISA_VectorOpnd *GenXKernelBuilder::createState(Register *Reg, unsigned Offset,
1637                                                 bool IsDst) {
1638   uint8_t Size = 0;
1639   VISA_VectorOpnd *Op = nullptr;
1640 
1641   switch (Reg->Category) {
1642   case RegCategory::SURFACE:
1643     CISA_CALL(Kernel->CreateVISAStateOperand(Op, Reg->GetVar<VISA_SurfaceVar>(Kernel),
1644                                              Size, Offset, IsDst));
1645     break;
1646   case RegCategory::SAMPLER:
1647     CISA_CALL(Kernel->CreateVISAStateOperand(Op, Reg->GetVar<VISA_SamplerVar>(Kernel),
1648                                              Size, Offset, IsDst));
1649     break;
1650   default:
1651     IGC_ASSERT_EXIT_MESSAGE(0, "unknown state operand");
1652   }
1653 
1654   return Op;
1655 }
1656 
createDestination(CisaVariable * Dest,genx::Signedness Signed,unsigned * Offset)1657 VISA_VectorOpnd *GenXKernelBuilder::createDestination(CisaVariable *Dest,
1658                                                       genx::Signedness Signed,
1659                                                       unsigned *Offset) {
1660   Region R(IGCLLVM::FixedVectorType::get(
1661       IntegerType::get(Ctx, CISATypeTable[Dest->getType()].typeSize * CHAR_BIT),
1662       Dest->getNumElements()));
1663   if (Offset)
1664     R.Offset = *Offset;
1665   return createRegionOperand(&R, Dest->getGenVar(), Signed, 0, true);
1666 }
1667 
createDestination(Value * Dest,genx::Signedness Signed,unsigned * Offset)1668 VISA_VectorOpnd *GenXKernelBuilder::createDestination(Value *Dest,
1669                                                       genx::Signedness Signed,
1670                                                       unsigned *Offset) {
1671   return createDestination(Dest, Signed, 0, DstOpndDesc(), nullptr, Offset);
1672 }
1673 
1674 VISA_VectorOpnd *
createDestination(Value * Dest,genx::Signedness Signed,unsigned Mod,const DstOpndDesc & DstDesc,Signedness * SignedRes,unsigned * Offset)1675 GenXKernelBuilder::createDestination(Value *Dest, genx::Signedness Signed,
1676                                      unsigned Mod, const DstOpndDesc &DstDesc,
1677                                      Signedness *SignedRes, unsigned *Offset) {
1678   LLVM_DEBUG(dbgs() << "createDest for value: " << *Dest << ", wrr: ");
1679   if (DstDesc.WrRegion)
1680     LLVM_DEBUG(dbgs() << *(DstDesc.WrRegion));
1681   else
1682     LLVM_DEBUG(dbgs() << "null");
1683   LLVM_DEBUG(dbgs() << "\n");
1684   IGC_ASSERT_MESSAGE(!Dest->getType()->isAggregateType(),
1685     "cannot create destination register of an aggregate type");
1686   if (SignedRes)
1687     *SignedRes = Signed;
1688 
1689   Type *OverrideType = nullptr;
1690   if (BitCastInst *BCI = dyn_cast<BitCastInst>(Dest)) {
1691     if (!(isa<Constant>(BCI->getOperand(0))) &&
1692         !(BCI->getType()->getScalarType()->isIntegerTy(1)) &&
1693         (BCI->getOperand(0)->getType()->getScalarType()->isIntegerTy(1))) {
1694       if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Dest->getType())) {
1695         unsigned int NumBits = VT->getNumElements() *
1696                                VT->getElementType()->getPrimitiveSizeInBits();
1697         OverrideType = IntegerType::get(BCI->getContext(), NumBits);
1698       }
1699     }
1700   }
1701 
1702   // Saturation can also change signedness.
1703   if (!Dest->user_empty() && GenXIntrinsic::isIntegerSat(Dest->user_back())) {
1704     Signed = getISatDstSign(Dest->user_back());
1705   }
1706 
1707   if (!DstDesc.WrRegion) {
1708     if (Mod) {
1709       // There is a sat modifier. Either it is an fp saturate, which is
1710       // represented by its own intrinsic which this instruction is baled
1711       // into, or it is an int saturate which always comes from this
1712       // instruction's semantics. In the former case, use the value
1713       // that is the result of the saturate. But only if this instruction
1714       // itself is not the sat intrinsic.
1715       if (Dest->getType()->getScalarType()->isFloatingPointTy() &&
1716           GenXIntrinsic::getGenXIntrinsicID(Dest) != GenXIntrinsic::genx_sat)
1717         Dest = cast<Instruction>(Dest->use_begin()->getUser());
1718     }
1719     if ((Mod & MODIFIER_SAT) != 0) {
1720       // Similar for integer saturation.
1721       if (Dest->getType()->getScalarType()->isIntegerTy() &&
1722           !GenXIntrinsic::isIntegerSat(Dest) && GenXIntrinsic::isIntegerSat(Dest->user_back()))
1723         Dest = cast<Instruction>(Dest->user_back());
1724     }
1725     Register *Reg =
1726         getRegForValueAndSaveAlias(KernFunc, Dest, Signed, OverrideType);
1727     if (SignedRes)
1728       *SignedRes = RegAlloc->getSigned(Reg);
1729     // Write the vISA general operand:
1730     if (Reg->Category == RegCategory::GENERAL) {
1731       Region DestR(Dest);
1732       if (Offset)
1733         DestR.Offset = *Offset;
1734       return createRegionOperand(&DestR, Reg->GetVar<VISA_GenVar>(Kernel),
1735                                  DONTCARESIGNED, Mod, true /*isDest*/);
1736     } else {
1737       IGC_ASSERT(Reg->Category == RegCategory::SURFACE ||
1738              Reg->Category == RegCategory::SAMPLER);
1739 
1740       return createState(Reg, 0 /*Offset*/, true /*IsDst*/);
1741     }
1742   }
1743   // We need to allow for the case that there is no register allocated if it
1744   // is an indirected arg, and that is OK because the region is indirect so
1745   // the vISA does not contain the base register.
1746   Register *Reg;
1747 
1748   Value *V = nullptr;
1749   if (DstDesc.GStore) {
1750     auto GV = getUnderlyingGlobalVariable(DstDesc.GStore->getOperand(1));
1751     IGC_ASSERT_MESSAGE(GV, "out of sync");
1752     if (OverrideType == nullptr)
1753       OverrideType = DstDesc.GStore->getOperand(0)->getType();
1754     Reg = getRegForValueAndSaveAlias(KernFunc, GV, Signed, OverrideType);
1755     V = GV;
1756   } else {
1757     V = DstDesc.WrPredefReg ? DstDesc.WrPredefReg : DstDesc.WrRegion;
1758     // if (!V->user_empty() && GenXIntrinsic::isWritePredefReg(V->user_back()))
1759     //   V = V->user_back();
1760     Reg = getRegForValueOrNullAndSaveAlias(KernFunc, V, Signed, OverrideType);
1761   }
1762 
1763   // Write the vISA general operand with region:
1764   Region R = makeRegionFromBaleInfo(DstDesc.WrRegion, DstDesc.WrRegionBI);
1765 
1766   if (SignedRes)
1767     *SignedRes = RegAlloc->getSigned(Reg);
1768 
1769   if (Reg && (Reg->Category == RegCategory::SAMPLER ||
1770               Reg->Category == RegCategory::SURFACE)) {
1771     IGC_ASSERT(R.ElementBytes);
1772     return createState(Reg, R.Offset / R.ElementBytes, true /*IsDest*/);
1773   } else {
1774     IGC_ASSERT(!Reg || Reg->Category == RegCategory::GENERAL);
1775     auto Decl = Reg ? Reg->GetVar<VISA_GenVar>(Kernel) : nullptr;
1776     return createRegionOperand(&R, Decl, Signed, Mod, true /*IsDest*/);
1777   }
1778 }
1779 
createSourceOperand(Instruction * Inst,Signedness Signed,unsigned OperandNum,genx::BaleInfo BI,unsigned Mod,Signedness * SignedRes,unsigned MaxWidth)1780 VISA_VectorOpnd *GenXKernelBuilder::createSourceOperand(
1781     Instruction *Inst, Signedness Signed, unsigned OperandNum,
1782     genx::BaleInfo BI, unsigned Mod, Signedness *SignedRes, unsigned MaxWidth) {
1783   Value *V = Inst->getOperand(OperandNum);
1784   return createSource(V, Signed, BI.isOperandBaled(OperandNum), Mod, SignedRes,
1785                       MaxWidth);
1786 }
1787 
1788 VISA_PredOpnd *
createPredOperand(VISA_PredVar * PredVar,VISA_PREDICATE_STATE State,VISA_PREDICATE_CONTROL Control)1789 GenXKernelBuilder::createPredOperand(VISA_PredVar *PredVar,
1790                                      VISA_PREDICATE_STATE State,
1791                                      VISA_PREDICATE_CONTROL Control) {
1792   VISA_PredOpnd *PredOperand = nullptr;
1793   CISA_CALL(
1794       Kernel->CreateVISAPredicateOperand(PredOperand, PredVar, State, Control));
1795 
1796   return PredOperand;
1797 }
1798 
createCisaSrcOperand(VISA_GenVar * Decl,VISA_Modifier Mod,unsigned VStride,unsigned Width,unsigned HStride,unsigned ROffset,unsigned COffset)1799 VISA_VectorOpnd *GenXKernelBuilder::createCisaSrcOperand(
1800     VISA_GenVar *Decl, VISA_Modifier Mod, unsigned VStride, unsigned Width,
1801     unsigned HStride, unsigned ROffset, unsigned COffset) {
1802   VISA_VectorOpnd *ResultOperand = nullptr;
1803   CISA_CALL(Kernel->CreateVISASrcOperand(ResultOperand, Decl, Mod, VStride,
1804                                          Width, HStride, ROffset, COffset));
1805   return ResultOperand;
1806 }
1807 
createCisaDstOperand(VISA_GenVar * Decl,unsigned HStride,unsigned ROffset,unsigned COffset)1808 VISA_VectorOpnd *GenXKernelBuilder::createCisaDstOperand(VISA_GenVar *Decl,
1809                                                          unsigned HStride,
1810                                                          unsigned ROffset,
1811                                                          unsigned COffset) {
1812   VISA_VectorOpnd *ResultOperand = nullptr;
1813   CISA_CALL(Kernel->CreateVISADstOperand(ResultOperand, Decl, HStride, ROffset,
1814                                          COffset));
1815   return ResultOperand;
1816 }
1817 
1818 /***********************************************************************
1819  * createAddressOperand : create an address register operand
1820  */
createAddressOperand(Value * V,bool IsDst)1821 VISA_VectorOpnd *GenXKernelBuilder::createAddressOperand(Value *V, bool IsDst) {
1822   VISA_VectorOpnd *ResultOperand = nullptr;
1823   Register *Reg = getRegForValueAndSaveAlias(KernFunc, V, DONTCARESIGNED);
1824   IGC_ASSERT(Reg->Category == RegCategory::ADDRESS);
1825   unsigned Width = 1;
1826   if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(V->getType()))
1827     Width = VT->getNumElements();
1828   if (IsDst) {
1829     CISA_CALL(Kernel->CreateVISAAddressDstOperand(
1830         ResultOperand, Reg->GetVar<VISA_AddrVar>(Kernel), 0));
1831   } else {
1832     CISA_CALL(Kernel->CreateVISAAddressSrcOperand(
1833         ResultOperand, Reg->GetVar<VISA_AddrVar>(Kernel), 0, Width));
1834   }
1835   return ResultOperand;
1836 }
1837 
getVISAImmTy(uint8_t ImmTy)1838 VISA_Type GenXKernelBuilder::getVISAImmTy(uint8_t ImmTy) {
1839   return static_cast<VISA_Type>(ImmTy & 0xf);
1840 }
1841 
createImmediateOperand(Constant * V,Signedness Signed)1842 VISA_VectorOpnd *GenXKernelBuilder::createImmediateOperand(Constant *V,
1843                                                            Signedness Signed) {
1844   if (isDerivedFromUndef(V))
1845     V = Constant::getNullValue(V->getType());
1846 
1847   Type *T = V->getType();
1848   if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(T)) {
1849     // Vector constant.
1850     auto Splat = V->getSplatValue();
1851     if (!Splat) {
1852       // Non-splatted vector constant. Must be a packed vector.
1853       unsigned NumElements = VT->getNumElements();
1854       if (VT->getElementType()->isIntegerTy()) {
1855         // Packed int vector.
1856         IGC_ASSERT(NumElements <= ImmIntVec::Width);
1857         unsigned Packed = 0;
1858         for (unsigned i = 0; i != NumElements; ++i) {
1859           auto El = dyn_cast<ConstantInt>(V->getAggregateElement(i));
1860           if (!El)
1861             continue; // undef element
1862           int This = El->getSExtValue();
1863           if (This < ImmIntVec::MinUInt) {
1864             IGC_ASSERT_MESSAGE(This >= ImmIntVec::MinSInt,
1865               "too big imm, cannot encode as vector imm");
1866             Signed = SIGNED;
1867           } else if (This > ImmIntVec::MaxSInt) {
1868             IGC_ASSERT_MESSAGE(This <= ImmIntVec::MaxUInt,
1869               "too big imm, cannot encode as vector imm");
1870             Signed = UNSIGNED;
1871           }
1872           Packed |= (This & ImmIntVec::MaxUInt) << (ImmIntVec::ElemSize * i);
1873         }
1874         // For a 2- or 4-wide operand, we need to repeat the vector elements
1875         // as which ones are used depends on the position of the other
1876         // operand in its oword.
1877         switch (NumElements) {
1878         case 2:
1879           Packed = Packed * 0x01010101;
1880           break;
1881         case 4:
1882           Packed = Packed * 0x00010001;
1883           break;
1884         }
1885         auto ImmTy =
1886             static_cast<uint8_t>(Signed == UNSIGNED ? ISA_TYPE_UV : ISA_TYPE_V);
1887         auto VISAImmTy = getVISAImmTy(ImmTy);
1888         VISA_VectorOpnd *ImmOp = nullptr;
1889         CISA_CALL(Kernel->CreateVISAImmediate(ImmOp, &Packed, VISAImmTy));
1890         return ImmOp;
1891       }
1892       // Packed float vector.
1893       IGC_ASSERT(VT->getElementType()->isFloatTy());
1894       IGC_ASSERT(NumElements == 1 || NumElements == 2 || NumElements == 4);
1895       unsigned Packed = 0;
1896       for (unsigned i = 0; i != 4; ++i) {
1897         auto CFP =
1898             dyn_cast<ConstantFP>(V->getAggregateElement(i % NumElements));
1899         if (!CFP) // Undef
1900           continue;
1901         const APFloat &FP = CFP->getValueAPF();
1902         Packed |= get8bitPackedFloat(FP.convertToFloat()) << (i * 8);
1903       }
1904       auto VISAImmTy = getVISAImmTy(ISA_TYPE_VF);
1905       VISA_VectorOpnd *ImmOp = nullptr;
1906       CISA_CALL(Kernel->CreateVISAImmediate(ImmOp, &Packed, VISAImmTy));
1907       return ImmOp;
1908     }
1909     // Splatted (or single element) vector. Use the scalar value.
1910     T = VT->getElementType();
1911     V = Splat;
1912   }
1913 
1914   if (isDerivedFromUndef(V))
1915     V = Constant::getNullValue(V->getType());
1916   else if (isa<ConstantPointerNull>(V)) {
1917     const DataLayout &DL = Func->getParent()->getDataLayout();
1918     T = DL.getIntPtrType(V->getType());
1919     V = Constant::getNullValue(T);
1920   }
1921 
1922   // We have a scalar constant.
1923   if (IntegerType *IT = dyn_cast<IntegerType>(T)) {
1924     ConstantInt *CI = cast<ConstantInt>(V);
1925     // I think we need to use the appropriate one of getZExtValue or
1926     // getSExtValue to avoid an assertion failure on very large 64 bit values...
1927     int64_t Val = Signed == UNSIGNED ? CI->getZExtValue() : CI->getSExtValue();
1928     visa::TypeDetails TD(Func->getParent()->getDataLayout(), IT, Signed);
1929     VISA_VectorOpnd *ImmOp = nullptr;
1930     CISA_CALL(
1931         Kernel->CreateVISAImmediate(ImmOp, &Val, getVISAImmTy(TD.VisaType)));
1932     return ImmOp;
1933   } if (isa<Function>(V)) {
1934     IGC_ASSERT_MESSAGE(0, "Not baled function address");
1935     return nullptr;
1936   } else {
1937     VISA_VectorOpnd *ImmOp = nullptr;
1938     ConstantFP *CF = cast<ConstantFP>(V);
1939     if (T->isFloatTy()) {
1940       union {
1941         float f;
1942         uint32_t i;
1943       } Val;
1944       Val.f = CF->getValueAPF().convertToFloat();
1945       auto VISAImmTy = getVISAImmTy(ISA_TYPE_F);
1946       CISA_CALL(Kernel->CreateVISAImmediate(ImmOp, &Val.i, VISAImmTy));
1947     } else if (T->isHalfTy()) {
1948       uint16_t Val(
1949           (uint16_t)(CF->getValueAPF().bitcastToAPInt().getZExtValue()));
1950       auto VISAImmTy = getVISAImmTy(ISA_TYPE_HF);
1951       auto Val32 = static_cast<uint32_t>(Val);
1952       CISA_CALL(Kernel->CreateVISAImmediate(ImmOp, &Val32, VISAImmTy));
1953     } else {
1954       IGC_ASSERT(T->isDoubleTy());
1955       union {
1956         double f;
1957         uint64_t i;
1958       } Val;
1959       Val.f = CF->getValueAPF().convertToDouble();
1960       auto VISAImmTy = getVISAImmTy(ISA_TYPE_DF);
1961       CISA_CALL(Kernel->CreateVISAImmediate(ImmOp, &Val.i, VISAImmTy));
1962     }
1963     return ImmOp;
1964   }
1965 }
1966 
1967 /***********************************************************************
1968  * getOriginalInstructionForSource : trace a source operand back through
1969  *     its bale (if any), given a starting instruction.
1970  *
1971  * Enter:   Inst = The instruction to start tracing from.
1972  *          BI = BaleInfo for Inst
1973  */
1974 Instruction *
getOriginalInstructionForSource(Instruction * Inst,BaleInfo BI)1975 GenXKernelBuilder::getOriginalInstructionForSource(Instruction *Inst,
1976                                                    BaleInfo BI) {
1977   while (!isa<Constant>(Inst->getOperand(0)) && BI.isOperandBaled(0)) {
1978     Inst = cast<Instruction>(Inst->getOperand(0));
1979     BI = Baling->getBaleInfo(Inst);
1980   }
1981 
1982   return Inst;
1983 }
1984 
buildConvert(CallInst * CI,BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)1985 void GenXKernelBuilder::buildConvert(CallInst *CI, BaleInfo BI, unsigned Mod,
1986                                      const DstOpndDesc &DstDesc) {
1987   Register *DstReg = getRegForValueAndSaveAlias(KernFunc, CI, UNSIGNED);
1988   if (!isa<Constant>(CI->getOperand(0))) {
1989     Instruction *OrigInst = getOriginalInstructionForSource(CI, BI);
1990     Register *SrcReg =
1991         getRegForValueAndSaveAlias(KernFunc, OrigInst->getOperand(0));
1992     const bool SrcCategory = (SrcReg->Category != RegCategory::GENERAL);
1993     const bool DstCategory = (DstReg->Category != RegCategory::GENERAL);
1994     const bool Categories = (SrcCategory || DstCategory);
1995     IGC_ASSERT_MESSAGE(Categories, "expected a category conversion");
1996     (void)Categories;
1997   }
1998 
1999   if (DstReg->Category != RegCategory::ADDRESS) {
2000     // State copy.
2001     int ExecSize = 1;
2002     if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(CI->getType())) {
2003       ExecSize = VT->getNumElements();
2004     }
2005 
2006     auto ISAExecSize = static_cast<VISA_Exec_Size>(genx::log2(ExecSize));
2007     auto Dst = createDestination(CI, UNSIGNED, 0, DstDesc);
2008     auto Src = createSourceOperand(CI, UNSIGNED, 0, BI);
2009     addDebugInfo();
2010     CISA_CALL(Kernel->AppendVISADataMovementInst(
2011         ISA_MOVS, nullptr /*Pred*/, false /*Mod*/,
2012         NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1, ISAExecSize, Dst, Src));
2013     return;
2014   }
2015 
2016   // Destination is address register.
2017   int ExecSize = 1;
2018   if (VectorType *VT = dyn_cast<VectorType>(CI->getType())) {
2019     DiagnosticInfoCisaBuild Err{CI, "vector of addresses not implemented",
2020                                 DS_Error};
2021     getContext().diagnose(Err);
2022   }
2023 
2024   auto ISAExecSize = static_cast<VISA_Exec_Size>(genx::log2(ExecSize));
2025   Register *SrcReg = getRegForValueAndSaveAlias(KernFunc, CI->getOperand(0));
2026   IGC_ASSERT(SrcReg->Category == RegCategory::ADDRESS);
2027 
2028   (void)SrcReg;
2029   // This is an address->address copy, inserted due to coalescing failure of
2030   // the address for an indirected arg in GenXArgIndirection.
2031   // (A conversion to address is handled in buildConvertAddr below.)
2032   // Write the addr_add instruction.
2033   Value *SrcOp0 = CI->getOperand(0);
2034   unsigned Src0Width = 1;
2035   if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(SrcOp0->getType()))
2036     Src0Width = VT->getNumElements();
2037 
2038   Register *RegDst = getRegForValueAndSaveAlias(KernFunc, CI, DONTCARESIGNED);
2039   Register *RegSrc0 =
2040       getRegForValueAndSaveAlias(KernFunc, SrcOp0, DONTCARESIGNED);
2041 
2042   VISA_VectorOpnd *Dst = nullptr, *Src0 = nullptr, *Src1 = nullptr;
2043 
2044   CISA_CALL(Kernel->CreateVISAAddressDstOperand(
2045       Dst, RegDst->GetVar<VISA_AddrVar>(Kernel), 0));
2046   CISA_CALL(Kernel->CreateVISAAddressSrcOperand(
2047       Src0, RegSrc0->GetVar<VISA_AddrVar>(Kernel), 0, Src0Width));
2048   Src1 =
2049       createImmediateOperand(Constant::getNullValue(CI->getType()), UNSIGNED);
2050 
2051   addDebugInfo();
2052   CISA_CALL(Kernel->AppendVISAAddrAddInst(vISA_EMASK_M1_NM, ISAExecSize, Dst,
2053                                           Src0, Src1));
2054 }
2055 
createSource(CisaVariable * V,Signedness Signed,unsigned MaxWidth,unsigned * Offset)2056 VISA_VectorOpnd *GenXKernelBuilder::createSource(CisaVariable *V,
2057                                                  Signedness Signed,
2058                                                  unsigned MaxWidth,
2059                                                  unsigned *Offset) {
2060   Region R(IGCLLVM::FixedVectorType::get(
2061       IntegerType::get(Ctx, CISATypeTable[V->getType()].typeSize * CHAR_BIT),
2062       V->getNumElements()));
2063   if (Offset)
2064     R.Offset = *Offset;
2065   return createRegionOperand(&R, V->getGenVar(), Signed, 0, false, MaxWidth);
2066 }
2067 
createSource(Value * V,Signedness Signed,unsigned MaxWidth,unsigned * Offset)2068 VISA_VectorOpnd *GenXKernelBuilder::createSource(Value *V, Signedness Signed,
2069                                                  unsigned MaxWidth,
2070                                                  unsigned *Offset) {
2071   return createSource(V, Signed, false, 0, nullptr, MaxWidth, Offset);
2072 }
2073 
createSource(Value * V,Signedness Signed,bool Baled,unsigned Mod,Signedness * SignedRes,unsigned MaxWidth,unsigned * Offset)2074 VISA_VectorOpnd *GenXKernelBuilder::createSource(Value *V, Signedness Signed,
2075                                                  bool Baled, unsigned Mod,
2076                                                  Signedness *SignedRes,
2077                                                  unsigned MaxWidth,
2078                                                  unsigned *Offset) {
2079   LLVM_DEBUG(dbgs() << "createSource for "
2080                     << (Baled ? "baled" : "non-baled") << " value: ");
2081   LLVM_DEBUG(V->dump());
2082   LLVM_DEBUG(dbgs() << "\n");
2083   if (SignedRes)
2084     *SignedRes = Signed;
2085   if (auto C = dyn_cast<Constant>(V)) {
2086     if (Mod) {
2087       // Need to negate constant.
2088       IGC_ASSERT_MESSAGE(Mod == MODIFIER_NEG, "unexpected modifier");
2089       if (C->getType()->isIntOrIntVectorTy())
2090         C = ConstantExpr::getNeg(C);
2091       else
2092         C = ConstantExpr::getFNeg(C);
2093     }
2094     return createImmediateOperand(C, Signed);
2095   }
2096   if (!Baled) {
2097     Register *Reg = getRegForValueAndSaveAlias(KernFunc, V, Signed);
2098     IGC_ASSERT(Reg->Category == RegCategory::GENERAL ||
2099            Reg->Category == RegCategory::SURFACE ||
2100            Reg->Category == RegCategory::SAMPLER);
2101     // Write the vISA general operand.
2102     Region R(V);
2103     if (Offset)
2104       R.Offset = *Offset;
2105     if (R.NumElements == 1)
2106       R.VStride = R.Stride = 0;
2107     if (SignedRes)
2108       *SignedRes = RegAlloc->getSigned(Reg);
2109     if (Reg->Category == RegCategory::GENERAL) {
2110       return createRegionOperand(&R, Reg->GetVar<VISA_GenVar>(Kernel), Signed, Mod,
2111                                  false /*IsDst*/, MaxWidth);
2112     } else {
2113       return createState(Reg, R.Offset >> 2, false /*IsDst*/);
2114     };
2115   }
2116 
2117   Instruction *Inst = cast<Instruction>(V);
2118   BaleInfo BI(Baling->getBaleInfo(Inst));
2119   unsigned Idx = 0;
2120   switch (BI.Type) {
2121   case BaleInfo::RDREGION: {
2122     // The source operand has a rdregion baled in. We need to allow for the
2123     // case that there is no register allocated if it is an indirected arg,
2124     // and that is OK because the region is indirect so the vISA does not
2125     // contain the base register.
2126     Value *V = Inst->getOperand(0);
2127     Register *Reg = getRegForValueOrNullAndSaveAlias(KernFunc, V, Signed);
2128 
2129     // Ensure we pick a non-DONTCARESIGNED signedness here, as, for an
2130     // indirect region and DONTCARESIGNED, writeRegion arbitrarily picks a
2131     // signedness as it is attached to the operand, unlike a direct region
2132     // where it is attached to the vISA register.
2133     if (Reg)
2134       Signed = RegAlloc->getSigned(Reg);
2135     else if (Signed == DONTCARESIGNED)
2136       Signed = SIGNED;
2137     // Write the vISA general operand with region.
2138     Region R = makeRegionFromBaleInfo(Inst, Baling->getBaleInfo(Inst));
2139     if (Offset)
2140       R.Offset = *Offset;
2141     if (R.NumElements == 1)
2142       R.VStride = 0;
2143     if (R.Width == 1)
2144       R.Stride = 0;
2145     if (!Reg || Reg->Category == RegCategory::GENERAL || R.Indirect) {
2146       if (SignedRes)
2147         *SignedRes = Signed;
2148       return createRegionOperand(&R, Reg ? Reg->GetVar<VISA_GenVar>(Kernel) : nullptr,
2149                                  Signed, Mod, false, MaxWidth);
2150     } else {
2151       if (SignedRes)
2152         *SignedRes = Signed;
2153       return createState(Reg, R.Offset >> 2, false /*IsDst*/);
2154     }
2155   }
2156   case BaleInfo::ABSMOD:
2157     Signed = SIGNED;
2158     Mod |= MODIFIER_ABS;
2159     break;
2160   case BaleInfo::NEGMOD:
2161 #if LLVM_VERSION_MAJOR > 8
2162     if (Inst->getOpcode() == Instruction::FNeg) {
2163       Mod ^= MODIFIER_NEG;
2164       break;
2165     }
2166 #endif
2167     if (!(Mod & MODIFIER_ABS))
2168       Mod ^= MODIFIER_NEG;
2169     Idx = 1; // the input we want in "0-x" is x, not 0.
2170     break;
2171   case BaleInfo::NOTMOD:
2172     Mod ^= MODIFIER_NOT;
2173     break;
2174   case BaleInfo::ZEXT:
2175     Signed = UNSIGNED;
2176     break;
2177   case BaleInfo::SEXT:
2178     Signed = SIGNED;
2179     break;
2180   default:
2181     IGC_ASSERT_EXIT_MESSAGE(0, "unknown bale type");
2182     break;
2183   }
2184   return createSource(Inst->getOperand(Idx), Signed, BI.isOperandBaled(Idx),
2185                       Mod, SignedRes, MaxWidth);
2186 }
2187 
createInlineAsmOperand(Register * Reg,genx::Region * R,bool IsDst,genx::Signedness Signed,genx::ConstraintType Ty,unsigned Mod)2188 std::string GenXKernelBuilder::createInlineAsmOperand(
2189     Register *Reg, genx::Region *R, bool IsDst, genx::Signedness Signed,
2190     genx::ConstraintType Ty, unsigned Mod) {
2191   deduceRegion(R, IsDst);
2192 
2193   VISA_VectorOpnd *ResultOperand = nullptr;
2194   switch (Ty) {
2195   default:
2196     IGC_ASSERT_EXIT_MESSAGE(0, "constraint unhandled");
2197   case ConstraintType::Constraint_cr: {
2198     IGC_ASSERT(Reg);
2199     IGC_ASSERT(Reg->Category == RegCategory::PREDICATE);
2200     VISA_PredVar *PredVar = getPredicateVar(Reg);
2201     VISA_PredOpnd *PredOperand =
2202         createPredOperand(PredVar, PredState_NO_INVERSE, PRED_CTRL_NON);
2203     return Kernel->getPredicateOperandName(PredOperand);
2204   }
2205   case ConstraintType::Constraint_rw:
2206     return Kernel->getVarName(Reg->GetVar<VISA_GenVar>(Kernel));
2207   case ConstraintType::Constraint_r:
2208     ResultOperand =
2209         createGeneralOperand(R, Reg->GetVar<VISA_GenVar>(Kernel), Signed, Mod, IsDst);
2210     break;
2211   case ConstraintType::Constraint_a:
2212     if (R->Indirect)
2213       ResultOperand = createIndirectOperand(R, Signed, Mod, IsDst);
2214     else
2215       ResultOperand = createGeneralOperand(R, Reg->GetVar<VISA_GenVar>(Kernel),
2216                                            Signed, Mod, IsDst);
2217     break;
2218   }
2219   return Kernel->getVectorOperandName(ResultOperand, true);
2220 }
2221 
createInlineAsmDestinationOperand(Value * Dest,genx::Signedness Signed,genx::ConstraintType Ty,unsigned Mod,const DstOpndDesc & DstDesc)2222 std::string GenXKernelBuilder::createInlineAsmDestinationOperand(
2223     Value *Dest, genx::Signedness Signed, genx::ConstraintType Ty, unsigned Mod,
2224     const DstOpndDesc &DstDesc) {
2225 
2226   Type *OverrideType = nullptr;
2227 
2228   // Saturation can also change signedness.
2229   if (!Dest->user_empty() && GenXIntrinsic::isIntegerSat(Dest->user_back())) {
2230     Signed = getISatDstSign(Dest->user_back());
2231   }
2232 
2233   if (!DstDesc.WrRegion) {
2234     Register *Reg =
2235         getRegForValueAndSaveAlias(KernFunc, Dest, Signed, OverrideType);
2236 
2237     Region DestR(Dest);
2238     return createInlineAsmOperand(Reg, &DestR, true /*IsDst*/, DONTCARESIGNED,
2239                                   Ty, Mod);
2240   }
2241   // We need to allow for the case that there is no register allocated if it is
2242   // an indirected arg, and that is OK because the region is indirect so the
2243   // vISA does not contain the base register.
2244   Register *Reg;
2245 
2246   Value *V = nullptr;
2247   if (DstDesc.GStore) {
2248     auto GV = getUnderlyingGlobalVariable(DstDesc.GStore->getOperand(1));
2249     IGC_ASSERT_MESSAGE(GV, "out of sync");
2250     if (OverrideType == nullptr)
2251       OverrideType = DstDesc.GStore->getOperand(0)->getType();
2252     Reg = getRegForValueAndSaveAlias(KernFunc, GV, Signed, OverrideType);
2253     V = GV;
2254   } else {
2255     V = DstDesc.WrRegion;
2256     Reg = getRegForValueOrNullAndSaveAlias(KernFunc, V, Signed, OverrideType);
2257   }
2258 
2259   IGC_ASSERT(!Reg || Reg->Category == RegCategory::GENERAL);
2260 
2261   // Write the vISA general operand with region:
2262   Region R = makeRegionFromBaleInfo(DstDesc.WrRegion, DstDesc.WrRegionBI);
2263 
2264   return createInlineAsmOperand(Reg, &R, true /*IsDst*/, Signed, Ty, Mod);
2265 }
2266 
createInlineAsmSourceOperand(Value * V,genx::Signedness Signed,bool Baled,genx::ConstraintType Ty,unsigned Mod,unsigned MaxWidth)2267 std::string GenXKernelBuilder::createInlineAsmSourceOperand(
2268     Value *V, genx::Signedness Signed, bool Baled, genx::ConstraintType Ty,
2269     unsigned Mod, unsigned MaxWidth) {
2270 
2271   if (auto C = dyn_cast<Constant>(V)) {
2272     if (Ty != genx::ConstraintType::Constraint_n) {
2273       if (Mod) {
2274         // Need to negate constant.
2275         IGC_ASSERT_MESSAGE(Mod == MODIFIER_NEG, "unexpected modifier");
2276         if (C->getType()->isIntOrIntVectorTy())
2277           C = ConstantExpr::getNeg(C);
2278         else
2279           C = ConstantExpr::getFNeg(C);
2280       }
2281       VISA_VectorOpnd *ImmOp = createImmediateOperand(C, Signed);
2282       return Kernel->getVectorOperandName(ImmOp, false);
2283     } else {
2284       ConstantInt *CI = cast<ConstantInt>(C);
2285       return llvm::to_string(CI->getSExtValue());
2286     }
2287   }
2288 
2289   if (!Baled) {
2290     Register *Reg = getRegForValueAndSaveAlias(KernFunc, V, Signed);
2291     Region R(V);
2292     if (R.NumElements == 1)
2293       R.VStride = R.Stride = 0;
2294 
2295     return createInlineAsmOperand(Reg, &R, false /*IsDst*/, Signed, Ty, Mod);
2296   }
2297 
2298   Instruction *Inst = cast<Instruction>(V);
2299   BaleInfo BI(Baling->getBaleInfo(Inst));
2300   IGC_ASSERT(BI.Type == BaleInfo::RDREGION);
2301   // The source operand has a rdregion baled in. We need to allow for the
2302   // case that there is no register allocated if it is an indirected arg,
2303   // and that is OK because the region is indirect so the vISA does not
2304   // contain the base register.
2305   V = Inst->getOperand(0);
2306   Register *Reg = getRegForValueAndSaveAlias(KernFunc, V, Signed);
2307 
2308   // Ensure we pick a non-DONTCARESIGNED signedness here, as, for an
2309   // indirect region and DONTCARESIGNED, writeRegion arbitrarily picks a
2310   // signedness as it is attached to the operand, unlike a direct region
2311   // where it is attached to the vISA register.
2312   if (Signed == DONTCARESIGNED)
2313     Signed = SIGNED;
2314   // Write the vISA general operand with region.
2315   Region R = makeRegionFromBaleInfo(Inst, Baling->getBaleInfo(Inst));
2316   if (R.NumElements == 1)
2317     R.VStride = 0;
2318   if (R.Width == 1)
2319     R.Stride = 0;
2320 
2321   IGC_ASSERT(Reg->Category == RegCategory::GENERAL || R.Indirect);
2322 
2323   return createInlineAsmOperand(Reg, &R, false /*IsDst*/, Signed, Ty, Mod);
2324 }
2325 
2326 /***********************************************************************
2327  * getPredicateVar : get predicate var from value
2328  */
getPredicateVar(Value * V)2329 VISA_PredVar *GenXKernelBuilder::getPredicateVar(Value *V) {
2330   auto Reg = getRegForValueAndSaveAlias(KernFunc, V, DONTCARESIGNED);
2331   IGC_ASSERT(Reg);
2332   IGC_ASSERT(Reg->Category == RegCategory::PREDICATE);
2333   return getPredicateVar(Reg);
2334 }
2335 
2336 /***********************************************************************
2337  * getZeroedPredicateVar : get predicate var from value with zeroing it
2338  */
getZeroedPredicateVar(Value * V)2339 VISA_PredVar *GenXKernelBuilder::getZeroedPredicateVar(Value *V) {
2340   auto Reg = getRegForValueAndSaveAlias(KernFunc, V, DONTCARESIGNED);
2341   IGC_ASSERT(Reg);
2342   IGC_ASSERT(Reg->Category == RegCategory::PREDICATE);
2343   auto PredVar = getPredicateVar(Reg);
2344   unsigned Size = V->getType()->getPrimitiveSizeInBits();
2345   auto C = Constant::getNullValue(V->getType());
2346   CISA_CALL(Kernel->AppendVISASetP(
2347     vISA_EMASK_M1_NM, VISA_Exec_Size(genx::log2(Size)),
2348     PredVar, createImmediateOperand(C, DONTCARESIGNED)));
2349 
2350   return PredVar;
2351 }
2352 
2353 /***********************************************************************
2354  * getPredicateVar : get predicate var from register
2355  */
getPredicateVar(Register * R)2356 VISA_PredVar *GenXKernelBuilder::getPredicateVar(Register *R) {
2357   IGC_ASSERT(R);
2358   return R->Num >= visa::VISA_NUM_RESERVED_PREDICATES
2359              ? R->GetVar<VISA_PredVar>(Kernel)
2360              : nullptr;
2361 }
2362 
buildSelectInst(SelectInst * SI,BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)2363 void GenXKernelBuilder::buildSelectInst(SelectInst *SI, BaleInfo BI,
2364                                         unsigned Mod,
2365                                         const DstOpndDesc &DstDesc) {
2366   unsigned ExecSize = 1;
2367   if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(SI->getType()))
2368     ExecSize = VT->getNumElements();
2369   // Get the predicate (mask) operand, scanning through baled in
2370   // all/any/not/rdpredregion and setting PredField and MaskCtrl
2371   // appropriately.
2372   VISA_EMask_Ctrl MaskCtrl;
2373   VISA_PREDICATE_CONTROL Control;
2374   VISA_PREDICATE_STATE State;
2375 
2376   VISA_PredVar *PredDecl =
2377       createPredicateDeclFromSelect(SI, BI, Control, State, &MaskCtrl);
2378   VISA_PredOpnd* PredOp = createPredOperand(PredDecl, State, Control);
2379 
2380   VISA_VectorOpnd *Dst = createDestination(SI, DONTCARESIGNED, Mod, DstDesc);
2381   VISA_VectorOpnd *Src0 = createSourceOperand(SI, DONTCARESIGNED, 1, BI);
2382   VISA_VectorOpnd *Src1 = createSourceOperand(SI, DONTCARESIGNED, 2, BI);
2383 
2384   addDebugInfo();
2385   CISA_CALL(Kernel->AppendVISADataMovementInst(
2386       ISA_SEL, PredOp, Mod & MODIFIER_SAT, MaskCtrl,
2387       getExecSizeFromValue(ExecSize), Dst, Src0, Src1));
2388 }
2389 
buildNoopCast(CastInst * CI,genx::BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)2390 void GenXKernelBuilder::buildNoopCast(CastInst *CI, genx::BaleInfo BI,
2391                                       unsigned Mod, const DstOpndDesc &DstDesc) {
2392   IGC_ASSERT_MESSAGE(isMaskPacking(CI) || !BI.Bits,
2393     "non predicate bitcast should not be baled with anything");
2394   IGC_ASSERT_MESSAGE(isMaskPacking(CI) || !Mod,
2395     "non predicate bitcast should not be baled with anything");
2396   IGC_ASSERT_MESSAGE(isMaskPacking(CI) || !DstDesc.WrRegion,
2397     "non predicate bitcast should not be baled with anything");
2398 
2399   // ignore bitcasts of volatile globals
2400   // (they used to be a part of load/store as a constexpr)
2401   if ((isa<GlobalVariable>(CI->getOperand(0)) &&
2402        cast<GlobalVariable>(CI->getOperand(0))
2403            ->hasAttribute(VCModuleMD::VCVolatile)))
2404     return;
2405 
2406   if (CI->getType()->getScalarType()->isIntegerTy(1)) {
2407     if (CI->getOperand(0)->getType()->getScalarType()->isIntegerTy(1)) {
2408       if (auto C = dyn_cast<Constant>(CI->getOperand(0))) {
2409         auto Reg =
2410             getRegForValueOrNullAndSaveAlias(KernFunc, CI, DONTCARESIGNED);
2411         if (!Reg)
2412           return; // write to EM/RM value, ignore
2413         // We can move a constant predicate to a predicate register
2414         // using setp, if we get the constant predicate as a single int.
2415         unsigned IntVal = getPredicateConstantAsInt(C);
2416         unsigned Size = C->getType()->getPrimitiveSizeInBits();
2417         C = ConstantInt::get(
2418             Type::getIntNTy(CI->getContext(), std::max(Size, 8U)), IntVal);
2419 
2420         addDebugInfo();
2421         CISA_CALL(Kernel->AppendVISASetP(
2422             vISA_EMASK_M1_NM, VISA_Exec_Size(genx::log2(Size)),
2423             getPredicateVar(Reg), createSourceOperand(CI, UNSIGNED, 0, BI)));
2424         return;
2425       }
2426       // There does not appear to be a vISA instruction to move predicate
2427       // to predicate. GenXCoalescing avoids this by moving in two steps
2428       // via a general register. So the only pred->pred bitcast that arrives
2429       // here should be one from GenXLowering, and it should have been copy
2430       // coalesced in GenXCoalescing.
2431       const Register *const Reg1 =
2432         getRegForValueAndSaveAlias(KernFunc, CI, DONTCARESIGNED);
2433       const Register *const Reg2 =
2434         getRegForValueAndSaveAlias(KernFunc, CI->getOperand(0), DONTCARESIGNED);
2435       IGC_ASSERT_MESSAGE(Reg1 == Reg2, "uncoalesced phi move of predicate");
2436       (void) Reg1;
2437       (void) Reg2;
2438       return;
2439     }
2440 
2441     VISA_PredVar *PredVar = getPredicateVar(CI);
2442 
2443     addDebugInfo();
2444     CISA_CALL(Kernel->AppendVISASetP(
2445         vISA_EMASK_M1_NM,
2446         VISA_Exec_Size(
2447             genx::log2(CI->getType()->getPrimitiveSizeInBits())),
2448         PredVar, createSourceOperand(CI, UNSIGNED, 0, BI)));
2449     return;
2450   }
2451   if (isa<Constant>(CI->getOperand(0))) {
2452     if (isa<UndefValue>(CI->getOperand(0)))
2453       return; // undef source, generate no code
2454     // Source is constant.
2455     int ExecSize = 1;
2456     if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(CI->getType()))
2457       ExecSize = VT->getNumElements();
2458 
2459     VISA_EMask_Ctrl ctrlMask = getExecMaskFromWrRegion(DstDesc, true);
2460     VISA_Exec_Size execSize = getExecSizeFromValue(ExecSize);
2461     addDebugInfo();
2462     CISA_CALL(Kernel->AppendVISADataMovementInst(
2463         ISA_MOV, createPredFromWrRegion(DstDesc), Mod & MODIFIER_SAT, ctrlMask,
2464         execSize, createDestination(CI, DONTCARESIGNED, Mod, DstDesc),
2465         createSourceOperand(CI, DONTCARESIGNED, 0, BI)));
2466     return;
2467   }
2468   if (CI->getOperand(0)->getType()->getScalarType()->isIntegerTy(1)) {
2469     // Bitcast from predicate to scalar int
2470     Register *PredReg =
2471         getRegForValueAndSaveAlias(KernFunc, CI->getOperand(0), DONTCARESIGNED);
2472     IGC_ASSERT(PredReg->Category == RegCategory::PREDICATE);
2473     addDebugInfo();
2474     CISA_CALL(Kernel->AppendVISAPredicateMove(
2475         createDestination(CI, UNSIGNED, 0, DstDesc),
2476         PredReg->GetVar<VISA_PredVar>(Kernel)));
2477 
2478     return;
2479   }
2480 
2481   if (Liveness->isNoopCastCoalesced(CI))
2482     return; // cast was coalesced away
2483 
2484   // Here we always choose minimal (in size) type in order to avoid issues
2485   // with alignment. We expect that execution size should still be valid
2486   Type *Ty = CI->getSrcTy();
2487   if (Ty->getScalarType()->getPrimitiveSizeInBits() >
2488       CI->getDestTy()->getScalarType()->getPrimitiveSizeInBits())
2489     Ty = CI->getDestTy();
2490 
2491   Register *DstReg =
2492       getRegForValueAndSaveAlias(KernFunc, CI, DONTCARESIGNED, Ty);
2493   // Give dest and source the same signedness for byte mov.
2494   auto Signed = RegAlloc->getSigned(DstReg);
2495   Register *SrcReg =
2496       getRegForValueAndSaveAlias(KernFunc, CI->getOperand(0), Signed, Ty);
2497   VISA_Exec_Size ExecSize = EXEC_SIZE_1;
2498   if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Ty))
2499     ExecSize = getExecSizeFromValue(VT->getNumElements());
2500   IGC_ASSERT_MESSAGE(ExecSize >= EXEC_SIZE_1,
2501     "illegal exec size in bitcast: should have been coalesced away");
2502   IGC_ASSERT_MESSAGE(ExecSize <= EXEC_SIZE_32,
2503     "illegal exec size in bitcast: should have been coalesced away");
2504   // destination
2505   Region DestR(CI);
2506   // source
2507   Region SourceR(CI->getOperand(0));
2508 
2509   VISA_EMask_Ctrl ctrlMask = NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1;
2510   addDebugInfo();
2511   CISA_CALL(Kernel->AppendVISADataMovementInst(
2512       ISA_MOV, nullptr, Mod, ctrlMask, ExecSize,
2513       createRegionOperand(&DestR, DstReg->GetVar<VISA_GenVar>(Kernel), DONTCARESIGNED,
2514                           0, true),
2515       createRegionOperand(&SourceR, SrcReg->GetVar<VISA_GenVar>(Kernel), Signed, 0,
2516                           false)));
2517 }
2518 
buildFunctionAddr(Instruction * Inst,const DstOpndDesc & DstDesc)2519 void GenXKernelBuilder::buildFunctionAddr(Instruction *Inst,
2520                                           const DstOpndDesc &DstDesc) {
2521   auto *CI = dyn_cast<CallInst>(Inst);
2522   IGC_ASSERT(CI);
2523   IGC_ASSERT_MESSAGE(GenXIntrinsic::getGenXIntrinsicID(CI) == GenXIntrinsic::genx_faddr,
2524     "genx.faddr expected in a FADDR bale");
2525   auto *Dst = createDestination(Inst, DONTCARESIGNED, MODIFIER_NONE, DstDesc);
2526   IGC_ASSERT(Dst);
2527   auto *F = cast<Function>(Inst->getOperand(0));
2528   CISA_CALL(Kernel->AppendVISACFSymbolInst(F->getName().str(), Dst));
2529 }
2530 
2531 /***********************************************************************
2532  * buildLoneWrRegion : build a lone wrregion
2533  */
buildLoneWrRegion(const DstOpndDesc & DstDesc)2534 void GenXKernelBuilder::buildLoneWrRegion(const DstOpndDesc &DstDesc) {
2535   enum { OperandNum = 1 };
2536   Value *Input = DstDesc.WrRegion->getOperand(OperandNum);
2537   if (isa<UndefValue>(Input))
2538     return; // No code if input is undef
2539   VISA_Exec_Size ExecSize = EXEC_SIZE_1;
2540   if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Input->getType()))
2541     ExecSize = getExecSizeFromValue(VT->getNumElements());
2542 
2543   VISA_EMask_Ctrl ExecMask = getExecMaskFromWrRegion(DstDesc, true);
2544 
2545   // TODO: fix signedness of the source
2546   addDebugInfo();
2547   auto *Src = createSource(Input, DONTCARESIGNED, false, 0);
2548   auto *Dst = createDestination(Input, DONTCARESIGNED, 0, DstDesc);
2549   CISA_CALL(Kernel->AppendVISADataMovementInst(
2550       ISA_MOV, createPredFromWrRegion(DstDesc), false, ExecMask, ExecSize,
2551       Dst, Src));
2552 }
2553 
2554 /***********************************************************************
2555  * buildLoneWrPredRegion : build a lone wrpredregion
2556  */
buildLoneWrPredRegion(Instruction * Inst,BaleInfo BI)2557 void GenXKernelBuilder::buildLoneWrPredRegion(Instruction *Inst, BaleInfo BI) {
2558   IGC_ASSERT_MESSAGE(isWrPredRegionLegalSetP(*cast<CallInst>(Inst)),
2559     "wrpredregion cannot be legally represented as SETP instruction");
2560   enum { OperandNum = 1 };
2561   Value *Input = Inst->getOperand(OperandNum);
2562   IGC_ASSERT_MESSAGE(isa<Constant>(Input), "only immediate case is supported");
2563   auto *C = cast<Constant>(Input);
2564   unsigned Size = C->getType()->getPrimitiveSizeInBits();
2565 
2566   VISA_EMask_Ctrl ctrlMask = getExecMaskFromWrPredRegion(Inst, true);
2567   VISA_Exec_Size execSize = getExecSizeFromValue(Size);
2568 
2569   unsigned IntVal = getPredicateConstantAsInt(C);
2570   C = ConstantInt::get(Type::getIntNTy(Inst->getContext(), std::max(Size, 8U)),
2571                        IntVal);
2572   addDebugInfo();
2573   CISA_CALL(Kernel->AppendVISASetP(ctrlMask, execSize, getPredicateVar(Inst),
2574                                    createImmediateOperand(C, UNSIGNED)));
2575 }
2576 
2577 /***********************************************************************
2578  * buildLoneOperand : build a rdregion or modifier that is not baled in to
2579  *                    a main instruction
2580  *
2581  * Enter:   Inst = the rdregion or modifier instruction
2582  *          BI = BaleInfo for Inst
2583  *          Mod = modifier for destination
2584  *          WrRegion = 0 else wrregion for destination
2585  *          WrRegionBI = BaleInfo for WrRegion (possibly baling in
2586  *              variable index add)
2587  */
buildLoneOperand(Instruction * Inst,genx::BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)2588 void GenXKernelBuilder::buildLoneOperand(Instruction *Inst, genx::BaleInfo BI,
2589                                          unsigned Mod,
2590                                          const DstOpndDesc &DstDesc) {
2591   Instruction *WrRegion = DstDesc.WrRegion;
2592   BaleInfo WrRegionBI = DstDesc.WrRegionBI;
2593 
2594   VISA_Exec_Size ExecSize = EXEC_SIZE_1;
2595   if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Inst->getType()))
2596     ExecSize = getExecSizeFromValue(VT->getNumElements());
2597   ISA_Opcode Opcode = ISA_MOV;
2598   bool Baled = true;
2599   VISA_EMask_Ctrl ExecMask = getExecMaskFromWrRegion(DstDesc);
2600   // Default source from Inst
2601   Value *Src = Inst;
2602 
2603   // Give dest and source the same signedness for byte mov.
2604   auto Signed = DONTCARESIGNED;
2605   // destination
2606   auto Dest = createDestination(Inst, Signed, Mod, DstDesc, &Signed);
2607 
2608   // source
2609   if ((Mod & MODIFIER_SAT) != 0 &&
2610       Inst->getType()->getScalarType()->isIntegerTy() &&
2611          GenXIntrinsic::isIntegerSat(Inst->user_back()))
2612     Signed = getISatSrcSign(Inst->user_back());
2613 
2614   if (BI.Type == BaleInfo::NOTMOD) {
2615     // A lone "not" is implemented as a not instruction, rather than a mov
2616     // with a not modifier. A mov only allows an arithmetic modifier.
2617     Opcode = ISA_NOT;
2618     Baled = BI.isOperandBaled(0);
2619     // In this case the src is actually operand 0 of the noti intrinsic
2620     Src = Inst->getOperand(0);
2621   } else if (BI.Type == BaleInfo::RDREGION && !Mod) {
2622     Register *DstReg;
2623     if (WrRegion) {
2624       DstReg =
2625           getRegForValueOrNullAndSaveAlias(KernFunc, WrRegion, DONTCARESIGNED);
2626     } else {
2627       DstReg = getRegForValueAndSaveAlias(KernFunc, Inst, DONTCARESIGNED);
2628     }
2629     if (DstReg && (DstReg->Category == RegCategory::SURFACE ||
2630                    DstReg->Category == RegCategory::SAMPLER)) {
2631       Opcode = ISA_MOVS;
2632     }
2633   }
2634   // TODO: mb need to get signed from dest for src and then modify that
2635   addDebugInfo();
2636   CISA_CALL(Kernel->AppendVISADataMovementInst(
2637       Opcode, (Opcode != ISA_MOVS ? createPredFromWrRegion(DstDesc) : nullptr),
2638       Mod & MODIFIER_SAT, ExecMask, ExecSize, Dest,
2639       createSource(Src, Signed, Baled, 0)));
2640 }
2641 
getResultedTypeSize(Type * Ty,const DataLayout & DL)2642 static unsigned getResultedTypeSize(Type *Ty, const DataLayout& DL) {
2643   unsigned TySz = 0;
2644   if (auto *VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty))
2645     TySz =
2646         VTy->getNumElements() * getResultedTypeSize(VTy->getElementType(), DL);
2647   else if (Ty->isArrayTy())
2648     TySz = Ty->getArrayNumElements() *
2649            getResultedTypeSize(Ty->getArrayElementType(), DL);
2650   else if (Ty->isStructTy()) {
2651     StructType *STy = dyn_cast<StructType>(Ty);
2652     IGC_ASSERT(STy);
2653     for (Type *Ty : STy->elements())
2654       TySz += getResultedTypeSize(Ty, DL);
2655   } else if (Ty->isPointerTy())
2656     TySz = DL.getPointerSize();
2657   else {
2658     TySz = Ty->getPrimitiveSizeInBits() / CHAR_BIT;
2659     IGC_ASSERT_MESSAGE(TySz, "Ty is not primitive?");
2660   }
2661 
2662   return TySz;
2663 }
2664 
2665 // Check if we're trying to form return value of a structure type
2666 // TODO:  should check full insert/extract chain (for failed coalescing cases),
2667 //        e.g. after failed coalescing we may end up having a bunch of
2668 //        extractvalue, insertvalue and bitcasts inst where only the last one
2669 //        should be actually lowered
checkInsertToRetv(InsertValueInst * Inst)2670 static bool checkInsertToRetv(InsertValueInst *Inst) {
2671   if (auto IVI = dyn_cast<InsertValueInst>(Inst->use_begin()->getUser()))
2672     return checkInsertToRetv(IVI);
2673 
2674   if (auto RI = dyn_cast<ReturnInst>(Inst->use_begin()->getUser())) {
2675     const auto *F = RI->getFunction();
2676     return genx::requiresStackCall(F) || genx::isReferencedIndirectly(F);
2677   }
2678 
2679   return false;
2680 }
2681 
2682 /***********************************************************************
2683  * buildMainInst : build a main instruction
2684  *
2685  * Enter:   Inst = the main instruction
2686  *          BI = BaleInfo for Inst
2687  *          Mod = modifier bits for destination
2688  *          WrRegion = 0 else wrregion for destination
2689  *          WrRegionBI = BaleInfo for WrRegion (possibly baling in
2690  *              variable index add)
2691  *
2692  * Return:  true if terminator inst that falls through to following block
2693  */
buildMainInst(Instruction * Inst,BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)2694 bool GenXKernelBuilder::buildMainInst(Instruction *Inst, BaleInfo BI,
2695                                       unsigned Mod,
2696                                       const DstOpndDesc &DstDesc) {
2697   if (PHINode *Phi = dyn_cast<PHINode>(Inst))
2698     buildPhiNode(Phi);
2699   else if (ReturnInst *RI = dyn_cast<ReturnInst>(Inst)) {
2700     buildRet(RI);
2701   } else if (BranchInst *BR = dyn_cast<BranchInst>(Inst)) {
2702     return buildBranch(BR);
2703   } else if (IndirectBrInst *IBR = dyn_cast<IndirectBrInst>(Inst)) {
2704     buildIndirectBr(IBR);
2705   } else if (CmpInst *Cmp = dyn_cast<CmpInst>(Inst)) {
2706     buildCmp(Cmp, BI, DstDesc);
2707   } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Inst)) {
2708     if (!BO->getType()->getScalarType()->isIntegerTy(1)) {
2709       buildBinaryOperator(BO, BI, Mod, DstDesc);
2710     } else {
2711       IGC_ASSERT(!Mod);
2712       IGC_ASSERT(!DstDesc.WrRegion);
2713       IGC_ASSERT(!BI.isOperandBaled(0));
2714       IGC_ASSERT(!BI.isOperandBaled(1));
2715       buildBoolBinaryOperator(BO);
2716     }
2717   } else if (auto EVI = dyn_cast<ExtractValueInst>(Inst)) {
2718     if (auto *CI = dyn_cast<CallInst>(Inst->getOperand(0)))
2719       // translate extraction of structured type from retv
2720       if (!UseNewStackBuilder && !CI->isInlineAsm() &&
2721           (genx::requiresStackCall(CI->getCalledFunction()) ||
2722            IGCLLVM::isIndirectCall(*CI)))
2723         buildExtractRetv(EVI);
2724     // no code generated
2725   } else if (auto IVI = dyn_cast<InsertValueInst>(Inst)) {
2726     if (!UseNewStackBuilder && checkInsertToRetv(IVI) &&
2727         // TODO: safely remove this tmp workaround for failed coalescing cases
2728         // and insert-extract-insert chains
2729         !isa<BitCastInst>(Inst->getOperand(1)))
2730       RetvInserts.push_back(IVI);
2731     // no code generated
2732   } else if (CastInst *CI = dyn_cast<CastInst>(Inst)) {
2733     if (genx::isNoopCast(CI))
2734       buildNoopCast(CI, BI, Mod, DstDesc);
2735     else
2736       buildCastInst(CI, BI, Mod, DstDesc);
2737   } else if (auto SI = dyn_cast<SelectInst>(Inst)) {
2738     buildSelectInst(SI, BI, Mod, DstDesc);
2739   } else if (auto LI = dyn_cast<LoadInst>(Inst)) {
2740     (void)LI; // no code generated
2741   } else if (auto GEPI = dyn_cast<GetElementPtrInst>(Inst)) {
2742     // Skip genx.print.format.index GEP here.
2743     IGC_ASSERT_MESSAGE(vc::isLegalPrintFormatIndexGEP(*GEPI),
2744                        "only genx.print.format.index src GEP can still be "
2745                        "present at this stage");
2746 #if (LLVM_VERSION_MAJOR > 8)
2747   } else if (UnaryOperator *UO = dyn_cast<UnaryOperator>(Inst)) {
2748     buildUnaryOperator(UO, BI, Mod, DstDesc);
2749 #endif
2750   } else if (auto *CI = dyn_cast<CallInst>(Inst)) {
2751     if (CI->isInlineAsm())
2752       buildInlineAsm(CI);
2753     else if (IGCLLVM::isIndirectCall(*CI)) {
2754       IGC_ASSERT_MESSAGE(!Mod,
2755         "cannot bale subroutine call into anything");
2756       IGC_ASSERT_MESSAGE(!DstDesc.WrRegion,
2757         "cannot bale subroutine call into anything");
2758       buildCall(CI, DstDesc);
2759     } else {
2760       Function *Callee = CI->getCalledFunction();
2761       unsigned IntrinID = GenXIntrinsic::getAnyIntrinsicID(Callee);
2762       switch (IntrinID) {
2763       case Intrinsic::dbg_value:
2764       case Intrinsic::dbg_declare:
2765         addDebugInfo();
2766         break;
2767       case GenXIntrinsic::genx_predefined_surface:
2768       case GenXIntrinsic::genx_output:
2769       case GenXIntrinsic::genx_output_1:
2770       case GenXIntrinsic::genx_jump_table:
2771         // ignore
2772         break;
2773       case GenXIntrinsic::genx_simdcf_goto:
2774         // A goto that is not baled into a branch (via an extractvalue)
2775         buildGoto(CI, nullptr);
2776         break;
2777       case GenXIntrinsic::genx_simdcf_join:
2778         // A join that is not baled into a branch (via an extractvalue)
2779         buildJoin(CI, nullptr);
2780         break;
2781       case GenXIntrinsic::genx_convert:
2782         buildConvert(CI, BI, Mod, DstDesc);
2783         break;
2784       case GenXIntrinsic::genx_print_format_index:
2785         buildPrintIndex(CI, IntrinID, Mod, DstDesc);
2786         break;
2787       case GenXIntrinsic::genx_convert_addr:
2788         buildConvertAddr(CI, BI, Mod, DstDesc);
2789         break;
2790       case GenXIntrinsic::genx_alloca:
2791         if (!UseNewStackBuilder)
2792           buildAlloca(CI, IntrinID, Mod, DstDesc);
2793         break;
2794       case GenXIntrinsic::genx_gaddr:
2795         buildSymbolInst(CI, Mod, DstDesc);
2796         break;
2797       case GenXIntrinsic::genx_write_predef_surface:
2798         buildWritePredefSurface(*CI);
2799         break;
2800       case GenXIntrinsic::genx_get_hwid:
2801         buildGetHWID(CI, DstDesc);
2802         break;
2803       case GenXIntrinsic::genx_constanti:
2804       case GenXIntrinsic::genx_constantf:
2805       case GenXIntrinsic::genx_constantpred:
2806         if (isa<UndefValue>(CI->getOperand(0)))
2807           return false; // Omit llvm.genx.constant with undef operand.
2808         if (!DstDesc.WrRegion &&
2809             !getRegForValueOrNullAndSaveAlias(KernFunc, CI))
2810           return false; // Omit llvm.genx.constantpred that is EM or RM and so
2811                         // does not have a register allocated.
2812                         // fall through...
2813       default:
2814         if (!(CI->user_empty() &&
2815               GenXIntrinsic::getAnyIntrinsicID(CI->getCalledFunction()) ==
2816                   GenXIntrinsic::genx_any))
2817           buildIntrinsic(CI, IntrinID, BI, Mod, DstDesc);
2818         break;
2819       case GenXIntrinsic::not_any_intrinsic:
2820         IGC_ASSERT_MESSAGE(!Mod,
2821           "cannot bale subroutine call into anything");
2822         IGC_ASSERT_MESSAGE(!DstDesc.WrRegion,
2823           "cannot bale subroutine call into anything");
2824         buildCall(CI, DstDesc);
2825         break;
2826       }
2827     }
2828   } else if (isa<UnreachableInst>(Inst))
2829     ; // no code generated
2830   else {
2831     DiagnosticInfoCisaBuild Err{Inst, "main inst not implemented", DS_Error};
2832     getContext().diagnose(Err);
2833   }
2834 
2835   return false;
2836 }
2837 
2838 /***********************************************************************
2839  * buildPhiNode : build code for a phi node
2840  *
2841  * A phi node generates no code because coalescing has ensured that all
2842  * incomings and the result are in the same register. This function just
2843  * asserts that that is the case.
2844  */
buildPhiNode(PHINode * Phi)2845 void GenXKernelBuilder::buildPhiNode(PHINode *Phi) {
2846   IGC_ASSERT(testPhiNodeHasNoMismatchedRegs(Phi, Liveness));
2847 }
2848 
2849 /***********************************************************************
2850  * buildGoto : translate a goto
2851  *
2852  * Enter:   Goto = goto instruction that is baled into an extractvalue of
2853  *                 field 2 (the !any(EM) value), that is baled into Branch
2854  *          Branch = branch instruction, 0 if this is a goto that is not
2855  *                   baled into a branch, which happens when the goto is
2856  *                   followed by a join point so the goto's JIP points there,
2857  *                   and LLVM changes the resulting conditional branch with
2858  *                   both successors the same into an unconditional branch
2859  */
buildGoto(CallInst * Goto,BranchInst * Branch)2860 void GenXKernelBuilder::buildGoto(CallInst *Goto, BranchInst *Branch) {
2861   // GenXSimdCFConformance and GenXTidyControlFlow ensure that we have either
2862   // 1. a forward goto, where the false successor is fallthrough; or
2863   // 2. a backward goto, where the UIP (the join whose RM the goto updates)
2864   //    and the true successor are both fallthrough, and the false successor
2865   //    is the top of the loop.
2866   // (1) generates a vISA forward goto, but the condition has the wrong sense
2867   // so we need to invert it.
2868   // (2) generates a vISA backward goto.
2869   Value *BranchTarget = nullptr;
2870   VISA_PREDICATE_STATE StateInvert = PredState_NO_INVERSE;
2871   if (!Branch ||
2872       Branch->getSuccessor(1) == Branch->getParent()->getNextNode()) {
2873     // Forward goto.  Find the join.
2874     auto Join = GotoJoin::findJoin(Goto);
2875     IGC_ASSERT_MESSAGE(Join, "join not found");
2876     BranchTarget = Join;
2877     StateInvert = PredState_INVERSE;
2878   } else {
2879     IGC_ASSERT_MESSAGE(Branch->getSuccessor(0) == Branch->getParent()->getNextNode(),
2880       "bad goto structure");
2881     // Backward branch.
2882     BranchTarget = Branch->getSuccessor(1);
2883   }
2884   // Get the condition.
2885   VISA_EMask_Ctrl Mask = vISA_EMASK_M1;
2886   VISA_PREDICATE_CONTROL Control = PRED_CTRL_NON;
2887   VISA_PREDICATE_STATE State = PredState_NO_INVERSE;
2888 
2889   Value *Pred = getPredicateOperand(
2890       Goto, 2 /*OperandNum*/, Baling->getBaleInfo(Goto), Control, State, &Mask);
2891   IGC_ASSERT_MESSAGE(!Mask, "cannot have rdpredregion baled into goto");
2892 
2893   Instruction *Not = dyn_cast<Instruction>(Pred);
2894   if (Not && isPredNot(Not)) {
2895     // Eliminate excess NOT
2896     // %P1 = ...
2897     // %P2 = not %P1
2898     // (!%P2) goto
2899     // Transforms into
2900     // (%P1) goto
2901     StateInvert = (StateInvert == PredState_NO_INVERSE) ? PredState_INVERSE
2902                                                         : PredState_NO_INVERSE;
2903     Pred = getPredicateOperand(Not, 0 /*OperandNum*/, Baling->getBaleInfo(Not),
2904                                Control, State, &Mask);
2905     IGC_ASSERT_MESSAGE(!Mask, "cannot have rdpredregion baled into goto");
2906   }
2907 
2908   Register *PredReg = nullptr;
2909   if (auto C = dyn_cast<Constant>(Pred)) {
2910     (void)C;
2911     if (StateInvert)
2912       IGC_ASSERT_MESSAGE(C->isNullValue(),
2913         "predication operand must be constant 0 or not constant");
2914     else
2915       IGC_ASSERT_MESSAGE(C->isAllOnesValue(),
2916         "predication operand must be constant 1 or not constant");
2917   } else {
2918     State ^= StateInvert;
2919     PredReg = getRegForValueOrNullAndSaveAlias(KernFunc, Pred);
2920     IGC_ASSERT(PredReg);
2921     IGC_ASSERT(PredReg->Category == RegCategory::PREDICATE);
2922   }
2923 
2924   uint8_t execSize = genx::log2(
2925       cast<IGCLLVM::FixedVectorType>(Pred->getType())->getNumElements());
2926 
2927   // Visa decoder part
2928   VISA_EMask_Ctrl emask =
2929       VISA_EMask_Ctrl((execSize >> 0x4) & 0xF);
2930   VISA_Exec_Size esize = (VISA_Exec_Size)((execSize)&0xF);
2931 
2932   VISA_PredOpnd *pred = nullptr;
2933   if (PredReg) {
2934     VISA_PredVar *Decl = getPredicateVar(PredReg);
2935     VISA_PredOpnd *opnd = createPredOperand(Decl, State, Control);
2936     pred = opnd;
2937   }
2938 
2939   unsigned LabelID = getOrCreateLabel(BranchTarget, LABEL_BLOCK);
2940 
2941   VISA_LabelOpnd *label = Labels[LabelID];
2942   addDebugInfo();
2943   CISA_CALL(Kernel->AppendVISACFGotoInst(pred, emask, esize, label));
2944 }
2945 
2946 // Convert predicate offset to EM offset according to
2947 // vISA spec 3.3.1 Execution Mask.
getVisaEMOffset(unsigned PredOffset)2948 static VISA_EMask_Ctrl getVisaEMOffset(unsigned PredOffset) {
2949   switch (PredOffset) {
2950   case 0:
2951     return vISA_EMASK_M1;
2952   case 4:
2953     return vISA_EMASK_M2;
2954   case 8:
2955     return vISA_EMASK_M3;
2956   case 12:
2957     return vISA_EMASK_M4;
2958   case 16:
2959     return vISA_EMASK_M5;
2960   case 20:
2961     return vISA_EMASK_M6;
2962   case 24:
2963     return vISA_EMASK_M7;
2964   case 28:
2965     return vISA_EMASK_M8;
2966   }
2967   IGC_ASSERT_EXIT_MESSAGE(0, "Unexpected EM offset");
2968 }
2969 
2970 /***********************************************************************
2971  * getPredicateOperand : get predicate operand, scanning through any baled
2972  *    in rdpredregion, all, any, not instructions to derive the mask control
2973  *    field and the predication field
2974  *
2975  * Enter:   Inst = instruction to get predicate operand from
2976  *          OperandNum = operand number in Inst
2977  *          BI = bale info for Inst
2978  *          *Control = where to write control information about predicate
2979  *          *State = where to write state information about predicate
2980  *          *MaskCtrl = where to write mask control field (bits 7..4)
2981  *
2982  * Return:  Value of mask after scanning through baled in instructions
2983  *          *PredField and *MaskCtrl set
2984  */
getPredicateOperand(Instruction * Inst,unsigned OperandNum,BaleInfo BI,VISA_PREDICATE_CONTROL & Control,VISA_PREDICATE_STATE & State,VISA_EMask_Ctrl * MaskCtrl)2985 Value *GenXKernelBuilder::getPredicateOperand(
2986     Instruction *Inst, unsigned OperandNum, BaleInfo BI,
2987     VISA_PREDICATE_CONTROL &Control, VISA_PREDICATE_STATE &State,
2988     VISA_EMask_Ctrl *MaskCtrl) {
2989   State = PredState_NO_INVERSE;
2990   *MaskCtrl = vISA_EMASK_M1;
2991   Control = PRED_CTRL_NON;
2992   Value *Mask = Inst->getOperand(OperandNum);
2993   // Check for baled in all/any/notp/rdpredregion.
2994   while (BI.isOperandBaled(OperandNum)) {
2995     Instruction *Inst = dyn_cast<Instruction>(Mask);
2996     if (isNot(Inst)) {
2997       if (Control != PRED_CTRL_NON) {
2998         // switch any<->all as well as invert bit
2999         Control ^= (VISA_PREDICATE_CONTROL)(PRED_CTRL_ANY | PRED_CTRL_ALL);
3000         State ^= PredState_INVERSE;
3001       } else {
3002         // all/any not set, just invert invert bit
3003         State ^= PredState_INVERSE;
3004       }
3005       OperandNum = 0;
3006       IGC_ASSERT(Inst);
3007       Mask = Inst->getOperand(OperandNum);
3008       BI = Baling->getBaleInfo(Inst);
3009       continue;
3010     }
3011     switch (GenXIntrinsic::getGenXIntrinsicID(Inst)) {
3012     case GenXIntrinsic::genx_all:
3013       Control |= PRED_CTRL_ALL; // predicate combine field = "all"
3014       OperandNum = 0;
3015       Mask = Inst->getOperand(OperandNum);
3016       BI = Baling->getBaleInfo(Inst);
3017       continue;
3018     case GenXIntrinsic::genx_any:
3019       Control |= PRED_CTRL_ANY; // predicate combine field = "any"
3020       OperandNum = 0;
3021       Mask = Inst->getOperand(OperandNum);
3022       BI = Baling->getBaleInfo(Inst);
3023       continue;
3024     case GenXIntrinsic::genx_rdpredregion: {
3025       // Baled in rdpredregion. Use its constant offset for the mask control
3026       // field.
3027       unsigned MaskOffset =
3028           cast<ConstantInt>(Inst->getOperand(1))->getSExtValue();
3029       *MaskCtrl = getVisaEMOffset(MaskOffset);
3030       Mask = Inst->getOperand(0);
3031       break;
3032     }
3033     default:
3034       break;
3035     }
3036     // Baled shufflepred. Mask offset is deduced from initial value of slice.
3037     if (auto *SVI = dyn_cast<ShuffleVectorInst>(Inst)) {
3038       unsigned MaskOffset =
3039           ShuffleVectorAnalyzer::getReplicatedSliceDescriptor(SVI)
3040               .InitialOffset;
3041       *MaskCtrl = getVisaEMOffset(MaskOffset);
3042       Mask = SVI->getOperand(0);
3043     }
3044     break;
3045   }
3046   return Mask;
3047 }
3048 
AddGenVar(Register & Reg)3049 void GenXKernelBuilder::AddGenVar(Register &Reg) {
3050   auto &DL = FG->getModule()->getDataLayout();
3051 
3052   VISA_GenVar *parentDecl = nullptr;
3053   VISA_GenVar *Decl = nullptr;
3054 
3055   if (!Reg.AliasTo) {
3056     LLVM_DEBUG(dbgs() << "GenXKernelBuilder::AddGenVar: "; Reg.print(dbgs()); dbgs() << "\n");
3057     // This is not an aliased register. Go through all the aliases and
3058     // determine the biggest alignment required. If the register is at least
3059     // as big as a GRF, make the alignment GRF.
3060     unsigned Alignment = getLogAlignment(
3061         VISA_Align::ALIGN_GRF, Subtarget ? Subtarget->getGRFByteSize()
3062                                          : defaultGRFByteSize); // GRF alignment
3063     Type *Ty = Reg.Ty;
3064     unsigned NBits = Ty->isPointerTy() ? DL.getPointerSizeInBits()
3065                                        : Ty->getPrimitiveSizeInBits();
3066     LLVM_DEBUG(dbgs() << "RegTy " << *Ty << ", nbits = " << NBits << "\n");
3067     if (NBits < GrfByteSize * 8 /* bits in GRF */) {
3068       Alignment = 0;
3069       for (Register *AliasReg = &Reg; AliasReg;
3070            AliasReg = AliasReg->NextAlias[KernFunc]) {
3071         LLVM_DEBUG(dbgs() << "Alias reg " << AliasReg->Num << ", ty "
3072                           << *(AliasReg->Ty) << "\n");
3073         Type *AliasTy = AliasReg->Ty->getScalarType();
3074         unsigned ThisElementBytes = AliasTy->isPointerTy()
3075                                         ? DL.getPointerTypeSize(AliasTy)
3076                                         : AliasTy->getPrimitiveSizeInBits() / 8;
3077         unsigned LogThisElementBytes = genx::log2(ThisElementBytes);
3078         if (LogThisElementBytes > Alignment)
3079           Alignment = LogThisElementBytes;
3080         if (AliasReg->Alignment > Alignment)
3081           Alignment = AliasReg->Alignment;
3082       }
3083     }
3084     LLVM_DEBUG(dbgs() << "Final alignment of " << Alignment << " for reg "
3085                       << Reg.Num << "\n");
3086     for (Register *AliasReg = &Reg; AliasReg; AliasReg = AliasReg->NextAlias[KernFunc]) {
3087       if (AliasReg->Alignment < Alignment) {
3088         AliasReg->Alignment = Alignment;
3089         LLVM_DEBUG(dbgs() << "Setting alignment of " << Alignment << " for reg "
3090                           << AliasReg->Num << "\n");
3091       }
3092     }
3093   } else {
3094     if (Reg.AliasTo->Num < visa::VISA_NUM_RESERVED_REGS) {
3095       LLVM_DEBUG(dbgs() << "GenXKernelBuilder::AddGenVar alias: " << Reg.AliasTo->Num << "\n");
3096       CISA_CALL(Kernel->GetPredefinedVar(parentDecl,
3097                                          (PreDefined_Vars)Reg.AliasTo->Num));
3098       IGC_ASSERT_MESSAGE(parentDecl, "Predefeined variable is null");
3099     } else {
3100       parentDecl = Reg.AliasTo->GetVar<VISA_GenVar>(Kernel);
3101       LLVM_DEBUG(dbgs() << "GenXKernelBuilder::AddGenVar decl: " << parentDecl << "\n");
3102       IGC_ASSERT_MESSAGE(parentDecl, "Refers to undefined var");
3103     }
3104   }
3105 
3106   visa::TypeDetails TD(DL, Reg.Ty, Reg.Signed);
3107   LLVM_DEBUG(dbgs() << "Resulting #of elements: " << TD.NumElements << "\n");
3108 
3109   VISA_Align VA = getVISA_Align(
3110       Reg.Alignment, Subtarget ? Subtarget->getGRFByteSize() : defaultGRFByteSize);
3111   CISA_CALL(Kernel->CreateVISAGenVar(Decl, Reg.NameStr.c_str(), TD.NumElements,
3112                                      static_cast<VISA_Type>(TD.VisaType), VA,
3113                                      parentDecl, 0));
3114 
3115   Reg.SetVar(Kernel, Decl);
3116   LLVM_DEBUG(dbgs() << "Resulting decl: " << Decl << "\n");
3117 
3118   for (auto &Attr : Reg.Attributes) {
3119     CISA_CALL(Kernel->AddAttributeToVar(
3120         Decl, getStringByIndex(Attr.first).begin(), Attr.second.size(),
3121         (void *)(Attr.second.c_str())));
3122   }
3123 }
3124 
allowI64Ops() const3125 bool GenXKernelBuilder::allowI64Ops() const {
3126   IGC_ASSERT(Subtarget);
3127   if (!Subtarget->hasLongLong())
3128     return false;
3129   return true;
3130 }
3131 /**************************************************************************************************
3132  * Scan ir to collect information about whether kernel has callable function or
3133  * barrier.
3134  */
collectKernelInfo()3135 void GenXKernelBuilder::collectKernelInfo() {
3136   for (auto It = FG->begin(), E = FG->end(); It != E; ++It) {
3137     auto Func = *It;
3138     HasStackcalls |=
3139         genx::requiresStackCall(Func) || genx::isReferencedIndirectly(Func);
3140     for (auto &BB : *Func) {
3141       for (auto &I : BB) {
3142         if (CallInst *CI = dyn_cast<CallInst>(&I)) {
3143           if (CI->isInlineAsm())
3144             continue;
3145           if (GenXIntrinsicInst *II = dyn_cast<GenXIntrinsicInst>(CI)) {
3146             auto IID = II->getIntrinsicID();
3147             if (IID == GenXIntrinsic::genx_barrier ||
3148                 IID == GenXIntrinsic::genx_sbarrier)
3149               HasBarrier = true;
3150             else if (IID == GenXIntrinsic::genx_alloca)
3151               HasAlloca = true;
3152           } else {
3153             Function *Callee = CI->getCalledFunction();
3154             if (Callee && Callee->hasFnAttribute("CMCallable"))
3155               HasCallable = true;
3156           }
3157         }
3158       }
3159     }
3160   }
3161 }
3162 /**************************************************************************************************
3163  * Build variables
3164  */
buildVariables()3165 void GenXKernelBuilder::buildVariables() {
3166   RegAlloc->SetRegPushHook(this, [](void *Object, GenXVisaRegAlloc::Reg &Reg) {
3167     static_cast<GenXKernelBuilder *>(Object)->AddGenVar(Reg);
3168   });
3169 
3170   for (auto &It : RegAlloc->getRegStorage()) {
3171     Register *Reg = &(It);
3172     switch (Reg->Category) {
3173     case RegCategory::GENERAL:
3174       if (Reg->Num >= visa::VISA_NUM_RESERVED_REGS)
3175         AddGenVar(*Reg);
3176       break;
3177 
3178     case RegCategory::ADDRESS: {
3179       VISA_AddrVar *Decl = nullptr;
3180       unsigned NumElements = 1;
3181       if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Reg->Ty))
3182         NumElements = VT->getNumElements();
3183       CISA_CALL(
3184           Kernel->CreateVISAAddrVar(Decl, Reg->NameStr.c_str(), NumElements));
3185       Reg->SetVar(Kernel, Decl);
3186       for (auto &Attr : Reg->Attributes) {
3187         CISA_CALL(Kernel->AddAttributeToVar(
3188             Decl, getStringByIndex(Attr.first).begin(), Attr.second.size(),
3189             (void *)(Attr.second.c_str())));
3190       }
3191     } break;
3192 
3193     case RegCategory::PREDICATE: {
3194       VISA_PredVar *Decl = nullptr;
3195       unsigned NumElements = 1;
3196       if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Reg->Ty))
3197         NumElements = VT->getNumElements();
3198       CISA_CALL(
3199           Kernel->CreateVISAPredVar(Decl, Reg->NameStr.c_str(), NumElements));
3200       Reg->SetVar(Kernel, Decl);
3201       for (auto &Attr : Reg->Attributes) {
3202         CISA_CALL(Kernel->AddAttributeToVar(
3203             Decl, getStringByIndex(Attr.first).begin(), Attr.second.size(),
3204             (void *)(Attr.second.c_str())));
3205       }
3206     } break;
3207 
3208     case RegCategory::SAMPLER: {
3209       unsigned NumElements = 1;
3210       if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Reg->Ty))
3211         NumElements = VT->getNumElements();
3212       VISA_SamplerVar *Decl = nullptr;
3213       CISA_CALL(Kernel->CreateVISASamplerVar(Decl, Reg->NameStr.c_str(),
3214                                              NumElements));
3215       Reg->SetVar(Kernel, Decl);
3216     } break;
3217 
3218     case RegCategory::SURFACE: {
3219       VISA_SurfaceVar *Decl = nullptr;
3220       if (Reg->Num < visa::VISA_NUM_RESERVED_SURFACES) {
3221         Kernel->GetPredefinedSurface(Decl, (PreDefined_Surface)Reg->Num);
3222       } else {
3223         unsigned NumElements = 1;
3224         if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Reg->Ty))
3225           NumElements = VT->getNumElements();
3226 
3227         CISA_CALL(Kernel->CreateVISASurfaceVar(Decl, Reg->NameStr.c_str(),
3228                                                NumElements));
3229       }
3230       Reg->SetVar(Kernel, Decl);
3231     } break;
3232 
3233     default:
3234       report_fatal_error("Unknown category for register");
3235       break;
3236     }
3237   }
3238 
3239   VISA_GenVar *ArgDecl = nullptr, *RetDecl = nullptr;
3240   Kernel->GetPredefinedVar(ArgDecl, PREDEFINED_ARG);
3241   Kernel->GetPredefinedVar(RetDecl, PREDEFINED_RET);
3242   createCisaVariable(Kernel, "argv", ArgDecl,
3243                      visa::ArgRegSizeInGRFs * GrfByteSize);
3244   createCisaVariable(Kernel, "retv", RetDecl,
3245                      visa::RetRegSizeInGRFs * GrfByteSize);
3246 }
3247 
3248 /***********************************************************************
3249  * getExecMaskFromWrPredRegion : write exec size field from wrpredregion
3250  *        or wrpredpredregion instruction
3251  *
3252  * Enter:   ExecSize = execution size
3253  *          WrPredRegion = 0 else wrpredregion instruction
3254  *
3255  * The exec size byte includes the mask control field, which we need to set
3256  * up from the wrpredregion/wrpredpredregion.
3257  */
3258 VISA_EMask_Ctrl
getExecMaskFromWrPredRegion(Instruction * WrPredRegion,bool IsNoMask)3259 GenXKernelBuilder::getExecMaskFromWrPredRegion(Instruction *WrPredRegion,
3260                                                bool IsNoMask) {
3261   VISA_EMask_Ctrl MaskCtrl =
3262       (IsNoMask | NoMask) ? vISA_EMASK_M1_NM : vISA_EMASK_M1;
3263   if (WrPredRegion) {
3264     // Get the mask control field from the offset in the wrpredregion.
3265     unsigned MaskOffset =
3266         cast<ConstantInt>(WrPredRegion->getOperand(2))->getSExtValue();
3267     IGC_ASSERT_MESSAGE(MaskOffset < 32, "unexpected mask offset");
3268     MaskCtrl = static_cast<VISA_EMask_Ctrl>(MaskOffset >> 2);
3269   }
3270 
3271   // Set to NoMask if requested. Otherwise use the default NM mode
3272   // when WrPredRegion is null.
3273   if ((IsNoMask && MaskCtrl < vISA_EMASK_M1_NM) ||
3274       (!WrPredRegion && NoMask && MaskCtrl < vISA_EMASK_M1_NM))
3275     MaskCtrl = static_cast<VISA_EMask_Ctrl>(
3276         static_cast<unsigned>(MaskCtrl) + vISA_EMASK_M1_NM);
3277 
3278   return MaskCtrl;
3279 }
3280 
3281 /***********************************************************************
3282  * getExecMaskFromWrRegion : get exec size field from wrregion instruction
3283  *
3284  * Enter:   ExecSize = execution size
3285  *          WrRegion = 0 else wrregion instruction
3286  *          WrRegionBI = BaleInfo for wrregion, so we can see if there is a
3287  *                rdpredregion baled in to the mask
3288  *
3289  * If WrRegion != 0, and it has a mask that is not constant 1, then the
3290  * mask must be a predicate register.
3291  *
3292  * The exec size byte includes the mask control field, which we need to set
3293  * up from any rdpredregion baled in to a predicated wrregion.
3294  *
3295  * If the predicate has no register allocated, it must be EM, and we set the
3296  * instruction to be masked. Otherwise we set nomask.
3297  */
3298 VISA_EMask_Ctrl
getExecMaskFromWrRegion(const DstOpndDesc & DstDesc,bool IsNoMask)3299 GenXKernelBuilder::getExecMaskFromWrRegion(const DstOpndDesc &DstDesc,
3300                                            bool IsNoMask) {
3301   // Override mask control if requested.
3302   auto MaskCtrl = (IsNoMask | NoMask) ? vISA_EMASK_M1_NM : vISA_EMASK_M1;
3303 
3304   if (DstDesc.WrRegion) {
3305     // Get the predicate (mask) operand, scanning through baled in
3306     // all/any/not/rdpredregion and setting PredField and MaskCtrl
3307     // appropriately.
3308     VISA_PREDICATE_CONTROL Control = PRED_CTRL_NON;
3309     VISA_PREDICATE_STATE State = PredState_NO_INVERSE;
3310     Value *Mask =
3311         getPredicateOperand(DstDesc.WrRegion, 7 /*mask operand in wrregion*/,
3312                             DstDesc.WrRegionBI, Control, State, &MaskCtrl);
3313     if ((isa<Constant>(Mask) ||
3314          getRegForValueOrNullAndSaveAlias(KernFunc, Mask)) &&
3315         NoMask)
3316       MaskCtrl |= vISA_EMASK_M1_NM;
3317   }
3318   return MaskCtrl;
3319 }
3320 
3321 /***********************************************************************
3322  * buildIntrinsic : build code for an intrinsic
3323  *
3324  * Enter:   CI = the CallInst
3325  *          IntrinID = intrinsic ID
3326  *          BI = BaleInfo for the instruction
3327  *          Mod = modifier bits for destination
3328  *          WrRegion = 0 else wrregion for destination
3329  *          WrRegionBI = BaleInfo for WrRegion
3330  */
buildIntrinsic(CallInst * CI,unsigned IntrinID,BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)3331 void GenXKernelBuilder::buildIntrinsic(CallInst *CI, unsigned IntrinID,
3332                                        BaleInfo BI, unsigned Mod,
3333                                        const DstOpndDesc &DstDesc) {
3334   using II = GenXIntrinsicInfo;
3335   LLVM_DEBUG(dbgs() << "buildIntrinsic: " << *CI << "\n");
3336 
3337   int MaxRawOperands = std::numeric_limits<int>::max();
3338 
3339   // TODO: replace lambdas by methods
3340 
3341   auto GetUnsignedValue = [&](II::ArgInfo AI) {
3342     ConstantInt *Const =
3343         dyn_cast<ConstantInt>(CI->getArgOperand(AI.getArgIdx()));
3344     if (!Const) {
3345       DiagnosticInfoCisaBuild Err{CI, "Incorrect args to intrinsic call",
3346                                   DS_Error};
3347       getContext().diagnose(Err);
3348     }
3349     unsigned val = Const->getSExtValue();
3350     LLVM_DEBUG(dbgs() << "GetUnsignedValue from op #" << AI.getArgIdx()
3351                       << " yields: " << val << "\n");
3352     return val;
3353   };
3354 
3355   auto CreateSurfaceOperand = [&](II::ArgInfo AI) {
3356     LLVM_DEBUG(dbgs() << "CreateSurfaceOperand\n");
3357     llvm::Value *Arg = CI->getArgOperand(AI.getArgIdx());
3358     VISA_SurfaceVar *SurfDecl = nullptr;
3359     int Index = visa::convertToSurfaceIndex(Arg);
3360     if (visa::isReservedSurfaceIndex(Index)) {
3361       Kernel->GetPredefinedSurface(SurfDecl, visa::getReservedSurface(Index));
3362     } else {
3363       Register *Reg = getRegForValueAndSaveAlias(KernFunc, Arg);
3364       IGC_ASSERT_MESSAGE(Reg->Category == RegCategory::SURFACE,
3365         "Expected surface register");
3366       SurfDecl = Reg->GetVar<VISA_SurfaceVar>(Kernel);
3367     }
3368     VISA_StateOpndHandle *ResultOperand = nullptr;
3369     CISA_CALL(Kernel->CreateVISAStateOperandHandle(ResultOperand, SurfDecl));
3370     return ResultOperand;
3371   };
3372 
3373   auto CreatePredefSurfaceOperand = [&](II::ArgInfo AI) {
3374     LLVM_DEBUG(dbgs() << "CreatePredefinedSurfaceOperand\n");
3375     auto *Arg = cast<GlobalVariable>(CI->getArgOperand(AI.getArgIdx()));
3376     VISA_SurfaceVar *SurfVar = getPredefinedSurfaceVar(*Arg);
3377     VISA_StateOpndHandle *ResultOperand = nullptr;
3378     CISA_CALL(Kernel->CreateVISAStateOperandHandle(ResultOperand, SurfVar));
3379     return ResultOperand;
3380   };
3381 
3382   auto CreateSamplerOperand = [&](II::ArgInfo AI) {
3383     LLVM_DEBUG(dbgs() << "CreateSamplerOperand\n");
3384     Register *Reg =
3385         getRegForValueAndSaveAlias(KernFunc, CI->getArgOperand(AI.getArgIdx()));
3386     IGC_ASSERT_MESSAGE(Reg->Category == RegCategory::SAMPLER,
3387       "Expected sampler register");
3388     VISA_StateOpndHandle *ResultOperand = nullptr;
3389     CISA_CALL(Kernel->CreateVISAStateOperandHandle(
3390         ResultOperand, Reg->GetVar<VISA_SamplerVar>(Kernel)));
3391     return ResultOperand;
3392   };
3393 
3394   auto GetMediaHeght = [&](II::ArgInfo AI) {
3395     LLVM_DEBUG(dbgs() << "GetMediaHeght\n");
3396     // constant byte for media height that we need to infer from the
3397     // media width and the return type or final arg
3398     ConstantInt *Const =
3399         dyn_cast<ConstantInt>(CI->getArgOperand(AI.getArgIdx()));
3400     IGC_ASSERT_MESSAGE(Const, "Incorrect args to intrinsic call");
3401     unsigned Width = Const->getZExtValue();
3402     IGC_ASSERT_MESSAGE(Width > 0 && Width <= 64, "Invalid media width");
3403     unsigned RoundedWidth = roundedVal(Width, 4u);
3404     Type *DataType = CI->getType();
3405     if (DataType->isVoidTy())
3406       DataType = CI->getOperand(CI->getNumArgOperands() - 1)->getType();
3407     unsigned DataSize;
3408     if (VectorType *VT = dyn_cast<VectorType>(DataType))
3409       DataSize = DL.getTypeSizeInBits(VT) / genx::ByteBits;
3410     else
3411       DataSize = DL.getTypeSizeInBits(DataType) / genx::ByteBits;
3412     if (DataSize <= RoundedWidth && DataSize >= Width)
3413       return static_cast<uint8_t>(1);
3414     IGC_ASSERT_MESSAGE(RoundedWidth && (DataSize % RoundedWidth == 0),
3415                        "Invalid media width");
3416     return static_cast<uint8_t>(DataSize / RoundedWidth);
3417   };
3418 
3419   auto ChooseSign = [&](ArrayRef<unsigned> SrcIdxs) {
3420     IGC_ASSERT_MESSAGE(!SrcIdxs.empty(), "Expected at least one source index");
3421 
3422     bool hasExt = std::any_of(SrcIdxs.begin(), SrcIdxs.end(),
3423                               [CI, B = Baling](unsigned Idx) {
3424                                 return isExtOperandBaled(CI, Idx, B);
3425                               });
3426 
3427     // Keep the old behavior.
3428     if (hasExt)
3429       return DONTCARESIGNED;
3430 
3431     SmallVector<Value *, 4> SrcValues;
3432     std::transform(SrcIdxs.begin(), SrcIdxs.end(),
3433                    std::back_inserter(SrcValues),
3434                    [CI](unsigned Idx) { return CI->getOperand(Idx); });
3435 
3436     return getCommonSignedness(SrcValues);
3437   };
3438 
3439   auto CreateOperand = [&](II::ArgInfo AI, Signedness Signed = DONTCARESIGNED) {
3440     LLVM_DEBUG(dbgs() << "CreateOperand from arg #" << AI.getArgIdx() << "\n");
3441     VISA_VectorOpnd *ResultOperand = nullptr;
3442     IGC_ASSERT_MESSAGE(Signed == DONTCARESIGNED ||
3443                            !(AI.needsSigned() || AI.needsUnsigned()),
3444                        "Signedness was set in two different ways.");
3445     if (AI.needsSigned())
3446       Signed = SIGNED;
3447     else if (AI.needsUnsigned())
3448       Signed = UNSIGNED;
3449     if (AI.isRet()) {
3450       if (AI.getSaturation() == II::SATURATION_SATURATE)
3451         Mod |= MODIFIER_SAT;
3452       ResultOperand = createDestination(CI, Signed, Mod, DstDesc);
3453     } else {
3454       unsigned MaxWidth = 16;
3455       if (AI.getRestriction() == II::TWICEWIDTH) {
3456         // For a TWICEWIDTH operand, do not allow width bigger than the
3457         // execution size.
3458         MaxWidth =
3459             cast<IGCLLVM::FixedVectorType>(CI->getType())->getNumElements();
3460       }
3461       if ((IntrinID == GenXIntrinsic::genx_dpas) ||
3462           (IntrinID == GenXIntrinsic::genx_dpas2) ||
3463           (IntrinID == GenXIntrinsic::genx_dpasw) ||
3464           (IntrinID == GenXIntrinsic::genx_dpas_nosrc0) ||
3465           (IntrinID == GenXIntrinsic::genx_dpasw_nosrc0)) {
3466         MaxWidth = Subtarget->dpasWidth();
3467       }
3468       ResultOperand = createSourceOperand(CI, Signed, AI.getArgIdx(), BI, 0,
3469                                           nullptr, MaxWidth);
3470     }
3471     return ResultOperand;
3472   };
3473 
3474   auto CreateRawOperand = [&](II::ArgInfo AI) {
3475     LLVM_DEBUG(dbgs() << "CreateRawOperand from "
3476                       << (AI.isRet() ? "Dest" : "Src")
3477                       << " op #" << AI.getArgIdx() << "\n");
3478     VISA_RawOpnd *ResultOperand = nullptr;
3479     auto Signed = DONTCARESIGNED;
3480     if (AI.needsSigned())
3481       Signed = SIGNED;
3482     else if (AI.needsUnsigned())
3483       Signed = UNSIGNED;
3484     if (AI.isRet()) {
3485       IGC_ASSERT(!Mod);
3486       ResultOperand = createRawDestination(CI, DstDesc, Signed);
3487     } else if (AI.getArgIdx() < MaxRawOperands)
3488       ResultOperand = createRawSourceOperand(CI, AI.getArgIdx(), BI, Signed);
3489     return ResultOperand;
3490   };
3491 
3492   auto CreateRawOperands = [&](II::ArgInfo AI, VISA_RawOpnd **Operands) {
3493     LLVM_DEBUG(dbgs() << "CreateRawOperands\n");
3494     IGC_ASSERT_MESSAGE(MaxRawOperands != std::numeric_limits<int>::max(),
3495       "MaxRawOperands must be defined");
3496     for (int i = 0; i < AI.getArgIdx() + MaxRawOperands; ++i) {
3497       Operands[i] = CreateRawOperand(II::ArgInfo(II::RAW | (AI.Info + i)));
3498     }
3499   };
3500 
3501   auto GetOwords = [&](II::ArgInfo AI) {
3502     LLVM_DEBUG(dbgs() << "GetOwords\n");
3503     // constant byte for log2 number of owords
3504     Value *Arg = CI;
3505     if (!AI.isRet())
3506       Arg = CI->getOperand(AI.getArgIdx());
3507     auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Arg->getType());
3508     if (!VT)
3509       report_fatal_error("Invalid number of owords");
3510     int DataSize = VT->getNumElements() *
3511                    DL.getTypeSizeInBits(VT->getElementType()) / 8;
3512     DataSize = std::max(0, genx::exactLog2(DataSize) - 4);
3513     if (DataSize > 4)
3514       report_fatal_error("Invalid number of words");
3515     return static_cast<VISA_Oword_Num>(DataSize);
3516   };
3517 
3518   auto GetExecSize = [&](II::ArgInfo AI, VISA_EMask_Ctrl *Mask) {
3519     LLVM_DEBUG(dbgs() << "GetExecSize\n");
3520     int ExecSize = GenXIntrinsicInfo::getOverridedExecSize(CI, Subtarget);
3521     if (ExecSize == 0) {
3522       if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(CI->getType())) {
3523         ExecSize = VT->getNumElements();
3524       } else {
3525         ExecSize = 1;
3526       }
3527     }
3528     bool IsNoMask = AI.getCategory() == II::EXECSIZE_NOMASK;
3529     *Mask = getExecMaskFromWrRegion(DstDesc, IsNoMask);
3530     return getExecSizeFromValue(ExecSize);
3531   };
3532 
3533   auto GetBitWidth = [&](II::ArgInfo AI) {
3534     LLVM_DEBUG(dbgs() << "GetBitWidth\n");
3535 #ifndef NDEBUG
3536     // Only SVM atomics have this field
3537     auto ID = GenXIntrinsic::getGenXIntrinsicID(CI);
3538     switch (ID)
3539     {
3540     case llvm::GenXIntrinsic::genx_svm_atomic_add:
3541     case llvm::GenXIntrinsic::genx_svm_atomic_and:
3542     case llvm::GenXIntrinsic::genx_svm_atomic_cmpxchg:
3543     case llvm::GenXIntrinsic::genx_svm_atomic_dec:
3544     case llvm::GenXIntrinsic::genx_svm_atomic_fcmpwr:
3545     case llvm::GenXIntrinsic::genx_svm_atomic_fmax:
3546     case llvm::GenXIntrinsic::genx_svm_atomic_fmin:
3547     case llvm::GenXIntrinsic::genx_svm_atomic_imax:
3548     case llvm::GenXIntrinsic::genx_svm_atomic_imin:
3549     case llvm::GenXIntrinsic::genx_svm_atomic_inc:
3550     case llvm::GenXIntrinsic::genx_svm_atomic_max:
3551     case llvm::GenXIntrinsic::genx_svm_atomic_min:
3552     case llvm::GenXIntrinsic::genx_svm_atomic_or:
3553     case llvm::GenXIntrinsic::genx_svm_atomic_sub:
3554     case llvm::GenXIntrinsic::genx_svm_atomic_xchg:
3555     case llvm::GenXIntrinsic::genx_svm_atomic_xor:
3556         break;
3557     default:
3558         IGC_ASSERT(false &&
3559             "Trying to get bit width for non-svm atomic inst");
3560         break;
3561     }
3562 #endif // !NDEBUG
3563     auto* T = AI.isRet() ? CI->getType() : CI->getArgOperand(AI.getArgIdx())->getType();
3564     unsigned short Width = T->getScalarType()->getPrimitiveSizeInBits();
3565     return Width;
3566   };
3567 
3568   auto GetExecSizeFromArg = [&](II::ArgInfo AI,
3569                                 VISA_EMask_Ctrl *ExecMask) {
3570     LLVM_DEBUG(dbgs() << "GetExecSizeFromArg\n");
3571     // exec_size inferred from width of predicate arg, defaulting to 16 if
3572     // it is scalar i1 (as can happen in raw send). Also get M3 etc flag
3573     // if the predicate has a baled in rdpredregion, and mark as nomask if
3574     // the predicate is not EM.
3575     int ExecSize;
3576     *ExecMask = NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1;
3577     // Get the predicate (mask) operand, scanning through baled in
3578     // all/any/not/rdpredregion and setting PredField and MaskCtrl
3579     // appropriately.
3580     VISA_PREDICATE_CONTROL Control;
3581     VISA_PREDICATE_STATE State;
3582     Value *Mask =
3583         getPredicateOperand(CI, AI.getArgIdx(), BI, Control, State, ExecMask);
3584     if (isa<Constant>(Mask) || getRegForValueOrNullAndSaveAlias(KernFunc, Mask))
3585       *ExecMask |= NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1;
3586     if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(
3587             CI->getOperand(AI.getArgIdx())->getType()))
3588       ExecSize = VT->getNumElements();
3589     else
3590       ExecSize = GenXIntrinsicInfo::getOverridedExecSize(CI, Subtarget);
3591     return getExecSizeFromValue(ExecSize);
3592   };
3593 
3594   auto GetExecSizeFromByte = [&](II::ArgInfo AI, VISA_EMask_Ctrl *Mask) {
3595     LLVM_DEBUG(dbgs() << "GetExecSizeFromByte\n");
3596     ConstantInt *Const =
3597       dyn_cast<ConstantInt>(CI->getArgOperand(AI.getArgIdx()));
3598     if (!Const) {
3599       DiagnosticInfoCisaBuild Err{CI, "Incorrect args to intrinsic call",
3600                                   DS_Error};
3601       getContext().diagnose(Err);
3602     }
3603     unsigned Byte = Const->getSExtValue() & 0xFF;
3604     *Mask = (VISA_EMask_Ctrl)(Byte >> 4);
3605     unsigned Res = Byte & 0xF;
3606     if (Res > 5) {
3607       DiagnosticInfoCisaBuild Err{
3608           CI, "illegal common ISA execsize (should be 1, 2, 4, 8, 16, 32)",
3609           DS_Error};
3610       getContext().diagnose(Err);
3611     }
3612     return (VISA_Exec_Size)Res;
3613   };
3614 
3615   auto CreateImplicitPredication = [&](II::ArgInfo AI) {
3616     LLVM_DEBUG(dbgs() << "CreateImplicitPredication\n");
3617     return createPredFromWrRegion(DstDesc);
3618   };
3619 
3620   auto CreatePredication = [&](II::ArgInfo AI) {
3621     LLVM_DEBUG(dbgs() << "CreatePredication\n");
3622     return createPred(CI, BI, AI.getArgIdx());
3623   };
3624 
3625   auto GetPredicateVar = [&](II::ArgInfo AI) {
3626     LLVM_DEBUG(dbgs() << "GetPredicateVar\n");
3627     if (AI.isRet())
3628       return getPredicateVar(CI);
3629     else
3630       return getPredicateVar(CI->getArgOperand(AI.getArgIdx()));
3631   };
3632 
3633   auto GetZeroedPredicateVar = [&](II::ArgInfo AI) {
3634     LLVM_DEBUG(dbgs() << "GetZeroedPredicateVar\n");
3635     if (AI.isRet())
3636       return getZeroedPredicateVar(CI);
3637     else
3638       return getZeroedPredicateVar(CI->getArgOperand(AI.getArgIdx()));
3639   };
3640 
3641   auto CreateNullRawOperand = [&](II::ArgInfo AI) {
3642     LLVM_DEBUG(dbgs() << "CreateNullRawOperand\n");
3643     VISA_RawOpnd *ResultOperand = nullptr;
3644     CISA_CALL(Kernel->CreateVISANullRawOperand(ResultOperand, false));
3645     return ResultOperand;
3646   };
3647 
3648   auto ProcessTwoAddr = [&](II::ArgInfo AI) {
3649     LLVM_DEBUG(dbgs() << "ProcessTwoAddr\n");
3650     if (AI.getCategory() != II::TWOADDR)
3651       return;
3652     auto Reg = getRegForValueOrNullAndSaveAlias(KernFunc, CI, DONTCARESIGNED);
3653     if (isa<UndefValue>(CI->getArgOperand(AI.getArgIdx())) && Reg &&
3654         isInLoop(CI->getParent()))
3655       addLifetimeStartInst(CI);
3656   };
3657 
3658   // Constant vector of i1 (or just scalar i1) as i32 (used in setp)
3659   auto ConstVi1Asi32 = [&](II::ArgInfo AI) {
3660     LLVM_DEBUG(dbgs() << "ConstVi1Asi32\n");
3661     VISA_VectorOpnd *ResultOperand = nullptr;
3662     auto C = cast<Constant>(CI->getArgOperand(AI.getArgIdx()));
3663     // Get the bit value of the vXi1 constant.
3664     unsigned IntVal = getPredicateConstantAsInt(C);
3665     // unsigned i32 constant source operand
3666     CISA_CALL(Kernel->CreateVISAImmediate(ResultOperand, &IntVal, ISA_TYPE_UD));
3667     return ResultOperand;
3668   };
3669 
3670   auto CreateAddressOperand = [&](II::ArgInfo AI) {
3671     LLVM_DEBUG(dbgs() << "CreateAddressOperand\n");
3672     if (AI.isRet())
3673       return createAddressOperand(CI, true);
3674     else
3675       return createAddressOperand(CI->getArgOperand(AI.getArgIdx()), false);
3676   };
3677 
3678   auto GetArgCount = [&](II::ArgInfo AI) {
3679     LLVM_DEBUG(dbgs() << "GetArgCount\n");
3680     auto BaseArg = AI.getArgIdx();
3681     MaxRawOperands = BaseArg;
3682 
3683     for (unsigned Idx = BaseArg; Idx < CI->getNumArgOperands(); ++Idx) {
3684       if (auto CA = dyn_cast<Constant>(CI->getArgOperand(Idx))) {
3685         if (CA->isNullValue())
3686           continue;
3687       }
3688       MaxRawOperands = Idx + 1;
3689     }
3690 
3691     if (MaxRawOperands < BaseArg + AI.getArgCountMin())
3692       MaxRawOperands = BaseArg + AI.getArgCountMin();
3693 
3694     return MaxRawOperands - AI.getArgIdx();
3695   };
3696 
3697   auto GetNumGrfs = [&](II::ArgInfo AI) {
3698     LLVM_DEBUG(dbgs() << "GetNumGrfs\n");
3699     // constant byte for number of GRFs
3700     Value *Arg = CI;
3701     if (!AI.isRet())
3702       Arg = CI->getOperand(AI.getArgIdx());
3703     auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Arg->getType());
3704     if (!VT) {
3705       DiagnosticInfoCisaBuild Err{CI, "Invalid number of GRFs", DS_Error};
3706       getContext().diagnose(Err);
3707     }
3708     int DataSize = VT->getNumElements() *
3709                    VT->getElementType()->getPrimitiveSizeInBits() / 8;
3710     return (uint8_t)((DataSize + (GrfByteSize - 1)) / GrfByteSize);
3711   };
3712 
3713   auto GetSampleChMask = [&](II::ArgInfo AI) {
3714     LLVM_DEBUG(dbgs() << "GetSampleChMask\n");
3715     ConstantInt *Const =
3716         dyn_cast<ConstantInt>(CI->getArgOperand(AI.getArgIdx()));
3717     if (!Const) {
3718       DiagnosticInfoCisaBuild Err{CI, "Incorrect args to intrinsic call",
3719                                   DS_Error};
3720       getContext().diagnose(Err);
3721     }
3722     unsigned Byte = Const->getSExtValue() & 15;
3723     // Find the U_offset arg. It is the first vector arg after this one.
3724     IGCLLVM::FixedVectorType *VT;
3725     for (unsigned Idx = AI.getArgIdx() + 1;
3726          !(VT = dyn_cast<IGCLLVM::FixedVectorType>(
3727                CI->getOperand(Idx)->getType()));
3728          ++Idx)
3729       ;
3730     unsigned Width = VT->getNumElements();
3731     if (Width != 8 && Width != 16) {
3732       DiagnosticInfoCisaBuild Err{CI, "Invalid execution size for load/sample",
3733                                   DS_Error};
3734       getContext().diagnose(Err);
3735     }
3736     Byte |= Width & 16;
3737     return Byte;
3738   };
3739 
3740   auto GetSvmBlockSizeNum = [&](II::ArgInfo Sz, II::ArgInfo Num) {
3741     LLVM_DEBUG(dbgs() << "SVM gather/scatter element size and num blocks\n");
3742     // svm gather/scatter "block size" field, set to reflect the element
3743     // type of the data
3744     Value *V = CI;
3745     if (!Sz.isRet())
3746       V = CI->getArgOperand(Sz.getArgIdx());
3747     auto *EltType = V->getType()->getScalarType();
3748     if (auto *MDType = CI->getMetadata(InstMD::SVMBlockType))
3749       EltType = cast<ValueAsMetadata>(MDType->getOperand(0).get())->getType();
3750     ConstantInt *LogOp = cast<ConstantInt>(CI->getArgOperand(Num.getArgIdx()));
3751     unsigned LogNum = LogOp->getZExtValue();
3752     unsigned ElBytes = getResultedTypeSize(EltType, DL);
3753     switch (ElBytes) {
3754       // For N = 2 byte data type, use block size 1 and block count x2
3755       // Otherwise, use block size N and original block count.
3756     case 2:
3757       ElBytes = 0;
3758       IGC_ASSERT(LogNum < 4);
3759       // This is correct but I can not merge this in while ISPC not fixed
3760       // LogNum += 1;
3761 
3762       // this is incorrect temporary solution
3763       LogNum = 1;
3764       break;
3765     case 1:
3766       ElBytes = 0;
3767       break;
3768     case 4:
3769       ElBytes = 1;
3770       break;
3771     case 8:
3772       ElBytes = 2;
3773       break;
3774     default:
3775       DiagnosticInfoCisaBuild Err{CI, "Bad element type for SVM scatter/gather",
3776                                   DS_Error};
3777       getContext().diagnose(Err);
3778     }
3779     return std::make_pair(ElBytes, LogNum);
3780   };
3781 
3782   auto CreateOpndPredefinedSrc = [&](PreDefined_Vars RegId, unsigned ROffset,
3783                                      unsigned COffset, unsigned VStride,
3784                                      unsigned Width, unsigned HStride) {
3785     LLVM_DEBUG(dbgs() << "CreateOpndPredefinedSrc\n");
3786     VISA_GenVar *Decl = nullptr;
3787     CISA_CALL(Kernel->GetPredefinedVar(Decl, RegId));
3788     VISA_VectorOpnd *ResultOperand = nullptr;
3789     CISA_CALL(Kernel->CreateVISASrcOperand(ResultOperand, Decl,
3790                                            (VISA_Modifier)Mod, VStride, Width,
3791                                            HStride, ROffset, COffset));
3792     return ResultOperand;
3793   };
3794 
3795   auto CreateOpndPredefinedDst = [&](PreDefined_Vars RegId, unsigned ROffset,
3796                                      unsigned COffset, unsigned HStride) {
3797     LLVM_DEBUG(dbgs() << "CreateOpndPredefinedDst\n");
3798     VISA_GenVar *Decl = nullptr;
3799     CISA_CALL(Kernel->GetPredefinedVar(Decl, RegId));
3800     VISA_VectorOpnd *ResultOperand = nullptr;
3801     CISA_CALL(Kernel->CreateVISADstOperand(ResultOperand, Decl, HStride,
3802                                            ROffset, COffset));
3803     return ResultOperand;
3804   };
3805 
3806   auto CreateImmOpndFromUInt = [&](VISA_Type ImmType, unsigned Val) {
3807     LLVM_DEBUG(dbgs() << "CreateImmOpndFromUInt\n");
3808     VISA_VectorOpnd *src = nullptr;
3809     CISA_CALL(Kernel->CreateVISAImmediate(src, &Val, ImmType));
3810 
3811     return src;
3812   };
3813 
3814   auto MakeSubbAddcDestination =
3815       [&](GenXIntrinsic::GenXResult::ResultIndexes MemberIdx) {
3816         LLVM_DEBUG(dbgs() << "MakeSubbAddcDestination\n");
3817         IGC_ASSERT(GenXIntrinsic::getGenXIntrinsicID(CI) ==
3818                        llvm::GenXIntrinsic::genx_addc ||
3819                    GenXIntrinsic::getGenXIntrinsicID(CI) ==
3820                        llvm::GenXIntrinsic::genx_subb);
3821         IGC_ASSERT(IndexFlattener::getNumElements(CI->getType()) == 2);
3822 
3823         auto SV = SimpleValue(CI, MemberIdx);
3824         auto *DstType = SV.getType();
3825 
3826         IGC_ASSERT(DstType->getScalarType()->isIntegerTy(genx::DWordBits));
3827 
3828         auto *Reg = getRegForValueAndSaveAlias(KernFunc, SV, UNSIGNED);
3829 
3830         const auto TypeSize = CISATypeTable[ISA_TYPE_UD].typeSize;
3831         auto Elements = 1;
3832         if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(DstType))
3833           Elements = VT->getNumElements();
3834 
3835         Region R(IGCLLVM::FixedVectorType::get(
3836             IntegerType::get(Ctx, TypeSize * genx::ByteBits), Elements));
3837         return createRegionOperand(&R, Reg->GetVar<VISA_GenVar>(Kernel),
3838                                    UNSIGNED, Mod, true /* Dst */);
3839       };
3840 
3841 
3842   VISA_EMask_Ctrl exec_mask;
3843   addDebugInfo();
3844 #include "GenXIntrinsicsBuildMap.inc"
3845 }
3846 
3847 /**************************************************************************************************
3848  * buildControlRegUpdate : generate an instruction to apply a mask to
3849  *                         the control register (V14).
3850  *
3851  * Enter:   Mask = the mask to apply
3852  *          Clear = false if bits set in Mask should be set in V14,
3853  *                  true if bits set in Mask should be cleared in V14.
3854  */
buildControlRegUpdate(unsigned Mask,bool Clear)3855 void GenXKernelBuilder::buildControlRegUpdate(unsigned Mask, bool Clear) {
3856   ISA_Opcode Opcode;
3857   // write opcode
3858   if (Clear) {
3859     Opcode = ISA_AND;
3860     Mask = ~Mask;
3861   } else
3862     Opcode = ISA_OR;
3863 
3864   Region Single = Region(1, 4);
3865 
3866   VISA_GenVar *Decl = nullptr;
3867   CISA_CALL(Kernel->GetPredefinedVar(Decl, PREDEFINED_CR0));
3868   VISA_VectorOpnd *dst =
3869       createRegionOperand(&Single, Decl, DONTCARESIGNED, 0, true);
3870   VISA_VectorOpnd *src0 =
3871       createRegionOperand(&Single, Decl, DONTCARESIGNED, 0, false);
3872 
3873   VISA_VectorOpnd *src1 = nullptr;
3874   CISA_CALL(Kernel->CreateVISAImmediate(src1, &Mask, ISA_TYPE_UD));
3875 
3876   addDebugInfo();
3877   CISA_CALL(Kernel->AppendVISALogicOrShiftInst(Opcode, nullptr, false,
3878                                                vISA_EMASK_M1, EXEC_SIZE_1, dst,
3879                                                src0, src1, nullptr, nullptr));
3880 }
3881 
3882 /***********************************************************************
3883  * buildBranch : build a conditional or unconditional branch
3884  *
3885  * Return:  true if fell through to successor
3886  */
buildBranch(BranchInst * Branch)3887 bool GenXKernelBuilder::buildBranch(BranchInst *Branch) {
3888   BasicBlock *Next = Branch->getParent()->getNextNode();
3889   if (Branch->isUnconditional()) {
3890     // Unconditional branch
3891     if (Branch->getOperand(0) == Next)
3892       return true; // fall through to successor
3893     auto labelId = getOrCreateLabel(Branch->getSuccessor(0), LABEL_BLOCK);
3894     addDebugInfo();
3895     CISA_CALL(Kernel->AppendVISACFJmpInst(nullptr, Labels[labelId]));
3896     return false;
3897   }
3898   // Conditional branch.
3899   // First check if it is a baled in goto/join, via an extractvalue.
3900   auto BI = Baling->getBaleInfo(Branch);
3901   if (BI.isOperandBaled(0 /*condition*/)) {
3902     if (auto Extract = dyn_cast<ExtractValueInst>(Branch->getCondition())) {
3903       auto GotoJoin = cast<CallInst>(Extract->getAggregateOperand());
3904       if (GenXIntrinsic::getGenXIntrinsicID(GotoJoin) == GenXIntrinsic::genx_simdcf_goto) {
3905         buildGoto(GotoJoin, Branch);
3906       } else {
3907         IGC_ASSERT_MESSAGE(GotoJoin::isValidJoin(GotoJoin),
3908           "extra unexpected code in join block");
3909         buildJoin(GotoJoin, Branch);
3910       }
3911       return true;
3912     }
3913   }
3914   // Normal conditional branch.
3915   VISA_EMask_Ctrl MaskCtrl;
3916   VISA_PREDICATE_CONTROL Control = PRED_CTRL_NON;
3917   VISA_PREDICATE_STATE State = PredState_NO_INVERSE;
3918   Value *Pred = getPredicateOperand(Branch, 0, BI, Control, State, &MaskCtrl);
3919   IGC_ASSERT_MESSAGE(!isa<VectorType>(Branch->getCondition()->getType()),
3920     "branch must have scalar condition");
3921   BasicBlock *True = Branch->getSuccessor(0);
3922   BasicBlock *False = Branch->getSuccessor(1);
3923   if (True == Next) {
3924     State ^= PredState_INVERSE; // invert bit in predicate field
3925     True = False;
3926     False = Next;
3927   }
3928   // Write the conditional branch.
3929   VISA_PredVar *PredVar = getPredicateVar(Pred);
3930   VISA_PredOpnd *PredOperand = createPredOperand(PredVar, State, Control);
3931   addDebugInfo();
3932   CISA_CALL(Kernel->AppendVISACFJmpInst(
3933       PredOperand, Labels[getOrCreateLabel(True, LABEL_BLOCK)]));
3934   // If the other successor is not the next block, write an unconditional
3935   // jmp to that.
3936   if (False == Next)
3937     return true; // fall through to successor
3938   addDebugInfo();
3939   CISA_CALL(Kernel->AppendVISACFJmpInst(
3940       nullptr, Labels[getOrCreateLabel(False, LABEL_BLOCK)]));
3941   return false;
3942 }
3943 
3944 /***********************************************************************
3945  * buildIndirectBr : build an indirect branch
3946  *
3947  * Indirectbr instructions are used only for jump tables.
3948  *
3949  * Enter:   Br = indirect branch inst
3950  */
buildIndirectBr(IndirectBrInst * Br)3951 void GenXKernelBuilder::buildIndirectBr(IndirectBrInst *Br) {
3952   IGC_ASSERT(Subtarget->hasSwitchjmp());
3953   Value *Addr = Br->getAddress();
3954   auto JumpTable = cast<IntrinsicInst>(Addr);
3955   unsigned IID = GenXIntrinsic::getAnyIntrinsicID(JumpTable);
3956   IGC_ASSERT(IID == GenXIntrinsic::genx_jump_table);
3957   Value *Idx = JumpTable->getArgOperand(0);
3958 
3959   VISA_VectorOpnd *JMPIdx = createSource(Idx, UNSIGNED);
3960   unsigned NumDest = Br->getNumDestinations();
3961   std::vector<VISA_LabelOpnd *> JMPLabels(NumDest, nullptr);
3962   for (unsigned I = 0; I < NumDest; ++I)
3963     JMPLabels[I] = Labels[getOrCreateLabel(Br->getDestination(I), LABEL_BLOCK)];
3964 
3965   addDebugInfo();
3966   CISA_CALL(
3967       Kernel->AppendVISACFSwitchJMPInst(JMPIdx, NumDest, JMPLabels.data()));
3968 }
3969 
3970 /***********************************************************************
3971  * buildJoin : build a join
3972  *
3973  * Enter:   Join = join instruction that is baled into an extractvalue of
3974  *                 field 1 (the !any(EM) value), that is baled into Branch,
3975  *                 if Branch is non-zero
3976  *          Branch = branch instruction, or 0 for a join that is not baled
3977  *                   in to a branch because it always ends up with at least
3978  *                   one channel enabled
3979  */
buildJoin(CallInst * Join,BranchInst * Branch)3980 void GenXKernelBuilder::buildJoin(CallInst *Join, BranchInst *Branch) {
3981   // A join needs a label. (If the join is at the start of its block, then
3982   // this gets merged into the block label.)
3983   addLabelInst(Join);
3984   // There is no join instruction in vISA -- the finalizer derives it by
3985   // looking for gotos targeting the basic block's label.
3986 }
3987 
3988 #if (LLVM_VERSION_MAJOR > 8)
3989 /***********************************************************************
3990  * buildUnaryOperator : build code for an unary operator
3991  *
3992  * Enter:   UO = the UnaryOperator
3993  *          BI = BaleInfo for UO
3994  *          Mod = modifier bits for destination
3995  *          WrRegion = 0 else wrregion for destination
3996  *          WrRegionBI = BaleInfo for WrRegion
3997  */
buildUnaryOperator(UnaryOperator * UO,BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)3998 void GenXKernelBuilder::buildUnaryOperator(UnaryOperator *UO, BaleInfo BI,
3999                                            unsigned Mod,
4000                                            const DstOpndDesc &DstDesc) {
4001   ISA_Opcode Opcode = ISA_RESERVED_0;
4002   Signedness DstSigned = SIGNED;
4003   Signedness SrcSigned = SIGNED;
4004   unsigned Mod1 = 0;
4005   VISA_Exec_Size ExecSize = EXEC_SIZE_1;
4006   if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(UO->getType()))
4007     ExecSize = getExecSizeFromValue(VT->getNumElements());
4008 
4009   switch (UO->getOpcode()) {
4010     case Instruction::FNeg:
4011       Opcode = ISA_MOV;
4012       Mod1 ^= MODIFIER_NEG;
4013       break;
4014     default:
4015       report_fatal_error("buildUnaryOperator: unimplemented unary operator");
4016   }
4017 
4018   VISA_VectorOpnd *Dst = createDestination(UO, DstSigned, Mod, DstDesc);
4019 
4020   VISA_VectorOpnd *Src0 = nullptr;
4021   VISA_PredOpnd *Pred = createPredFromWrRegion(DstDesc);
4022 
4023   Src0 = createSourceOperand(UO, SrcSigned, 0, BI, Mod1);
4024 
4025   auto ExecMask = getExecMaskFromWrRegion(DstDesc);
4026 
4027   addDebugInfo();
4028 
4029   if (Opcode == ISA_MOV) {
4030     CISA_CALL(Kernel->AppendVISADataMovementInst(
4031         ISA_MOV, Pred, Mod1 & MODIFIER_SAT, ExecMask, ExecSize, Dst, Src0, NULL));
4032     return;
4033   }
4034   report_fatal_error("buildUnaryOperator: unimplemented opcode");
4035 }
4036 #endif
4037 
4038 /***********************************************************************
4039  * getCommonSignedness : predict the most suitable sign of a instruction based
4040  *                       on incoming values.
4041  *
4042  * Enter:   Vs = incoming values to use for signedness prediction
4043  */
getCommonSignedness(ArrayRef<Value * > Vs) const4044 Signedness GenXKernelBuilder::getCommonSignedness(ArrayRef<Value *> Vs) const {
4045   // Expect the first value is always set.
4046   IGC_ASSERT(!Vs.empty());
4047   std::vector<Register *> Regs;
4048   std::transform(
4049       Vs.begin(), Vs.end(), std::back_inserter(Regs), [this](Value *V) { return getLastUsedAlias(V); });
4050   // If there is no register allocated for Value, getLastUsedAlias returns
4051   // nullptr. Remove such nodes.
4052   Regs.erase(std::remove(Regs.begin(), Regs.end(), nullptr), Regs.end());
4053 
4054   if (Regs.empty())
4055     // Use SIGNED by default if there are no registers for the values.
4056     return SIGNED;
4057 
4058   bool hasSigned = std::any_of(Regs.begin(), Regs.end(),
4059                                [](Register *R) { return R->Signed == SIGNED; });
4060   bool hasUnsigned = std::any_of(Regs.begin(), Regs.end(), [](Register *R) {
4061     return R->Signed == UNSIGNED;
4062   });
4063   // If there is at least one UNSIGNED and others are UNSIGNED or DONTCARESIGNED
4064   // (absence of a register also means DONTCARESIGNED), UNSIGNED must be used.
4065   // Otherwise, SIGNED.
4066   if (hasUnsigned && !hasSigned)
4067     return UNSIGNED;
4068   return SIGNED;
4069 }
4070 
4071 /***********************************************************************
4072  * getLastUsedAlias : get the last used alias of a vISA virtual register
4073  *                    for a value. Nullptr if none.
4074  */
getLastUsedAlias(Value * V) const4075 GenXKernelBuilder::Register *GenXKernelBuilder::getLastUsedAlias(Value *V) const {
4076   auto Res = LastUsedAliasMap.find(V);
4077   if (Res == LastUsedAliasMap.end())
4078     return nullptr;
4079   return Res->second;
4080 }
4081 
4082 /***********************************************************************
4083  * getRegForValueUntypedAndSaveAlias : a wrapper for
4084  * GenXVisaRegAlloc::getRegForValueUntyped which also saves the register alias
4085  * in a special map.
4086  *
4087  * Enter:   args = the wrapped function parameters.
4088  */
4089 template <typename... Args>
4090 GenXKernelBuilder::Register *
getRegForValueUntypedAndSaveAlias(Args &&...args)4091 GenXKernelBuilder::getRegForValueUntypedAndSaveAlias(Args &&... args) {
4092   Register *R = RegAlloc->getRegForValueUntyped(std::forward<Args>(args)...);
4093   SimpleValue SV = std::get<1>(std::make_tuple(args...));
4094   if (R)
4095     LastUsedAliasMap[SV.getValue()] = R;
4096   return R;
4097 }
4098 
4099 /***********************************************************************
4100  * getRegForValueOrNullAndSaveAlias : a wrapper for
4101  * GenXVisaRegAlloc::getRegForValueOrNull which also saves the register alias in
4102  * a special map.
4103  *
4104  * Enter:   args = the wrapped function parameters.
4105  */
4106 template <typename... Args>
4107 GenXKernelBuilder::Register *
getRegForValueOrNullAndSaveAlias(Args &&...args)4108 GenXKernelBuilder::getRegForValueOrNullAndSaveAlias(Args &&... args) {
4109   Register *R = RegAlloc->getRegForValueOrNull(std::forward<Args>(args)...);
4110   SimpleValue SV = std::get<1>(std::make_tuple(args...));
4111   if (R)
4112     LastUsedAliasMap[SV.getValue()] = R;
4113   return R;
4114 }
4115 
4116 /***********************************************************************
4117  * getRegForValueAndSaveAlias : a wrapper for GenXVisaRegAlloc::getRegForValue
4118  * which also saves the register alias in a special map.
4119  *
4120  * Enter:   args = the wrapped function parameters.
4121  */
4122 template <typename... Args>
4123 GenXKernelBuilder::Register *
getRegForValueAndSaveAlias(Args &&...args)4124 GenXKernelBuilder::getRegForValueAndSaveAlias(Args &&... args) {
4125   Register *R = RegAlloc->getRegForValue(std::forward<Args>(args)...);
4126   SimpleValue SV = std::get<1>(std::make_tuple(args...));
4127   IGC_ASSERT_MESSAGE(R, "getRegForValue must return non-nullptr register");
4128   LastUsedAliasMap[SV.getValue()] = R;
4129   return R;
4130 }
4131 
4132 /***********************************************************************
4133  * buildBinaryOperator : build code for a binary operator
4134  *
4135  * Enter:   BO = the BinaryOperator
4136  *          BI = BaleInfo for BO
4137  *          Mod = modifier bits for destination
4138  *          WrRegion = 0 else wrregion for destination
4139  *          WrRegionBI = BaleInfo for WrRegion
4140  */
buildBinaryOperator(BinaryOperator * BO,BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)4141 void GenXKernelBuilder::buildBinaryOperator(BinaryOperator *BO, BaleInfo BI,
4142                                             unsigned Mod,
4143                                             const DstOpndDesc &DstDesc) {
4144   bool IsLogic = false;
4145   ISA_Opcode Opcode = ISA_RESERVED_0;
4146 
4147   Signedness SrcSigned = DONTCARESIGNED;
4148   Signedness DstSigned = DONTCARESIGNED;
4149   unsigned Mod1 = 0;
4150   VISA_Exec_Size ExecSize = EXEC_SIZE_1;
4151   auto hasConstantIntFitsInWord = [BO]() {
4152     return std::any_of(BO->op_begin(), BO->op_end(), [](Value *V) {
4153       auto C = dyn_cast<ConstantInt>(V);
4154       if (!C)
4155         return false;
4156       return C->getValue().getMinSignedBits() <= genx::WordBits;
4157     });
4158   };
4159   if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(BO->getType()))
4160     ExecSize = getExecSizeFromValue(VT->getNumElements());
4161   switch (BO->getOpcode()) {
4162   case Instruction::Add:
4163   case Instruction::FAdd:
4164     Opcode = ISA_ADD;
4165     break;
4166   case Instruction::Sub:
4167   case Instruction::FSub:
4168     Opcode = ISA_ADD;
4169     Mod1 ^= MODIFIER_NEG;
4170     break;
4171   case Instruction::Mul:
4172   case Instruction::FMul:
4173     Opcode = ISA_MUL;
4174     // Check if there is a possibility to truncate the integer constant further
4175     // that will help to generate better code. In this case SIGNED type must be
4176     // used.
4177     if (hasConstantIntFitsInWord())
4178       DstSigned = SrcSigned = SIGNED;
4179     break;
4180   case Instruction::Shl:
4181     Opcode = ISA_SHL;
4182     IsLogic = true;
4183     break;
4184   case Instruction::AShr:
4185     Opcode = ISA_ASR;
4186     DstSigned = SrcSigned = SIGNED;
4187     IsLogic = true;
4188     break;
4189   case Instruction::LShr:
4190     Opcode = ISA_SHR;
4191     DstSigned = SrcSigned = UNSIGNED;
4192     IsLogic = true;
4193     break;
4194   case Instruction::UDiv:
4195     Opcode = ISA_DIV;
4196     DstSigned = SrcSigned = UNSIGNED;
4197     break;
4198   case Instruction::SDiv:
4199     Opcode = ISA_DIV;
4200     DstSigned = SrcSigned = SIGNED;
4201     break;
4202   case Instruction::FDiv: {
4203     Opcode = ISA_DIV;
4204     if (Constant *Op0 = dyn_cast<Constant>(BO->getOperand(0))) {
4205       if (Op0->getType()->isVectorTy())
4206         Op0 = Op0->getSplatValue();
4207       ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(Op0);
4208       if (CFP && CFP->isExactlyValue(1.0))
4209         Opcode = ISA_INV;
4210     }
4211   } break;
4212   case Instruction::URem:
4213     Opcode = ISA_MOD;
4214     DstSigned = SrcSigned = UNSIGNED;
4215     break;
4216   case Instruction::SRem:
4217     DstSigned = SrcSigned = SIGNED;
4218     Opcode = ISA_MOD;
4219     break;
4220   case Instruction::FRem:
4221     Opcode = ISA_MOD;
4222     break;
4223   case Instruction::And:
4224     Opcode = ISA_AND;
4225     IsLogic = true;
4226     break;
4227   case Instruction::Or:
4228     Opcode = ISA_OR;
4229     IsLogic = true;
4230     break;
4231   case Instruction::Xor:
4232     Opcode = ISA_XOR;
4233     IsLogic = true;
4234     break;
4235   default:
4236     report_fatal_error("buildBinaryOperator: unimplemented binary operator");
4237     break;
4238   }
4239 
4240   // If signedness wasn't set explicitly earlier and destination modifier isn't
4241   // set.
4242   if (SrcSigned == DONTCARESIGNED && DstSigned == DONTCARESIGNED) {
4243 
4244     bool hasExt =
4245         std::any_of(BO->use_begin(), BO->use_end(),
4246                     [B = Baling](Use &U) { return isExtOperandBaled(U, B); });
4247 
4248     if (Mod == MODIFIER_NONE && !hasExt) {
4249       Value *Op0 = BO->getOperand(0);
4250       Value *Op1 = BO->getOperand(1);
4251       if (Opcode == ISA_INV)
4252         SrcSigned = DstSigned = getCommonSignedness({Op1});
4253       else
4254         SrcSigned = DstSigned = getCommonSignedness({Op0, Op1});
4255     } else
4256       // If the modifier is set or SEXT, ZEXT is baled, use old behavior.
4257       SrcSigned = DstSigned = SIGNED;
4258   }
4259 
4260   VISA_VectorOpnd *Dst = createDestination(BO, DstSigned, Mod, DstDesc);
4261 
4262   VISA_VectorOpnd *Src0 = nullptr;
4263   VISA_VectorOpnd *Src1 = nullptr;
4264   VISA_PredOpnd *Pred = createPredFromWrRegion(DstDesc);
4265 
4266   if (Opcode == ISA_INV) {
4267     Src0 = createSourceOperand(BO, SrcSigned, 1, BI, Mod1); // source 0
4268   } else {
4269     Src0 = createSourceOperand(BO, SrcSigned, 0, BI);       // source 0
4270     Src1 = createSourceOperand(BO, SrcSigned, 1, BI, Mod1); // source 1
4271   }
4272 
4273   auto ExecMask = getExecMaskFromWrRegion(DstDesc);
4274 
4275   addDebugInfo();
4276   if (IsLogic) {
4277     CISA_CALL(Kernel->AppendVISALogicOrShiftInst(
4278         Opcode, Pred, Mod, ExecMask, ExecSize, Dst, Src0, Src1, NULL, NULL));
4279   } else {
4280     if (Opcode == ISA_ADDC || Opcode == ISA_SUBB) {
4281         IGC_ASSERT(0);
4282     } else {
4283       CISA_CALL(Kernel->AppendVISAArithmeticInst(
4284           Opcode, Pred, Mod, ExecMask, ExecSize, Dst, Src0, Src1, NULL));
4285     }
4286   }
4287 }
4288 
4289 /***********************************************************************
4290  * buildBoolBinaryOperator : build code for a binary operator acting on
4291  *                           i1 or vector of i1
4292  *
4293  * Enter:   BO = the BinaryOperator
4294  */
buildBoolBinaryOperator(BinaryOperator * BO)4295 void GenXKernelBuilder::buildBoolBinaryOperator(BinaryOperator *BO) {
4296   VISA_Exec_Size ExecSize = EXEC_SIZE_1;
4297   if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(BO->getType()))
4298     ExecSize = getExecSizeFromValue(VT->getNumElements());
4299   ISA_Opcode Opcode = ISA_RESERVED_0;
4300   switch (BO->getOpcode()) {
4301   case Instruction::And:
4302     Opcode = ISA_AND;
4303     break;
4304   case Instruction::Or:
4305     Opcode = ISA_OR;
4306     break;
4307   case Instruction::Xor:
4308     Opcode = ISA_XOR;
4309     if (isNot(BO))
4310       Opcode = ISA_NOT;
4311     break;
4312   default:
4313     report_fatal_error(
4314         "buildBoolBinaryOperator: unimplemented binary operator");
4315     break;
4316   }
4317 
4318   if (isPredNot(BO) && BO->hasOneUse()) {
4319     // If this NOT predicate is a goto operand and it has only one use, then we
4320     // won't emit it. %P1 = ... %P2 = not %P1
4321     // (!%P2) goto
4322     // Transforms into
4323     // (%P1) goto
4324 
4325     auto Goto = dyn_cast<CallInst>(*BO->user_begin());
4326     if (Goto && GenXIntrinsic::getGenXIntrinsicID(Goto) ==
4327                     GenXIntrinsic::genx_simdcf_goto)
4328       return;
4329   }
4330 
4331   VISA_PredVar *Dst = getPredicateVar(BO);
4332   VISA_PredVar *Src0 = getPredicateVar(BO->getOperand(0));
4333   VISA_PredVar *Src1 =
4334       Opcode != ISA_NOT ? getPredicateVar(BO->getOperand(1)) : nullptr;
4335 
4336   addDebugInfo();
4337   CISA_CALL(Kernel->AppendVISALogicOrShiftInst(
4338       Opcode, NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1, ExecSize, Dst, Src0,
4339       Src1));
4340 }
4341 
buildSymbolInst(CallInst * GAddrInst,unsigned Mod,const DstOpndDesc & DstDesc)4342 void GenXKernelBuilder::buildSymbolInst(CallInst *GAddrInst, unsigned Mod,
4343                                         const DstOpndDesc &DstDesc) {
4344   IGC_ASSERT_MESSAGE(GAddrInst, "wrong argument: nullptr is unallowed");
4345   IGC_ASSERT_MESSAGE(GenXIntrinsic::getGenXIntrinsicID(GAddrInst) ==
4346                          GenXIntrinsic::genx_gaddr,
4347                      "wrong argument: genx.addr intrinsic is expected");
4348   auto *GV = cast<GlobalValue>(GAddrInst->getOperand(0));
4349   VISA_VectorOpnd *Dst = createDestination(GAddrInst, UNSIGNED, Mod, DstDesc);
4350   CISA_CALL(Kernel->AppendVISACFSymbolInst(GV->getName().str(), Dst));
4351 }
4352 
4353 /***********************************************************************
4354  * buildWritePredefSurface : get predefined visa surface variable
4355  *
4356  * Enter:   GV = global that denotes predefined variable
4357  *
4358  * Return:  visa surface variable, non-null
4359  *
4360  */
4361 VISA_SurfaceVar *
getPredefinedSurfaceVar(GlobalVariable & GV)4362 GenXKernelBuilder::getPredefinedSurfaceVar(GlobalVariable &GV) {
4363   StringRef SurfName = GV.getName();
4364   PreDefined_Surface VisaSurfName =
4365       StringSwitch<PreDefined_Surface>(SurfName)
4366           .Case(genx::BSSVariableName, PREDEFINED_SURFACE_T252)
4367           .Default(PREDEFINED_SURFACE_LAST);
4368   IGC_ASSERT_MESSAGE(VisaSurfName != PREDEFINED_SURFACE_LAST,
4369                      "Unexpected predefined surface");
4370   VISA_SurfaceVar *SurfVar = nullptr;
4371   CISA_CALL(Kernel->GetPredefinedSurface(SurfVar, VisaSurfName));
4372   return SurfVar;
4373 }
4374 
4375 /***********************************************************************
4376  * buildWritePredefSurface : build code to write to predefined surface
4377  *
4378  * Enter:   CI = write_predef_surface intrinsic
4379  *
4380  */
buildWritePredefSurface(CallInst & CI)4381 void GenXKernelBuilder::buildWritePredefSurface(CallInst &CI) {
4382   IGC_ASSERT_MESSAGE(GenXIntrinsic::getGenXIntrinsicID(&CI) ==
4383                          GenXIntrinsic::genx_write_predef_surface,
4384                      "Expected predefined surface write intrinsic");
4385   auto *PredefSurf = cast<GlobalVariable>(CI.getArgOperand(0));
4386   VISA_SurfaceVar *SurfVar = getPredefinedSurfaceVar(*PredefSurf);
4387   VISA_VectorOpnd *SurfOpnd = nullptr;
4388   CISA_CALL(Kernel->CreateVISAStateOperand(SurfOpnd, SurfVar, /*offset=*/0,
4389                                            /*useAsDst=*/true));
4390   VISA_VectorOpnd *SrcOpnd = createSource(CI.getArgOperand(1), genx::UNSIGNED);
4391   CISA_CALL(Kernel->AppendVISADataMovementInst(
4392       ISA_MOVS, /*pred=*/nullptr, /*satMod=*/false, vISA_EMASK_M1_NM,
4393       EXEC_SIZE_1, SurfOpnd, SrcOpnd));
4394 }
4395 
4396 /***********************************************************************
4397  * buildCastInst : build code for a cast (other than a no-op cast)
4398  *
4399  * Enter:   CI = the CastInst
4400  *          BI = BaleInfo for CI
4401  *          Mod = modifier bits for destination
4402  *          WrRegion = 0 else wrregion for destination
4403  *          WrRegionBI = BaleInfo for WrRegion
4404  */
buildCastInst(CastInst * CI,BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)4405 void GenXKernelBuilder::buildCastInst(CastInst *CI, BaleInfo BI, unsigned Mod,
4406                                       const DstOpndDesc &DstDesc) {
4407   Signedness InSigned = DONTCARESIGNED;
4408   Signedness OutSigned = DONTCARESIGNED;
4409   switch (CI->getOpcode()) {
4410   case Instruction::UIToFP:
4411     InSigned = UNSIGNED;
4412     break;
4413   case Instruction::SIToFP:
4414     InSigned = SIGNED;
4415     break;
4416   case Instruction::FPToUI:
4417     OutSigned = UNSIGNED;
4418     break;
4419   case Instruction::FPToSI:
4420     OutSigned = SIGNED;
4421     break;
4422   case Instruction::ZExt:
4423     InSigned = UNSIGNED;
4424     break;
4425   case Instruction::SExt:
4426     InSigned = SIGNED;
4427     break;
4428   case Instruction::FPTrunc:
4429   case Instruction::FPExt:
4430     break;
4431   case Instruction::PtrToInt:
4432   case Instruction::IntToPtr:
4433     break;
4434   case Instruction::AddrSpaceCast:
4435     break;
4436   case Instruction::Trunc:
4437     break;
4438   default:
4439     DiagnosticInfoCisaBuild Err{CI, "buildCastInst: unimplemented cast",
4440                                 DS_Error};
4441     getContext().diagnose(Err);
4442   }
4443 
4444   VISA_Exec_Size ExecSize = EXEC_SIZE_1;
4445   if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(CI->getType()))
4446     ExecSize = getExecSizeFromValue(VT->getNumElements());
4447 
4448   auto ExecMask = getExecMaskFromWrRegion(DstDesc);
4449 
4450   VISA_PredOpnd *Pred = createPredFromWrRegion(DstDesc);
4451   // Give dest and source the same signedness for byte mov.
4452   VISA_VectorOpnd *Dst = createDestination(CI, OutSigned, Mod, DstDesc);
4453 
4454   if (InSigned == DONTCARESIGNED)
4455     InSigned = OutSigned;
4456   VISA_VectorOpnd *Src0 = createSourceOperand(CI, InSigned, 0, BI);
4457 
4458   addDebugInfo();
4459   CISA_CALL(Kernel->AppendVISADataMovementInst(
4460       ISA_MOV, Pred, Mod & MODIFIER_SAT, ExecMask, ExecSize, Dst, Src0, NULL));
4461 }
4462 
4463 /***********************************************************************
4464  * buildCmp : build code for a compare
4465  *
4466  * Enter:   Cmp = the compare instruction
4467  *          BI = BaleInfo for Cmp
4468  *          WrRegion = 0 else wrpredregion, wrpredpredregion, or wrregion for
4469  *          destination
4470  */
buildCmp(CmpInst * Cmp,BaleInfo BI,const DstOpndDesc & DstDesc)4471 void GenXKernelBuilder::buildCmp(CmpInst *Cmp, BaleInfo BI,
4472                                  const DstOpndDesc &DstDesc) {
4473   IGC_ASSERT_MESSAGE(testPredicate(Cmp, DstDesc),
4474     "write predicate size 4 only allowed for double/longlong type");
4475   Signedness Signed = DONTCARESIGNED;
4476   VISA_Cond_Mod opSpec;
4477   switch (Cmp->getPredicate()) {
4478   case CmpInst::FCMP_ONE:
4479   case CmpInst::FCMP_ORD:
4480   case CmpInst::FCMP_UEQ:
4481   case CmpInst::FCMP_UGT:
4482   case CmpInst::FCMP_UGE:
4483   case CmpInst::FCMP_ULT:
4484   case CmpInst::FCMP_ULE:
4485   case CmpInst::FCMP_UNO:
4486     IGC_ASSERT_MESSAGE(0, "unsupported fcmp predicate");
4487     break;
4488   case CmpInst::FCMP_OEQ:
4489   case CmpInst::ICMP_EQ:
4490     opSpec = ISA_CMP_E;
4491     break;
4492   case CmpInst::FCMP_UNE:
4493   case CmpInst::ICMP_NE:
4494     opSpec = ISA_CMP_NE;
4495     break;
4496   case CmpInst::FCMP_OGT:
4497     opSpec = ISA_CMP_G;
4498     break;
4499   case CmpInst::ICMP_UGT:
4500     opSpec = ISA_CMP_G;
4501     Signed = UNSIGNED;
4502     break;
4503   case CmpInst::ICMP_SGT:
4504     opSpec = ISA_CMP_G;
4505     Signed = SIGNED;
4506     break;
4507   case CmpInst::FCMP_OGE:
4508     opSpec = ISA_CMP_GE;
4509     break;
4510   case CmpInst::ICMP_UGE:
4511     opSpec = ISA_CMP_GE;
4512     Signed = UNSIGNED;
4513     break;
4514   case CmpInst::ICMP_SGE:
4515     opSpec = ISA_CMP_GE;
4516     Signed = SIGNED;
4517     break;
4518   case CmpInst::FCMP_OLT:
4519     opSpec = ISA_CMP_L;
4520     break;
4521   case CmpInst::ICMP_ULT:
4522     opSpec = ISA_CMP_L;
4523     Signed = UNSIGNED;
4524     break;
4525   case CmpInst::ICMP_SLT:
4526     opSpec = ISA_CMP_L;
4527     Signed = SIGNED;
4528     break;
4529   case CmpInst::FCMP_OLE:
4530     opSpec = ISA_CMP_LE;
4531     break;
4532   case CmpInst::ICMP_ULE:
4533     opSpec = ISA_CMP_LE;
4534     Signed = UNSIGNED;
4535     break;
4536   case CmpInst::ICMP_SLE:
4537     opSpec = ISA_CMP_LE;
4538     Signed = SIGNED;
4539     break;
4540   default:
4541     DiagnosticInfoCisaBuild Err{Cmp, "unknown predicate", DS_Error};
4542     getContext().diagnose(Err);
4543   }
4544 
4545   // Check if this is to write to a predicate desination or a GRF desination.
4546   bool WriteToPred = true;
4547   if (Cmp->hasOneUse()) {
4548     Instruction *UI = Cmp->user_back();
4549     BaleInfo UserBI = Baling->getBaleInfo(UI);
4550     if (UserBI.Type == BaleInfo::CMPDST)
4551       WriteToPred = false;
4552   }
4553 
4554   VISA_Exec_Size ExecSize = EXEC_SIZE_1;
4555   VISA_EMask_Ctrl ctrlMask = vISA_EMASK_M1;
4556   if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(Cmp->getType()))
4557     ExecSize = getExecSizeFromValue(VT->getNumElements());
4558 
4559   VISA_VectorOpnd *Dst = nullptr;
4560   genx::Signedness SignedSrc0;
4561   VISA_VectorOpnd *Src0 =
4562       createSourceOperand(Cmp, Signed, 0, BI, 0, &SignedSrc0);
4563   VISA_VectorOpnd *Src1 = createSourceOperand(Cmp, SignedSrc0, 1, BI);
4564 
4565   if (WriteToPred) {
4566     ctrlMask = getExecMaskFromWrPredRegion(DstDesc.WrRegion, false);
4567     VISA_PredVar *PredVar =
4568         getPredicateVar(DstDesc.WrRegion ? DstDesc.WrRegion : Cmp);
4569     addDebugInfo();
4570     CISA_CALL(Kernel->AppendVISAComparisonInst(opSpec, ctrlMask, ExecSize,
4571                                                PredVar, Src0, Src1));
4572   } else {
4573     ctrlMask = getExecMaskFromWrRegion(DstDesc);
4574     Value *Val = DstDesc.WrRegion ? DstDesc.WrRegion : Cmp->user_back();
4575     Dst = createDestination(Val, Signed, 0, DstDesc);
4576     addDebugInfo();
4577     CISA_CALL(Kernel->AppendVISAComparisonInst(opSpec, ctrlMask, ExecSize, Dst,
4578                                                Src0, Src1));
4579   }
4580 }
4581 
4582 /***********************************************************************
4583  * buildConvertAddr : build code for conversion to address
4584  *
4585  * Enter:   CI = the CallInst
4586  *          BI = BaleInfo for CI
4587  *          Mod = modifier bits for destination
4588  *          WrRegion = 0 else wrregion for destination
4589  *          WrRegionBI = BaleInfo for WrRegion
4590  */
buildConvertAddr(CallInst * CI,genx::BaleInfo BI,unsigned Mod,const DstOpndDesc & DstDesc)4591 void GenXKernelBuilder::buildConvertAddr(CallInst *CI, genx::BaleInfo BI,
4592                                          unsigned Mod,
4593                                          const DstOpndDesc &DstDesc) {
4594   IGC_ASSERT(!DstDesc.WrRegion);
4595   Value *Base = Liveness->getAddressBase(CI);
4596   VISA_Exec_Size ExecSize = EXEC_SIZE_1;
4597   VISA_EMask_Ctrl MaskCtrl = NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1;
4598 
4599   if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(CI->getType()))
4600     ExecSize = getExecSizeFromValue(VT->getNumElements());
4601   // If the offset is less aligned than the base register element type, then
4602   // we need a different type.
4603   Type *OverrideTy = nullptr;
4604   Type *BaseTy = Base->getType();
4605   if (BaseTy->isPointerTy())
4606     BaseTy = BaseTy->getPointerElementType();
4607   unsigned ElementBytes =
4608       BaseTy->getScalarType()->getPrimitiveSizeInBits() >> 3;
4609   int Offset = cast<ConstantInt>(CI->getArgOperand(1))->getSExtValue();
4610   if ((ElementBytes - 1) & Offset) {
4611     OverrideTy = IGCLLVM::FixedVectorType::get(
4612         Type::getInt8Ty(CI->getContext()),
4613         cast<IGCLLVM::FixedVectorType>(BaseTy)->getNumElements() *
4614             ElementBytes);
4615     ElementBytes = 1;
4616   }
4617   Register *BaseReg =
4618       getRegForValueAndSaveAlias(KernFunc, Base, DONTCARESIGNED, OverrideTy);
4619 
4620   VISA_VectorOpnd *Dst = createAddressOperand(CI, true);
4621   VISA_VectorOpnd *Src1 = nullptr;
4622 
4623   if (BaseReg->Category == RegCategory::SURFACE ||
4624       BaseReg->Category == RegCategory::SAMPLER) {
4625     uint8_t offset = Offset >> 2;
4626     switch (BaseReg->Category) {
4627     case RegCategory::SURFACE: {
4628       VISA_SurfaceVar *Decl = BaseReg->GetVar<VISA_SurfaceVar>(Kernel);
4629       unsigned int offsetB = offset * 2; // 2 is bytes size of UW
4630       CISA_CALL(Kernel->CreateVISAAddressOfOperand(Src1, Decl, offsetB));
4631       break;
4632     }
4633     case RegCategory::SAMPLER: {
4634       VISA_SurfaceVar *Decl = BaseReg->GetVar<VISA_SurfaceVar>(Kernel);
4635       unsigned int offsetB = offset * 2; // 2 is bytes size of UW
4636       CISA_CALL(Kernel->CreateVISAAddressOfOperand(Src1, Decl, offsetB));
4637       break;
4638     }
4639     default:
4640       DiagnosticInfoCisaBuild Err{
4641           CI,
4642           "Invalid state operand class: only surface, vme, and "
4643           "sampler are supported.",
4644           DS_Error};
4645       getContext().diagnose(Err);
4646     }
4647   } else {
4648     uint8_t rowOffset = Offset >> genx::log2(GrfByteSize);
4649     uint8_t colOffset = (Offset & (GrfByteSize - 1)) >> Log2_32(ElementBytes);
4650     auto TypeSize = BaseReg->Ty->getScalarType()->getPrimitiveSizeInBits() >> 3;
4651     unsigned int offset = colOffset * TypeSize + rowOffset * GrfByteSize;
4652 
4653     if (BaseReg->Category == RegCategory::ADDRESS) {
4654       VISA_AddrVar *Decl = BaseReg->GetVar<VISA_AddrVar>(Kernel);
4655       unsigned Width = 1;
4656       CISA_CALL(Kernel->CreateVISAAddressSrcOperand(Src1, Decl, offset, Width));
4657     } else {
4658       VISA_GenVar *Decl = BaseReg->GetVar<VISA_GenVar>(Kernel);
4659       CISA_CALL(Kernel->CreateVISAAddressOfOperand(Src1, Decl, offset));
4660     }
4661   }
4662   VISA_VectorOpnd *Src2 = createSourceOperand(CI, UNSIGNED, 0, BI);
4663   addDebugInfo();
4664   CISA_CALL(Kernel->AppendVISAAddrAddInst(MaskCtrl, ExecSize, Dst, Src1, Src2));
4665 }
4666 
4667 /***********************************************************************
4668  * buildAlloca : build code for allocating in thread-private memory
4669  *
4670  * Enter:   CI = the CallInst
4671  *
4672  */
buildAlloca(CallInst * CI,unsigned IntrinID,unsigned Mod,const DstOpndDesc & DstDesc)4673 void GenXKernelBuilder::buildAlloca(CallInst *CI, unsigned IntrinID,
4674                                     unsigned Mod, const DstOpndDesc &DstDesc) {
4675   LLVM_DEBUG(dbgs() << "Building alloca " << *CI << "\n");
4676   VISA_GenVar *Sp = nullptr;
4677   CISA_CALL(Kernel->GetPredefinedVar(Sp, PreDefined_Vars::PREDEFINED_FE_SP));
4678   if (!allowI64Ops())
4679     CISA_CALL(Kernel->CreateVISAGenVar(Sp, "Sp", 1, ISA_TYPE_UD, ALIGN_DWORD, Sp));
4680 
4681   Value *AllocaOff = CI->getOperand(0);
4682   Type *AllocaOffTy = AllocaOff->getType();
4683 
4684   if (CurrentPadding) {
4685     // padd the current alloca the comply with gather/scatter alignment rules
4686     // unsigned LastOff = getResultedTypeSize(LastAlloca->getOperand(0)->getType(), DL);
4687     auto *AllocaEltTy = AllocaOffTy->getScalarType();
4688     if (AllocaOffTy->isArrayTy())
4689       AllocaEltTy = AllocaOffTy->getArrayElementType();
4690     unsigned Padding = DL.getTypeSizeInBits(AllocaEltTy) / genx::ByteBits;
4691     Padding = (Padding - CurrentPadding) % Padding;
4692     if (Padding) {
4693       VISA_VectorOpnd *SpSrc = nullptr;
4694       CISA_CALL(Kernel->CreateVISASrcOperand(SpSrc, Sp, MODIFIER_NONE, 0, 1, 0,
4695                                              0, 0));
4696       VISA_VectorOpnd *PaddImm = nullptr;
4697       CISA_CALL(Kernel->CreateVISAImmediate(PaddImm, &Padding, ISA_TYPE_D));
4698       VISA_VectorOpnd *DstSp = nullptr;
4699       CISA_CALL(Kernel->CreateVISADstOperand(
4700           DstSp, static_cast<VISA_GenVar *>(Sp), 1, 0, 0));
4701 
4702       CISA_CALL(Kernel->AppendVISAArithmeticInst(ISA_ADD, nullptr, false,
4703                                                  vISA_EMASK_M1, EXEC_SIZE_1,
4704                                                  DstSp, SpSrc, PaddImm));
4705       CurrentPadding += Padding;
4706     }
4707   }
4708 
4709   VISA_VectorOpnd *SpSrc = nullptr;
4710   CISA_CALL(
4711       Kernel->CreateVISASrcOperand(SpSrc, Sp, MODIFIER_NONE, 0, 1, 0, 0, 0));
4712 
4713   unsigned OffVal = getResultedTypeSize(AllocaOffTy, DL);
4714   CurrentPadding = (CurrentPadding + OffVal) %
4715                    (DL.getLargestLegalIntTypeSizeInBits() / genx::ByteBits);
4716 
4717   VISA_VectorOpnd *Imm = nullptr;
4718   CISA_CALL(Kernel->CreateVISAImmediate(Imm, &OffVal, ISA_TYPE_D));
4719 
4720   if (IntrinID == llvm::GenXIntrinsic::genx_alloca) {
4721     VISA_VectorOpnd *Src = nullptr;
4722     CISA_CALL(Kernel->CreateVISASrcOperand(Src, static_cast<VISA_GenVar *>(Sp),
4723                                            MODIFIER_NONE, 0, 1, 0, 0, 0));
4724     VISA_VectorOpnd *Dst = createDestination(CI, DONTCARESIGNED, Mod, DstDesc);
4725     CISA_CALL(Kernel->AppendVISADataMovementInst(
4726         ISA_MOV, nullptr, false, vISA_EMASK_M1, EXEC_SIZE_1, Dst, Src));
4727   }
4728 
4729   VISA_VectorOpnd *DstSp = nullptr;
4730   CISA_CALL(Kernel->CreateVISADstOperand(DstSp, static_cast<VISA_GenVar *>(Sp),
4731                                          1, 0, 0));
4732 
4733   CISA_CALL(Kernel->AppendVISAArithmeticInst(
4734       ISA_ADD, nullptr, false, vISA_EMASK_M1, EXEC_SIZE_1, DstSp, SpSrc, Imm));
4735 }
4736 
4737 /***********************************************************************
4738  * buildPrintIndex : build code for storing constant format strins as metadata
4739  *                   and returning idx for that string
4740  *
4741  * Enter:   CI = the CallInst
4742  *
4743  */
buildPrintIndex(CallInst * CI,unsigned IntrinID,unsigned Mod,const DstOpndDesc & DstDesc)4744 void GenXKernelBuilder::buildPrintIndex(CallInst *CI, unsigned IntrinID,
4745                                         unsigned Mod,
4746                                         const DstOpndDesc &DstDesc) {
4747   // create move with constant
4748   VISA_VectorOpnd *Imm = nullptr;
4749   Module* M = CI->getModule();
4750   NamedMDNode *NMD = M->getOrInsertNamedMetadata("cm_print_strings");
4751   unsigned NumOp  = NMD->getNumOperands();
4752   CISA_CALL(Kernel->CreateVISAImmediate(Imm, &NumOp, ISA_TYPE_UD));
4753   VISA_VectorOpnd *Dst = createDestination(CI, DONTCARESIGNED, Mod, DstDesc);
4754   CISA_CALL(Kernel->AppendVISADataMovementInst(
4755             ISA_MOV, nullptr, false, vISA_EMASK_M1_NM,
4756             EXEC_SIZE_1, Dst, Imm));
4757 
4758   // access string
4759   StringRef UnderlyingCStr =
4760       vc::getConstStringFromOperand(*CI->getArgOperand(0));
4761 
4762   // store metadata
4763   LLVMContext &Context = CI->getContext();
4764   MDNode* N = MDNode::get(Context, MDString::get(Context, UnderlyingCStr));
4765   NMD->addOperand(N);
4766 }
4767 
deduceRegion(Region * R,bool IsDest,unsigned MaxWidth)4768 void GenXKernelBuilder::deduceRegion(Region *R, bool IsDest,
4769                                      unsigned MaxWidth) {
4770   IGC_ASSERT(Subtarget);
4771   if (!IsDest && !R->is2D() && R->Indirect &&
4772       Subtarget->hasIndirectGRFCrossing()) {
4773     // For a source 1D indirect region that might possibly cross a GRF
4774     // (because we are on SKL+ so a single GRF crossing is allowed), make it
4775     // Nx1 instead of 1xN to avoid crossing a GRF within a row.
4776     R->VStride = R->Stride;
4777     R->Width = 1;
4778     R->Stride = 0;
4779   }
4780   // another case of converting to <N;1,0> region format
4781   if (!IsDest &&
4782       (R->VStride == (int)R->Width * R->Stride || R->Width == R->NumElements)) {
4783     R->Width = 1;
4784     R->VStride = R->Stride;
4785     R->Stride = 0;
4786   } else if (R->Width > MaxWidth) {
4787     // A Width of more than 16 (or whatever MaxWidth is) is not allowed. If it
4788     // is more than 16, then legalization has ensured that either there is one
4789     // row or the rows are contiguous (VStride == Width * Stride) and we can
4790     // increase the number of rows.  (Note that Width and VStride are ignored
4791     // in a destination operand; legalization ensures that there is only one
4792     // row.)
4793     R->Width = MaxWidth;
4794     R->VStride = R->Width * R->Stride;
4795   }
4796 
4797   if (R->Width == R->NumElements) {
4798     // Use VStride 0 on a 1D region. This is necessary for src0 in line or
4799     // pln, so we may as well do it for everything.
4800     R->VStride = 0;
4801   }
4802 
4803   if (R->Indirect) {
4804     R->IndirectAddrOffset = 0;
4805     if (GenXIntrinsic::isRdRegion(R->Indirect)) {
4806       auto AddrRdR = cast<Instruction>(R->Indirect);
4807       Region AddrR = makeRegionFromBaleInfo(AddrRdR, BaleInfo());
4808       IGC_ASSERT_MESSAGE(!AddrR.Indirect,
4809         "cannot have address rdregion that is indirect");
4810       R->IndirectAddrOffset =
4811           AddrR.Offset / 2; // address element is always 2 byte
4812     }
4813   }
4814 }
4815 
4816 VISA_VectorOpnd *
createGeneralOperand(Region * R,VISA_GenVar * Decl,Signedness Signed,unsigned Mod,bool IsDest,unsigned MaxWidth)4817 GenXKernelBuilder::createGeneralOperand(Region *R, VISA_GenVar *Decl,
4818                                         Signedness Signed, unsigned Mod,
4819                                         bool IsDest, unsigned MaxWidth) {
4820   VISA_VectorOpnd *ResultOperand = nullptr;
4821   // Write the vISA general operand, canonicalizing the
4822   // region parameters where applicable.
4823   IGC_ASSERT_MESSAGE(Decl, "no register allocated for this value");
4824   if (!IsDest) {
4825     ResultOperand = createCisaSrcOperand(
4826         Decl, static_cast<VISA_Modifier>(Mod), R->VStride, R->Width, R->Stride,
4827         R->Offset >> genx::log2(GrfByteSize),
4828         (R->Offset & (GrfByteSize - 1)) / R->ElementBytes);
4829   } else {
4830     ResultOperand = createCisaDstOperand(
4831         Decl, R->Stride, R->Offset >> genx::log2(GrfByteSize),
4832         (R->Offset & (GrfByteSize - 1)) / R->ElementBytes);
4833   }
4834   return ResultOperand;
4835 }
4836 
createIndirectOperand(Region * R,Signedness Signed,unsigned Mod,bool IsDest,unsigned MaxWidth)4837 VISA_VectorOpnd *GenXKernelBuilder::createIndirectOperand(Region *R,
4838                                                           Signedness Signed,
4839                                                           unsigned Mod,
4840                                                           bool IsDest,
4841                                                           unsigned MaxWidth) {
4842   VISA_VectorOpnd *ResultOperand = nullptr;
4843   // Check if the indirect operand is a baled in rdregion.
4844   Value *Indirect = R->Indirect;
4845   if (GenXIntrinsic::isRdRegion(Indirect)) {
4846     auto AddrRdR = cast<Instruction>(Indirect);
4847     Indirect = AddrRdR->getOperand(0);
4848   }
4849   // Write the vISA indirect operand.
4850   Register *IdxReg =
4851       getRegForValueAndSaveAlias(KernFunc, Indirect, DONTCARESIGNED);
4852   IGC_ASSERT(IdxReg->Category == RegCategory::ADDRESS);
4853 
4854   bool NotCrossGrf = !(R->Offset & (GrfByteSize - 1));
4855   if (!NotCrossGrf) {
4856     // Determine the NotCrossGrf bit setting (whether we can guarantee
4857     // that adding an indirect region's constant offset does not cause
4858     // a carry out of bit 4)
4859     // by looking at the partial constant for the index
4860     // before the constant is added on.
4861     // This only works for a scalar index.
4862     if (auto IndirInst = dyn_cast<Instruction>(R->Indirect)) {
4863       auto A = AI.get(IndirInst);
4864       unsigned Mask = (1U << std::min(5U, A.getLogAlign())) - 1;
4865       if (Mask) {
4866         if ((A.getExtraBits() & Mask) + (R->Offset & Mask) <= Mask &&
4867             (unsigned)(R->Offset & (GrfByteSize - 1)) <= Mask) {
4868           // The alignment and extrabits are such that adding R->Offset
4869           // cannot cause a carry from bit 4 to bit 5.
4870           NotCrossGrf = true;
4871         }
4872       }
4873     }
4874   }
4875   visa::TypeDetails TD(Func->getParent()->getDataLayout(), R->ElementTy,
4876                        Signed);
4877   unsigned VStride = R->VStride;
4878   if (isa<VectorType>(R->Indirect->getType()))
4879     // multi indirect (vector index), set vstride
4880     VStride = 0x8000; // field to null
4881   VISA_AddrVar *AddrDecl = IdxReg->GetVar<VISA_AddrVar>(Kernel);
4882   if (IsDest) {
4883     CISA_CALL(Kernel->CreateVISAIndirectDstOperand(
4884         ResultOperand, AddrDecl, R->IndirectAddrOffset, R->Offset, R->Stride,
4885         (VISA_Type)TD.VisaType));
4886   } else {
4887     CISA_CALL(Kernel->CreateVISAIndirectSrcOperand(
4888         ResultOperand, AddrDecl, static_cast<VISA_Modifier>(Mod),
4889         R->IndirectAddrOffset, R->Offset, VStride, R->Width, R->Stride,
4890         (VISA_Type)TD.VisaType));
4891   }
4892   return ResultOperand;
4893 }
4894 
4895 
4896 /***********************************************************************
4897  * createRegionOperand : create a vISA region operand
4898  *
4899  * Enter:   R = Region
4900  *          RegNum = vISA register number (ignored if region is indirect)
4901  *          Signed = whether signed or unsigned required (only used for
4902  *                   indirect operand)
4903  *          Mod = modifiers
4904  *          IsDest = true if destination operand
4905  *          MaxWidth = maximum width (used to stop TWICEWIDTH operand
4906  *                     getting a width bigger than the execution size, but
4907  *                     for other uses defaults to 16)
4908  */
4909 VISA_VectorOpnd *
createRegionOperand(Region * R,VISA_GenVar * Decl,Signedness Signed,unsigned Mod,bool IsDest,unsigned MaxWidth)4910 GenXKernelBuilder::createRegionOperand(Region *R, VISA_GenVar *Decl,
4911                                        Signedness Signed, unsigned Mod,
4912                                        bool IsDest, unsigned MaxWidth) {
4913   deduceRegion(R, IsDest, MaxWidth);
4914 
4915   if (R->Indirect)
4916     return createIndirectOperand(R, Signed, Mod, IsDest, MaxWidth);
4917   else
4918     return createGeneralOperand(R, Decl, Signed, Mod, IsDest, MaxWidth);
4919 }
4920 
4921 
isInLoop(BasicBlock * BB)4922 bool GenXKernelBuilder::isInLoop(BasicBlock *BB) {
4923   Function *BBFunc = BB->getParent();
4924   // Cannot predict for stack calls and indirectly called functions.
4925   // Let's assume the function is in a loop.
4926   if (genx::requiresStackCall(BBFunc) || genx::isReferencedIndirectly(BBFunc))
4927     return true;
4928 
4929   IGC_ASSERT(LIs->getLoopInfo(BBFunc));
4930   if (LIs->getLoopInfo(BBFunc)->getLoopFor(BB))
4931     return true; // inside loop in this function
4932   // Now we need to see if this function is called from inside a loop.
4933   // First check the cache.
4934   auto i = IsInLoopCache.find(BBFunc);
4935   if (i != IsInLoopCache.end())
4936     return i->second;
4937   // Now check all call sites. This recurses as deep as the depth of the call
4938   // graph, which must be acyclic as GenX does not allow recursion.
4939   bool InLoop = false;
4940   for (auto ui = BBFunc->use_begin(), ue = BBFunc->use_end(); ui != ue; ++ui) {
4941     auto CI = dyn_cast<CallInst>(ui->getUser());
4942     if (!checkFunctionCall(CI, BBFunc))
4943       continue;
4944     IGC_ASSERT(ui->getOperandNo() == CI->getNumArgOperands());
4945     if (CI->getFunction() == BBFunc)
4946       continue;
4947     if (isInLoop(CI->getParent())) {
4948       InLoop = true;
4949       break;
4950     }
4951   }
4952   IsInLoopCache[BBFunc] = InLoop;
4953   return InLoop;
4954 }
4955 
addWriteRegionLifetimeStartInst(Instruction * WrRegion)4956 void GenXKernelBuilder::addWriteRegionLifetimeStartInst(Instruction *WrRegion) {
4957   if (!GenXIntrinsic::isWrRegion(WrRegion))
4958     return; // No lifetime start for wrpredregion.
4959   // See if the wrregion is in a loop.
4960   auto BB = WrRegion->getParent();
4961   if (!isInLoop(BB))
4962     return; // not in loop
4963   // See if the wrregion is the first of a sequence in the same basic block
4964   // that together write the whole register. We assume that each region is
4965   // contiguous, and the regions are written in ascending offset order, as
4966   // that is what legalization does if the original write was to the whole
4967   // register.
4968   unsigned NumElementsSoFar = 0;
4969   unsigned TotalNumElements = 1;
4970   if (auto *VT = dyn_cast<IGCLLVM::FixedVectorType>(WrRegion->getType()))
4971     TotalNumElements = VT->getNumElements();
4972   Instruction *ThisWr = WrRegion;
4973   for (;;) {
4974     Region R = makeRegionFromBaleInfo(ThisWr, BaleInfo());
4975     if (R.Indirect)
4976       break;
4977     if ((unsigned)R.Offset != NumElementsSoFar * R.ElementBytes)
4978       break;
4979     if (R.Stride != 1 && R.Width != 1)
4980       break;
4981     if (R.Width != R.NumElements)
4982       break;
4983     NumElementsSoFar += R.NumElements;
4984     if (NumElementsSoFar == TotalNumElements)
4985       return; // whole register is written
4986     // Go on to next wrregion in the same basic block if any.
4987     if (!ThisWr->hasOneUse())
4988       break;
4989     ThisWr = cast<Instruction>(ThisWr->use_begin()->getUser());
4990     if (!GenXIntrinsic::isWrRegion(ThisWr))
4991       break;
4992     if (ThisWr->getParent() != BB)
4993       break;
4994   }
4995   // The wrregion is in a loop and is not the first in a sequence in the same
4996   // basic block that writes the whole register. Write a lifetime start.
4997   addLifetimeStartInst(WrRegion);
4998 }
4999 
5000 /**************************************************************************************************
5001  * addLifetimeStartInst : add a lifetime.start instruction
5002  *
5003  * Enter:   Inst = value to use in lifetime.start
5004  */
addLifetimeStartInst(Instruction * Inst)5005 void GenXKernelBuilder::addLifetimeStartInst(Instruction *Inst) {
5006   VISA_VectorOpnd *opnd = nullptr;
5007   auto Reg = getRegForValueOrNullAndSaveAlias(KernFunc, Inst);
5008   if (!Reg)
5009     return; // no register allocated such as being indirected.
5010 
5011   switch (Reg->Category) {
5012   case RegCategory::GENERAL:
5013     opnd = createCisaDstOperand(Reg->GetVar<VISA_GenVar>(Kernel), 1, 0, 0);
5014     break;
5015   case RegCategory::ADDRESS:
5016     CISA_CALL(Kernel->CreateVISAAddressDstOperand(
5017         opnd, Reg->GetVar<VISA_AddrVar>(Kernel), 0));
5018     break;
5019 #if 0  // Not currently used.
5020     case RegCategory::PREDICATE:
5021       break;
5022 #endif // 0
5023   default:
5024     report_fatal_error("createLifetimeStartInst: Invalid register category");
5025     break;
5026   }
5027   addDebugInfo();
5028   CISA_CALL(Kernel->AppendVISALifetime(LIFETIME_START, opnd));
5029 }
5030 
5031 /***********************************************************************
5032  * addDebugInfo : add debug infromation
5033  */
addDebugInfo()5034 void GenXKernelBuilder::addDebugInfo() {
5035   // Check if we have a pending debug location.
5036   if (PendingLine) {
5037     // Do the source location debug info with vISA FILE and LOC instructions.
5038     if (PendingFilename != "" && (PendingFilename != LastFilename ||
5039                                   PendingDirectory != LastDirectory)) {
5040       SmallString<256> Filename;
5041       // Bodge here to detect Windows absolute path even when built on cygwin.
5042       if (sys::path::is_absolute(PendingFilename) ||
5043           (PendingFilename.size() > 2 && PendingFilename[1] == ':'))
5044         Filename = PendingFilename;
5045       else {
5046         Filename = PendingDirectory;
5047         sys::path::append(Filename, PendingFilename);
5048       }
5049       CISA_CALL(Kernel->AppendVISAMiscFileInst(Filename.c_str()));
5050       GM->updateVisaMapping(KernFunc, nullptr, Kernel->getvIsaInstCount(),
5051                             "FILE");
5052       LastDirectory = PendingDirectory;
5053       LastFilename = PendingFilename;
5054     }
5055     if (PendingLine != LastLine) {
5056       LLVM_DEBUG(dbgs() << "LOC instruction appended:" << PendingLine << "\n");
5057       CISA_CALL(Kernel->AppendVISAMiscLOC(PendingLine));
5058       GM->updateVisaMapping(KernFunc, nullptr, Kernel->getvIsaInstCount(),
5059                             "LOC");
5060       LastLine = PendingLine;
5061       PendingLine = 0;
5062     }
5063   }
5064   // +1 since we update debug info BEFORE appending the instruction
5065   GM->updateVisaMapping(KernFunc, CurrentInst, Kernel->getvIsaInstCount() + 1,
5066                         CurrentInst ? CurrentInst->getName() : "Init_Special");
5067 }
5068 
emitOptimizationHints()5069 void GenXKernelBuilder::emitOptimizationHints() {
5070   if (skipOptWithLargeBlock(*FG))
5071     return;
5072 
5073   const auto &DL = FG->getModule()->getDataLayout();
5074   // Track rp considering byte variable widening.
5075   PressureTracker RP(DL, *FG, Liveness, /*ByteWidening*/ true);
5076   const std::vector<genx::LiveRange *> &WidenLRs = RP.getWidenVariables();
5077 
5078   if (!SkipNoWiden) {
5079     for (auto LR : WidenLRs) {
5080       SimpleValue SV = *LR->value_begin();
5081       auto *R = getRegForValueOrNullAndSaveAlias(FG->getHead(), SV);
5082       // This variable is being used in or crossing a high register pressure
5083       // region. Set an optimization hint not to widen it.
5084       if (R && RP.intersectWithRedRegion(LR)) {
5085         R->addAttribute(addStringToPool("NoWidening"), "");
5086         RP.decreasePressure(LR);
5087       }
5088     }
5089   }
5090 }
5091 
5092 /***********************************************************************
5093  * addLabelInst : add a label instruction for a basic block or join
5094  */
addLabelInst(const Value * BB)5095 void GenXKernelBuilder::addLabelInst(const Value *BB) {
5096   GM->updateVisaMapping(KernFunc, nullptr, Kernel->getvIsaInstCount(), "LBL");
5097   auto LabelID = getOrCreateLabel(BB, LABEL_BLOCK);
5098   IGC_ASSERT(LabelID < Labels.size());
5099   CISA_CALL(Kernel->AppendVISACFLabelInst(Labels[LabelID]));
5100 }
5101 
5102 /***********************************************************************
5103  * getOrCreateLabel : get/create label number for a Function or BasicBlock
5104  */
getOrCreateLabel(const Value * V,int Kind)5105 unsigned GenXKernelBuilder::getOrCreateLabel(const Value *V, int Kind) {
5106   int Num = getLabel(V);
5107   if (Num >= 0)
5108     return Num;
5109   Num = Labels.size();
5110   setLabel(V, Num);
5111   VISA_LabelOpnd *Decl = nullptr;
5112 
5113   // Replicate the functionality of the old compiler and make the first label
5114   // for a function contain the name (makes sure the function label is unique)
5115   // It's not clear this is strictly necessary any more (but doesn't do any
5116   // harm and may even make reading the intermediate forms easier)
5117   if (Kind == LABEL_SUBROUTINE) {
5118     StringRef N = TheKernelMetadata.getName();
5119     std::string NameBuf;
5120     if (V != FG->getHead()) {
5121       // This is a subroutine, not the kernel/function at the head of the
5122       // FunctionGroup. Use the name of the subroutine.
5123       N = V->getName();
5124     } else {
5125       // For a kernel/function name, fix illegal characters. The jitter uses
5126       // the same name for the label in the .asm file, and aubload does not
5127       // like the illegal characters.
5128       NameBuf = legalizeName(N.str());
5129       N = NameBuf;
5130     }
5131     auto SubroutineLabel =
5132         cutString(Twine(N) + Twine("_BB_") + Twine(Labels.size()));
5133     LLVM_DEBUG(dbgs() << "creating SubroutineLabel: " << SubroutineLabel
5134                       << "\n");
5135     CISA_CALL(Kernel->CreateVISALabelVar(Decl, SubroutineLabel.c_str(),
5136                                          VISA_Label_Kind(Kind)));
5137   } else if (Kind == LABEL_BLOCK) {
5138     auto BlockLabel = cutString(Twine("BB_") + Twine(Labels.size()));
5139     LLVM_DEBUG(dbgs() << "creating BlockLabel: " << BlockLabel << "\n");
5140     CISA_CALL(Kernel->CreateVISALabelVar(Decl, BlockLabel.c_str(),
5141                                          VISA_Label_Kind(Kind)));
5142   } else if (Kind == LABEL_FC) {
5143     const auto *F = cast<Function>(V);
5144     IGC_ASSERT(F->hasFnAttribute("CMCallable"));
5145     StringRef N(F->getName());
5146     auto FCLabel = cutString(Twine(N));
5147     LLVM_DEBUG(dbgs() << "creating FCLabel: " << FCLabel << "\n");
5148     CISA_CALL(Kernel->CreateVISALabelVar(Decl, FCLabel.c_str(),
5149                                          VISA_Label_Kind(Kind)));
5150   } else {
5151     StringRef N = V->getName();
5152     auto Label =
5153         cutString(Twine("_") + Twine(N) + Twine("_") + Twine(Labels.size()));
5154     LLVM_DEBUG(dbgs() << "creating Label: " << Label << "\n");
5155     CISA_CALL(
5156         Kernel->CreateVISALabelVar(Decl, Label.c_str(), VISA_Label_Kind(Kind)));
5157   }
5158   IGC_ASSERT(Decl);
5159   Labels.push_back(Decl);
5160   return Num;
5161 }
5162 
buildInlineAsm(CallInst * CI)5163 void GenXKernelBuilder::buildInlineAsm(CallInst *CI) {
5164   IGC_ASSERT_MESSAGE(CI->isInlineAsm(), "Inline asm expected");
5165   InlineAsm *IA = dyn_cast<InlineAsm>(IGCLLVM::getCalledValue(CI));
5166   std::string AsmStr(IA->getAsmString());
5167   std::stringstream &AsmTextStream = CisaBuilder->GetAsmTextStream();
5168 
5169   // Nothing to substitute if no constraints provided
5170   if (IA->getConstraintString().empty()) {
5171     AsmTextStream << AsmStr << std::endl;
5172     return;
5173   }
5174 
5175   unsigned NumOutputs = genx::getInlineAsmNumOutputs(CI);
5176   auto ConstraintsInfo = genx::getGenXInlineAsmInfo(CI);
5177 
5178   // Scan asm string in reverse direction to match larger numbers first
5179   for (int ArgNo = ConstraintsInfo.size() - 1; ArgNo >= 0; ArgNo--) {
5180     // Regexp to match number of operand
5181     Regex R("\\$+" + llvm::to_string(ArgNo));
5182     if (!R.match(AsmStr))
5183       continue;
5184     // Operand that must be substituded into inline assembly string
5185     Value *InlasmOp = nullptr;
5186     std::string InlasmOpAsString;
5187     // For output collect destination descriptor with
5188     // baling info and WrRegion instruction
5189     DstOpndDesc DstDesc;
5190     auto Info = ConstraintsInfo[ArgNo];
5191     if (Info.isOutput()) {
5192       // If result is a struct than inline assembly
5193       // instruction has multiple outputs
5194       if (isa<StructType>(CI->getType())) {
5195         // Go through all users of a result and find extractelement with
5196         // ArgNo indice: ArgNo is a number of a constraint in constraint
5197         // list
5198         for (auto ui = CI->use_begin(), ue = CI->use_end(); ui != ue; ++ui) {
5199           auto EV = dyn_cast<ExtractValueInst>(ui->getUser());
5200           if (EV && (EV->getIndices()[0] == ArgNo)) {
5201             InlasmOp = EV;
5202             break;
5203           }
5204         }
5205       } else
5206         // Single output
5207         InlasmOp = CI;
5208 
5209       if (InlasmOp) {
5210         Instruction *Inst = cast<Instruction>(InlasmOp);
5211         Instruction *Head = Baling->getBaleHead(Inst);
5212         BaleInfo BI = Baling->getBaleInfo(Head);
5213         // If head is g_store than change head to store's
5214         //  operand and check if it's baled wrr
5215         if (BI.Type == BaleInfo::GSTORE) {
5216           DstDesc.GStore = Head;
5217           Head = cast<Instruction>(Head->getOperand(0));
5218           BI = Baling->getBaleInfo(Head);
5219         }
5220         if (BI.Type == BaleInfo::WRREGION) {
5221           DstDesc.WrRegion = Head;
5222           DstDesc.WrRegionBI = BI;
5223         }
5224         InlasmOpAsString = createInlineAsmDestinationOperand(
5225             InlasmOp, DONTCARESIGNED, Info.getConstraintType(), 0, DstDesc);
5226       } else {
5227         // Can't deduce output operand because there are no users
5228         // but we have register allocated. If region is needed we can use
5229         // default one based one type.
5230         SimpleValue SV(CI, ArgNo);
5231         Register *Reg =
5232             getRegForValueAndSaveAlias(KernFunc, SV, DONTCARESIGNED);
5233         Region R(SV.getType());
5234         InlasmOpAsString =
5235             createInlineAsmOperand(Reg, &R, true /*IsDst*/, DONTCARESIGNED,
5236                                    Info.getConstraintType(), 0);
5237       }
5238     } else {
5239       // Input of inline assembly
5240       InlasmOp = CI->getArgOperand(ArgNo - NumOutputs);
5241       bool IsBaled = false;
5242       if (GenXIntrinsic::isRdRegion(InlasmOp)) {
5243         Instruction *RdR = cast<Instruction>(InlasmOp);
5244         IsBaled = Baling->isBaled(RdR);
5245       }
5246       InlasmOpAsString = createInlineAsmSourceOperand(
5247           InlasmOp, DONTCARESIGNED, IsBaled, Info.getConstraintType());
5248     }
5249     // Substitute string name of the variable until
5250     // there are no possible sustitutions. Do-while
5251     // since first match was checked in the beginning
5252     // of the loop.
5253     do {
5254       AsmStr = R.sub(InlasmOpAsString, AsmStr);
5255     } while (R.match(AsmStr));
5256   }
5257 
5258   AsmTextStream << "\n// INLASM BEGIN\n"
5259                 << AsmStr << "\n// INLASM END\n"
5260                 << std::endl;
5261 }
5262 
buildCall(CallInst * CI,const DstOpndDesc & DstDesc)5263 void GenXKernelBuilder::buildCall(CallInst *CI, const DstOpndDesc &DstDesc) {
5264   LLVM_DEBUG(dbgs() << CI << "\n");
5265   Function *Callee = CI->getCalledFunction();
5266   IGC_ASSERT_MESSAGE(
5267       !Callee || !Callee->isDeclaration(),
5268       "Currently VC backend does not support modules with external functions");
5269 
5270   if (!Callee || genx::requiresStackCall(Callee)) {
5271     if (UseNewStackBuilder)
5272       buildStackCallLight(CI, DstDesc);
5273     else
5274       buildStackCall(CI, DstDesc);
5275     return;
5276   }
5277 
5278   unsigned LabelKind = LABEL_SUBROUTINE;
5279   if (Callee->hasFnAttribute("CMCallable"))
5280     LabelKind = LABEL_FC;
5281   else
5282     IGC_ASSERT_MESSAGE(FG == FG->getParent()->getAnyGroup(Callee),
5283                        "unexpected call to outside FunctionGroup");
5284 
5285   // Check whether the called function has a predicate arg that is EM.
5286   int EMOperandNum = -1;
5287   for (auto ai = Callee->arg_begin(), ae = Callee->arg_end(); ai != ae; ++ai) {
5288     auto Arg = &*ai;
5289     if (!Arg->getType()->getScalarType()->isIntegerTy(1))
5290       continue;
5291     if (Liveness->getLiveRange(Arg)->getCategory() == RegCategory::EM) {
5292       EMOperandNum = Arg->getArgNo();
5293       break;
5294     }
5295   }
5296 
5297   if (EMOperandNum < 0) {
5298     addDebugInfo();
5299     // Scalar calls must be marked with NoMask
5300     CISA_CALL(Kernel->AppendVISACFCallInst(
5301         nullptr, vISA_EMASK_M1_NM, EXEC_SIZE_1,
5302         Labels[getOrCreateLabel(Callee, LabelKind)]));
5303   } else {
5304     auto PredicateOpnd = NoMask ? nullptr : createPred(CI, BaleInfo(), EMOperandNum);
5305     addDebugInfo();
5306     auto *VTy = cast<IGCLLVM::FixedVectorType>(
5307         CI->getArgOperand(EMOperandNum)->getType());
5308     VISA_Exec_Size ExecSize = getExecSizeFromValue(VTy->getNumElements());
5309     CISA_CALL(Kernel->AppendVISACFCallInst(
5310         PredicateOpnd, vISA_EMASK_M1, ExecSize,
5311         Labels[getOrCreateLabel(Callee, LabelKind)]));
5312   }
5313 }
5314 
buildRet(ReturnInst * RI)5315 void GenXKernelBuilder::buildRet(ReturnInst *RI) {
5316   uint32_t FloatControl = 0;
5317   auto F = RI->getFunction();
5318   F->getFnAttribute(genx::FunctionMD::CMFloatControl)
5319       .getValueAsString()
5320       .getAsInteger(0, FloatControl);
5321   FloatControl &= CR_Mask;
5322   if (FloatControl != DefaultFloatControl) {
5323     buildControlRegUpdate(CR_Mask, true);
5324     if (DefaultFloatControl)
5325       buildControlRegUpdate(DefaultFloatControl, false);
5326   }
5327   addDebugInfo();
5328   if (!genx::isKernel(F) &&
5329       (genx::requiresStackCall(Func) || genx::isReferencedIndirectly(F))) {
5330     CISA_CALL(Kernel->AppendVISACFFunctionRetInst(nullptr, vISA_EMASK_M1,
5331                                                   EXEC_SIZE_16));
5332   } else {
5333     CISA_CALL(Kernel->AppendVISACFRetInst(nullptr, vISA_EMASK_M1, EXEC_SIZE_1));
5334   }
5335 }
5336 
buildGetHWID(CallInst * CI,const DstOpndDesc & DstDesc)5337 void GenXKernelBuilder::buildGetHWID(CallInst *CI, const DstOpndDesc &DstDesc) {
5338   IGC_ASSERT(Subtarget);
5339   if (Subtarget->getsHWTIDFromPredef()) {
5340     // Use predefined variable
5341     VISA_GenVar *hwid = nullptr;
5342     CISA_CALL(Kernel->GetPredefinedVar(hwid, PREDEFINED_HW_TID));
5343 
5344     VISA_VectorOpnd *dst = createDestination(CI, DONTCARESIGNED, 0, DstDesc);
5345     VISA_VectorOpnd *src = nullptr;
5346     CISA_CALL(
5347         Kernel->CreateVISASrcOperand(src, hwid, MODIFIER_NONE, 0, 1, 0, 0, 0));
5348     CISA_CALL(Kernel->AppendVISADataMovementInst(
5349         ISA_MOV, nullptr /*Pred*/, false /*Mod*/, vISA_EMASK_M1_NM, EXEC_SIZE_1,
5350         dst, src));
5351 
5352     return;
5353   }
5354 
5355   // Build HWTID from sr0
5356 
5357   // Initialize temporary regs
5358   VISA_GenVar *HwtidTmp0 = nullptr, *HwtidTmp1 = nullptr, *HwtidSR0 = nullptr;
5359   CISA_CALL(Kernel->CreateVISAGenVar(HwtidTmp0, "hwtid_tmp0", 1, ISA_TYPE_UD,
5360                                      ALIGN_DWORD));
5361   CISA_CALL(Kernel->CreateVISAGenVar(HwtidTmp1, "hwtid_tmp1", 1, ISA_TYPE_UD,
5362                                      ALIGN_DWORD));
5363   CISA_CALL(Kernel->CreateVISAGenVar(HwtidSR0, "hwtid_sr0", 1, ISA_TYPE_UD,
5364                                      ALIGN_DWORD));
5365 
5366   // Local helper for instruction generation
5367   auto generateLogicOrShift = [this](ISA_Opcode Opcode, VISA_GenVar *Dst,
5368                                      VISA_GenVar *Left, uint32_t RightImm,
5369                                      VISA_GenVar *Right = nullptr) -> void {
5370     VISA_VectorOpnd *LeftOp = nullptr, *RightOp = nullptr, *DstOp = nullptr;
5371     CISA_CALL(Kernel->CreateVISASrcOperand(LeftOp, Left, MODIFIER_NONE, 0, 1, 0,
5372                                            0, 0));
5373     if (Right) {
5374       CISA_CALL(Kernel->CreateVISASrcOperand(RightOp, Right, MODIFIER_NONE, 0,
5375                                              1, 0, 0, 0));
5376     } else {
5377       CISA_CALL(Kernel->CreateVISAImmediate(RightOp, &RightImm,
5378                                             getVISAImmTy(ISA_TYPE_UD)));
5379     }
5380     CISA_CALL(Kernel->CreateVISADstOperand(DstOp, Dst, 1, 0, 0));
5381     CISA_CALL(Kernel->AppendVISALogicOrShiftInst(
5382         Opcode, nullptr /*Pred*/, false /*Mod*/, vISA_EMASK_M1_NM, EXEC_SIZE_1,
5383         DstOp, LeftOp, RightOp));
5384   };
5385 
5386   // Local helper for masked sr0 value load
5387   auto loadMaskedSR0 = [this, generateLogicOrShift,
5388                         HwtidSR0](unsigned MaskBits) -> void {
5389     auto SR0Mask = maskTrailingOnes<uint32_t>(MaskBits);
5390 
5391     VISA_GenVar *sr0 = nullptr;
5392     CISA_CALL(Kernel->GetPredefinedVar(sr0, PREDEFINED_SR0));
5393     generateLogicOrShift(ISA_AND, HwtidSR0, sr0, SR0Mask);
5394   };
5395 
5396   // Local helper for reserved bits elimination
5397   auto removeBitRange = [this, generateLogicOrShift, HwtidTmp0, HwtidTmp1,
5398                          HwtidSR0](unsigned RemoveBit, unsigned Range) -> void {
5399     // src = (src & mask) | ((src >> range) & ~mask)
5400     auto TmpMask = maskTrailingOnes<uint32_t>(RemoveBit);
5401     // tmp0 = (src & mask)
5402     generateLogicOrShift(ISA_AND, HwtidTmp0, HwtidSR0, TmpMask);
5403     // tmp1 = (src >> range)
5404     generateLogicOrShift(ISA_SHR, HwtidTmp1, HwtidSR0, Range);
5405     // tmp1 = (tmp1 & ~mask)
5406     generateLogicOrShift(ISA_AND, HwtidTmp1, HwtidTmp1, ~TmpMask);
5407     // src = (tmp0 | tmp1)
5408     generateLogicOrShift(ISA_OR, HwtidSR0, HwtidTmp0, 0 /*RightImm*/,
5409                          HwtidTmp1);
5410   };
5411 
5412   // Local helper for passing final hwtid to the dst
5413   auto writeHwtidToDst = [this, &DstDesc, HwtidSR0, CI](void) -> void {
5414     VISA_VectorOpnd *src = nullptr, *dst = nullptr;
5415     CISA_CALL(Kernel->CreateVISASrcOperand(src, HwtidSR0, MODIFIER_NONE, 0, 1,
5416                                            0, 0, 0));
5417     dst = createDestination(CI, DONTCARESIGNED, 0, DstDesc);
5418     CISA_CALL(Kernel->AppendVISADataMovementInst(
5419         ISA_MOV, nullptr, false, vISA_EMASK_M1_NM, EXEC_SIZE_1, dst, src));
5420   };
5421 
5422   // XeHP_SDV
5423   // [13:11] Slice ID.
5424   // [10:9] Dual - SubSlice ID
5425   // [8] SubSlice ID.
5426   // [7] : EUID[2]
5427   // [6] : Reserved
5428   // [5:4] EUID[1:0]
5429   // [3] : Reserved MBZ
5430   // [2:0] : TID
5431   //
5432   // HWTID is calculated using a concatenation of TID:EUID:SubSliceID:SliceID
5433 
5434   // Load sr0 with [13:0] mask
5435   loadMaskedSR0(14);
5436 
5437   // Remove reserved bits
5438   removeBitRange(6, 1);
5439   removeBitRange(3, 1);
5440 
5441   // Store final value
5442   writeHwtidToDst();
5443 }
5444 
5445 /***********************************************************************
5446  * createRawSourceOperand : create raw source operand of instruction
5447  *
5448  * Enter:   Inst = instruction to get source operand from
5449  *          OperandNum = operand number
5450  *          BI = BaleInfo for Inst (so we can tell whether a rdregion
5451  *                  or modifier is bundled in)
5452  */
createRawSourceOperand(const Instruction * Inst,unsigned OperandNum,BaleInfo BI,Signedness Signed)5453 VISA_RawOpnd *GenXKernelBuilder::createRawSourceOperand(const Instruction *Inst,
5454                                                         unsigned OperandNum,
5455                                                         BaleInfo BI,
5456                                                         Signedness Signed) {
5457   VISA_RawOpnd *ResultOperand = nullptr;
5458   Value *V = Inst->getOperand(OperandNum);
5459   if (isa<UndefValue>(V)) {
5460     CISA_CALL(Kernel->CreateVISANullRawOperand(ResultOperand, false));
5461   } else {
5462     unsigned ByteOffset = 0;
5463     bool Baled = Baling->getBaleInfo(Inst).isOperandBaled(OperandNum);
5464     if (Baled) {
5465       Instruction *RdRegion = cast<Instruction>(V);
5466       Region R = makeRegionFromBaleInfo(RdRegion, BaleInfo());
5467       ByteOffset = R.Offset;
5468       V = RdRegion->getOperand(0);
5469     }
5470     LLVM_DEBUG(dbgs() << "createRawSourceOperand for "
5471                       << (Baled ? "baled" : "non-baled") << " value: ");
5472     LLVM_DEBUG(V->dump());
5473     LLVM_DEBUG(dbgs() << "\n");
5474     Register *Reg = getRegForValueAndSaveAlias(KernFunc, V, Signed);
5475     IGC_ASSERT(Reg->Category == RegCategory::GENERAL);
5476     LLVM_DEBUG(dbgs() << "CreateVISARawOperand: "; Reg->print(dbgs()); dbgs() << "\n");
5477     CISA_CALL(Kernel->CreateVISARawOperand(
5478         ResultOperand, Reg->GetVar<VISA_GenVar>(Kernel), ByteOffset));
5479   }
5480   return ResultOperand;
5481 }
5482 
5483 /***********************************************************************
5484  * createRawDestination : create raw destination operand
5485  *
5486  * Enter:   Inst = destination value
5487  *          WrRegion = 0 else wrregion that destination is baled into
5488  *
5489  * A raw destination can be baled into a wrregion, but only if the region
5490  * is direct and its start index is GRF aligned.
5491  */
5492 VISA_RawOpnd *
createRawDestination(Value * V,const DstOpndDesc & DstDesc,Signedness Signed)5493 GenXKernelBuilder::createRawDestination(Value *V, const DstOpndDesc &DstDesc,
5494                                         Signedness Signed) {
5495   VISA_RawOpnd *ResultOperand = nullptr;
5496   unsigned ByteOffset = 0;
5497   if (DstDesc.WrRegion) {
5498     V = DstDesc.WrRegion;
5499     Region R = makeRegionFromBaleInfo(DstDesc.WrRegion, BaleInfo());
5500     ByteOffset = R.Offset;
5501   }
5502   Type *OverrideType = nullptr;
5503   if (DstDesc.GStore) {
5504     V = getUnderlyingGlobalVariable(DstDesc.GStore->getOperand(1));
5505     IGC_ASSERT_MESSAGE(V, "out of sync");
5506     OverrideType = DstDesc.GStore->getOperand(0)->getType();
5507   }
5508   LLVM_DEBUG(dbgs() << "createRawDestination for "
5509                     << (DstDesc.GStore ? "global" : "non-global") << " value: ");
5510   LLVM_DEBUG(V->dump());
5511   LLVM_DEBUG(dbgs() << "\n");
5512   if (DstDesc.WrPredefReg)
5513     V = DstDesc.WrPredefReg;
5514   Register *Reg =
5515       getRegForValueOrNullAndSaveAlias(KernFunc, V, Signed, OverrideType);
5516   if (!Reg) {
5517     // No register assigned. This happens to an unused raw result where the
5518     // result is marked as RAW_NULLALLOWED in GenXIntrinsics.
5519     CISA_CALL(Kernel->CreateVISANullRawOperand(ResultOperand, true));
5520   } else {
5521     IGC_ASSERT(Reg->Category == RegCategory::GENERAL);
5522     LLVM_DEBUG(dbgs() << "CreateVISARawOperand: "; Reg->print(dbgs()); dbgs() << "\n");
5523     CISA_CALL(Kernel->CreateVISARawOperand(
5524         ResultOperand, Reg->GetVar<VISA_GenVar>(Kernel), ByteOffset));
5525   }
5526   return ResultOperand;
5527 }
5528 
5529 /***********************************************************************
5530  * getLabel : get label number for a Function or BasicBlock
5531  *
5532  * Return:  label number, -1 if none found
5533  */
getLabel(const Value * V) const5534 int GenXKernelBuilder::getLabel(const Value *V) const {
5535   auto It = LabelMap.find(V);
5536   if (It != LabelMap.end())
5537     return It->second;
5538   return -1;
5539 }
5540 
5541 /***********************************************************************
5542  * setLabel : set the label number for a Function or BasicBlock
5543  */
setLabel(const Value * V,unsigned Num)5544 void GenXKernelBuilder::setLabel(const Value *V, unsigned Num) {
5545   LabelMap[V] = Num;
5546 }
5547 
addStringToPool(StringRef Str)5548 unsigned GenXKernelBuilder::addStringToPool(StringRef Str) {
5549   auto val = std::pair<std::string, unsigned>(Str.begin(), StringPool.size());
5550   auto Res = StringPool.insert(val);
5551   return Res.first->second;
5552 }
5553 
getStringByIndex(unsigned Val)5554 StringRef GenXKernelBuilder::getStringByIndex(unsigned Val) {
5555   for (const auto &it : StringPool) {
5556     if (it.second == Val)
5557       return it.first;
5558   }
5559   IGC_ASSERT_EXIT_MESSAGE(0, "Can't find string by index.");
5560 }
5561 
5562 /***********************************************************************
5563  * Get size of the argument of type 'type' in bytes considering layout of
5564  * subtypes of aggregate type in units of size 'mod'
5565  * mod is typically 32 (GRF) or 16 (oword)
5566  */
getValueSize(Type * T,unsigned Mod) const5567 unsigned GenXKernelBuilder::getValueSize(Type *T, unsigned Mod) const {
5568   unsigned Result = 0;
5569   if (T->isAggregateType()) {
5570     for (unsigned i = 0; i < T->getStructNumElements(); i++) {
5571       Result += getValueSize(T->getContainedType(i)) / Mod +
5572                 (getValueSize(T->getContainedType(i)) % Mod ? 1 : 0);
5573     }
5574     Result *= Mod;
5575   } else
5576     Result = FG->getModule()->getDataLayout().getTypeSizeInBits(T) / 8;
5577   return Result;
5578 }
5579 
getFuncArgsSize(llvm::Function * F)5580 unsigned GenXKernelBuilder::getFuncArgsSize(llvm::Function *F) {
5581   unsigned Result = 0;
5582   for (auto &Arg : F->args())
5583     Result += getValueSize(&Arg);
5584   return Result;
5585 }
5586 
5587 GenericCisaVariable *
createCisaVariable(VISAKernel * Kernel,const char * Name,VISA_GenVar * AliasVar,unsigned ByteSize)5588 GenXKernelBuilder::createCisaVariable(VISAKernel *Kernel, const char *Name,
5589                                       VISA_GenVar *AliasVar,
5590                                       unsigned ByteSize) {
5591   auto it = CisaVars[Kernel].find(Name);
5592   if (it != CisaVars[Kernel].end())
5593     it->second = GenericCisaVariable(Name, AliasVar, ByteSize);
5594   else
5595     CisaVars[Kernel].insert(
5596         std::make_pair(Name, GenericCisaVariable(Name, AliasVar, ByteSize)));
5597   return &(CisaVars[Kernel].at(Name));
5598 }
5599 
deduceByteSize(Value * V,const DataLayout & DL)5600 static unsigned deduceByteSize(Value *V, const DataLayout &DL) {
5601   return DL.getTypeSizeInBits(V->getType()->getScalarType()) / 8;
5602 }
5603 
deduceByteSize(CisaVariable * V,const DataLayout & DL)5604 static unsigned deduceByteSize(CisaVariable *V, const DataLayout &DL) {
5605   IGC_ASSERT(V->getType() < ISA_TYPE_NUM);
5606   return CISATypeTable[V->getType()].typeSize;
5607 }
5608 
5609 /**************************************************************************************************
5610  * emitVectorCopy : emit vISA that performs copying form Dst to Src
5611  *
5612  * Emit sufficient amount of MOVs from Dst to Src picking size in a greedy manner
5613  *
5614  * T1 and T2 should be llvm::Value and CisaVariable or vice-versa,
5615  * CisaVariable=>CisaVariable or Value=>Value copying is not supported here
5616  *
5617  */
5618 template <typename T1, typename T2>
emitVectorCopy(T1 * Dst,T2 * Src,unsigned & RowOff,unsigned & ColOff,unsigned & SrcRowOff,unsigned & SrcColOff,int TotalSize,bool DoCopy)5619 void GenXKernelBuilder::emitVectorCopy(T1 *Dst, T2 *Src, unsigned &RowOff,
5620                                        unsigned &ColOff, unsigned &SrcRowOff,
5621                                        unsigned &SrcColOff, int TotalSize,
5622                                        bool DoCopy) {
5623   IGC_ASSERT(Subtarget);
5624   auto partCopy = [&, GRFWidth = Subtarget->getGRFByteSize()](int Sz) {
5625     int ByteSz = Sz * deduceByteSize(Dst, DL);
5626     IGC_ASSERT(ByteSz);
5627 
5628     unsigned Start = SrcRowOff;
5629     unsigned End = (SrcRowOff * GRFWidth + SrcColOff + ByteSz) / GRFWidth;
5630 
5631     // mov is prohibited to span across >2 GRF
5632     if (End - Start >= 2) {
5633       IGC_ASSERT(Sz > 1);
5634       return;
5635     }
5636 
5637     while (TotalSize >= ByteSz) {
5638       VISA_VectorOpnd *ArgSrc = nullptr, *ArgDst = nullptr;
5639       unsigned Offset = SrcRowOff * GrfByteSize + SrcColOff;
5640       ArgSrc = createSource(Src, UNSIGNED, Sz, &Offset);
5641       SrcRowOff += (SrcColOff + ByteSz) / GrfByteSize;
5642       SrcColOff = (SrcColOff + ByteSz) % GrfByteSize;
5643 
5644       Offset = RowOff * GrfByteSize + ColOff;
5645       ArgDst = createDestination(Dst, UNSIGNED, &Offset);
5646       RowOff += (ColOff + ByteSz) / GrfByteSize;
5647       ColOff = (ColOff + ByteSz) % GrfByteSize;
5648 
5649       if (DoCopy)
5650         CISA_CALL(Kernel->AppendVISADataMovementInst(
5651             ISA_MOV, nullptr, false,
5652             (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5653             getExecSizeFromValue(Sz), ArgDst, ArgSrc));
5654       TotalSize -= ByteSz;
5655     }
5656   };
5657   partCopy(16);
5658   partCopy(8);
5659   partCopy(4);
5660   partCopy(2);
5661   partCopy(1);
5662 }
5663 
pushStackArg(VISA_StateOpndHandle * Dst,Value * Src,int TotalSz,unsigned & RowOff,unsigned & ColOff,unsigned & SrcRowOff,unsigned & SrcColOff,bool DoCopy)5664 void GenXKernelBuilder::pushStackArg(VISA_StateOpndHandle *Dst, Value *Src,
5665                                      int TotalSz, unsigned &RowOff,
5666                                      unsigned &ColOff, unsigned &SrcRowOff,
5667                                      unsigned &SrcColOff, bool DoCopy) {
5668   VISA_GenVar *StackOff = nullptr, *Sp = nullptr;
5669 
5670   auto StackTmp = createCisaVariable(Kernel, "stackTmp", nullptr, TotalSz);
5671 
5672   auto TmpType = llvmToVisaType(Src->getType());
5673   auto TmpVar = StackTmp->getAlias(TmpType, Kernel);
5674 
5675   CISA_CALL(Kernel->CreateVISAGenVar(StackOff, "stackOff", 1, ISA_TYPE_UQ,
5676                                      ALIGN_OWORD));
5677   unsigned RawOff = 0;
5678   auto partCopy = [&](int Sz) {
5679     // TODO: mb we have some constant for oword size
5680     int ByteSz = Sz * visa::BytesPerOword;
5681     int CopySz = std::min(ByteSz, TotalSz);
5682 
5683     while (TotalSz - ByteSz >= 0 || (TotalSz > 0 && Sz == 1)) {
5684       CISA_CALL(Kernel->GetPredefinedVar(Sp, PREDEFINED_FE_SP));
5685       VISA_VectorOpnd *SpOpSrc1 = nullptr;
5686       VISA_VectorOpnd *SpOpSrc2 = nullptr;
5687       VISA_VectorOpnd *SpOpDst = nullptr;
5688       CISA_CALL(Kernel->CreateVISADstOperand(SpOpDst, Sp, 1, 0, 0));
5689       CISA_CALL(Kernel->CreateVISASrcOperand(SpOpSrc1, Sp, MODIFIER_NONE, 0, 1,
5690                                              0, 0, 0));
5691       CISA_CALL(Kernel->CreateVISASrcOperand(SpOpSrc2, Sp, MODIFIER_NONE, 0, 1,
5692                                              0, 0, 0));
5693 
5694       VISA_VectorOpnd *TmpOffDst = nullptr, *TmpOffSrc = nullptr;
5695       CISA_CALL(Kernel->CreateVISADstOperand(TmpOffDst, StackOff, 1, 0, 0));
5696       CISA_CALL(Kernel->CreateVISASrcOperand(TmpOffSrc, StackOff, MODIFIER_NONE,
5697                                              0, 1, 0, 0, 0));
5698 
5699       emitVectorCopy(TmpVar, Src, RowOff, ColOff, SrcRowOff, SrcColOff, CopySz,
5700                      DoCopy);
5701       VISA_VectorOpnd *Imm = nullptr;
5702       unsigned OffVal = Sz;
5703       CISA_CALL(Kernel->CreateVISAImmediate(Imm, &OffVal, ISA_TYPE_UD));
5704       VISA_RawOpnd *RawSrc = nullptr;
5705       CISA_CALL(
5706           Kernel->CreateVISARawOperand(RawSrc, TmpVar->getGenVar(), RawOff));
5707       RawOff += Sz * visa::BytesPerOword;
5708 
5709       if (DoCopy) {
5710         CISA_CALL(Kernel->AppendVISADataMovementInst(ISA_MOV, nullptr, false,
5711                                                      vISA_EMASK_M1, EXEC_SIZE_1,
5712                                                      TmpOffDst, SpOpSrc1));
5713         CISA_CALL(Kernel->AppendVISASurfAccessOwordLoadStoreInst(
5714             ISA_OWORD_ST, vISA_EMASK_M1, Dst, getCisaOwordNumFromNumber(Sz),
5715             TmpOffSrc, RawSrc));
5716       }
5717       CISA_CALL(Kernel->AppendVISAArithmeticInst(ISA_ADD, nullptr, false,
5718                                                  vISA_EMASK_M1, EXEC_SIZE_1,
5719                                                  SpOpDst, SpOpSrc2, Imm));
5720       TotalSz -= ByteSz;
5721     }
5722   };
5723 
5724   partCopy(8);
5725   partCopy(4);
5726   partCopy(2);
5727   partCopy(1);
5728 }
5729 
popStackArg(llvm::Value * Dst,VISA_StateOpndHandle * Src,int TotalSz,unsigned & RowOff,unsigned & ColOff,unsigned & SrcRowOff,unsigned & SrcColOff,int & PrevStackOff)5730 void GenXKernelBuilder::popStackArg(llvm::Value *Dst, VISA_StateOpndHandle *Src,
5731                                     int TotalSz, unsigned &RowOff,
5732                                     unsigned &ColOff, unsigned &SrcRowOff,
5733                                     unsigned &SrcColOff, int &PrevStackOff) {
5734   VISA_GenVar *StackOff = nullptr, *Sp = nullptr;
5735 
5736   auto StackTmp = createCisaVariable(Kernel, "stackTmp", nullptr, TotalSz);
5737 
5738   auto TmpType = llvmToVisaType(Dst->getType());
5739   auto TmpVar = StackTmp->getAlias(TmpType, Kernel);
5740 
5741   CISA_CALL(Kernel->CreateVISAGenVar(StackOff, "stackOff", 1, ISA_TYPE_UQ,
5742                                      ALIGN_OWORD));
5743   auto partCopy = [&](int Sz) {
5744     // TODO: mb we have some constant for oword size
5745     int ByteSz = Sz * visa::BytesPerOword;
5746     while (TotalSz - ByteSz >= 0 || (TotalSz > 0 && Sz == 1)) {
5747       CISA_CALL(Kernel->GetPredefinedVar(Sp, PREDEFINED_FE_SP));
5748       VISA_VectorOpnd *SpOpSrc = nullptr;
5749       CISA_CALL(Kernel->CreateVISASrcOperand(SpOpSrc, Sp, MODIFIER_NONE, 0, 1,
5750                                              0, 0, 0));
5751 
5752       VISA_VectorOpnd *TmpOffDst = nullptr;
5753       VISA_VectorOpnd *TmpOffSrc = nullptr;
5754       CISA_CALL(Kernel->CreateVISADstOperand(TmpOffDst, StackOff, 1, 0, 0));
5755       CISA_CALL(Kernel->CreateVISASrcOperand(TmpOffSrc, StackOff, MODIFIER_NONE,
5756                                              0, 1, 0, 0, 0));
5757 
5758       VISA_VectorOpnd *Imm = nullptr;
5759       int OffVal = PrevStackOff;
5760       CISA_CALL(Kernel->CreateVISAImmediate(Imm, &OffVal, ISA_TYPE_UD));
5761       PrevStackOff += Sz;
5762       VISA_RawOpnd *RawSrc = nullptr;
5763       CISA_CALL(Kernel->CreateVISARawOperand(RawSrc, TmpVar->getGenVar(), 0));
5764 
5765       CISA_CALL(Kernel->AppendVISAArithmeticInst(ISA_ADD, nullptr, false,
5766                                                  vISA_EMASK_M1, EXEC_SIZE_1,
5767                                                  TmpOffDst, SpOpSrc, Imm));
5768       CISA_CALL(Kernel->AppendVISASurfAccessOwordLoadStoreInst(
5769           ISA_OWORD_LD, vISA_EMASK_M1, Src, getCisaOwordNumFromNumber(Sz),
5770           TmpOffSrc, RawSrc));
5771       int CopySz = std::min(ByteSz, TotalSz);
5772       SrcRowOff = SrcColOff = 0;
5773       emitVectorCopy(Dst, TmpVar, RowOff, ColOff, SrcRowOff, SrcColOff, CopySz);
5774       TotalSz -= ByteSz;
5775     }
5776     SrcRowOff = SrcColOff = 0;
5777   };
5778 
5779   partCopy(8);
5780   partCopy(4);
5781   partCopy(2);
5782   partCopy(1);
5783 }
5784 
5785 /**************************************************************************************************
5786  * beginFunction : emit function prologue and arguments passing code
5787  *
5788  * Emit stack-related function prologue if Func is a kernel and there're
5789  * stackcalls or Func is a stack function.
5790  *
5791  * Prologue performs Sp and Fp initialization (both for kernel and stack
5792  * function). For stack functions arguments passing code is generated as well,
5793  * %arg and stackmem passing is supported.
5794  */
beginFunction(Function * Func)5795 void GenXKernelBuilder::beginFunction(Function *Func) {
5796   VISA_GenVar *Sp = nullptr, *Fp = nullptr, *Hwtid = nullptr;
5797   CISA_CALL(Kernel->GetPredefinedVar(Sp, PREDEFINED_FE_SP));
5798   CISA_CALL(Kernel->GetPredefinedVar(Fp, PREDEFINED_FE_FP));
5799   // TODO: consider removing the if for local stack
5800   if (!allowI64Ops()) {
5801     CISA_CALL(Kernel->CreateVISAGenVar(Sp, "Sp", 1, ISA_TYPE_UD, ALIGN_DWORD, Sp));
5802     CISA_CALL(Kernel->CreateVISAGenVar(Fp, "Fp", 1, ISA_TYPE_UD, ALIGN_DWORD, Fp));
5803   }
5804   CISA_CALL(Kernel->GetPredefinedVar(Hwtid, PREDEFINED_HW_TID));
5805 
5806   VISA_VectorOpnd *SpOpSrc = nullptr;
5807   VISA_VectorOpnd *SpOpSrc1 = nullptr;
5808   VISA_VectorOpnd *SpOpDst = nullptr;
5809   VISA_VectorOpnd *SpOpDst1 = nullptr;
5810   VISA_VectorOpnd *FpOpDst = nullptr;
5811   VISA_VectorOpnd *FpOpSrc = nullptr;
5812   VISA_VectorOpnd *Imm = nullptr;
5813 
5814   CISA_CALL(Kernel->CreateVISADstOperand(SpOpDst, Sp, 1, 0, 0));
5815   CISA_CALL(Kernel->CreateVISADstOperand(SpOpDst1, Sp, 1, 0, 0));
5816   CISA_CALL(Kernel->CreateVISADstOperand(FpOpDst, Fp, 1, 0, 0));
5817 
5818   CISA_CALL(
5819       Kernel->CreateVISASrcOperand(SpOpSrc, Sp, MODIFIER_NONE, 0, 1, 0, 0, 0));
5820   CISA_CALL(
5821       Kernel->CreateVISASrcOperand(SpOpSrc1, Sp, MODIFIER_NONE, 0, 1, 0, 0, 0));
5822 
5823   CISA_CALL(
5824       Kernel->CreateVISASrcOperand(FpOpSrc, Fp, MODIFIER_NONE, 0, 1, 0, 0, 0));
5825 
5826   if (genx::isKernel(Func) && (HasStackcalls || HasAlloca)) {
5827     // init kernel stack
5828     VISA_GenVar *Hwtid = nullptr;
5829     CISA_CALL(Kernel->GetPredefinedVar(Hwtid, PREDEFINED_HW_TID));
5830 
5831     VISA_VectorOpnd *HwtidOp = nullptr;
5832 
5833     // probably here would be better calculate exact stack size required
5834     // by the kernel, but legacy stack builder is to be dropped away soon
5835     uint32_t Val = visa::StackPerThreadScratch;
5836 
5837     CISA_CALL(Kernel->CreateVISAImmediate(Imm, &Val, ISA_TYPE_UD));
5838     CISA_CALL(Kernel->CreateVISASrcOperand(HwtidOp, Hwtid, MODIFIER_NONE, 0, 1,
5839                                            0, 0, 0));
5840 
5841     if (StackSurf == PREDEFINED_SURFACE_STACK) {
5842       CISA_CALL(Kernel->AppendVISAArithmeticInst(
5843           ISA_MUL, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5844           EXEC_SIZE_1, SpOpDst, HwtidOp, Imm));
5845     } else {
5846       VISA_GenVar *Tmp = nullptr;
5847 
5848       CISA_CALL(Kernel->CreateVISAGenVar(
5849         Tmp, "SpOff", 1, allowI64Ops() ? ISA_TYPE_UQ : ISA_TYPE_UD, ALIGN_DWORD));
5850 
5851       VISA_VectorOpnd *OffOpDst = nullptr;
5852       VISA_VectorOpnd *OffOpSrc = nullptr;
5853       CISA_CALL(Kernel->CreateVISADstOperand(OffOpDst, Tmp, 1, 0, 0));
5854       CISA_CALL(Kernel->CreateVISASrcOperand(OffOpSrc, Tmp, MODIFIER_NONE, 0, 1,
5855                                              0, 0, 0));
5856       CISA_CALL(Kernel->AppendVISAArithmeticInst(
5857           ISA_MUL, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5858           EXEC_SIZE_1, OffOpDst, HwtidOp, Imm));
5859 
5860       VISA_VectorOpnd *OpSrc = nullptr;
5861       VISA_GenVar *R0 = nullptr;
5862       CISA_CALL(Kernel->GetPredefinedVar(R0, PREDEFINED_R0));
5863       CISA_CALL(Kernel->CreateVISASrcOperand(OpSrc, R0, MODIFIER_NONE, 0, 1, 0,
5864                                              0, 5));
5865       if (OptStrictI64Check)
5866         report_fatal_error("CisaBuilder should not produce 64-bit instructions"
5867                            " add64", false);
5868       CISA_CALL(Kernel->AppendVISADataMovementInst(
5869           ISA_MOV, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5870           EXEC_SIZE_1, SpOpDst, OpSrc));
5871       Kernel->AppendVISAArithmeticInst(
5872           ISA_ADD, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5873           EXEC_SIZE_1, SpOpDst1, SpOpSrc1, OffOpSrc);
5874     }
5875     CISA_CALL(Kernel->AppendVISADataMovementInst(
5876         ISA_MOV, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5877         EXEC_SIZE_1, FpOpDst, SpOpSrc));
5878     unsigned SMO = BackendConfig->getStackSurfaceMaxSize();
5879     Kernel->AddKernelAttribute("SpillMemOffset", 4, &SMO);
5880   } else if (genx::requiresStackCall(Func) ||
5881              genx::isReferencedIndirectly(Func)) {
5882     if (genx::isReferencedIndirectly(Func)) {
5883       int ExtVal = 1;
5884       Kernel->AddKernelAttribute("Extern", 4, &ExtVal);
5885     }
5886     // stack function prologue
5887     VISA_GenVar *FpTmp = nullptr;
5888 
5889     auto *ArgVar = &CisaVars[Kernel].at("argv");
5890     auto *RetVar = &CisaVars[Kernel].at("retv");
5891 
5892     if (FPMap.count(Func) == 0) {
5893       CISA_CALL(
5894           Kernel->CreateVISAGenVar(FpTmp, "tmp", 1, ISA_TYPE_UQ, ALIGN_DWORD));
5895       FPMap.insert(std::pair<Function *, VISA_GenVar *>(Func, FpTmp));
5896     } else
5897       FpTmp = FPMap[Func];
5898 
5899     // init func stack pointers
5900     VISA_VectorOpnd *TmpOp = nullptr;
5901     CISA_CALL(Kernel->CreateVISADstOperand(TmpOp, FpTmp, 1, 0, 0));
5902 
5903     Kernel->AppendVISADataMovementInst(
5904         ISA_MOV, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5905         EXEC_SIZE_1, TmpOp, FpOpSrc);
5906     Kernel->AppendVISADataMovementInst(
5907         ISA_MOV, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
5908         EXEC_SIZE_1, FpOpDst, SpOpSrc);
5909 
5910     // unpack args
5911     int Sz = 0, StackOff = 0;
5912     unsigned RowOff = 0, ColOff = 0, SrcRowOff = 0, SrcColOff = 0;
5913     bool StackStarted = false;
5914     unsigned NoStackSize = 0;
5915     // NOTE: using reverse iterators for args would be much better we don't have
5916     // any though
5917     for (auto &FArg : Func->args()) {
5918       if (Liveness->getLiveRange(&FArg) &&
5919           Liveness->getLiveRange(&FArg)->getCategory() == RegCategory::EM)
5920         continue;
5921 
5922       RowOff = 0, ColOff = 0;
5923       unsigned ArgSize = getValueSize(FArg.getType());
5924       if (SrcColOff &&
5925           (FArg.getType()->isVectorTy() || ArgSize > (GrfByteSize - ColOff))) {
5926         SrcRowOff++;
5927         SrcColOff = 0;
5928         NoStackSize++;
5929       }
5930       if (Liveness->getLiveRange(&FArg)->getCategory() ==
5931           RegCategory::PREDICATE) {
5932         VISA_VectorOpnd *argSrc = nullptr;
5933         Kernel->CreateVISASrcOperand(
5934             argSrc,
5935             ArgVar->getAlias(llvmToVisaType(FArg.getType()), Kernel)
5936                 ->getGenVar(),
5937             MODIFIER_NONE, 0, 1, 0, SrcRowOff, SrcColOff);
5938         auto *PReg =
5939             getRegForValueOrNullAndSaveAlias(KernFunc, SimpleValue(&FArg));
5940         IGC_ASSERT(PReg);
5941         Kernel->AppendVISASetP(vISA_EMASK_M1_NM, EXEC_SIZE_1,
5942                                PReg->GetVar<VISA_PredVar>(Kernel), argSrc);
5943       } else {
5944         if ((int)ArgVar->getByteSize() - SrcRowOff * GrfByteSize >= ArgSize &&
5945             !StackStarted) {
5946           emitVectorCopy(&FArg, ArgVar->getAlias(&FArg, Kernel), RowOff, ColOff,
5947                          SrcRowOff, SrcColOff, getValueSize(&FArg));
5948           NoStackSize = RowOff;
5949         } else {
5950           StackStarted = true;
5951           VISA_StateOpndHandle *stackSurf = nullptr;
5952           VISA_SurfaceVar *stackSurfVar = nullptr;
5953           CISA_CALL(Kernel->GetPredefinedSurface(stackSurfVar, StackSurf));
5954           CISA_CALL(
5955               Kernel->CreateVISAStateOperandHandle(stackSurf, stackSurfVar));
5956           popStackArg(&FArg, stackSurf, ArgSize, RowOff, ColOff, SrcRowOff,
5957                       SrcColOff, StackOff);
5958         }
5959       }
5960       Sz += ArgSize;
5961     }
5962     if (!StackStarted && ColOff)
5963       NoStackSize++;
5964     auto *StackCallee = Func2Kern[Func];
5965     auto *FuncTy = Func->getFunctionType();
5966     int RetSize =
5967         (FuncTy->getReturnType()->isVoidTy() ||
5968          getValueSize(FuncTy->getReturnType()) > RetVar->getByteSize())
5969             ? 0
5970             : (getValueSize(FuncTy->getReturnType()) + GrfByteSize - 1) /
5971                   GrfByteSize;
5972 
5973     StackCallee->SetFunctionInputSize(NoStackSize);
5974     StackCallee->SetFunctionReturnSize(RetSize);
5975     StackCallee->AddKernelAttribute("ArgSize", 1, &NoStackSize);
5976     StackCallee->AddKernelAttribute("RetValSize", 1, &RetSize);
5977   }
5978 }
5979 
beginFunctionLight(Function * Func)5980 void GenXKernelBuilder::beginFunctionLight(Function *Func) {
5981   if (genx::isKernel(Func))
5982     return;
5983   if (!genx::requiresStackCall(Func) && !genx::isReferencedIndirectly(Func))
5984     return;
5985   if (genx::isReferencedIndirectly(Func)) {
5986     int ExtVal = 1;
5987     Kernel->AddKernelAttribute("Extern", 4, &ExtVal);
5988   }
5989   // stack function prologue
5990   auto *MDArg = Func->getMetadata(InstMD::FuncArgSize);
5991   auto *MDRet = Func->getMetadata(InstMD::FuncRetSize);
5992   IGC_ASSERT(MDArg && MDRet);
5993   auto ArgSize =
5994       cast<ConstantInt>(
5995           cast<ConstantAsMetadata>(MDArg->getOperand(0).get())->getValue())
5996           ->getZExtValue();
5997   auto RetSize =
5998       cast<ConstantInt>(
5999           cast<ConstantAsMetadata>(MDRet->getOperand(0).get())->getValue())
6000           ->getZExtValue();
6001 
6002   auto *StackCallee = Func2Kern[Func];
6003   StackCallee->SetFunctionInputSize(ArgSize);
6004   StackCallee->SetFunctionReturnSize(RetSize);
6005   StackCallee->AddKernelAttribute("ArgSize", 1, &ArgSize);
6006   StackCallee->AddKernelAttribute("RetValSize", 1, &RetSize);
6007 }
6008 
6009 /**************************************************************************************************
6010  * endFunction : emit function epilogue and return value passing code
6011  *
6012  * Emit stack-related function epilogue if Func is a stack function.
6013  *
6014  * Epilogue restores Sp and Fp. Return value may be passed either visa %retval
6015  * arg or stackmem, both scalar/vector and aggregate types are supported (please
6016  * also see build[Extract|Insert]Value).
6017  */
endFunction(Function * Func,ReturnInst * RI)6018 void GenXKernelBuilder::endFunction(Function *Func, ReturnInst *RI) {
6019   if (!genx::isKernel(Func) &&
6020       (genx::requiresStackCall(Func) || genx::isReferencedIndirectly(Func))) {
6021     VISA_GenVar *Sp = nullptr, *Fp = nullptr;
6022     CISA_CALL(Kernel->GetPredefinedVar(Sp, PREDEFINED_FE_SP));
6023     CISA_CALL(Kernel->GetPredefinedVar(Fp, PREDEFINED_FE_FP));
6024 
6025     VISA_VectorOpnd *SpOpSrc = nullptr;
6026     VISA_VectorOpnd *SpOpDst = nullptr;
6027     VISA_VectorOpnd *FpOpDst = nullptr;
6028     VISA_VectorOpnd *FpOpSrc = nullptr;
6029 
6030     CISA_CALL(Kernel->CreateVISADstOperand(SpOpDst, Sp, 1, 0, 0));
6031     CISA_CALL(Kernel->CreateVISADstOperand(FpOpDst, Fp, 1, 0, 0));
6032     CISA_CALL(Kernel->CreateVISASrcOperand(SpOpSrc, Sp, MODIFIER_NONE, 0, 1,
6033                                            0, 0, 0));
6034     CISA_CALL(Kernel->CreateVISASrcOperand(FpOpSrc, Fp, MODIFIER_NONE, 0, 1,
6035                                            0, 0, 0));
6036 
6037     VISA_VectorOpnd *TmpOp = nullptr;
6038     CISA_CALL(Kernel->CreateVISASrcOperand(TmpOp, FPMap[Func], MODIFIER_NONE,
6039                                            0, 1, 0, 0, 0));
6040 
6041     Kernel->AppendVISADataMovementInst(
6042         ISA_MOV, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
6043         EXEC_SIZE_1, SpOpDst, FpOpSrc);
6044     Kernel->AppendVISADataMovementInst(
6045         ISA_MOV, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
6046         EXEC_SIZE_1, FpOpDst, TmpOp);
6047 
6048     VISA_GenVar *Ret = nullptr;
6049     CISA_CALL(Kernel->GetPredefinedVar(Ret, PREDEFINED_RET));
6050 
6051     if (!Func->getReturnType()->isVoidTy() &&
6052         !Func->getReturnType()->isAggregateType() &&
6053         Liveness->getLiveRangeOrNull(RI->getReturnValue()) &&
6054         (Liveness->getLiveRange(RI->getReturnValue())->getCategory() !=
6055              RegCategory::EM &&
6056          Liveness->getLiveRange(RI->getReturnValue())->getCategory() !=
6057              RegCategory::PREDICATE)) {
6058       GenericCisaVariable *RetVar = &CisaVars[Kernel].at("retv");
6059       IGC_ASSERT(!Func->getReturnType()->isAggregateType());
6060 
6061       // pack retval
6062       unsigned RowOff = 0, ColOff = 0, SrcRowOff = 0, SrcColOff = 0;
6063       if (getValueSize(Func->getReturnType()) <=
6064           RetVar->getByteSize()) {
6065         unsigned RowOff = 0, ColOff = 0, SrcRowOff = 0, SrcColOff = 0;
6066         emitVectorCopy(RetVar->getAlias(RI->getReturnValue(), Kernel), RI->getReturnValue(),
6067           RowOff, ColOff, SrcRowOff,
6068                        SrcColOff, getValueSize(RI->getReturnValue()));
6069       } else {
6070         VISA_StateOpndHandle *StackSurfOp = nullptr;
6071         VISA_SurfaceVar *StackSurfVar = nullptr;
6072         CISA_CALL(Kernel->GetPredefinedSurface(StackSurfVar,
6073                                                StackSurf));
6074         CISA_CALL(
6075             Kernel->CreateVISAStateOperandHandle(StackSurfOp, StackSurfVar));
6076         pushStackArg(StackSurfOp, RI->getReturnValue(),
6077                      getValueSize(Func->getReturnType()), RowOff, ColOff,
6078                      SrcRowOff, SrcColOff);
6079       }
6080     }
6081     for (auto II : RetvInserts)
6082       buildInsertRetv(II);
6083     RetvInserts.clear();
6084   }
6085 }
6086 
buildExtractRetv(ExtractValueInst * Inst)6087 void GenXKernelBuilder::buildExtractRetv(ExtractValueInst *Inst) {
6088   auto T = Inst->getOperand(0)->getType();
6089   auto *RetVar = &CisaVars[Kernel].at("retv");
6090 
6091   bool UseStack = getValueSize(T) > RetVar->getByteSize();
6092 
6093   auto Index = Inst->getIndices().front();
6094   if (T->getContainedType(Index)->isVectorTy() &&
6095       cast<VectorType>(T->getContainedType(Index))
6096           ->getElementType()
6097           ->isIntegerTy(1))
6098     // elements of <N x i1> type should be ignored
6099     return;
6100 
6101   unsigned RowOff = 0, ColOff = 0;
6102   unsigned SrcRowOff = 0, SrcColOff = 0;
6103   for (unsigned i = 0; i < Index; i++) {
6104     int Mod = UseStack ? visa::BytesPerOword : GrfByteSize;
6105     SrcRowOff += (getValueSize(T->getContainedType(i)) + Mod - 1) / Mod;
6106   }
6107 
6108   if (UseStack) {
6109     int Prev = SrcRowOff;
6110     VISA_StateOpndHandle *StackSurfOp = nullptr;
6111     VISA_SurfaceVar *StackSurfVar = nullptr;
6112     CISA_CALL(
6113         Kernel->GetPredefinedSurface(StackSurfVar, StackSurf));
6114     CISA_CALL(Kernel->CreateVISAStateOperandHandle(StackSurfOp, StackSurfVar));
6115     popStackArg(Inst, StackSurfOp, getValueSize(T->getContainedType(Index)),
6116                 RowOff, ColOff, SrcRowOff, SrcColOff, Prev);
6117   } else
6118     emitVectorCopy(Inst, RetVar->getAlias(Inst, Kernel), RowOff, ColOff,
6119                    SrcRowOff, SrcColOff, getValueSize(Inst));
6120 }
6121 
buildInsertRetv(InsertValueInst * Inst)6122 void GenXKernelBuilder::buildInsertRetv(InsertValueInst *Inst) {
6123   auto T = Inst->getOperand(0)->getType();
6124   auto *RetVar = &CisaVars[Kernel].at("retv");
6125 
6126   bool UseStack = getValueSize(T) > RetVar->getByteSize();
6127 
6128   auto Index = Inst->getIndices().front();
6129   if (T->getContainedType(Index)->isVectorTy() &&
6130       cast<VectorType>(T->getContainedType(Index))
6131           ->getElementType()
6132           ->isIntegerTy(1)) {
6133     // elements of <N x i1> type should be ignored
6134     return;
6135   }
6136 
6137   unsigned RowOff = 0, ColOff = 0;
6138   unsigned SrcRowOff = 0, SrcColOff = 0;
6139 
6140   if (!UseStack)
6141     for (unsigned i = 0; i < Index; i++)
6142       RowOff += (getValueSize(T->getContainedType(i)) + GrfByteSize - 1) /
6143                 GrfByteSize;
6144 
6145   if (UseStack) {
6146     VISA_StateOpndHandle *StackSurfOp = nullptr;
6147     VISA_SurfaceVar *StackSurfVar = nullptr;
6148     CISA_CALL(
6149         Kernel->GetPredefinedSurface(StackSurfVar, StackSurf));
6150     CISA_CALL(Kernel->CreateVISAStateOperandHandle(StackSurfOp, StackSurfVar));
6151     pushStackArg(StackSurfOp, Inst->getOperand(1),
6152                  getValueSize(T->getContainedType(Index)), RowOff, ColOff,
6153                  SrcRowOff, SrcColOff);
6154   } else
6155     emitVectorCopy(RetVar->getAlias(Inst->getOperand(1), Kernel),
6156                    Inst->getOperand(1), RowOff, ColOff, SrcRowOff, SrcColOff,
6157                    getValueSize(Inst->getOperand(1)));
6158 }
6159 
buildStackCallLight(CallInst * CI,const DstOpndDesc & DstDesc)6160 void GenXKernelBuilder::buildStackCallLight(CallInst *CI,
6161                                             const DstOpndDesc &DstDesc) {
6162   LLVM_DEBUG(dbgs() << "Build stack call " << *CI << "\n");
6163   Function *Callee = CI->getCalledFunction();
6164 
6165   // Check whether the called function has a predicate arg that is EM.
6166   auto *EMArg = std::find_if(CI->arg_begin(), CI->arg_end(), [this](Use &Arg) {
6167     return Arg->getType()->getScalarType()->isIntegerTy(1) &&
6168            Liveness->getLiveRange(Arg)->getCategory() == RegCategory::EM;
6169   });
6170   VISA_PredOpnd *Pred = nullptr;
6171   VISA_Exec_Size Esz = EXEC_SIZE_16;
6172   if (EMArg != CI->arg_end()) {
6173     auto EMOperandNum = EMArg->getOperandNo();
6174     Pred = createPred(CI, BaleInfo(), EMOperandNum);
6175     auto *VTy = cast<IGCLLVM::FixedVectorType>(
6176         CI->getArgOperand(EMOperandNum)->getType());
6177     Esz = getExecSizeFromValue(VTy->getNumElements());
6178   }
6179   addDebugInfo();
6180   auto *MDArg = CI->getMetadata(InstMD::FuncArgSize);
6181   auto *MDRet = CI->getMetadata(InstMD::FuncRetSize);
6182   IGC_ASSERT(MDArg && MDRet);
6183   auto ArgSize =
6184       cast<ConstantInt>(
6185           cast<ConstantAsMetadata>(MDArg->getOperand(0).get())->getValue())
6186           ->getZExtValue();
6187   auto RetSize =
6188       cast<ConstantInt>(
6189           cast<ConstantAsMetadata>(MDRet->getOperand(0).get())->getValue())
6190           ->getZExtValue();
6191   if (Callee) {
6192     CISA_CALL(Kernel->AppendVISACFFunctionCallInst(
6193         Pred, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1), EXEC_SIZE_16,
6194         Callee->getName().str(), ArgSize, RetSize));
6195   } else {
6196     auto *FuncAddr = createSource(IGCLLVM::getCalledValue(CI), DONTCARESIGNED);
6197     IGC_ASSERT(FuncAddr);
6198     CISA_CALL(Kernel->AppendVISACFIndirectFuncCallInst(
6199         Pred, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1), EXEC_SIZE_16,
6200         FuncAddr, ArgSize, RetSize));
6201   }
6202 }
6203 
buildStackCall(CallInst * CI,const DstOpndDesc & DstDesc)6204 void GenXKernelBuilder::buildStackCall(CallInst *CI,
6205                                        const DstOpndDesc &DstDesc) {
6206   LLVM_DEBUG(dbgs() << "Build stack call " << *CI << "\n");
6207   Function *Callee = CI->getCalledFunction();
6208   auto *FuncTy = CI->getFunctionType();
6209 
6210   // Check whether the called function has a predicate arg that is EM.
6211   int EMOperandNum = -1, EMIdx = -1;
6212   for (auto &Arg : CI->arg_operands()) {
6213     ++EMIdx;
6214     if (!Arg->getType()->getScalarType()->isIntegerTy(1))
6215       continue;
6216     if (Liveness->getLiveRange(Arg)->getCategory() == RegCategory::EM) {
6217       EMOperandNum = EMIdx;
6218       break;
6219     }
6220   }
6221 
6222   int TotalArgSize = 0;
6223   for (auto &CallArg : CI->arg_operands())
6224     TotalArgSize += getValueSize(CallArg->getType());
6225 
6226   VISA_GenVar *Sp = nullptr, *Arg = nullptr, *Ret = nullptr;
6227   CISA_CALL(Kernel->GetPredefinedVar(Sp, PREDEFINED_FE_SP));
6228   CISA_CALL(Kernel->GetPredefinedVar(Arg, PREDEFINED_ARG));
6229   CISA_CALL(Kernel->GetPredefinedVar(Ret, PREDEFINED_RET));
6230 
6231   unsigned ColOff = 0, RowOff = 0, SrcRowOff = 0, SrcColOff = 0;
6232 
6233   int Sz = 0, NoStackSize = 0, StackArgSz = 0;
6234   uint64_t StackOff = 0;
6235   bool StackStarted = false;
6236   // pack arguments
6237   for (auto &CallArg : CI->arg_operands()) {
6238     auto *CallArgLR = Liveness->getLiveRangeOrNull(CallArg.get());
6239     if (CallArgLR && CallArgLR->getCategory() == RegCategory::EM)
6240       continue;
6241 
6242     IGC_ASSERT(!CallArg->getType()->isAggregateType());
6243     SrcRowOff = 0, SrcColOff = 0;
6244     unsigned ArgSize = getValueSize(CallArg->getType());
6245 
6246     if (ColOff && (CallArg->getType()->isVectorTy() ||
6247                    ArgSize > (GrfByteSize - ColOff))) {
6248       RowOff++;
6249       ColOff = 0;
6250       // adjust size if we use only a part the last used GRF
6251       NoStackSize++;
6252     }
6253 
6254     bool IsUndef = isa<UndefValue>(CallArg);
6255     auto *ArgVar = &CisaVars[Kernel].at("argv");
6256     if ((int)ArgVar->getByteSize() - RowOff * GrfByteSize >= ArgSize &&
6257         !StackStarted) {
6258       IGC_ASSERT_MESSAGE(ArgSize <= Sz - ArgVar->getByteSize(),
6259         "cannot pass arg via stack and %arg as well");
6260 
6261       SrcRowOff = 0, SrcColOff = 0;
6262       if (!IsUndef && CallArgLR->getCategory() == RegCategory::PREDICATE) {
6263         VISA_VectorOpnd *PredDst = nullptr;
6264         Kernel->CreateVISADstOperand(
6265             PredDst,
6266             ArgVar->getAlias(llvmToVisaType(CallArg->getType()), Kernel)
6267                 ->getGenVar(),
6268             1, RowOff, ColOff);
6269         auto PReg =
6270             getRegForValueOrNullAndSaveAlias(KernFunc, SimpleValue(CallArg));
6271         IGC_ASSERT(PReg);
6272         Kernel->AppendVISAPredicateMove(PredDst,
6273                                         PReg->GetVar<VISA_PredVar>(Kernel));
6274         ColOff += ArgSize;
6275       } else
6276         emitVectorCopy<CisaVariable, Value>(
6277             ArgVar->getAlias(CallArg, Kernel), CallArg, RowOff, ColOff,
6278             SrcRowOff, SrcColOff, getValueSize(CallArg), !IsUndef);
6279       Sz += ArgSize;
6280       NoStackSize = RowOff;
6281     } else {
6282       StackStarted = true;
6283       RowOff = ColOff = 0;
6284       SrcRowOff = SrcColOff = 0;
6285       VISA_StateOpndHandle *StackSurfOp = nullptr;
6286       VISA_SurfaceVar *StackSurfVar = nullptr;
6287       CISA_CALL(
6288           Kernel->GetPredefinedSurface(StackSurfVar, StackSurf));
6289       CISA_CALL(Kernel->CreateVISAStateOperandHandle(StackSurfOp, StackSurfVar));
6290       pushStackArg(StackSurfOp, CallArg, ArgSize, RowOff, ColOff, SrcRowOff,
6291                    SrcColOff, !IsUndef);
6292 
6293       StackArgSz += (ArgSize / visa::BytesPerOword) +
6294                     (ArgSize % visa::BytesPerOword ? 1 : 0);
6295       StackOff = -StackArgSz;
6296     }
6297   }
6298   if (!StackStarted && ColOff)
6299     NoStackSize++;
6300 
6301   VISA_VectorOpnd *SpOpSrc = nullptr, *SpOpDst = nullptr, *Imm = nullptr;
6302   if (StackOff) {
6303     CISA_CALL(Kernel->CreateVISADstOperand(SpOpDst, Sp, 1, 0, 0));
6304     CISA_CALL(Kernel->CreateVISASrcOperand(SpOpSrc, Sp, MODIFIER_NONE, 0, 1, 0,
6305                                            0, 0));
6306     CISA_CALL(Kernel->CreateVISAImmediate(Imm, &StackOff, ISA_TYPE_UQ));
6307     CISA_CALL(Kernel->AppendVISAArithmeticInst(
6308         ISA_ADD, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
6309         EXEC_SIZE_1, SpOpDst, SpOpSrc, Imm));
6310   }
6311 
6312   VISA_PredOpnd *Pred = nullptr;
6313   VISA_Exec_Size Esz = EXEC_SIZE_16;
6314   if (EMOperandNum >= 0) {
6315     Pred = createPred(CI, BaleInfo(), EMOperandNum);
6316     auto *VTy = cast<IGCLLVM::FixedVectorType>(
6317         CI->getArgOperand(EMOperandNum)->getType());
6318     Esz = getExecSizeFromValue(VTy->getNumElements());
6319   }
6320   addDebugInfo();
6321 
6322   auto *RetVar = &CisaVars[Kernel].at("retv");
6323   bool ProcessRet = !FuncTy->getReturnType()->isVoidTy() &&
6324                     !FuncTy->getReturnType()->isAggregateType() &&
6325                     !(FuncTy->getReturnType()->isVectorTy() &&
6326                       cast<VectorType>(FuncTy->getReturnType())
6327                           ->getElementType()
6328                           ->isIntegerTy(1));
6329 
6330   // cannot use processRet here since aggr/em args should be co
6331   int RetSize =
6332       (FuncTy->getReturnType()->isVoidTy() ||
6333        getValueSize(FuncTy->getReturnType()) > RetVar->getByteSize())
6334           ? 0
6335           : (getValueSize(FuncTy->getReturnType()) + GrfByteSize - 1) /
6336                 GrfByteSize;
6337   if (Callee) {
6338     CISA_CALL(Kernel->AppendVISACFFunctionCallInst(
6339         Pred, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1), EXEC_SIZE_16,
6340         Callee->getName().str(), NoStackSize, RetSize));
6341   } else {
6342     auto *FuncAddr = createSource(IGCLLVM::getCalledValue(CI), DONTCARESIGNED);
6343     IGC_ASSERT(FuncAddr);
6344     CISA_CALL(Kernel->AppendVISACFIndirectFuncCallInst(
6345         Pred, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1), EXEC_SIZE_16,
6346         FuncAddr, NoStackSize, RetSize));
6347   }
6348 
6349   unsigned StackRetSz = 0;
6350   if (!FuncTy->getReturnType()->isVoidTy() &&
6351       getValueSize(FuncTy->getReturnType()) > RetVar->getByteSize())
6352     StackRetSz = (getValueSize(FuncTy->getReturnType(), visa::BytesPerOword) /
6353                       visa::BytesPerOword +
6354                   ((getValueSize(FuncTy->getReturnType(), visa::BytesPerOword) %
6355                     visa::BytesPerOword)
6356                        ? 1
6357                        : 0));
6358   // unpack retval
6359   if (ProcessRet && Liveness->getLiveRange(CI) &&
6360       Liveness->getLiveRange(CI)->getCategory() != RegCategory::EM) {
6361     unsigned RowOff = 0, ColOff = 0, SrcRowOff = 0, SrcColOff = 0;
6362     if (getValueSize(FuncTy->getReturnType()) <= RetVar->getByteSize()) {
6363       emitVectorCopy(CI, RetVar->getAlias(CI, Kernel), RowOff, ColOff,
6364                      SrcRowOff, SrcColOff, getValueSize(CI));
6365     } else {
6366       int StackOffVal = -StackRetSz;
6367       VISA_StateOpndHandle *StackSurfOp = nullptr;
6368       VISA_SurfaceVar *StackSurfVar = nullptr;
6369       CISA_CALL(
6370           Kernel->GetPredefinedSurface(StackSurfVar, StackSurf));
6371       CISA_CALL(Kernel->CreateVISAStateOperandHandle(StackSurfOp, StackSurfVar));
6372       popStackArg(CI, StackSurfOp, getValueSize(Callee->getReturnType()), RowOff,
6373                   ColOff, SrcRowOff, SrcColOff, StackOffVal);
6374     }
6375   }
6376   // restore Sp
6377   CISA_CALL(
6378       Kernel->CreateVISASrcOperand(SpOpSrc, Sp, MODIFIER_NONE, 0, 1, 0, 0, 0));
6379   CISA_CALL(Kernel->CreateVISADstOperand(SpOpDst, Sp, 1, 0, 0));
6380   uint64_t OffVal = -StackRetSz;
6381   IGC_ASSERT(OffVal <= std::numeric_limits<uint32_t>::max());
6382 
6383   if (OffVal) {
6384     CISA_CALL(Kernel->CreateVISAImmediate(Imm, &OffVal, ISA_TYPE_UD));
6385     CISA_CALL(Kernel->AppendVISAArithmeticInst(
6386       ISA_ADD, nullptr, false, (NoMask ? vISA_EMASK_M1_NM : vISA_EMASK_M1),
6387       EXEC_SIZE_1, SpOpDst, SpOpSrc, Imm));
6388   }
6389 }
6390 
6391 namespace {
6392 
6393 class GenXFinalizer : public ModulePass {
6394   raw_pwrite_stream &Out;
6395   LLVMContext *Ctx = nullptr;
6396 
6397 public:
6398   static char ID;
GenXFinalizer(raw_pwrite_stream & o)6399   explicit GenXFinalizer(raw_pwrite_stream &o) : ModulePass(ID), Out(o) {}
6400 
getPassName() const6401   StringRef getPassName() const override { return "GenX Finalizer"; }
6402 
getContext()6403   LLVMContext &getContext() {
6404     IGC_ASSERT(Ctx);
6405     return *Ctx;
6406   }
6407 
getAnalysisUsage(AnalysisUsage & AU) const6408   void getAnalysisUsage(AnalysisUsage &AU) const override {
6409     AU.addRequired<GenXModule>();
6410     AU.addRequired<FunctionGroupAnalysis>();
6411     AU.addRequired<TargetPassConfig>();
6412     AU.addRequired<GenXBackendConfig>();
6413     AU.setPreservesAll();
6414   }
6415 
runOnModule(Module & M)6416   bool runOnModule(Module &M) override {
6417     Ctx = &M.getContext();
6418 
6419     auto BC = &getAnalysis<GenXBackendConfig>();
6420     auto &FGA = getAnalysis<FunctionGroupAnalysis>();
6421     auto &GM = getAnalysis<GenXModule>();
6422     std::stringstream ss;
6423     VISABuilder *CisaBuilder = GM.GetCisaBuilder();
6424     if (GM.HasInlineAsm())
6425       CisaBuilder = GM.GetVISAAsmReader();
6426     CISA_CALL(CisaBuilder->Compile("genxir", &ss, EmitVisa));
6427 
6428     if (!BC->isDisableFinalizerMsg())
6429       dbgs() << CisaBuilder->GetCriticalMsg();
6430 
6431     Out << ss.str();
6432 
6433     // Collect some useful statistics
6434     for (auto *FG: FGA) {
6435       VISAKernel *Kernel = CisaBuilder->GetVISAKernel(FG->getName().str());
6436       IGC_ASSERT(Kernel);
6437       FINALIZER_INFO *jitInfo = nullptr;
6438       CISA_CALL(Kernel->GetJitInfo(jitInfo));
6439       IGC_ASSERT(jitInfo);
6440       NumAsmInsts += jitInfo->numAsmCount;
6441       SpillMemUsed += jitInfo->spillMemUsed;
6442     }
6443     return false;
6444   }
6445 };
6446 } // end anonymous namespace.
6447 
6448 char GenXFinalizer::ID = 0;
6449 
createGenXFinalizerPass(raw_pwrite_stream & o)6450 ModulePass *llvm::createGenXFinalizerPass(raw_pwrite_stream &o) {
6451   return new GenXFinalizer(o);
6452 }
6453 
6454 static SmallVector<const char *, 8>
collectFinalizerArgs(StringSaver & Saver,const GenXSubtarget & ST,bool EmitDebugInformation,const GenXBackendConfig & BC)6455 collectFinalizerArgs(StringSaver &Saver, const GenXSubtarget &ST,
6456                      bool EmitDebugInformation, const GenXBackendConfig &BC) {
6457   SmallVector<const char *, 8> Argv;
6458   auto addArgument = [&Argv, &Saver](StringRef Arg) {
6459     // String saver guarantees that string is null-terminated.
6460     Argv.push_back(Saver.save(Arg).data());
6461   };
6462 
6463   // enable preemption if we have SKL+ and option switched on
6464   if (BC.enablePreemption() && ST.hasPreemption())
6465     addArgument("-enablePreemption");
6466 
6467   addArgument("-dumpvisa");
6468   for (const auto &Fos : FinalizerOpts)
6469     cl::TokenizeGNUCommandLine(Fos, Saver, Argv);
6470 
6471   if (EmitDebugInformation)
6472     addArgument("-generateDebugInfo");
6473   if (BC.passDebugToFinalizer())
6474     addArgument("-debug");
6475   if (BC.emitDebuggableKernels()) {
6476     addArgument("-addKernelID");
6477     addArgument("-setstartbp");
6478   }
6479   if (BC.asmDumpsEnabled()) {
6480     addArgument("-dumpcommonisa");
6481     addArgument("-output");
6482     addArgument("-binary");
6483   }
6484   if (ST.needsWANoMaskFusedEU() && !DisableNoMaskWA) {
6485     addArgument("-noMaskWA");
6486     addArgument("2");
6487   }
6488   if (BC.isLargeGRFMode()) {
6489     addArgument("-TotalGRFNum");
6490     addArgument("256");
6491   }
6492   return Argv;
6493 }
6494 
dumpFinalizerArgs(const SmallVectorImpl<const char * > & Argv,StringRef CPU)6495 static void dumpFinalizerArgs(const SmallVectorImpl<const char *> &Argv,
6496                               StringRef CPU) {
6497   // NOTE: CPU is not the Platform used by finalizer
6498   // The mapping is described by getVisaPlatform from GenXSubtarget.h
6499   outs() << "GenXCpu: " << CPU << "\n";
6500   outs() << "Finalizer Parameters:\n\t";
6501   std::for_each(Argv.begin(), Argv.end(),
6502                 [](const char *Arg) { outs() << " " << Arg; });
6503   outs() << "\n";
6504 }
6505 
getContext()6506 LLVMContext &GenXModule::getContext() {
6507   IGC_ASSERT(Ctx);
6508   return *Ctx;
6509 }
6510 
createVISABuilder(const GenXSubtarget & ST,const GenXBackendConfig & BC,bool EmitDebugInformation,vISABuilderMode Mode,LLVMContext & Ctx,BumpPtrAllocator & Alloc)6511 static VISABuilder *createVISABuilder(const GenXSubtarget &ST,
6512                                       const GenXBackendConfig &BC,
6513                                       bool EmitDebugInformation,
6514                                       vISABuilderMode Mode, LLVMContext &Ctx,
6515                                       BumpPtrAllocator &Alloc) {
6516   auto Platform = ST.getVisaPlatform();
6517   // Use SKL for unknown platforms
6518   if (Platform == TARGET_PLATFORM::GENX_NONE)
6519     Platform = TARGET_PLATFORM::GENX_SKL;
6520 
6521   // Prepare array of arguments for Builder API.
6522   StringSaver Saver{Alloc};
6523   SmallVector<const char *, 8> Argv =
6524       collectFinalizerArgs(Saver, ST, EmitDebugInformation, BC);
6525 
6526   if (PrintFinalizerOptions)
6527     dumpFinalizerArgs(Argv, ST.getCPU());
6528 
6529   // Special error processing here related to strange case where on Windows
6530   // machines only we had failures, reproducible only when shader dumps are
6531   // off. This code is to diagnose such cases simpler.
6532   VISABuilder *VB = nullptr;
6533   int Result = CreateVISABuilder(
6534       VB, Mode, EmitVisa ? VISA_BUILDER_VISA : VISA_BUILDER_BOTH, Platform,
6535       Argv.size(), Argv.data(), BC.getWATable());
6536   if (Result != 0 || VB == nullptr) {
6537     std::string Str;
6538     llvm::raw_string_ostream Os(Str);
6539     Os << "VISA builder creation failed\n";
6540     Os << "Mode: " << Mode << "\n";
6541     Os << "Args:\n";
6542     for (const char *Arg : Argv)
6543       Os << Arg << " ";
6544     Os << "Visa only: " << (EmitVisa ? "yes" : "no") << "\n";
6545     Os << "Platform: " << ST.getVisaPlatform() << "\n";
6546     DiagnosticInfoCisaBuild Err(Os.str(), DS_Error);
6547     Ctx.diagnose(Err);
6548   }
6549   return VB;
6550 }
6551 
InitCISABuilder()6552 void GenXModule::InitCISABuilder() {
6553   IGC_ASSERT(ST);
6554   const vISABuilderMode Mode = HasInlineAsm() ? vISA_ASM_WRITER : vISA_DEFAULT;
6555   CisaBuilder = createVISABuilder(*ST, *BC, EmitDebugInformation, Mode,
6556                                   getContext(), ArgStorage);
6557 }
6558 
GetCisaBuilder()6559 VISABuilder *GenXModule::GetCisaBuilder() {
6560   if (!CisaBuilder)
6561     InitCISABuilder();
6562   return CisaBuilder;
6563 }
6564 
DestroyCISABuilder()6565 void GenXModule::DestroyCISABuilder() {
6566   if (CisaBuilder) {
6567     CISA_CALL(DestroyVISABuilder(CisaBuilder));
6568     CisaBuilder = nullptr;
6569   }
6570 }
6571 
InitVISAAsmReader()6572 void GenXModule::InitVISAAsmReader() {
6573   IGC_ASSERT(ST);
6574   VISAAsmTextReader =
6575       createVISABuilder(*ST, *BC, EmitDebugInformation, vISA_ASM_READER,
6576                         getContext(), ArgStorage);
6577 }
6578 
GetVISAAsmReader()6579 VISABuilder *GenXModule::GetVISAAsmReader() {
6580   if (!VISAAsmTextReader)
6581     InitVISAAsmReader();
6582   return VISAAsmTextReader;
6583 }
6584 
DestroyVISAAsmReader()6585 void GenXModule::DestroyVISAAsmReader() {
6586   if (VISAAsmTextReader) {
6587     CISA_CALL(DestroyVISABuilder(VISAAsmTextReader));
6588     VISAAsmTextReader = nullptr;
6589   }
6590 }
6591