1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #pragma once
10 
11 #include "Compiler/CISACodeGen/CISACodeGen.h"
12 #include "Compiler/CISACodeGen/CVariable.hpp"
13 #include "Compiler/CISACodeGen/PatternMatchPass.hpp"
14 #include "Compiler/CISACodeGen/helper.h"
15 #include "visa_wa.h"
16 #include "inc/common/sku_wa.h"
17 
18 namespace IGC
19 {
20     class CShader;
21 
22     struct SFlag
23     {
24         CVariable* var;
25         e_predMode mode;
26         bool invertFlag;
initIGC::SFlag27         void init()
28         {
29             var = NULL;
30             mode = EPRED_NORMAL;
31             invertFlag = false;
32         }
33     };
34 
35     struct SModifier
36     {
37         uint16_t subReg;
38         uint8_t subVar;
39         uint8_t region[3];
40         e_modifier mod;
41         e_instance instance;
42         bool specialRegion;
initIGC::SModifier43         void init()
44         {
45             mod = EMOD_NONE;
46             subVar = 0;
47             subReg = 0;
48             instance = EINSTANCE_UNSPECIFIED;
49             specialRegion = false;
50         }
51     };
52 
53     struct SAlias
54     {
55         CVariable* m_rootVar;
56         VISA_Type  m_type;
SAliasIGC::SAlias57         SAlias(CVariable* var, VISA_Type type) :
58             m_rootVar(var), m_type(type)
59         { }
60     };
61 
62     struct SAliasMapInfo {
getEmptyKeyIGC::SAliasMapInfo63         static inline SAlias getEmptyKey() { return SAlias(nullptr, ISA_TYPE_UD); }
getTombstoneKeyIGC::SAliasMapInfo64         static inline SAlias getTombstoneKey() { return SAlias(nullptr, ISA_TYPE_D); }
getHashValueIGC::SAliasMapInfo65         static unsigned getHashValue(const SAlias& Val) {
66             return llvm::DenseMapInfo<CVariable*>::getHashValue(Val.m_rootVar) ^ Val.m_type;
67         }
isEqualIGC::SAliasMapInfo68         static bool isEqual(const SAlias& LHS, const SAlias& RHS) {
69             return LHS.m_rootVar == RHS.m_rootVar && LHS.m_type == RHS.m_type;
70         }
71     };
72 
73     /// Helps representing URB write channel masks in a way that provides type safety and adapts to
74     /// the channel mask format required by V-ISA interface.
75     class URBChannelMask
76     {
77     public:
URBChannelMask(unsigned int bitmask)78         explicit URBChannelMask(unsigned int bitmask) : m_bitmask(bitmask) {}
79 
80         /// Returns the size of bitmask,
81         /// defined as the position of the most significant bit with value 1.
82         /// E.g. size(10001) == 5, size(1) == 1 , size(1111) = 4
83         size_t size() const;
84 
85         /// Returns channel mask in the format expected by V-ISA.
86         /// If the mask is full (i.e. consists of all 1) the return value must be 0xFF
87         /// that means 'no channel mask'. In other cases it is the actual stored mask
88         /// E.g. 1010 asVISAMask --> 1010, 111 asVISAMask --> 11111111 (full mask case)
89         unsigned int asVISAMask() const;
90 
91         // returns true if all channels are set (i.e., we can skip the channel mask)
isAllSet() const92         bool isAllSet() const
93         {
94             return ((m_bitmask + 1) & m_bitmask) == 0;
95         }
96     private:
97         unsigned int m_bitmask;
98     };
99 
100     struct SEncoderState
101     {
102         SModifier m_srcOperand[4];
103         SModifier m_dstOperand;
104         SFlag     m_flag;
105         SIMDMode  m_simdSize;
106         SIMDMode  m_uniformSIMDSize;
107         e_mask    m_mask;
108         bool      m_noMask;
109         bool      m_SubSpanDestination;
110         bool      m_secondHalf;
111         bool      m_secondNibble = false;
112     };
113 
114     class CEncoder
115     {
116     public:
117         void InitEncoder(bool canAbortOnSpill, bool hasStackCall, bool hasInlineAsmCall, VISAKernel* prevKernel);
118         void InitBuildParams(llvm::SmallVector<std::unique_ptr< char, std::function<void(char*)>>, 10> & params);
119         void InitVISABuilderOptions(TARGET_PLATFORM VISAPlatform, bool canAbortOnSpill, bool hasStackCall, bool enableVISA_IR);
120         SEncoderState CopyEncoderState();
121         void SetEncoderState(SEncoderState& newState);
122         VISA_Align GetVISAAlign(CVariable* var);
123 
124         void SetDispatchSimdSize();
125         void SetSpillMemOffset();
126         void SetStackFunctionArgSize(uint size);  // size in GRFs
127         void SetStackFunctionRetSize(uint size);  // size in GRFs
128         void SetExternFunctionFlag();
129 
130         void GetVISAPredefinedVar(CVariable* pVar, PreDefined_Vars var);
131         void CreateVISAVar(CVariable* var);
132         void DeclareInput(CVariable* var, uint offset, uint instance);
133         void MarkAsOutput(CVariable* var);
134         void MarkAsPayloadLiveOut(CVariable* var);
135         void Compile(bool hasSymbolTable = false);
136         std::string GetShaderName();
137         void ReportCompilerStatistics(VISAKernel* pMainKernel, SProgramOutput* pOutput);
138         int GetThreadCount(SIMDMode simdMode);
139 
140         CEncoder();
141         ~CEncoder();
142         void SetProgram(CShader* program);
143         void Jump(CVariable* flag, uint label);
144         void Label(uint label);
145         uint GetNewLabelID(const CName &name);
146         void DwordAtomicRaw(AtomicOp atomic_op,
147             const ResourceDescriptor& bindingTableIndex,
148             CVariable* dst, CVariable* elem_offset, CVariable* src0,
149             CVariable* src1, bool is16Bit = false);
150         void AtomicRawA64(AtomicOp atomic_op, const ResourceDescriptor& resource, CVariable* dst,
151             CVariable* elem_offset, CVariable* src0, CVariable* src1,
152             unsigned short bitwidth);
153         void Cmp(e_predicate p, CVariable* dst, CVariable* src0, CVariable* src1);
154         void Select(CVariable* flag, CVariable* dst, CVariable* src0, CVariable* src1);
155         void GenericAlu(e_opcode opcode, CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2 = nullptr);
156         void URBWrite(CVariable* src, const int payloadElementOffset, CVariable* offset, CVariable* urbHandle, CVariable* dynamicMask);
157         void Send(CVariable* dst, CVariable* src, uint exDesc, CVariable* messDescriptor, bool isSendc = false);
158         void Send(CVariable* dst, CVariable* src, uint ffid, CVariable* exDesc, CVariable* messDescriptor, bool isSendc = false);
159         void Sends(CVariable* dst, CVariable* src0, CVariable* src1, uint ffid, CVariable* exDesc, CVariable* messDescriptor, bool isSendc = false, bool hasEOT = false);
160         void RenderTargetWrite(CVariable* var[],
161             bool isUndefined[],
162             bool lastRenderTarget,
163             bool isNullRT,
164             bool perSample,
165             bool coarseMode,
166             bool headerMaskFromCe0,
167             CVariable* bindingTableIndex,
168             CVariable* RTIndex,
169             CVariable* source0Alpha,
170             CVariable* oMask,
171             CVariable* depth,
172             CVariable* stencil,
173             CVariable* CPSCounter,
174             CVariable* sampleIndex,
175             CVariable* r1Reg);
176         void Sample(
177             EOPCODE subOpcode,
178             uint writeMask,
179             CVariable* offset,
180             const ResourceDescriptor& bindingTableIndex,
181             const SamplerDescriptor& SamplerIdx,
182             uint numSources,
183             CVariable* dst,
184             llvm::SmallVector<CVariable*, 4> & payload,
185             bool zeroLOD,
186             bool cpsEnable,
187             bool feedbackEnable,
188             bool nonUniformState = false);
189         void Load(
190             EOPCODE subOpcode,
191             uint writeMask,
192             CVariable* offset,
193             const ResourceDescriptor& resource,
194             uint numSources,
195             CVariable* dst,
196             llvm::SmallVector<CVariable*, 4> & payload,
197             bool zeroLOD,
198             bool feedbackEnable);
199 
200         void Info(EOPCODE subOpcode, uint writeMask, const ResourceDescriptor& resource, CVariable* lod, CVariable* dst);
201 
202         void Gather4Inst(
203             EOPCODE subOpcode,
204             CVariable* offset,
205             const ResourceDescriptor& resource,
206             const SamplerDescriptor& sampler,
207             uint numSources,
208             CVariable* dst,
209             llvm::SmallVector<CVariable*, 4> & payload,
210             uint channel,
211             bool feedbackEnable);
212 
213         void OWLoad(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset, bool owordAligned, uint dstSize, uint dstOffset = 0);
214         void OWStore(CVariable* dst, e_predefSurface surfaceType, CVariable* bufidx, CVariable* offset, uint dstSize, uint srcOffset);
215 
216         void AddrAdd(CVariable* dst, CVariable* src0, CVariable* src1);
217         void Barrier(e_barrierKind BarrierKind);
218         void Fence(bool CommitEnable,
219             bool L3_Flush_RW_Data,
220             bool L3_Flush_Constant_Data,
221             bool L3_Flush_Texture_Data,
222             bool L3_Flush_Instructions,
223             bool Global_Mem_Fence,
224             bool L1_Flush,
225             bool SWFence);
226         void FlushSamplerCache();
227         void EOT();
228         void OWLoadA64(CVariable* dst, CVariable* offset, uint dstSize, uint dstOffset = 0);
229         void OWStoreA64(CVariable* dst, CVariable* offset, uint dstSize, uint srcOffset);
230         void MediaBlockMessage(ISA_Opcode subOpcode,
231             CVariable* dst,
232             e_predefSurface surfaceType,
233             CVariable* bufId,
234             CVariable* xOffset,
235             CVariable* yOffset,
236             uint modifier,
237             unsigned char blockWidth,
238             unsigned char blockHeight,
239             uint plane);
240         void GatherA64(CVariable* dst, CVariable* offset, unsigned elementSize, unsigned numElems);
241         void ScatterA64(CVariable* val, CVariable* offset, unsigned elementSize, unsigned numElems);
242         void ByteGather(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset, unsigned elementSize, unsigned numElems);
243         void ByteScatter(CVariable* src, const ResourceDescriptor& resource, CVariable* offset, unsigned elementSize, unsigned numElems);
244         void Gather4Scaled(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset);
245         void Gather4ScaledNd(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset, unsigned nd);
246         void Scatter4Scaled(CVariable* src, const ResourceDescriptor& resource, CVariable* offset);
247         void Gather4A64(CVariable* dst, CVariable* offset);
248         void Scatter4A64(CVariable* src, CVariable* offset);
249         void BoolToInt(CVariable* dst, CVariable* src);
250         void Copy(CVariable* dst, CVariable* src);
251         void SubroutineCall(CVariable* flag, llvm::Function* F);
252         void SubroutineRet(CVariable* flag, llvm::Function* F);
253         void StackCall(CVariable* flag, llvm::Function* F, unsigned char argSize, unsigned char retSize);
254         void IndirectStackCall(CVariable* flag, CVariable* funcPtr, unsigned char argSize, unsigned char retSize);
255         void StackRet(CVariable* flag);
256         void Loc(unsigned int line);
257         void File(std::string& s);
258         void PredAdd(CVariable* flag, CVariable* dst, CVariable* src0, CVariable* src1);
259         void DebugLinePlaceholder();
260 
261         inline void Jump(uint label);
262         inline void Cast(CVariable* dst, CVariable* src);
263         inline void Add(CVariable* dst, CVariable* src0, CVariable* src1);
264         inline void Bfi(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2, CVariable* src3);
265         inline void Bfe(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2);
266         inline void Bfrev(CVariable* dst, CVariable* src0);
267         inline void CBit(CVariable* dst, CVariable* src0);
268         inline void Fbh(CVariable* dst, CVariable* src0);
269         inline void Fbl(CVariable* dst, CVariable* src0);
270         inline void Mul(CVariable* dst, CVariable* src0, CVariable* src1);
271         inline void Pow(CVariable* dst, CVariable* src0, CVariable* src1);
272         inline void Div(CVariable* dst, CVariable* src0, CVariable* src1);
273         inline void Shl(CVariable* dst, CVariable* src0, CVariable* src1);
274         inline void Shr(CVariable* dst, CVariable* src0, CVariable* src1);
275         inline void MulH(CVariable* dst, CVariable* src0, CVariable* src1);
276         inline void Cos(CVariable* dst, CVariable* src0);
277         inline void Sin(CVariable* dst, CVariable* src0);
278         inline void Log(CVariable* dst, CVariable* src0);
279         inline void Exp(CVariable* dst, CVariable* src0);
280         inline void Frc(CVariable* dst, CVariable* src0);
281         inline void Sqrt(CVariable* dst, CVariable* src0);
282         inline void Floor(CVariable* dst, CVariable* src0);
283         inline void Ceil(CVariable* dst, CVariable* src0);
284         inline void Ctlz(CVariable* dst, CVariable* src0);
285         inline void Truncate(CVariable* dst, CVariable* src0);
286         inline void RoundNE(CVariable* dst, CVariable* src0);
287         inline void Mod(CVariable* dst, CVariable* src0, CVariable* src1);
288         inline void Rsqrt(CVariable* dst, CVariable* src0);
289         inline void Inv(CVariable* dst, CVariable* src0);
290         inline void Not(CVariable* dst, CVariable* src0);
291         // src0 * src1 + src2
292         inline void Madw(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2);
293         inline void Mad(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2);
294         inline void Lrp(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2);
295         inline void Xor(CVariable* dst, CVariable* src0, CVariable* src1);
296         inline void Or(CVariable* dst, CVariable* src0, CVariable* src1);
297         inline void And(CVariable* dst, CVariable* src0, CVariable* src1);
298         inline void Pln(CVariable* dst, CVariable* src0, CVariable* src1);
299         inline void SendC(CVariable* dst, CVariable* src, uint exDesc, CVariable* messDescriptor);
300         inline void SendC(CVariable* dst, CVariable* src, uint ffid, CVariable* exDesc, CVariable* messDescriptor);
301         inline void LoadMS(EOPCODE subOpcode, uint writeMask, CVariable* offset, const ResourceDescriptor& resource, uint numSources, CVariable* dst, llvm::SmallVector<CVariable*, 4> & payload, bool feedbackEnable);
302         inline void SetP(CVariable* dst, CVariable* src);
303         inline void Gather(CVariable* dst, CVariable* bufidx, CVariable* offset, CVariable* gOffset, e_predefSurface surface, int elementSize);
304         inline void TypedRead4(const ResourceDescriptor& resource, CVariable* pU, CVariable* pV, CVariable* pR, CVariable* pLOD, CVariable* pDst, uint writeMask);
305         inline void TypedWrite4(const ResourceDescriptor& resource, CVariable* pU, CVariable* pV, CVariable* pR, CVariable* pLOD, CVariable* pSrc, uint writeMask);
306         inline void Scatter(CVariable* val, CVariable* bufidx, CVariable* offset, CVariable* gOffset, e_predefSurface surface, int elementSize);
307         inline void IShr(CVariable* dst, CVariable* src0, CVariable* src1);
308         inline void Min(CVariable* dst, CVariable* src0, CVariable* src1);
309         inline void Max(CVariable* dst, CVariable* src0, CVariable* src1);
310         inline void UAddC(CVariable* dst, CVariable* src0, CVariable* src1);
311         inline void USubB(CVariable* dst, CVariable* src0, CVariable* src1);
312         inline void IEEESqrt(CVariable* dst, CVariable* src0);
313         inline void IEEEDivide(CVariable* dst, CVariable* src0, CVariable* src1);
314         void AddPair(CVariable* Lo, CVariable* Hi, CVariable* L0, CVariable* H0, CVariable* L1, CVariable* H1 = nullptr);
315         void SubPair(CVariable* Lo, CVariable* Hi, CVariable* L0, CVariable* H0, CVariable* L1, CVariable* H1);
316         inline void dp4a(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2);
317         void Lifetime(VISAVarLifetime StartOrEnd, CVariable* dst);
318         void dpas(CVariable* dst, CVariable* input, CVariable* weight, PrecisionType weight_precision,
319             CVariable* actication, PrecisionType activation_precision, uint8_t systolicDepth,
320             uint8_t repeatCount, bool IsDpasw);
321         void fcvt(CVariable* dst, CVariable* src);
322         void Bfn(uint8_t booleanFuncCtrl, CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2);
323         void QWGather(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset, unsigned elementSize, unsigned numElems);
324         void QWScatter(CVariable* src, const ResourceDescriptor& resource, CVariable* offset, unsigned elementSize, unsigned numElems);
325         // VME
326         void SendVmeIme(
327             CVariable* bindingTableIndex,
328             unsigned char streamMode,
329             unsigned char searchControlMode,
330             CVariable* uniInputVar,
331             CVariable* imeInputVar,
332             CVariable* ref0Var,
333             CVariable* ref1Var,
334             CVariable* costCenterVar,
335             CVariable* outputVar);
336 
337         void SendVmeFbr(
338             CVariable* bindingTableIndex,
339             CVariable* uniInputVar,
340             CVariable* fbrInputVar,
341             CVariable* FBRMbModeVar,
342             CVariable* FBRSubMbShapeVar,
343             CVariable* FBRSubPredModeVar,
344             CVariable* outputVar);
345 
346         void SendVmeSic(
347             CVariable* bindingTableIndex,
348             CVariable* uniInputVar,
349             CVariable* sicInputVar,
350             CVariable* outputVar);
351 
352         // VA
353         void SendVideoAnalytic(
354             llvm::GenIntrinsicInst* inst,
355             CVariable* vaResult,
356             CVariable* coords,
357             CVariable* size,
358             CVariable* srcImg,
359             CVariable* sampler);
360 
361         void SetDstSubVar(uint subVar);
362         void SetDstSubReg(uint subReg);
363         void SetSrcSubVar(uint srcNum, uint subVar);
364         void SetSrcSubReg(uint srcNum, uint subReg);
365         void SetDstModifier(e_modifier mod);
366         void SetDstModifier(const DstModifier& modifier);
367         void SetSrcModifier(uint srcNum, e_modifier mod);
368         void SetPredicate(CVariable* flag);
369         void SetInversePredicate(bool inv);
370         void SetPredicateMode(e_predMode mode);
371         void SetSrcRegion(uint srcNum, uint vStride, uint width, uint hStride, e_instance instance = EINSTANCE_UNSPECIFIED);
372         void SetDstRegion(uint hStride);
373         inline void SetNoMask();
374         inline void SetMask(e_mask mask);
375         inline void SetSimdSize(SIMDMode size);
376         inline SIMDMode GetSimdSize();
377         inline void SetUniformSIMDSize(SIMDMode size);
378         inline void SetSubSpanDestination(bool subspan);
379         inline bool IsSubSpanDestination();
380         inline void SetSecondHalf(bool secondHalf);
381         inline bool IsSecondHalf();
382         inline void SetSecondNibble(bool secondNibble);
383         inline bool IsSecondNibble();
384 
385         inline void SetIsCodePatchCandidate(bool v);
386         inline bool IsCodePatchCandidate();
387         inline unsigned int GetPayloadEnd();
388         inline void SetPayloadEnd(unsigned int payloadEnd);
389         inline void SetHasPrevKernel(bool v);
390         inline bool HasPrevKernel();
391         inline void BeginForcedNoMaskRegion();
392         inline void EndForcedNoMaskRegion();
393 
394         void Wait();
395 
GetVISAKernel() const396         VISAKernel* GetVISAKernel() const { return vKernel; }
GetVISABuilder() const397         VISABuilder* GetVISABuilder() const { return vbuilder; }
398         void Init();
399         void Push();
400 
401         void initCR(VISAKernel* vKernel);
402         void SetVectorMask(bool vMask);
403 
404         // Switches from actualRM to newRM
405         void SetRoundingMode_FP(ERoundingMode actualRM, ERoundingMode newRM);
406         void SetRoundingMode_FPCvtInt(ERoundingMode actualRM, ERoundingMode newRM);
407 
GetCISADataTypeSize(VISA_Type type)408         static uint GetCISADataTypeSize(VISA_Type type) {return CVariable::GetCISADataTypeSize(type);}
GetCISADataTypeAlignment(VISA_Type type)409         static e_alignment GetCISADataTypeAlignment(VISA_Type type) {return CVariable::GetCISADataTypeAlignment(type);}
410 
411         static VISASampler3DSubOpCode ConvertSubOpcode(EOPCODE subOpcode, bool zeroLOD);
412 
413         // Wrappers for (potentially) common queries on types
414         static bool IsIntegerType(VISA_Type type);
415         static bool IsFloatType(VISA_Type type);
416 
417         void SetVISAWaTable(WA_TABLE const& waTable);
418 
419         /// \brief Initialize per function states and starts vISA emission
420         /// as a vISA subroutine
421         void BeginSubroutine(llvm::Function* F);
422         /// \brief Initialize per function states and starts vISA emission
423         /// as a vISA stack-call function
424         void BeginStackFunction(llvm::Function* F);
425         /// \brief Initialize interpolation section for vISA emission
426         void BeginPayloadSection();
427 
428         void DestroyVISABuilder();
429 
430         void AddVISASymbol(std::string& symName, CVariable* cvar);
431 
432         std::string GetVariableName(CVariable* var);
433         std::string GetDumpFileName(std::string extension);
434 
IsPayloadSectionAsPrimary()435         bool IsPayloadSectionAsPrimary()    {return vKernel == vPayloadSection;}
SetPayloadSectionAsPrimary()436         void SetPayloadSectionAsPrimary()   {vKernelTmp = vKernel; vKernel = vPayloadSection;}
SetPayloadSectionAsSecondary()437         void SetPayloadSectionAsSecondary() {vKernel = vKernelTmp;}
438 
439         std::string GetUniqueInlineAsmLabel();
440 
441     private:
442         // helper functions
443         VISA_VectorOpnd* GetSourceOperand(CVariable* var, const SModifier& mod);
444         VISA_VectorOpnd* GetSourceOperandNoModifier(CVariable* var);
445         VISA_VectorOpnd* GetDestinationOperand(CVariable* var, const SModifier& mod);
446         VISA_RawOpnd* GetRawSource(CVariable* var, uint offset = 0);
447         VISA_RawOpnd* GetRawDestination(CVariable* var, unsigned offset = 0);
448         VISA_PredOpnd* GetFlagOperand(const SFlag& flag);
449         VISA_StateOpndHandle* GetVISASurfaceOpnd(e_predefSurface surfaceType, CVariable* bti);
450         VISA_StateOpndHandle* GetVISASurfaceOpnd(const ResourceDescriptor& resource);
451         VISA_LabelOpnd* GetLabel(uint label);
452         VISA_LabelOpnd* GetFuncLabel(llvm::Function* F);
453         void InitLabelMap(const llvm::Function* F);
454         CName CreateVisaLabelName(const llvm::StringRef &L = "");
455         std::string CreateShortLabel(unsigned labelIndex) const;
456         // Compiler labels must start with something a user won't use in inline
457         // assembly.
GetCompilerLabelPrefix()458         static const char *GetCompilerLabelPrefix() {return "_";}
459 
460         VISAFunction* GetStackFunction(llvm::Function* F);
461 
462         VISA_VectorOpnd* GetUniformSource(CVariable* var);
463         VISA_StateOpndHandle* GetBTIOperand(uint bindingTableIndex);
464         VISA_StateOpndHandle* GetSamplerOperand(CVariable* sampleIdx);
465         VISA_StateOpndHandle* GetSamplerOperand(const SamplerDescriptor& sampler, bool& isIdxLT16);
466         void GetRowAndColOffset(CVariable* var, unsigned int subVar, unsigned int subreg, unsigned char& rowOff, unsigned char& colOff);
467 
468         VISA_GenVar* GetVISAVariable(CVariable* var);
469         VISA_GenVar* GetVISAVariable(CVariable* var, e_instance instance);
470         VISA_EMask_Ctrl ConvertMaskToVisaType(e_mask mask, bool noMask);
471 
472         // Generic encoding functions
473         void MinMax(CISA_MIN_MAX_SUB_OPCODE subopcode, CVariable* dst, CVariable* src0, CVariable* src1);
474         void DataMov(ISA_Opcode opcode, CVariable* dst, CVariable* src);
475         void LogicOp(
476             ISA_Opcode opcode,
477             CVariable* dst,
478             CVariable* src0,
479             CVariable* src1 = nullptr,
480             CVariable* src2 = nullptr,
481             CVariable* src3 = nullptr);
482         void Arithmetic(
483             ISA_Opcode opcode,
484             CVariable* dst,
485             CVariable* src0 = nullptr,
486             CVariable* src1 = nullptr,
487             CVariable* src2 = nullptr);
488         void CarryBorrowArith(ISA_Opcode opcode, CVariable* dst, CVariable* src0, CVariable* src1);
489         void ScatterGather(
490             ISA_Opcode opcode,
491             CVariable* srcdst,
492             CVariable* bufId,
493             CVariable* offset,
494             CVariable* gOffset,
495             e_predefSurface surface,
496             int elementSize);
497         void TypedReadWrite(
498             ISA_Opcode opcode,
499             const ResourceDescriptor& resource,
500             CVariable* pU,
501             CVariable* pV,
502             CVariable* pR,
503             CVariable* pLOD,
504             CVariable* pSrcDst,
505             uint writeMask);
506 
507         VISA_Exec_Size  GetAluExecSize(CVariable* dst) const;
508         VISA_EMask_Ctrl GetAluEMask(CVariable* dst);
509         bool IsSat();
510 
511         // Variable splitting facilities (if crosses 2 GRF boundary).
512         bool NeedSplitting(CVariable* var, const SModifier& mod,
513             unsigned& numParts, bool isSource = false) const;
514         SModifier SplitVariable(VISA_Exec_Size fromExecSize,
515             VISA_Exec_Size toExecSize,
516             unsigned thePart,
517             CVariable* var, const SModifier& mod,
518             bool isSource = false) const;
519         VISA_Exec_Size SplitExecSize(VISA_Exec_Size fromExecSize,
520             unsigned numParts) const;
521         VISA_EMask_Ctrl SplitEMask(VISA_Exec_Size fromExecSize,
522             VISA_Exec_Size toExecSize,
523             unsigned thePart,
524             VISA_EMask_Ctrl execMask) const;
525 
526         // Split SIMD16 message data payload(MDP) for scattered/untyped write
527         // messages into two SIMD8 MDPs : V0 and V1.
528         void SplitPayloadToLowerSIMD(CVariable* MDP, uint32_t MDPOfst, uint32_t NumBlks, CVariable* V0, CVariable* V1, uint32_t fromSize = 16);
529         // Merge two SIMD8 MDPs (V0 & V1) for scattered/untyped read messages into one SIMD16 message : MDP
530         void MergePayloadToHigherSIMD(CVariable* V0, CVariable* V1, uint32_t NumBlks, CVariable* MDP, uint32_t MDPOfst, uint32_t toSize = 16);
531 
532         // save compile time by avoiding retry if the amount of spill is (very) small
533         bool AvoidRetryOnSmallSpill() const;
534 
535         // CreateSymbolTable, CreateRelocationTable and CreateFuncAttributeTable will create symbols, relococations and FuncAttributes in
536         // two formats. One in given buffer that will be later parsed as patch token based format, another as struct type that will be parsed
537         // as ZE binary format
538 
539         // CreateSymbolTable
540         // Note that this function should be called only once even if there are multiple kernels in a program. Current IGC
541         // flow will create all symbols in the first kernel and all the other kernels won't contain symbols
542         typedef std::vector<std::pair<llvm::Value*, vISA::GenSymEntry>> ValueToSymbolList;
543         void CreateSymbolTable(ValueToSymbolList& symbolTableList);
544         // input/output: buffer, bufferSize, tableEntries: for patch-token-based format.
545         void CreateSymbolTable(void*& buffer, unsigned& bufferSize, unsigned& tableEntries);
546         // input/output: symbols: for ZEBinary foramt
547         void CreateSymbolTable(SProgramOutput::ZEBinFuncSymbolTable& funcSyms, SOpenCLProgramInfo::ZEBinProgramSymbolTable& programSyms);
548         // Create function symbols for kernels. This is ZEBinary foramt only.
549         void CreateKernelSymbol(const std::string& kernelName, unsigned offset, unsigned size,
550             SProgramOutput::ZEBinFuncSymbolTable& symbols);
551 
552         // CreateRelocationTable
553         // input/output: buffer, bufferSize, tableEntries: for patch-token-based format.
554         void CreateRelocationTable(void*& buffer, unsigned& bufferSize, unsigned& tableEntries);
555         // input/output: relocations: for ZEBinary foramt
556         void CreateRelocationTable(SProgramOutput::RelocListTy& relocations);
557 
558         // CreateFuncAttributeTable
559         void CreateFuncAttributeTable(void*& buffer, unsigned& bufferSize, unsigned& tableEntries, SProgramOutput::FuncAttrListTy& attrs);
560 
561         uint32_t getGRFSize() const;
562 
needsSplitting(VISA_Exec_Size ExecSize) const563         bool needsSplitting(VISA_Exec_Size ExecSize) const
564         {
565             return ExecSize == EXEC_SIZE_16;
566         }
567 
568         // Note that GEN can set both fpCvtInt_rtz and any of FP rounding modes
569         // at the same time. If fpCvtInt uses a rounding mode other than rtz,
570         // they both uses FP rounding bits.
571         //
572         // RM bits in CR0.0.
573         //    float RM bits: [5:4];
574         //    int RM (float -> int): Bit 12: 0 -> rtz; 1 -> using Float RM
575         enum RMEncoding {
576             // float rounding mode (fp operations, cvt to fp)
577             RoundToNearestEven = 0x00,
578             RoundToPositive = 0x10,
579             RoundToNegative = 0x20,
580             RoundToZero = 0x30,
581             // int rounding mode (fp cvt int only), use FP RM for all rounding modes but rtz.
582             RoundToNearestEven_int = 0x1000,
583             RoundToPositive_int = 0x1010,
584             RoundToNegative_int = 0x1020,
585             RoundToZero_int_unused = 0x1030,
586             RoundToZero_int = 0x0000,    // use this for rtz, bit 12 = 0
587 
588             IntAndFPRoundingModeMask = 0x1030
589         };
590         void SetRoundingMode(RMEncoding actualRM, RMEncoding newRM);
591         // Get Encoding bit values for rounding mode
592         RMEncoding getEncoderRoundingMode_FP(ERoundingMode FP_RM);
593         RMEncoding getEncoderRoundingMode_FPCvtInt(ERoundingMode FCvtI_RM);
594 
595         unsigned GetRawOpndSplitOffset(VISA_Exec_Size fromExecSize,
596             VISA_Exec_Size toExecSize,
597             unsigned thePart, CVariable* var) const;
598 
599         std::tuple<CVariable*, uint32_t> splitRawOperand(CVariable* var, bool isFirstHalf, VISA_EMask_Ctrl execMask);
600 
601         uint32_t getNumChannels(CVariable* var) const;
602 
603         void SaveOption(vISAOptions option, bool val);
604         void SaveOption(vISAOptions option, uint32_t val);
605         void SaveOption(vISAOptions option, const char* val);
606         void SetBuilderOptions(VISABuilder* pbuilder);
607 
608     protected:
609         // encoder states
610         SEncoderState m_encoderState;
611 
612         llvm::DenseMap<SAlias, CVariable*, SAliasMapInfo> m_aliasesMap;
613 
614         // vISA needs its own Wa-table as some of the W/A are applicable
615         // only to certain APIs/shader types/reg key settings/etc.
616         WA_TABLE m_vISAWaTable;
617 
618         enum OpType
619         {
620             ET_BOOL,
621             ET_INT32,
622             ET_CSTR
623         };
624         struct OptionValue
625         {
626             OpType type;
627             bool vBool;
628             uint32_t vInt32;
629             const char* vCstr;
630         };
631         // List of vISA user options
632         std::vector<std::pair<vISAOptions, OptionValue>> m_visaUserOptions;
633 
634         // Typically IGC just use ones vKernel for every vISA::compile call,
635         // in those cases, vKernel and vMainKernel should be the same.
636         // Only when using stack-call, vKernel pointer changes every time
637         // IGC addes a vISA kernel or function object, but the vMainKernel
638         // always pointing to the first kernel added during InitEncoder.
639         VISAKernel* vKernel;
640         VISAKernel* vMainKernel;
641         VISABuilder* vbuilder;
642         VISABuilder* vAsmTextBuilder;
643 
644         // This is for CodePatch to split payload interpolation from a shader
645         VISAKernel* vPayloadSection;
646         VISAKernel* vKernelTmp;
647         bool m_hasPrevKernel = false;
648         unsigned int m_payloadEnd = 0;
649 
650         bool m_isCodePatchCandidate = false;
651 
652         int m_nestLevelForcedNoMaskRegion = 0;
653 
654         bool m_enableVISAdump;
655         bool m_hasInlineAsm;
656 
657         std::vector<VISA_LabelOpnd*> labelMap;
658         std::vector<CName> labelNameMap; // parallel to labelMap
659 
660         /// Per kernel label counter
661         unsigned labelCounter = 0;
662         /// Per kernel label counter for each inline asm block
663         unsigned labelInlineAsmCounter = 0;
664         /// Each kernel might emit several functions;
665         /// we pre-increment this for each new function we process (InitLabelMap)
666         /// The first function will see 0, ...
667         unsigned labelFunctionIndex = (unsigned)-1;
668         ///
669         /// The name of the current function; set if we are emitting labels
670         CName currFunctionName;
671 
672         /// Keep a map between a function and its label, per kernel state.
673         llvm::SmallDenseMap<llvm::Function*, VISA_LabelOpnd*> funcLabelMap;
674         /// Keep a map between a stack-called function and the corresponding vISA function
675         llvm::SmallDenseMap<llvm::Function*, VISAFunction*> stackFuncMap;
676 
677         // dummy variables
678         VISA_SurfaceVar* dummySurface;
679         VISA_SamplerVar* samplervar;
680 
681         CShader* m_program;
682 
683         // Keep a map between a function and its per-function attributes needed for function pointer support
684         struct FuncAttrib
685         {
686             bool isKernel = false;
687             bool hasBarrier = false;
688             unsigned argumentStackSize = 0;
689             unsigned allocaStackSize = 0;
690         };
691         llvm::SmallDenseMap<llvm::Function*, FuncAttrib> funcAttributeMap;
692 
693     public:
694         // Used by EmitVISAPass to set function attributes
InitFuncAttribute(llvm::Function * F,bool isKernel=false)695         void InitFuncAttribute(llvm::Function* F, bool isKernel = false) {
696             funcAttributeMap[F].isKernel = isKernel;
697         }
SetFunctionHasBarrier(llvm::Function * F)698         void SetFunctionHasBarrier(llvm::Function* F) {
699             if (funcAttributeMap.find(F) != funcAttributeMap.end())
700                 funcAttributeMap[F].hasBarrier = true;
701         }
SetFunctionMaxArgumentStackSize(llvm::Function * F,unsigned size)702         void SetFunctionMaxArgumentStackSize(llvm::Function* F, unsigned size) {
703             if (funcAttributeMap.find(F) != funcAttributeMap.end())
704                 funcAttributeMap[F].argumentStackSize = MAX(funcAttributeMap[F].argumentStackSize, size);
705         }
SetFunctionAllocaStackSize(llvm::Function * F,unsigned size)706         void SetFunctionAllocaStackSize(llvm::Function* F, unsigned size) {
707             if (funcAttributeMap.find(F) != funcAttributeMap.end())
708                 funcAttributeMap[F].allocaStackSize = size;
709         }
710     };
711 
Jump(uint label)712     inline void CEncoder::Jump(uint label)
713     {
714         Jump(NULL, label);
715     }
716 
Bfi(CVariable * dst,CVariable * src0,CVariable * src1,CVariable * src2,CVariable * src3)717     inline void CEncoder::Bfi(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2, CVariable* src3)
718     {
719         LogicOp(ISA_BFI, dst, src0, src1, src2, src3);
720     }
721 
Bfe(CVariable * dst,CVariable * src0,CVariable * src1,CVariable * src2)722     inline void CEncoder::Bfe(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2)
723     {
724         LogicOp(ISA_BFE, dst, src0, src1, src2);
725     }
726 
Bfrev(CVariable * dst,CVariable * src0)727     inline void CEncoder::Bfrev(CVariable* dst, CVariable* src0)
728     {
729         LogicOp(ISA_BFREV, dst, src0);
730     }
731 
CBit(CVariable * dst,CVariable * src)732     inline void CEncoder::CBit(CVariable* dst, CVariable* src)
733     {
734         LogicOp(ISA_CBIT, dst, src);
735     }
736 
Fbh(CVariable * dst,CVariable * src)737     inline void CEncoder::Fbh(CVariable* dst, CVariable* src)
738     {
739         LogicOp(ISA_FBH, dst, src);
740     }
741 
Fbl(CVariable * dst,CVariable * src)742     inline void CEncoder::Fbl(CVariable* dst, CVariable* src)
743     {
744         LogicOp(ISA_FBL, dst, src);
745     }
746 
Mul(CVariable * dst,CVariable * src0,CVariable * src1)747     inline void CEncoder::Mul(CVariable* dst, CVariable* src0, CVariable* src1)
748     {
749         Arithmetic(ISA_MUL, dst, src0, src1);
750     }
751 
Pow(CVariable * dst,CVariable * src0,CVariable * src1)752     inline void CEncoder::Pow(CVariable* dst, CVariable* src0, CVariable* src1)
753     {
754         Arithmetic(ISA_POW, dst, src0, src1);
755     }
756 
Div(CVariable * dst,CVariable * src0,CVariable * src1)757     inline void CEncoder::Div(CVariable* dst, CVariable* src0, CVariable* src1)
758     {
759         Arithmetic(ISA_DIV, dst, src0, src1);
760     }
761 
Add(CVariable * dst,CVariable * src0,CVariable * src1)762     inline void CEncoder::Add(CVariable* dst, CVariable* src0, CVariable* src1)
763     {
764         Arithmetic(ISA_ADD, dst, src0, src1);
765     }
766 
Shl(CVariable * dst,CVariable * src0,CVariable * src1)767     inline void CEncoder::Shl(CVariable* dst, CVariable* src0, CVariable* src1)
768     {
769         LogicOp(ISA_SHL, dst, src0, src1);
770     }
771 
IShr(CVariable * dst,CVariable * src0,CVariable * src1)772     inline void CEncoder::IShr(CVariable* dst, CVariable* src0, CVariable* src1)
773     {
774         LogicOp(ISA_ASR, dst, src0, src1);
775     }
776 
Shr(CVariable * dst,CVariable * src0,CVariable * src1)777     inline void CEncoder::Shr(CVariable* dst, CVariable* src0, CVariable* src1)
778     {
779         LogicOp(ISA_SHR, dst, src0, src1);
780     }
781 
MulH(CVariable * dst,CVariable * src0,CVariable * src1)782     inline void CEncoder::MulH(CVariable* dst, CVariable* src0, CVariable* src1)
783     {
784         Arithmetic(ISA_MULH, dst, src0, src1);
785     }
786 
Cos(CVariable * dst,CVariable * src0)787     inline void CEncoder::Cos(CVariable* dst, CVariable* src0)
788     {
789         Arithmetic(ISA_COS, dst, src0);
790     }
791 
Sin(CVariable * dst,CVariable * src0)792     inline void CEncoder::Sin(CVariable* dst, CVariable* src0)
793     {
794         Arithmetic(ISA_SIN, dst, src0);
795     }
796 
Log(CVariable * dst,CVariable * src0)797     inline void CEncoder::Log(CVariable* dst, CVariable* src0)
798     {
799         Arithmetic(ISA_LOG, dst, src0);
800     }
801 
Exp(CVariable * dst,CVariable * src0)802     inline void CEncoder::Exp(CVariable* dst, CVariable* src0)
803     {
804         Arithmetic(ISA_EXP, dst, src0);
805     }
806 
Sqrt(CVariable * dst,CVariable * src0)807     inline void CEncoder::Sqrt(CVariable* dst, CVariable* src0)
808     {
809         Arithmetic(ISA_SQRT, dst, src0);
810     }
811 
Floor(CVariable * dst,CVariable * src0)812     inline void CEncoder::Floor(CVariable* dst, CVariable* src0)
813     {
814         Arithmetic(ISA_RNDD, dst, src0);
815     }
816 
Ceil(CVariable * dst,CVariable * src0)817     inline void CEncoder::Ceil(CVariable* dst, CVariable* src0)
818     {
819         Arithmetic(ISA_RNDU, dst, src0);
820     }
821 
Ctlz(CVariable * dst,CVariable * src0)822     inline void CEncoder::Ctlz(CVariable* dst, CVariable* src0)
823     {
824         Arithmetic(ISA_LZD, dst, src0);
825     }
826 
Truncate(CVariable * dst,CVariable * src0)827     inline void CEncoder::Truncate(CVariable* dst, CVariable* src0)
828     {
829         Arithmetic(ISA_RNDZ, dst, src0);
830     }
831 
RoundNE(CVariable * dst,CVariable * src0)832     inline void CEncoder::RoundNE(CVariable* dst, CVariable* src0)
833     {
834         Arithmetic(ISA_RNDE, dst, src0);
835     }
836 
Mod(CVariable * dst,CVariable * src0,CVariable * src1)837     inline void CEncoder::Mod(CVariable* dst, CVariable* src0, CVariable* src1)
838     {
839         Arithmetic(ISA_MOD, dst, src0, src1);
840     }
841 
Rsqrt(CVariable * dst,CVariable * src0)842     inline void CEncoder::Rsqrt(CVariable* dst, CVariable* src0)
843     {
844         Arithmetic(ISA_RSQRT, dst, src0);
845     }
846 
Inv(CVariable * dst,CVariable * src0)847     inline void CEncoder::Inv(CVariable* dst, CVariable* src0)
848     {
849         Arithmetic(ISA_INV, dst, src0);
850     }
851 
Not(CVariable * dst,CVariable * src0)852     inline void CEncoder::Not(CVariable* dst, CVariable* src0)
853     {
854         Arithmetic(ISA_NOT, dst, src0);
855     }
856 
Frc(CVariable * dst,CVariable * src0)857     inline void CEncoder::Frc(CVariable* dst, CVariable* src0)
858     {
859         Arithmetic(ISA_FRC, dst, src0);
860     }
861 
Pln(CVariable * dst,CVariable * src0,CVariable * src1)862     inline void CEncoder::Pln(CVariable* dst, CVariable* src0, CVariable* src1)
863     {
864         Arithmetic(ISA_PLANE, dst, src0, src1);
865     }
866 
Cast(CVariable * dst,CVariable * src)867     inline void CEncoder::Cast(CVariable* dst, CVariable* src)
868     {
869         DataMov(ISA_MOV, dst, src);
870     }
871 
872     // src0 * src1 + src2
Madw(CVariable * dst,CVariable * src0,CVariable * src1,CVariable * src2)873     inline void CEncoder::Madw(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2)
874     {
875         Arithmetic(ISA_MADW, dst, src0, src1, src2);
876     }
877 
878     // src0 * src1 + src2
Mad(CVariable * dst,CVariable * src0,CVariable * src1,CVariable * src2)879     inline void CEncoder::Mad(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2)
880     {
881         Arithmetic(ISA_MAD, dst, src0, src1, src2);
882     }
883 
Lrp(CVariable * dst,CVariable * src0,CVariable * src1,CVariable * src2)884     inline void CEncoder::Lrp(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2)
885     {
886         Arithmetic(ISA_LRP, dst, src0, src1, src2);
887     }
888 
Xor(CVariable * dst,CVariable * src0,CVariable * src1)889     inline void CEncoder::Xor(CVariable* dst, CVariable* src0, CVariable* src1)
890     {
891         LogicOp(ISA_XOR, dst, src0, src1);
892     }
893 
Or(CVariable * dst,CVariable * src0,CVariable * src1)894     inline void CEncoder::Or(CVariable* dst, CVariable* src0, CVariable* src1)
895     {
896         LogicOp(ISA_OR, dst, src0, src1);
897     }
898 
And(CVariable * dst,CVariable * src0,CVariable * src1)899     inline void CEncoder::And(CVariable* dst, CVariable* src0, CVariable* src1)
900     {
901         LogicOp(ISA_AND, dst, src0, src1);
902     }
903 
SetP(CVariable * dst,CVariable * src0)904     inline void CEncoder::SetP(CVariable* dst, CVariable* src0)
905     {
906         // We always need no mask when doing a set predicate
907         m_encoderState.m_noMask = true;
908         DataMov(ISA_SETP, dst, src0);
909     }
910 
Min(CVariable * dst,CVariable * src0,CVariable * src1)911     inline void CEncoder::Min(CVariable* dst, CVariable* src0, CVariable* src1)
912     {
913         MinMax(CISA_DM_FMIN, dst, src0, src1);
914     }
915 
Max(CVariable * dst,CVariable * src0,CVariable * src1)916     inline void CEncoder::Max(CVariable* dst, CVariable* src0, CVariable* src1)
917     {
918         MinMax(CISA_DM_FMAX, dst, src0, src1);
919     }
920 
UAddC(CVariable * dst,CVariable * src0,CVariable * src1)921     inline void CEncoder::UAddC(CVariable* dst, CVariable* src0, CVariable* src1)
922     {
923         CarryBorrowArith(ISA_ADDC, dst, src0, src1);
924     }
925 
USubB(CVariable * dst,CVariable * src0,CVariable * src1)926     inline void CEncoder::USubB(CVariable* dst, CVariable* src0, CVariable* src1)
927     {
928         CarryBorrowArith(ISA_SUBB, dst, src0, src1);
929     }
930 
LoadMS(EOPCODE subOpcode,uint writeMask,CVariable * offset,const ResourceDescriptor & resource,uint numSources,CVariable * dst,llvm::SmallVector<CVariable *,4> & payload,bool feedbackEnable)931     inline void CEncoder::LoadMS(EOPCODE subOpcode, uint writeMask, CVariable* offset,
932         const ResourceDescriptor& resource, uint numSources, CVariable* dst,
933         llvm::SmallVector<CVariable*, 4> & payload, bool feedbackEnable)
934     {
935         Load(
936             subOpcode,
937             writeMask,
938             offset,
939             resource,
940             numSources,
941             dst,
942             payload,
943             false,
944             feedbackEnable);
945     }
946 
Gather(CVariable * dst,CVariable * bufId,CVariable * offset,CVariable * gOffset,e_predefSurface surface,int elementSize)947     inline void CEncoder::Gather(CVariable* dst, CVariable* bufId, CVariable* offset, CVariable* gOffset, e_predefSurface surface, int elementSize)
948     {
949         ScatterGather(ISA_GATHER, dst, bufId, offset, gOffset, surface, elementSize);
950     }
951 
TypedRead4(const ResourceDescriptor & resource,CVariable * pU,CVariable * pV,CVariable * pR,CVariable * pLOD,CVariable * pDst,uint writeMask)952     inline void CEncoder::TypedRead4(const ResourceDescriptor& resource, CVariable* pU, CVariable* pV,
953         CVariable* pR, CVariable* pLOD, CVariable* pDst, uint writeMask)
954     {
955         TypedReadWrite(ISA_GATHER4_TYPED, resource, pU, pV, pR, pLOD, pDst, writeMask);
956     }
957 
TypedWrite4(const ResourceDescriptor & resource,CVariable * pU,CVariable * pV,CVariable * pR,CVariable * pLOD,CVariable * pSrc,uint writeMask)958     inline void CEncoder::TypedWrite4(const ResourceDescriptor& resource, CVariable* pU, CVariable* pV,
959         CVariable* pR, CVariable* pLOD, CVariable* pSrc, uint writeMask)
960     {
961         TypedReadWrite(ISA_SCATTER4_TYPED, resource, pU, pV, pR, pLOD, pSrc, writeMask);
962     }
963 
Scatter(CVariable * val,CVariable * bufidx,CVariable * offset,CVariable * gOffset,e_predefSurface surface,int elementSize)964     inline void CEncoder::Scatter(CVariable* val, CVariable* bufidx, CVariable* offset, CVariable* gOffset, e_predefSurface surface, int elementSize)
965     {
966         ScatterGather(ISA_SCATTER, val, bufidx, offset, gOffset, surface, elementSize);
967     }
968 
SendC(CVariable * dst,CVariable * src,uint exDesc,CVariable * messDescriptor)969     inline void CEncoder::SendC(CVariable* dst, CVariable* src, uint exDesc, CVariable* messDescriptor)
970     {
971         Send(dst, src, exDesc, messDescriptor, true);
972     }
973 
SendC(CVariable * dst,CVariable * src,uint ffid,CVariable * exDesc,CVariable * messDescriptor)974     inline void CEncoder::SendC(CVariable* dst, CVariable* src, uint ffid, CVariable* exDesc, CVariable* messDescriptor)
975     {
976         Send(dst, src, ffid, exDesc, messDescriptor, true);
977     }
978 
IEEESqrt(CVariable * dst,CVariable * src0)979     inline void CEncoder::IEEESqrt(CVariable* dst, CVariable* src0)
980     {
981         Arithmetic(ISA_SQRTM, dst, src0);
982     }
983 
IEEEDivide(CVariable * dst,CVariable * src0,CVariable * src1)984     inline void CEncoder::IEEEDivide(CVariable* dst, CVariable* src0, CVariable* src1)
985     {
986         Arithmetic(ISA_DIVM, dst, src0, src1);
987     }
988 
dp4a(CVariable * dst,CVariable * src0,CVariable * src1,CVariable * src2)989     inline void CEncoder::dp4a(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2) {
990         Arithmetic(ISA_DP4A, dst, src0, src1, src2);
991     }
992 
SetIsCodePatchCandidate(bool v)993     inline void CEncoder::SetIsCodePatchCandidate(bool v)
994     {
995         m_isCodePatchCandidate = v;
996     }
997 
IsCodePatchCandidate()998     inline bool CEncoder::IsCodePatchCandidate()
999     {
1000         return m_isCodePatchCandidate;
1001     }
1002 
SetPayloadEnd(unsigned int payloadEnd)1003     inline void CEncoder::SetPayloadEnd(unsigned int payloadEnd)
1004     {
1005         m_payloadEnd = payloadEnd;
1006     }
1007 
GetPayloadEnd()1008     inline unsigned int CEncoder::GetPayloadEnd()
1009     {
1010         return m_payloadEnd;
1011     }
1012 
SetHasPrevKernel(bool v)1013     inline void CEncoder::SetHasPrevKernel(bool v)
1014     {
1015         m_hasPrevKernel = v;
1016     }
1017 
HasPrevKernel()1018     inline bool CEncoder::HasPrevKernel()
1019     {
1020         return m_hasPrevKernel;
1021     }
1022 
BeginForcedNoMaskRegion()1023     inline void CEncoder::BeginForcedNoMaskRegion()
1024     {
1025         ++m_nestLevelForcedNoMaskRegion;
1026         // Start submitting insts with NoMask control
1027         m_encoderState.m_noMask = true;
1028     }
1029 
EndForcedNoMaskRegion()1030     inline void CEncoder::EndForcedNoMaskRegion()
1031     {
1032         --m_nestLevelForcedNoMaskRegion;
1033         IGC_ASSERT_MESSAGE(m_nestLevelForcedNoMaskRegion >= 0, "Invalid nesting of Unmasked regions");
1034         // Out of unmasked region, return to submitting insts
1035         // with Mask control
1036         if (m_nestLevelForcedNoMaskRegion == 0)
1037             m_encoderState.m_noMask = false;
1038     }
1039 
SetNoMask()1040     inline void CEncoder::SetNoMask()
1041     {
1042         m_encoderState.m_noMask = true;
1043     }
1044 
SetMask(e_mask mask)1045     inline void CEncoder::SetMask(e_mask mask)
1046     {
1047         m_encoderState.m_mask = mask;
1048     }
1049 
SetSimdSize(SIMDMode size)1050     inline void CEncoder::SetSimdSize(SIMDMode size)
1051     {
1052         m_encoderState.m_simdSize = size;
1053     }
1054 
GetSimdSize()1055     inline SIMDMode CEncoder::GetSimdSize()
1056     {
1057         return m_encoderState.m_simdSize;
1058     }
1059 
SetUniformSIMDSize(SIMDMode size)1060     inline void CEncoder::SetUniformSIMDSize(SIMDMode size)
1061     {
1062         m_encoderState.m_uniformSIMDSize = size;
1063     }
1064 
SetSubSpanDestination(bool subspan)1065     inline void CEncoder::SetSubSpanDestination(bool subspan)
1066     {
1067         m_encoderState.m_SubSpanDestination = subspan;
1068     }
1069 
SetSecondHalf(bool secondHalf)1070     inline void CEncoder::SetSecondHalf(bool secondHalf)
1071     {
1072         m_encoderState.m_secondHalf = secondHalf;
1073     }
1074 
IsSecondHalf()1075     inline bool CEncoder::IsSecondHalf()
1076     {
1077         return m_encoderState.m_secondHalf;
1078     }
1079 
SetSecondNibble(bool secondNibble)1080     inline void CEncoder::SetSecondNibble(bool secondNibble)
1081     {
1082         m_encoderState.m_secondNibble = secondNibble;
1083     }
1084 
IsSecondNibble()1085     inline bool CEncoder::IsSecondNibble()
1086     {
1087         return m_encoderState.m_secondNibble;
1088     }
1089 
IsSubSpanDestination()1090     inline bool CEncoder::IsSubSpanDestination()
1091     {
1092         return m_encoderState.m_SubSpanDestination;
1093     }
1094 
1095     VISA_Modifier ConvertModifierToVisaType(e_modifier modifier);
1096     VISA_Cond_Mod ConvertCondModToVisaType(e_predicate condMod);
1097     VISA_Oword_Num  ConvertSizeToVisaType(uint size);
1098     VISAChannelMask ConvertChannelMaskToVisaType(uint mask);
1099     VISASourceSingleChannel ConvertSingleSourceChannel(uint srcChannel);
1100 
1101 
1102     GenPrecision ConvertPrecisionToVisaType(PrecisionType P);
1103 }
1104