1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #pragma once 10 11 #include "Compiler/CISACodeGen/CISACodeGen.h" 12 #include "Compiler/CISACodeGen/CVariable.hpp" 13 #include "Compiler/CISACodeGen/PatternMatchPass.hpp" 14 #include "Compiler/CISACodeGen/helper.h" 15 #include "visa_wa.h" 16 #include "inc/common/sku_wa.h" 17 18 namespace IGC 19 { 20 class CShader; 21 22 struct SFlag 23 { 24 CVariable* var; 25 e_predMode mode; 26 bool invertFlag; initIGC::SFlag27 void init() 28 { 29 var = NULL; 30 mode = EPRED_NORMAL; 31 invertFlag = false; 32 } 33 }; 34 35 struct SModifier 36 { 37 uint16_t subReg; 38 uint8_t subVar; 39 uint8_t region[3]; 40 e_modifier mod; 41 e_instance instance; 42 bool specialRegion; initIGC::SModifier43 void init() 44 { 45 mod = EMOD_NONE; 46 subVar = 0; 47 subReg = 0; 48 instance = EINSTANCE_UNSPECIFIED; 49 specialRegion = false; 50 } 51 }; 52 53 struct SAlias 54 { 55 CVariable* m_rootVar; 56 VISA_Type m_type; SAliasIGC::SAlias57 SAlias(CVariable* var, VISA_Type type) : 58 m_rootVar(var), m_type(type) 59 { } 60 }; 61 62 struct SAliasMapInfo { getEmptyKeyIGC::SAliasMapInfo63 static inline SAlias getEmptyKey() { return SAlias(nullptr, ISA_TYPE_UD); } getTombstoneKeyIGC::SAliasMapInfo64 static inline SAlias getTombstoneKey() { return SAlias(nullptr, ISA_TYPE_D); } getHashValueIGC::SAliasMapInfo65 static unsigned getHashValue(const SAlias& Val) { 66 return llvm::DenseMapInfo<CVariable*>::getHashValue(Val.m_rootVar) ^ Val.m_type; 67 } isEqualIGC::SAliasMapInfo68 static bool isEqual(const SAlias& LHS, const SAlias& RHS) { 69 return LHS.m_rootVar == RHS.m_rootVar && LHS.m_type == RHS.m_type; 70 } 71 }; 72 73 /// Helps representing URB write channel masks in a way that provides type safety and adapts to 74 /// the channel mask format required by V-ISA interface. 75 class URBChannelMask 76 { 77 public: URBChannelMask(unsigned int bitmask)78 explicit URBChannelMask(unsigned int bitmask) : m_bitmask(bitmask) {} 79 80 /// Returns the size of bitmask, 81 /// defined as the position of the most significant bit with value 1. 82 /// E.g. size(10001) == 5, size(1) == 1 , size(1111) = 4 83 size_t size() const; 84 85 /// Returns channel mask in the format expected by V-ISA. 86 /// If the mask is full (i.e. consists of all 1) the return value must be 0xFF 87 /// that means 'no channel mask'. In other cases it is the actual stored mask 88 /// E.g. 1010 asVISAMask --> 1010, 111 asVISAMask --> 11111111 (full mask case) 89 unsigned int asVISAMask() const; 90 91 // returns true if all channels are set (i.e., we can skip the channel mask) isAllSet() const92 bool isAllSet() const 93 { 94 return ((m_bitmask + 1) & m_bitmask) == 0; 95 } 96 private: 97 unsigned int m_bitmask; 98 }; 99 100 struct SEncoderState 101 { 102 SModifier m_srcOperand[4]; 103 SModifier m_dstOperand; 104 SFlag m_flag; 105 SIMDMode m_simdSize; 106 SIMDMode m_uniformSIMDSize; 107 e_mask m_mask; 108 bool m_noMask; 109 bool m_SubSpanDestination; 110 bool m_secondHalf; 111 bool m_secondNibble = false; 112 }; 113 114 class CEncoder 115 { 116 public: 117 void InitEncoder(bool canAbortOnSpill, bool hasStackCall, bool hasInlineAsmCall, VISAKernel* prevKernel); 118 void InitBuildParams(llvm::SmallVector<std::unique_ptr< char, std::function<void(char*)>>, 10> & params); 119 void InitVISABuilderOptions(TARGET_PLATFORM VISAPlatform, bool canAbortOnSpill, bool hasStackCall, bool enableVISA_IR); 120 SEncoderState CopyEncoderState(); 121 void SetEncoderState(SEncoderState& newState); 122 VISA_Align GetVISAAlign(CVariable* var); 123 124 void SetDispatchSimdSize(); 125 void SetSpillMemOffset(); 126 void SetStackFunctionArgSize(uint size); // size in GRFs 127 void SetStackFunctionRetSize(uint size); // size in GRFs 128 void SetExternFunctionFlag(); 129 130 void GetVISAPredefinedVar(CVariable* pVar, PreDefined_Vars var); 131 void CreateVISAVar(CVariable* var); 132 void DeclareInput(CVariable* var, uint offset, uint instance); 133 void MarkAsOutput(CVariable* var); 134 void MarkAsPayloadLiveOut(CVariable* var); 135 void Compile(bool hasSymbolTable = false); 136 std::string GetShaderName(); 137 void ReportCompilerStatistics(VISAKernel* pMainKernel, SProgramOutput* pOutput); 138 int GetThreadCount(SIMDMode simdMode); 139 140 CEncoder(); 141 ~CEncoder(); 142 void SetProgram(CShader* program); 143 void Jump(CVariable* flag, uint label); 144 void Label(uint label); 145 uint GetNewLabelID(const CName &name); 146 void DwordAtomicRaw(AtomicOp atomic_op, 147 const ResourceDescriptor& bindingTableIndex, 148 CVariable* dst, CVariable* elem_offset, CVariable* src0, 149 CVariable* src1, bool is16Bit = false); 150 void AtomicRawA64(AtomicOp atomic_op, const ResourceDescriptor& resource, CVariable* dst, 151 CVariable* elem_offset, CVariable* src0, CVariable* src1, 152 unsigned short bitwidth); 153 void Cmp(e_predicate p, CVariable* dst, CVariable* src0, CVariable* src1); 154 void Select(CVariable* flag, CVariable* dst, CVariable* src0, CVariable* src1); 155 void GenericAlu(e_opcode opcode, CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2 = nullptr); 156 void URBWrite(CVariable* src, const int payloadElementOffset, CVariable* offset, CVariable* urbHandle, CVariable* dynamicMask); 157 void Send(CVariable* dst, CVariable* src, uint exDesc, CVariable* messDescriptor, bool isSendc = false); 158 void Send(CVariable* dst, CVariable* src, uint ffid, CVariable* exDesc, CVariable* messDescriptor, bool isSendc = false); 159 void Sends(CVariable* dst, CVariable* src0, CVariable* src1, uint ffid, CVariable* exDesc, CVariable* messDescriptor, bool isSendc = false, bool hasEOT = false); 160 void RenderTargetWrite(CVariable* var[], 161 bool isUndefined[], 162 bool lastRenderTarget, 163 bool isNullRT, 164 bool perSample, 165 bool coarseMode, 166 bool headerMaskFromCe0, 167 CVariable* bindingTableIndex, 168 CVariable* RTIndex, 169 CVariable* source0Alpha, 170 CVariable* oMask, 171 CVariable* depth, 172 CVariable* stencil, 173 CVariable* CPSCounter, 174 CVariable* sampleIndex, 175 CVariable* r1Reg); 176 void Sample( 177 EOPCODE subOpcode, 178 uint writeMask, 179 CVariable* offset, 180 const ResourceDescriptor& bindingTableIndex, 181 const SamplerDescriptor& SamplerIdx, 182 uint numSources, 183 CVariable* dst, 184 llvm::SmallVector<CVariable*, 4> & payload, 185 bool zeroLOD, 186 bool cpsEnable, 187 bool feedbackEnable, 188 bool nonUniformState = false); 189 void Load( 190 EOPCODE subOpcode, 191 uint writeMask, 192 CVariable* offset, 193 const ResourceDescriptor& resource, 194 uint numSources, 195 CVariable* dst, 196 llvm::SmallVector<CVariable*, 4> & payload, 197 bool zeroLOD, 198 bool feedbackEnable); 199 200 void Info(EOPCODE subOpcode, uint writeMask, const ResourceDescriptor& resource, CVariable* lod, CVariable* dst); 201 202 void Gather4Inst( 203 EOPCODE subOpcode, 204 CVariable* offset, 205 const ResourceDescriptor& resource, 206 const SamplerDescriptor& sampler, 207 uint numSources, 208 CVariable* dst, 209 llvm::SmallVector<CVariable*, 4> & payload, 210 uint channel, 211 bool feedbackEnable); 212 213 void OWLoad(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset, bool owordAligned, uint dstSize, uint dstOffset = 0); 214 void OWStore(CVariable* dst, e_predefSurface surfaceType, CVariable* bufidx, CVariable* offset, uint dstSize, uint srcOffset); 215 216 void AddrAdd(CVariable* dst, CVariable* src0, CVariable* src1); 217 void Barrier(e_barrierKind BarrierKind); 218 void Fence(bool CommitEnable, 219 bool L3_Flush_RW_Data, 220 bool L3_Flush_Constant_Data, 221 bool L3_Flush_Texture_Data, 222 bool L3_Flush_Instructions, 223 bool Global_Mem_Fence, 224 bool L1_Flush, 225 bool SWFence); 226 void FlushSamplerCache(); 227 void EOT(); 228 void OWLoadA64(CVariable* dst, CVariable* offset, uint dstSize, uint dstOffset = 0); 229 void OWStoreA64(CVariable* dst, CVariable* offset, uint dstSize, uint srcOffset); 230 void MediaBlockMessage(ISA_Opcode subOpcode, 231 CVariable* dst, 232 e_predefSurface surfaceType, 233 CVariable* bufId, 234 CVariable* xOffset, 235 CVariable* yOffset, 236 uint modifier, 237 unsigned char blockWidth, 238 unsigned char blockHeight, 239 uint plane); 240 void GatherA64(CVariable* dst, CVariable* offset, unsigned elementSize, unsigned numElems); 241 void ScatterA64(CVariable* val, CVariable* offset, unsigned elementSize, unsigned numElems); 242 void ByteGather(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset, unsigned elementSize, unsigned numElems); 243 void ByteScatter(CVariable* src, const ResourceDescriptor& resource, CVariable* offset, unsigned elementSize, unsigned numElems); 244 void Gather4Scaled(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset); 245 void Gather4ScaledNd(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset, unsigned nd); 246 void Scatter4Scaled(CVariable* src, const ResourceDescriptor& resource, CVariable* offset); 247 void Gather4A64(CVariable* dst, CVariable* offset); 248 void Scatter4A64(CVariable* src, CVariable* offset); 249 void BoolToInt(CVariable* dst, CVariable* src); 250 void Copy(CVariable* dst, CVariable* src); 251 void SubroutineCall(CVariable* flag, llvm::Function* F); 252 void SubroutineRet(CVariable* flag, llvm::Function* F); 253 void StackCall(CVariable* flag, llvm::Function* F, unsigned char argSize, unsigned char retSize); 254 void IndirectStackCall(CVariable* flag, CVariable* funcPtr, unsigned char argSize, unsigned char retSize); 255 void StackRet(CVariable* flag); 256 void Loc(unsigned int line); 257 void File(std::string& s); 258 void PredAdd(CVariable* flag, CVariable* dst, CVariable* src0, CVariable* src1); 259 void DebugLinePlaceholder(); 260 261 inline void Jump(uint label); 262 inline void Cast(CVariable* dst, CVariable* src); 263 inline void Add(CVariable* dst, CVariable* src0, CVariable* src1); 264 inline void Bfi(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2, CVariable* src3); 265 inline void Bfe(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2); 266 inline void Bfrev(CVariable* dst, CVariable* src0); 267 inline void CBit(CVariable* dst, CVariable* src0); 268 inline void Fbh(CVariable* dst, CVariable* src0); 269 inline void Fbl(CVariable* dst, CVariable* src0); 270 inline void Mul(CVariable* dst, CVariable* src0, CVariable* src1); 271 inline void Pow(CVariable* dst, CVariable* src0, CVariable* src1); 272 inline void Div(CVariable* dst, CVariable* src0, CVariable* src1); 273 inline void Shl(CVariable* dst, CVariable* src0, CVariable* src1); 274 inline void Shr(CVariable* dst, CVariable* src0, CVariable* src1); 275 inline void MulH(CVariable* dst, CVariable* src0, CVariable* src1); 276 inline void Cos(CVariable* dst, CVariable* src0); 277 inline void Sin(CVariable* dst, CVariable* src0); 278 inline void Log(CVariable* dst, CVariable* src0); 279 inline void Exp(CVariable* dst, CVariable* src0); 280 inline void Frc(CVariable* dst, CVariable* src0); 281 inline void Sqrt(CVariable* dst, CVariable* src0); 282 inline void Floor(CVariable* dst, CVariable* src0); 283 inline void Ceil(CVariable* dst, CVariable* src0); 284 inline void Ctlz(CVariable* dst, CVariable* src0); 285 inline void Truncate(CVariable* dst, CVariable* src0); 286 inline void RoundNE(CVariable* dst, CVariable* src0); 287 inline void Mod(CVariable* dst, CVariable* src0, CVariable* src1); 288 inline void Rsqrt(CVariable* dst, CVariable* src0); 289 inline void Inv(CVariable* dst, CVariable* src0); 290 inline void Not(CVariable* dst, CVariable* src0); 291 // src0 * src1 + src2 292 inline void Madw(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2); 293 inline void Mad(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2); 294 inline void Lrp(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2); 295 inline void Xor(CVariable* dst, CVariable* src0, CVariable* src1); 296 inline void Or(CVariable* dst, CVariable* src0, CVariable* src1); 297 inline void And(CVariable* dst, CVariable* src0, CVariable* src1); 298 inline void Pln(CVariable* dst, CVariable* src0, CVariable* src1); 299 inline void SendC(CVariable* dst, CVariable* src, uint exDesc, CVariable* messDescriptor); 300 inline void SendC(CVariable* dst, CVariable* src, uint ffid, CVariable* exDesc, CVariable* messDescriptor); 301 inline void LoadMS(EOPCODE subOpcode, uint writeMask, CVariable* offset, const ResourceDescriptor& resource, uint numSources, CVariable* dst, llvm::SmallVector<CVariable*, 4> & payload, bool feedbackEnable); 302 inline void SetP(CVariable* dst, CVariable* src); 303 inline void Gather(CVariable* dst, CVariable* bufidx, CVariable* offset, CVariable* gOffset, e_predefSurface surface, int elementSize); 304 inline void TypedRead4(const ResourceDescriptor& resource, CVariable* pU, CVariable* pV, CVariable* pR, CVariable* pLOD, CVariable* pDst, uint writeMask); 305 inline void TypedWrite4(const ResourceDescriptor& resource, CVariable* pU, CVariable* pV, CVariable* pR, CVariable* pLOD, CVariable* pSrc, uint writeMask); 306 inline void Scatter(CVariable* val, CVariable* bufidx, CVariable* offset, CVariable* gOffset, e_predefSurface surface, int elementSize); 307 inline void IShr(CVariable* dst, CVariable* src0, CVariable* src1); 308 inline void Min(CVariable* dst, CVariable* src0, CVariable* src1); 309 inline void Max(CVariable* dst, CVariable* src0, CVariable* src1); 310 inline void UAddC(CVariable* dst, CVariable* src0, CVariable* src1); 311 inline void USubB(CVariable* dst, CVariable* src0, CVariable* src1); 312 inline void IEEESqrt(CVariable* dst, CVariable* src0); 313 inline void IEEEDivide(CVariable* dst, CVariable* src0, CVariable* src1); 314 void AddPair(CVariable* Lo, CVariable* Hi, CVariable* L0, CVariable* H0, CVariable* L1, CVariable* H1 = nullptr); 315 void SubPair(CVariable* Lo, CVariable* Hi, CVariable* L0, CVariable* H0, CVariable* L1, CVariable* H1); 316 inline void dp4a(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2); 317 void Lifetime(VISAVarLifetime StartOrEnd, CVariable* dst); 318 void dpas(CVariable* dst, CVariable* input, CVariable* weight, PrecisionType weight_precision, 319 CVariable* actication, PrecisionType activation_precision, uint8_t systolicDepth, 320 uint8_t repeatCount, bool IsDpasw); 321 void fcvt(CVariable* dst, CVariable* src); 322 void Bfn(uint8_t booleanFuncCtrl, CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2); 323 void QWGather(CVariable* dst, const ResourceDescriptor& resource, CVariable* offset, unsigned elementSize, unsigned numElems); 324 void QWScatter(CVariable* src, const ResourceDescriptor& resource, CVariable* offset, unsigned elementSize, unsigned numElems); 325 // VME 326 void SendVmeIme( 327 CVariable* bindingTableIndex, 328 unsigned char streamMode, 329 unsigned char searchControlMode, 330 CVariable* uniInputVar, 331 CVariable* imeInputVar, 332 CVariable* ref0Var, 333 CVariable* ref1Var, 334 CVariable* costCenterVar, 335 CVariable* outputVar); 336 337 void SendVmeFbr( 338 CVariable* bindingTableIndex, 339 CVariable* uniInputVar, 340 CVariable* fbrInputVar, 341 CVariable* FBRMbModeVar, 342 CVariable* FBRSubMbShapeVar, 343 CVariable* FBRSubPredModeVar, 344 CVariable* outputVar); 345 346 void SendVmeSic( 347 CVariable* bindingTableIndex, 348 CVariable* uniInputVar, 349 CVariable* sicInputVar, 350 CVariable* outputVar); 351 352 // VA 353 void SendVideoAnalytic( 354 llvm::GenIntrinsicInst* inst, 355 CVariable* vaResult, 356 CVariable* coords, 357 CVariable* size, 358 CVariable* srcImg, 359 CVariable* sampler); 360 361 void SetDstSubVar(uint subVar); 362 void SetDstSubReg(uint subReg); 363 void SetSrcSubVar(uint srcNum, uint subVar); 364 void SetSrcSubReg(uint srcNum, uint subReg); 365 void SetDstModifier(e_modifier mod); 366 void SetDstModifier(const DstModifier& modifier); 367 void SetSrcModifier(uint srcNum, e_modifier mod); 368 void SetPredicate(CVariable* flag); 369 void SetInversePredicate(bool inv); 370 void SetPredicateMode(e_predMode mode); 371 void SetSrcRegion(uint srcNum, uint vStride, uint width, uint hStride, e_instance instance = EINSTANCE_UNSPECIFIED); 372 void SetDstRegion(uint hStride); 373 inline void SetNoMask(); 374 inline void SetMask(e_mask mask); 375 inline void SetSimdSize(SIMDMode size); 376 inline SIMDMode GetSimdSize(); 377 inline void SetUniformSIMDSize(SIMDMode size); 378 inline void SetSubSpanDestination(bool subspan); 379 inline bool IsSubSpanDestination(); 380 inline void SetSecondHalf(bool secondHalf); 381 inline bool IsSecondHalf(); 382 inline void SetSecondNibble(bool secondNibble); 383 inline bool IsSecondNibble(); 384 385 inline void SetIsCodePatchCandidate(bool v); 386 inline bool IsCodePatchCandidate(); 387 inline unsigned int GetPayloadEnd(); 388 inline void SetPayloadEnd(unsigned int payloadEnd); 389 inline void SetHasPrevKernel(bool v); 390 inline bool HasPrevKernel(); 391 inline void BeginForcedNoMaskRegion(); 392 inline void EndForcedNoMaskRegion(); 393 394 void Wait(); 395 GetVISAKernel() const396 VISAKernel* GetVISAKernel() const { return vKernel; } GetVISABuilder() const397 VISABuilder* GetVISABuilder() const { return vbuilder; } 398 void Init(); 399 void Push(); 400 401 void initCR(VISAKernel* vKernel); 402 void SetVectorMask(bool vMask); 403 404 // Switches from actualRM to newRM 405 void SetRoundingMode_FP(ERoundingMode actualRM, ERoundingMode newRM); 406 void SetRoundingMode_FPCvtInt(ERoundingMode actualRM, ERoundingMode newRM); 407 GetCISADataTypeSize(VISA_Type type)408 static uint GetCISADataTypeSize(VISA_Type type) {return CVariable::GetCISADataTypeSize(type);} GetCISADataTypeAlignment(VISA_Type type)409 static e_alignment GetCISADataTypeAlignment(VISA_Type type) {return CVariable::GetCISADataTypeAlignment(type);} 410 411 static VISASampler3DSubOpCode ConvertSubOpcode(EOPCODE subOpcode, bool zeroLOD); 412 413 // Wrappers for (potentially) common queries on types 414 static bool IsIntegerType(VISA_Type type); 415 static bool IsFloatType(VISA_Type type); 416 417 void SetVISAWaTable(WA_TABLE const& waTable); 418 419 /// \brief Initialize per function states and starts vISA emission 420 /// as a vISA subroutine 421 void BeginSubroutine(llvm::Function* F); 422 /// \brief Initialize per function states and starts vISA emission 423 /// as a vISA stack-call function 424 void BeginStackFunction(llvm::Function* F); 425 /// \brief Initialize interpolation section for vISA emission 426 void BeginPayloadSection(); 427 428 void DestroyVISABuilder(); 429 430 void AddVISASymbol(std::string& symName, CVariable* cvar); 431 432 std::string GetVariableName(CVariable* var); 433 std::string GetDumpFileName(std::string extension); 434 IsPayloadSectionAsPrimary()435 bool IsPayloadSectionAsPrimary() {return vKernel == vPayloadSection;} SetPayloadSectionAsPrimary()436 void SetPayloadSectionAsPrimary() {vKernelTmp = vKernel; vKernel = vPayloadSection;} SetPayloadSectionAsSecondary()437 void SetPayloadSectionAsSecondary() {vKernel = vKernelTmp;} 438 439 std::string GetUniqueInlineAsmLabel(); 440 441 private: 442 // helper functions 443 VISA_VectorOpnd* GetSourceOperand(CVariable* var, const SModifier& mod); 444 VISA_VectorOpnd* GetSourceOperandNoModifier(CVariable* var); 445 VISA_VectorOpnd* GetDestinationOperand(CVariable* var, const SModifier& mod); 446 VISA_RawOpnd* GetRawSource(CVariable* var, uint offset = 0); 447 VISA_RawOpnd* GetRawDestination(CVariable* var, unsigned offset = 0); 448 VISA_PredOpnd* GetFlagOperand(const SFlag& flag); 449 VISA_StateOpndHandle* GetVISASurfaceOpnd(e_predefSurface surfaceType, CVariable* bti); 450 VISA_StateOpndHandle* GetVISASurfaceOpnd(const ResourceDescriptor& resource); 451 VISA_LabelOpnd* GetLabel(uint label); 452 VISA_LabelOpnd* GetFuncLabel(llvm::Function* F); 453 void InitLabelMap(const llvm::Function* F); 454 CName CreateVisaLabelName(const llvm::StringRef &L = ""); 455 std::string CreateShortLabel(unsigned labelIndex) const; 456 // Compiler labels must start with something a user won't use in inline 457 // assembly. GetCompilerLabelPrefix()458 static const char *GetCompilerLabelPrefix() {return "_";} 459 460 VISAFunction* GetStackFunction(llvm::Function* F); 461 462 VISA_VectorOpnd* GetUniformSource(CVariable* var); 463 VISA_StateOpndHandle* GetBTIOperand(uint bindingTableIndex); 464 VISA_StateOpndHandle* GetSamplerOperand(CVariable* sampleIdx); 465 VISA_StateOpndHandle* GetSamplerOperand(const SamplerDescriptor& sampler, bool& isIdxLT16); 466 void GetRowAndColOffset(CVariable* var, unsigned int subVar, unsigned int subreg, unsigned char& rowOff, unsigned char& colOff); 467 468 VISA_GenVar* GetVISAVariable(CVariable* var); 469 VISA_GenVar* GetVISAVariable(CVariable* var, e_instance instance); 470 VISA_EMask_Ctrl ConvertMaskToVisaType(e_mask mask, bool noMask); 471 472 // Generic encoding functions 473 void MinMax(CISA_MIN_MAX_SUB_OPCODE subopcode, CVariable* dst, CVariable* src0, CVariable* src1); 474 void DataMov(ISA_Opcode opcode, CVariable* dst, CVariable* src); 475 void LogicOp( 476 ISA_Opcode opcode, 477 CVariable* dst, 478 CVariable* src0, 479 CVariable* src1 = nullptr, 480 CVariable* src2 = nullptr, 481 CVariable* src3 = nullptr); 482 void Arithmetic( 483 ISA_Opcode opcode, 484 CVariable* dst, 485 CVariable* src0 = nullptr, 486 CVariable* src1 = nullptr, 487 CVariable* src2 = nullptr); 488 void CarryBorrowArith(ISA_Opcode opcode, CVariable* dst, CVariable* src0, CVariable* src1); 489 void ScatterGather( 490 ISA_Opcode opcode, 491 CVariable* srcdst, 492 CVariable* bufId, 493 CVariable* offset, 494 CVariable* gOffset, 495 e_predefSurface surface, 496 int elementSize); 497 void TypedReadWrite( 498 ISA_Opcode opcode, 499 const ResourceDescriptor& resource, 500 CVariable* pU, 501 CVariable* pV, 502 CVariable* pR, 503 CVariable* pLOD, 504 CVariable* pSrcDst, 505 uint writeMask); 506 507 VISA_Exec_Size GetAluExecSize(CVariable* dst) const; 508 VISA_EMask_Ctrl GetAluEMask(CVariable* dst); 509 bool IsSat(); 510 511 // Variable splitting facilities (if crosses 2 GRF boundary). 512 bool NeedSplitting(CVariable* var, const SModifier& mod, 513 unsigned& numParts, bool isSource = false) const; 514 SModifier SplitVariable(VISA_Exec_Size fromExecSize, 515 VISA_Exec_Size toExecSize, 516 unsigned thePart, 517 CVariable* var, const SModifier& mod, 518 bool isSource = false) const; 519 VISA_Exec_Size SplitExecSize(VISA_Exec_Size fromExecSize, 520 unsigned numParts) const; 521 VISA_EMask_Ctrl SplitEMask(VISA_Exec_Size fromExecSize, 522 VISA_Exec_Size toExecSize, 523 unsigned thePart, 524 VISA_EMask_Ctrl execMask) const; 525 526 // Split SIMD16 message data payload(MDP) for scattered/untyped write 527 // messages into two SIMD8 MDPs : V0 and V1. 528 void SplitPayloadToLowerSIMD(CVariable* MDP, uint32_t MDPOfst, uint32_t NumBlks, CVariable* V0, CVariable* V1, uint32_t fromSize = 16); 529 // Merge two SIMD8 MDPs (V0 & V1) for scattered/untyped read messages into one SIMD16 message : MDP 530 void MergePayloadToHigherSIMD(CVariable* V0, CVariable* V1, uint32_t NumBlks, CVariable* MDP, uint32_t MDPOfst, uint32_t toSize = 16); 531 532 // save compile time by avoiding retry if the amount of spill is (very) small 533 bool AvoidRetryOnSmallSpill() const; 534 535 // CreateSymbolTable, CreateRelocationTable and CreateFuncAttributeTable will create symbols, relococations and FuncAttributes in 536 // two formats. One in given buffer that will be later parsed as patch token based format, another as struct type that will be parsed 537 // as ZE binary format 538 539 // CreateSymbolTable 540 // Note that this function should be called only once even if there are multiple kernels in a program. Current IGC 541 // flow will create all symbols in the first kernel and all the other kernels won't contain symbols 542 typedef std::vector<std::pair<llvm::Value*, vISA::GenSymEntry>> ValueToSymbolList; 543 void CreateSymbolTable(ValueToSymbolList& symbolTableList); 544 // input/output: buffer, bufferSize, tableEntries: for patch-token-based format. 545 void CreateSymbolTable(void*& buffer, unsigned& bufferSize, unsigned& tableEntries); 546 // input/output: symbols: for ZEBinary foramt 547 void CreateSymbolTable(SProgramOutput::ZEBinFuncSymbolTable& funcSyms, SOpenCLProgramInfo::ZEBinProgramSymbolTable& programSyms); 548 // Create function symbols for kernels. This is ZEBinary foramt only. 549 void CreateKernelSymbol(const std::string& kernelName, unsigned offset, unsigned size, 550 SProgramOutput::ZEBinFuncSymbolTable& symbols); 551 552 // CreateRelocationTable 553 // input/output: buffer, bufferSize, tableEntries: for patch-token-based format. 554 void CreateRelocationTable(void*& buffer, unsigned& bufferSize, unsigned& tableEntries); 555 // input/output: relocations: for ZEBinary foramt 556 void CreateRelocationTable(SProgramOutput::RelocListTy& relocations); 557 558 // CreateFuncAttributeTable 559 void CreateFuncAttributeTable(void*& buffer, unsigned& bufferSize, unsigned& tableEntries, SProgramOutput::FuncAttrListTy& attrs); 560 561 uint32_t getGRFSize() const; 562 needsSplitting(VISA_Exec_Size ExecSize) const563 bool needsSplitting(VISA_Exec_Size ExecSize) const 564 { 565 return ExecSize == EXEC_SIZE_16; 566 } 567 568 // Note that GEN can set both fpCvtInt_rtz and any of FP rounding modes 569 // at the same time. If fpCvtInt uses a rounding mode other than rtz, 570 // they both uses FP rounding bits. 571 // 572 // RM bits in CR0.0. 573 // float RM bits: [5:4]; 574 // int RM (float -> int): Bit 12: 0 -> rtz; 1 -> using Float RM 575 enum RMEncoding { 576 // float rounding mode (fp operations, cvt to fp) 577 RoundToNearestEven = 0x00, 578 RoundToPositive = 0x10, 579 RoundToNegative = 0x20, 580 RoundToZero = 0x30, 581 // int rounding mode (fp cvt int only), use FP RM for all rounding modes but rtz. 582 RoundToNearestEven_int = 0x1000, 583 RoundToPositive_int = 0x1010, 584 RoundToNegative_int = 0x1020, 585 RoundToZero_int_unused = 0x1030, 586 RoundToZero_int = 0x0000, // use this for rtz, bit 12 = 0 587 588 IntAndFPRoundingModeMask = 0x1030 589 }; 590 void SetRoundingMode(RMEncoding actualRM, RMEncoding newRM); 591 // Get Encoding bit values for rounding mode 592 RMEncoding getEncoderRoundingMode_FP(ERoundingMode FP_RM); 593 RMEncoding getEncoderRoundingMode_FPCvtInt(ERoundingMode FCvtI_RM); 594 595 unsigned GetRawOpndSplitOffset(VISA_Exec_Size fromExecSize, 596 VISA_Exec_Size toExecSize, 597 unsigned thePart, CVariable* var) const; 598 599 std::tuple<CVariable*, uint32_t> splitRawOperand(CVariable* var, bool isFirstHalf, VISA_EMask_Ctrl execMask); 600 601 uint32_t getNumChannels(CVariable* var) const; 602 603 void SaveOption(vISAOptions option, bool val); 604 void SaveOption(vISAOptions option, uint32_t val); 605 void SaveOption(vISAOptions option, const char* val); 606 void SetBuilderOptions(VISABuilder* pbuilder); 607 608 protected: 609 // encoder states 610 SEncoderState m_encoderState; 611 612 llvm::DenseMap<SAlias, CVariable*, SAliasMapInfo> m_aliasesMap; 613 614 // vISA needs its own Wa-table as some of the W/A are applicable 615 // only to certain APIs/shader types/reg key settings/etc. 616 WA_TABLE m_vISAWaTable; 617 618 enum OpType 619 { 620 ET_BOOL, 621 ET_INT32, 622 ET_CSTR 623 }; 624 struct OptionValue 625 { 626 OpType type; 627 bool vBool; 628 uint32_t vInt32; 629 const char* vCstr; 630 }; 631 // List of vISA user options 632 std::vector<std::pair<vISAOptions, OptionValue>> m_visaUserOptions; 633 634 // Typically IGC just use ones vKernel for every vISA::compile call, 635 // in those cases, vKernel and vMainKernel should be the same. 636 // Only when using stack-call, vKernel pointer changes every time 637 // IGC addes a vISA kernel or function object, but the vMainKernel 638 // always pointing to the first kernel added during InitEncoder. 639 VISAKernel* vKernel; 640 VISAKernel* vMainKernel; 641 VISABuilder* vbuilder; 642 VISABuilder* vAsmTextBuilder; 643 644 // This is for CodePatch to split payload interpolation from a shader 645 VISAKernel* vPayloadSection; 646 VISAKernel* vKernelTmp; 647 bool m_hasPrevKernel = false; 648 unsigned int m_payloadEnd = 0; 649 650 bool m_isCodePatchCandidate = false; 651 652 int m_nestLevelForcedNoMaskRegion = 0; 653 654 bool m_enableVISAdump; 655 bool m_hasInlineAsm; 656 657 std::vector<VISA_LabelOpnd*> labelMap; 658 std::vector<CName> labelNameMap; // parallel to labelMap 659 660 /// Per kernel label counter 661 unsigned labelCounter = 0; 662 /// Per kernel label counter for each inline asm block 663 unsigned labelInlineAsmCounter = 0; 664 /// Each kernel might emit several functions; 665 /// we pre-increment this for each new function we process (InitLabelMap) 666 /// The first function will see 0, ... 667 unsigned labelFunctionIndex = (unsigned)-1; 668 /// 669 /// The name of the current function; set if we are emitting labels 670 CName currFunctionName; 671 672 /// Keep a map between a function and its label, per kernel state. 673 llvm::SmallDenseMap<llvm::Function*, VISA_LabelOpnd*> funcLabelMap; 674 /// Keep a map between a stack-called function and the corresponding vISA function 675 llvm::SmallDenseMap<llvm::Function*, VISAFunction*> stackFuncMap; 676 677 // dummy variables 678 VISA_SurfaceVar* dummySurface; 679 VISA_SamplerVar* samplervar; 680 681 CShader* m_program; 682 683 // Keep a map between a function and its per-function attributes needed for function pointer support 684 struct FuncAttrib 685 { 686 bool isKernel = false; 687 bool hasBarrier = false; 688 unsigned argumentStackSize = 0; 689 unsigned allocaStackSize = 0; 690 }; 691 llvm::SmallDenseMap<llvm::Function*, FuncAttrib> funcAttributeMap; 692 693 public: 694 // Used by EmitVISAPass to set function attributes InitFuncAttribute(llvm::Function * F,bool isKernel=false)695 void InitFuncAttribute(llvm::Function* F, bool isKernel = false) { 696 funcAttributeMap[F].isKernel = isKernel; 697 } SetFunctionHasBarrier(llvm::Function * F)698 void SetFunctionHasBarrier(llvm::Function* F) { 699 if (funcAttributeMap.find(F) != funcAttributeMap.end()) 700 funcAttributeMap[F].hasBarrier = true; 701 } SetFunctionMaxArgumentStackSize(llvm::Function * F,unsigned size)702 void SetFunctionMaxArgumentStackSize(llvm::Function* F, unsigned size) { 703 if (funcAttributeMap.find(F) != funcAttributeMap.end()) 704 funcAttributeMap[F].argumentStackSize = MAX(funcAttributeMap[F].argumentStackSize, size); 705 } SetFunctionAllocaStackSize(llvm::Function * F,unsigned size)706 void SetFunctionAllocaStackSize(llvm::Function* F, unsigned size) { 707 if (funcAttributeMap.find(F) != funcAttributeMap.end()) 708 funcAttributeMap[F].allocaStackSize = size; 709 } 710 }; 711 Jump(uint label)712 inline void CEncoder::Jump(uint label) 713 { 714 Jump(NULL, label); 715 } 716 Bfi(CVariable * dst,CVariable * src0,CVariable * src1,CVariable * src2,CVariable * src3)717 inline void CEncoder::Bfi(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2, CVariable* src3) 718 { 719 LogicOp(ISA_BFI, dst, src0, src1, src2, src3); 720 } 721 Bfe(CVariable * dst,CVariable * src0,CVariable * src1,CVariable * src2)722 inline void CEncoder::Bfe(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2) 723 { 724 LogicOp(ISA_BFE, dst, src0, src1, src2); 725 } 726 Bfrev(CVariable * dst,CVariable * src0)727 inline void CEncoder::Bfrev(CVariable* dst, CVariable* src0) 728 { 729 LogicOp(ISA_BFREV, dst, src0); 730 } 731 CBit(CVariable * dst,CVariable * src)732 inline void CEncoder::CBit(CVariable* dst, CVariable* src) 733 { 734 LogicOp(ISA_CBIT, dst, src); 735 } 736 Fbh(CVariable * dst,CVariable * src)737 inline void CEncoder::Fbh(CVariable* dst, CVariable* src) 738 { 739 LogicOp(ISA_FBH, dst, src); 740 } 741 Fbl(CVariable * dst,CVariable * src)742 inline void CEncoder::Fbl(CVariable* dst, CVariable* src) 743 { 744 LogicOp(ISA_FBL, dst, src); 745 } 746 Mul(CVariable * dst,CVariable * src0,CVariable * src1)747 inline void CEncoder::Mul(CVariable* dst, CVariable* src0, CVariable* src1) 748 { 749 Arithmetic(ISA_MUL, dst, src0, src1); 750 } 751 Pow(CVariable * dst,CVariable * src0,CVariable * src1)752 inline void CEncoder::Pow(CVariable* dst, CVariable* src0, CVariable* src1) 753 { 754 Arithmetic(ISA_POW, dst, src0, src1); 755 } 756 Div(CVariable * dst,CVariable * src0,CVariable * src1)757 inline void CEncoder::Div(CVariable* dst, CVariable* src0, CVariable* src1) 758 { 759 Arithmetic(ISA_DIV, dst, src0, src1); 760 } 761 Add(CVariable * dst,CVariable * src0,CVariable * src1)762 inline void CEncoder::Add(CVariable* dst, CVariable* src0, CVariable* src1) 763 { 764 Arithmetic(ISA_ADD, dst, src0, src1); 765 } 766 Shl(CVariable * dst,CVariable * src0,CVariable * src1)767 inline void CEncoder::Shl(CVariable* dst, CVariable* src0, CVariable* src1) 768 { 769 LogicOp(ISA_SHL, dst, src0, src1); 770 } 771 IShr(CVariable * dst,CVariable * src0,CVariable * src1)772 inline void CEncoder::IShr(CVariable* dst, CVariable* src0, CVariable* src1) 773 { 774 LogicOp(ISA_ASR, dst, src0, src1); 775 } 776 Shr(CVariable * dst,CVariable * src0,CVariable * src1)777 inline void CEncoder::Shr(CVariable* dst, CVariable* src0, CVariable* src1) 778 { 779 LogicOp(ISA_SHR, dst, src0, src1); 780 } 781 MulH(CVariable * dst,CVariable * src0,CVariable * src1)782 inline void CEncoder::MulH(CVariable* dst, CVariable* src0, CVariable* src1) 783 { 784 Arithmetic(ISA_MULH, dst, src0, src1); 785 } 786 Cos(CVariable * dst,CVariable * src0)787 inline void CEncoder::Cos(CVariable* dst, CVariable* src0) 788 { 789 Arithmetic(ISA_COS, dst, src0); 790 } 791 Sin(CVariable * dst,CVariable * src0)792 inline void CEncoder::Sin(CVariable* dst, CVariable* src0) 793 { 794 Arithmetic(ISA_SIN, dst, src0); 795 } 796 Log(CVariable * dst,CVariable * src0)797 inline void CEncoder::Log(CVariable* dst, CVariable* src0) 798 { 799 Arithmetic(ISA_LOG, dst, src0); 800 } 801 Exp(CVariable * dst,CVariable * src0)802 inline void CEncoder::Exp(CVariable* dst, CVariable* src0) 803 { 804 Arithmetic(ISA_EXP, dst, src0); 805 } 806 Sqrt(CVariable * dst,CVariable * src0)807 inline void CEncoder::Sqrt(CVariable* dst, CVariable* src0) 808 { 809 Arithmetic(ISA_SQRT, dst, src0); 810 } 811 Floor(CVariable * dst,CVariable * src0)812 inline void CEncoder::Floor(CVariable* dst, CVariable* src0) 813 { 814 Arithmetic(ISA_RNDD, dst, src0); 815 } 816 Ceil(CVariable * dst,CVariable * src0)817 inline void CEncoder::Ceil(CVariable* dst, CVariable* src0) 818 { 819 Arithmetic(ISA_RNDU, dst, src0); 820 } 821 Ctlz(CVariable * dst,CVariable * src0)822 inline void CEncoder::Ctlz(CVariable* dst, CVariable* src0) 823 { 824 Arithmetic(ISA_LZD, dst, src0); 825 } 826 Truncate(CVariable * dst,CVariable * src0)827 inline void CEncoder::Truncate(CVariable* dst, CVariable* src0) 828 { 829 Arithmetic(ISA_RNDZ, dst, src0); 830 } 831 RoundNE(CVariable * dst,CVariable * src0)832 inline void CEncoder::RoundNE(CVariable* dst, CVariable* src0) 833 { 834 Arithmetic(ISA_RNDE, dst, src0); 835 } 836 Mod(CVariable * dst,CVariable * src0,CVariable * src1)837 inline void CEncoder::Mod(CVariable* dst, CVariable* src0, CVariable* src1) 838 { 839 Arithmetic(ISA_MOD, dst, src0, src1); 840 } 841 Rsqrt(CVariable * dst,CVariable * src0)842 inline void CEncoder::Rsqrt(CVariable* dst, CVariable* src0) 843 { 844 Arithmetic(ISA_RSQRT, dst, src0); 845 } 846 Inv(CVariable * dst,CVariable * src0)847 inline void CEncoder::Inv(CVariable* dst, CVariable* src0) 848 { 849 Arithmetic(ISA_INV, dst, src0); 850 } 851 Not(CVariable * dst,CVariable * src0)852 inline void CEncoder::Not(CVariable* dst, CVariable* src0) 853 { 854 Arithmetic(ISA_NOT, dst, src0); 855 } 856 Frc(CVariable * dst,CVariable * src0)857 inline void CEncoder::Frc(CVariable* dst, CVariable* src0) 858 { 859 Arithmetic(ISA_FRC, dst, src0); 860 } 861 Pln(CVariable * dst,CVariable * src0,CVariable * src1)862 inline void CEncoder::Pln(CVariable* dst, CVariable* src0, CVariable* src1) 863 { 864 Arithmetic(ISA_PLANE, dst, src0, src1); 865 } 866 Cast(CVariable * dst,CVariable * src)867 inline void CEncoder::Cast(CVariable* dst, CVariable* src) 868 { 869 DataMov(ISA_MOV, dst, src); 870 } 871 872 // src0 * src1 + src2 Madw(CVariable * dst,CVariable * src0,CVariable * src1,CVariable * src2)873 inline void CEncoder::Madw(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2) 874 { 875 Arithmetic(ISA_MADW, dst, src0, src1, src2); 876 } 877 878 // src0 * src1 + src2 Mad(CVariable * dst,CVariable * src0,CVariable * src1,CVariable * src2)879 inline void CEncoder::Mad(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2) 880 { 881 Arithmetic(ISA_MAD, dst, src0, src1, src2); 882 } 883 Lrp(CVariable * dst,CVariable * src0,CVariable * src1,CVariable * src2)884 inline void CEncoder::Lrp(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2) 885 { 886 Arithmetic(ISA_LRP, dst, src0, src1, src2); 887 } 888 Xor(CVariable * dst,CVariable * src0,CVariable * src1)889 inline void CEncoder::Xor(CVariable* dst, CVariable* src0, CVariable* src1) 890 { 891 LogicOp(ISA_XOR, dst, src0, src1); 892 } 893 Or(CVariable * dst,CVariable * src0,CVariable * src1)894 inline void CEncoder::Or(CVariable* dst, CVariable* src0, CVariable* src1) 895 { 896 LogicOp(ISA_OR, dst, src0, src1); 897 } 898 And(CVariable * dst,CVariable * src0,CVariable * src1)899 inline void CEncoder::And(CVariable* dst, CVariable* src0, CVariable* src1) 900 { 901 LogicOp(ISA_AND, dst, src0, src1); 902 } 903 SetP(CVariable * dst,CVariable * src0)904 inline void CEncoder::SetP(CVariable* dst, CVariable* src0) 905 { 906 // We always need no mask when doing a set predicate 907 m_encoderState.m_noMask = true; 908 DataMov(ISA_SETP, dst, src0); 909 } 910 Min(CVariable * dst,CVariable * src0,CVariable * src1)911 inline void CEncoder::Min(CVariable* dst, CVariable* src0, CVariable* src1) 912 { 913 MinMax(CISA_DM_FMIN, dst, src0, src1); 914 } 915 Max(CVariable * dst,CVariable * src0,CVariable * src1)916 inline void CEncoder::Max(CVariable* dst, CVariable* src0, CVariable* src1) 917 { 918 MinMax(CISA_DM_FMAX, dst, src0, src1); 919 } 920 UAddC(CVariable * dst,CVariable * src0,CVariable * src1)921 inline void CEncoder::UAddC(CVariable* dst, CVariable* src0, CVariable* src1) 922 { 923 CarryBorrowArith(ISA_ADDC, dst, src0, src1); 924 } 925 USubB(CVariable * dst,CVariable * src0,CVariable * src1)926 inline void CEncoder::USubB(CVariable* dst, CVariable* src0, CVariable* src1) 927 { 928 CarryBorrowArith(ISA_SUBB, dst, src0, src1); 929 } 930 LoadMS(EOPCODE subOpcode,uint writeMask,CVariable * offset,const ResourceDescriptor & resource,uint numSources,CVariable * dst,llvm::SmallVector<CVariable *,4> & payload,bool feedbackEnable)931 inline void CEncoder::LoadMS(EOPCODE subOpcode, uint writeMask, CVariable* offset, 932 const ResourceDescriptor& resource, uint numSources, CVariable* dst, 933 llvm::SmallVector<CVariable*, 4> & payload, bool feedbackEnable) 934 { 935 Load( 936 subOpcode, 937 writeMask, 938 offset, 939 resource, 940 numSources, 941 dst, 942 payload, 943 false, 944 feedbackEnable); 945 } 946 Gather(CVariable * dst,CVariable * bufId,CVariable * offset,CVariable * gOffset,e_predefSurface surface,int elementSize)947 inline void CEncoder::Gather(CVariable* dst, CVariable* bufId, CVariable* offset, CVariable* gOffset, e_predefSurface surface, int elementSize) 948 { 949 ScatterGather(ISA_GATHER, dst, bufId, offset, gOffset, surface, elementSize); 950 } 951 TypedRead4(const ResourceDescriptor & resource,CVariable * pU,CVariable * pV,CVariable * pR,CVariable * pLOD,CVariable * pDst,uint writeMask)952 inline void CEncoder::TypedRead4(const ResourceDescriptor& resource, CVariable* pU, CVariable* pV, 953 CVariable* pR, CVariable* pLOD, CVariable* pDst, uint writeMask) 954 { 955 TypedReadWrite(ISA_GATHER4_TYPED, resource, pU, pV, pR, pLOD, pDst, writeMask); 956 } 957 TypedWrite4(const ResourceDescriptor & resource,CVariable * pU,CVariable * pV,CVariable * pR,CVariable * pLOD,CVariable * pSrc,uint writeMask)958 inline void CEncoder::TypedWrite4(const ResourceDescriptor& resource, CVariable* pU, CVariable* pV, 959 CVariable* pR, CVariable* pLOD, CVariable* pSrc, uint writeMask) 960 { 961 TypedReadWrite(ISA_SCATTER4_TYPED, resource, pU, pV, pR, pLOD, pSrc, writeMask); 962 } 963 Scatter(CVariable * val,CVariable * bufidx,CVariable * offset,CVariable * gOffset,e_predefSurface surface,int elementSize)964 inline void CEncoder::Scatter(CVariable* val, CVariable* bufidx, CVariable* offset, CVariable* gOffset, e_predefSurface surface, int elementSize) 965 { 966 ScatterGather(ISA_SCATTER, val, bufidx, offset, gOffset, surface, elementSize); 967 } 968 SendC(CVariable * dst,CVariable * src,uint exDesc,CVariable * messDescriptor)969 inline void CEncoder::SendC(CVariable* dst, CVariable* src, uint exDesc, CVariable* messDescriptor) 970 { 971 Send(dst, src, exDesc, messDescriptor, true); 972 } 973 SendC(CVariable * dst,CVariable * src,uint ffid,CVariable * exDesc,CVariable * messDescriptor)974 inline void CEncoder::SendC(CVariable* dst, CVariable* src, uint ffid, CVariable* exDesc, CVariable* messDescriptor) 975 { 976 Send(dst, src, ffid, exDesc, messDescriptor, true); 977 } 978 IEEESqrt(CVariable * dst,CVariable * src0)979 inline void CEncoder::IEEESqrt(CVariable* dst, CVariable* src0) 980 { 981 Arithmetic(ISA_SQRTM, dst, src0); 982 } 983 IEEEDivide(CVariable * dst,CVariable * src0,CVariable * src1)984 inline void CEncoder::IEEEDivide(CVariable* dst, CVariable* src0, CVariable* src1) 985 { 986 Arithmetic(ISA_DIVM, dst, src0, src1); 987 } 988 dp4a(CVariable * dst,CVariable * src0,CVariable * src1,CVariable * src2)989 inline void CEncoder::dp4a(CVariable* dst, CVariable* src0, CVariable* src1, CVariable* src2) { 990 Arithmetic(ISA_DP4A, dst, src0, src1, src2); 991 } 992 SetIsCodePatchCandidate(bool v)993 inline void CEncoder::SetIsCodePatchCandidate(bool v) 994 { 995 m_isCodePatchCandidate = v; 996 } 997 IsCodePatchCandidate()998 inline bool CEncoder::IsCodePatchCandidate() 999 { 1000 return m_isCodePatchCandidate; 1001 } 1002 SetPayloadEnd(unsigned int payloadEnd)1003 inline void CEncoder::SetPayloadEnd(unsigned int payloadEnd) 1004 { 1005 m_payloadEnd = payloadEnd; 1006 } 1007 GetPayloadEnd()1008 inline unsigned int CEncoder::GetPayloadEnd() 1009 { 1010 return m_payloadEnd; 1011 } 1012 SetHasPrevKernel(bool v)1013 inline void CEncoder::SetHasPrevKernel(bool v) 1014 { 1015 m_hasPrevKernel = v; 1016 } 1017 HasPrevKernel()1018 inline bool CEncoder::HasPrevKernel() 1019 { 1020 return m_hasPrevKernel; 1021 } 1022 BeginForcedNoMaskRegion()1023 inline void CEncoder::BeginForcedNoMaskRegion() 1024 { 1025 ++m_nestLevelForcedNoMaskRegion; 1026 // Start submitting insts with NoMask control 1027 m_encoderState.m_noMask = true; 1028 } 1029 EndForcedNoMaskRegion()1030 inline void CEncoder::EndForcedNoMaskRegion() 1031 { 1032 --m_nestLevelForcedNoMaskRegion; 1033 IGC_ASSERT_MESSAGE(m_nestLevelForcedNoMaskRegion >= 0, "Invalid nesting of Unmasked regions"); 1034 // Out of unmasked region, return to submitting insts 1035 // with Mask control 1036 if (m_nestLevelForcedNoMaskRegion == 0) 1037 m_encoderState.m_noMask = false; 1038 } 1039 SetNoMask()1040 inline void CEncoder::SetNoMask() 1041 { 1042 m_encoderState.m_noMask = true; 1043 } 1044 SetMask(e_mask mask)1045 inline void CEncoder::SetMask(e_mask mask) 1046 { 1047 m_encoderState.m_mask = mask; 1048 } 1049 SetSimdSize(SIMDMode size)1050 inline void CEncoder::SetSimdSize(SIMDMode size) 1051 { 1052 m_encoderState.m_simdSize = size; 1053 } 1054 GetSimdSize()1055 inline SIMDMode CEncoder::GetSimdSize() 1056 { 1057 return m_encoderState.m_simdSize; 1058 } 1059 SetUniformSIMDSize(SIMDMode size)1060 inline void CEncoder::SetUniformSIMDSize(SIMDMode size) 1061 { 1062 m_encoderState.m_uniformSIMDSize = size; 1063 } 1064 SetSubSpanDestination(bool subspan)1065 inline void CEncoder::SetSubSpanDestination(bool subspan) 1066 { 1067 m_encoderState.m_SubSpanDestination = subspan; 1068 } 1069 SetSecondHalf(bool secondHalf)1070 inline void CEncoder::SetSecondHalf(bool secondHalf) 1071 { 1072 m_encoderState.m_secondHalf = secondHalf; 1073 } 1074 IsSecondHalf()1075 inline bool CEncoder::IsSecondHalf() 1076 { 1077 return m_encoderState.m_secondHalf; 1078 } 1079 SetSecondNibble(bool secondNibble)1080 inline void CEncoder::SetSecondNibble(bool secondNibble) 1081 { 1082 m_encoderState.m_secondNibble = secondNibble; 1083 } 1084 IsSecondNibble()1085 inline bool CEncoder::IsSecondNibble() 1086 { 1087 return m_encoderState.m_secondNibble; 1088 } 1089 IsSubSpanDestination()1090 inline bool CEncoder::IsSubSpanDestination() 1091 { 1092 return m_encoderState.m_SubSpanDestination; 1093 } 1094 1095 VISA_Modifier ConvertModifierToVisaType(e_modifier modifier); 1096 VISA_Cond_Mod ConvertCondModToVisaType(e_predicate condMod); 1097 VISA_Oword_Num ConvertSizeToVisaType(uint size); 1098 VISAChannelMask ConvertChannelMaskToVisaType(uint mask); 1099 VISASourceSingleChannel ConvertSingleSourceChannel(uint srcChannel); 1100 1101 1102 GenPrecision ConvertPrecisionToVisaType(PrecisionType P); 1103 } 1104