1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #ifndef _BUILDIR_H_ 10 #define _BUILDIR_H_ 11 12 #include <cstdarg> 13 #include <list> 14 #include <map> 15 #include <set> 16 #include <string> 17 18 #include "G4_Kernel.hpp" 19 #include "G4_IR.hpp" 20 #include "InstSplit.h" 21 #include "visa_igc_common_header.h" 22 #include "Common_ISA.h" 23 #include "Common_ISA_util.h" 24 #include "RT_Jitter_Interface.h" 25 #include "visa_wa.h" 26 #include "PreDefinedVars.h" 27 #include "CompilerStats.h" 28 #include "inc/common/sku_wa.h" 29 30 31 #define MAX_DWORD_VALUE 0x7fffffff 32 #define MIN_DWORD_VALUE 0x80000000 33 #define MAX_UDWORD_VALUE 0xffffffff 34 #define MIN_UDWORD_VALUE 0 35 #define MAX_WORD_VALUE 32767 36 #define MIN_WORD_VALUE -32768 37 #define MAX_UWORD_VALUE 65535 38 #define MIN_UWORD_VALUE 0 39 #define MAX_CHAR_VALUE 127 40 #define MIN_CHAR_VALUE -128 41 #define MAX_UCHAR_VALUE 255 42 #define MIN_UCHAR_VALUE 0 43 44 typedef struct FCCalls 45 { 46 // callOffset is in inst number units 47 unsigned int callOffset; 48 const char* calleeLabelString; 49 } FCCalls; 50 51 enum DeclareType 52 { 53 Regular = 0, 54 Fill = 1, 55 Spill = 2, 56 Tmp = 3, 57 AddrSpill = 4, 58 CoalescedFill = 5, 59 CoalescedSpill = 6 60 }; 61 62 // forward declaration 63 // FIXME: our #include is a mess, need to clean it up 64 class CISA_IR_Builder; 65 66 namespace vISA 67 { 68 // IR_Builder class has a member of type FCPatchingInfo 69 // as a member. This class is expected to hold all FC 70 // related information. 71 class FCPatchingInfo 72 { 73 private: 74 // Flag to tell if this instance has any fast-composite 75 // type calls. Callees for such call instructions are not available 76 // in same compilation unit. 77 bool hasFCCalls; 78 79 // Set to true if kernel has "Callable" attribute set in VISA 80 // stream. 81 bool isFCCallableKernel; 82 83 // Set to true if kernel was compiled with /mCM_composable_kernel 84 // FE flag. 85 bool isFCComposableKernel; 86 87 // Set to true if kernel has "Entry" attribute set in VISA 88 // stream. 89 bool isFCEntryKernel; 90 91 std::vector<FCCalls*> FCCallsToPatch; 92 std::vector<unsigned int> FCReturnOffsetsToPatch; 93 94 public: FCPatchingInfo()95 FCPatchingInfo() 96 { 97 hasFCCalls = false; 98 isFCCallableKernel = false; 99 isFCComposableKernel = false; 100 isFCEntryKernel = false; 101 } 102 setHasFCCalls(bool hasFC)103 void setHasFCCalls(bool hasFC) { hasFCCalls = hasFC; } getHasFCCalls()104 bool getHasFCCalls() const { return hasFCCalls; } setIsCallableKernel(bool value)105 void setIsCallableKernel(bool value) { isFCCallableKernel = value; } getIsCallableKernel()106 bool getIsCallableKernel() const { return isFCCallableKernel; } setFCComposableKernel(bool value)107 void setFCComposableKernel(bool value) { isFCComposableKernel = value; } getFCComposableKernel()108 bool getFCComposableKernel() const { return isFCComposableKernel; } setIsEntryKernel(bool value)109 void setIsEntryKernel(bool value) { isFCEntryKernel = value; } getIsEntryKernel()110 bool getIsEntryKernel() const { return isFCEntryKernel; } getFCCallsToPatch()111 std::vector<FCCalls*>& getFCCallsToPatch() { return FCCallsToPatch; } getFCReturnsToPatch()112 std::vector<unsigned int>& getFCReturnsToPatch() { return FCReturnOffsetsToPatch; } 113 114 enum RegAccessType : unsigned char { 115 Fully_Use = 0, 116 Partial_Use = 1, 117 Fully_Def = 2, 118 Partial_Def = 3 119 }; 120 121 enum RegAccessPipe : unsigned char { 122 Pipe_ALU = 0, 123 Pipe_Math = 1, 124 Pipe_Send = 2, 125 Pipe_Dpas = 3 126 }; 127 128 struct RegAccess { 129 RegAccess *Next; // The next access on the same GRF. 130 RegAccessType Type; // 'def' or 'use' of that GRF. 131 unsigned RegNo; // GRF. 132 unsigned Pipe; // Pipe. 133 // where that access is issued. 134 G4_INST *Inst; // Where that GRF is accessed. 135 unsigned Offset; // Instruction offset populated finally. 136 // Token associated with that access. 137 unsigned Token; // Optional token allocation associated to 'def'. 138 RegAccessRegAccess139 RegAccess() : 140 Next(nullptr), Type(Fully_Use), RegNo(0), Pipe(0), Inst(nullptr), 141 Offset(0), Token(0) {} 142 }; 143 144 // FIXME: Need to consider the pipeline ID together with GRF since 145 // different pipe will use/def out-of-order. Need to synchronize all of 146 // them to resolve the dependency. 147 148 std::list<RegAccess> RegFirstAccessList; 149 std::list<RegAccess> RegLastAccessList; 150 // Per GRF, the first access. 151 std::map<unsigned, RegAccess *> RegFirstAccessMap; 152 // Per GRF, the last access. 153 std::map<unsigned, RegAccess *> RegLastAccessMap; 154 // Note that tokens recorded are tokens allocated but not used in last 155 // access list. 156 std::set<unsigned> AllocatedToken; // Allocated token. 157 }; 158 } 159 160 namespace vISA 161 { 162 // 163 // hash table for holding reg vars and reg region 164 // 165 166 class OperandHashTable 167 { 168 Mem_Manager& mem; 169 170 struct ImmKey 171 { 172 int64_t val; 173 G4_Type valType; ImmKeyImmKey174 ImmKey(int64_t imm, G4_Type type) : val(imm), valType(type) {} 175 bool operator==(const ImmKey& imm) const 176 { 177 return val == imm.val && valType == imm.valType; 178 } 179 }; 180 181 struct ImmKeyHash 182 { operatorImmKeyHash183 std::size_t operator()(const ImmKey& imm) const 184 { 185 return (std::size_t) (imm.val ^ imm.valType); 186 } 187 }; 188 189 struct stringCompare 190 { operatorstringCompare191 bool operator() (const char* s1, const char* s2) const 192 { 193 return strcmp(s1, s2) == 0; 194 } 195 }; 196 197 std::unordered_map<ImmKey, G4_Imm*, ImmKeyHash> immTable; 198 std::unordered_map<const char *, G4_Label*, std::hash<const char*>, stringCompare> labelTable; 199 200 public: OperandHashTable(Mem_Manager & m)201 OperandHashTable(Mem_Manager& m) : mem(m) 202 { 203 } 204 205 // Generic methods that work on both integer and floating-point types. 206 // For floating-point types, 'imm' needs to be G4_Imm(<float-value>.getImm(). 207 G4_Imm* lookupImm(int64_t imm, G4_Type ty); 208 G4_Imm* createImm(int64_t imm, G4_Type ty); 209 }; 210 211 // 212 // place for holding all region descriptions 213 // 214 class RegionPool 215 { 216 Mem_Manager& mem; 217 std::vector<RegionDesc*> rgnlist; 218 public: RegionPool(Mem_Manager & m)219 RegionPool(Mem_Manager& m) : mem(m) {} 220 const RegionDesc* createRegion( 221 uint16_t vstride, uint16_t width, uint16_t hstride); 222 }; 223 224 // 225 // place for hbolding all .declare 226 // 227 class DeclarePool 228 { 229 Mem_Manager& mem; 230 std::vector<G4_Declare*> dcllist; 231 int addrSpillLocCount; //incremented in G4_RegVarAddrSpillLoc() 232 public: DeclarePool(Mem_Manager & m)233 DeclarePool(Mem_Manager& m) : mem(m), addrSpillLocCount(0) { dcllist.reserve(2048); } 234 ~DeclarePool(); 235 236 G4_Declare* createDeclare( 237 const char* name, 238 G4_RegFileKind regFile, 239 unsigned short nElems, 240 unsigned short nRows, 241 G4_Type ty, 242 DeclareType kind = Regular, 243 G4_RegVar * base = nullptr, 244 G4_Operand * repRegion = nullptr, 245 G4_ExecSize execSize = G4_ExecSize(0)); 246 createPreVarDeclare(PreDefinedVarsInternal index,unsigned short n_elems,unsigned short n_rows,G4_Type ty)247 G4_Declare* createPreVarDeclare( 248 PreDefinedVarsInternal index, 249 unsigned short n_elems, 250 unsigned short n_rows, 251 G4_Type ty) 252 { 253 254 G4_Declare* dcl = new (mem)G4_Declare(getPredefinedVarString(index), G4_INPUT, n_elems * n_rows, ty, dcllist); 255 G4_RegVar * regVar; 256 regVar = new (mem) G4_RegVar(dcl, G4_RegVar::RegVarType::Default); 257 dcl->setRegVar(regVar); 258 259 return dcl; 260 } 261 getDeclareList()262 std::vector<G4_Declare*>& getDeclareList() {return dcllist;} 263 }; 264 265 266 // 267 // interface for creating operands and instructions 268 // 269 class IR_Builder 270 { 271 public: 272 const char* curFile; 273 unsigned int curLine; 274 int curCISAOffset; 275 276 static const int OrphanVISAIndex = 0xffffffff; 277 int debugInfoPlaceholder = OrphanVISAIndex; // used for debug info, catch all VISA offset for orphan instructions 278 279 private: 280 class GlobalImmPool 281 { 282 struct ImmVal 283 { 284 G4_Imm* imm; 285 int numElt; 286 287 bool operator==(const ImmVal& v) const { 288 return imm == v.imm && numElt == v.numElt; 289 } 290 }; 291 static const int MAX_POOL_SIZE = 8; // reg pressure control, for now just do naive first-come first-serve 292 std::array<ImmVal, MAX_POOL_SIZE> immArray; 293 std::array<G4_Declare*, MAX_POOL_SIZE> dclArray; 294 int curSize = 0; 295 IR_Builder& builder; 296 297 public: GlobalImmPool(IR_Builder & b)298 GlobalImmPool(IR_Builder& b) : builder(b), immArray(), dclArray() {} 299 G4_Declare* addImmVal(G4_Imm* imm, int numElt); size()300 int size() const { return curSize; } getImmVal(int i)301 const ImmVal& getImmVal(int i) {return immArray[i];} getImmDcl(int i)302 G4_Declare* getImmDcl(int i) {return dclArray[i];} 303 }; 304 305 GlobalImmPool immPool; 306 307 const TARGET_PLATFORM platform; 308 309 //allocator pools 310 USE_DEF_ALLOCATOR useDefAllocator; 311 312 FINALIZER_INFO* metaData = nullptr; 313 CompilerStats compilerStats; 314 315 int subroutineId = -1; // the kernel itself has id 0, as we always emit a subroutine label for kernel too 316 enum VISA_BUILD_TYPE type; // as opposed to what? 317 318 // pre-defined declare that binds to R0 (the entire GRF) 319 // when pre-emption is enabled, builtinR0 is replaced by a temp, 320 // and a move is inserted at kernel entry 321 // mov (8) builtinR0 realR0 322 G4_Declare* builtinR0 = nullptr; // this is either r0 or the temp if pre-emption is enabled 323 G4_Declare* realR0 = nullptr; // this always refers to r0 324 325 // pre-defined declare that binds to A0.0:ud 326 G4_Declare* builtinA0 = nullptr; 327 // pre-defined declare that binds to A0.2:ud 328 G4_Declare* builtinA0Dot2 = nullptr; //used for splitsend's ext msg descriptor 329 // pre-defind declare that binds to HWTid (R0.5:ud) 330 G4_Declare* builtinHWTID = nullptr; 331 // pre-defined bindless surface index (252, 1 UD) 332 G4_Declare* builtinT252 = nullptr; 333 // pre-defined bindless sampler index (31, 1 UD) 334 G4_Declare* builtinBindlessSampler = nullptr; 335 // pre-defined sampler header 336 G4_Declare* builtinSamplerHeader = nullptr; 337 338 // common message header for spill/fill intrinsics 339 // We put them here instead of spillManager since there may be multiple rounds of spill, 340 // and we want to use a common header 341 G4_Declare* spillFillHeader = nullptr; 342 343 G4_Declare* oldA0Dot2Temp = nullptr; 344 345 G4_Declare* builtinScratchSurface = nullptr; 346 G4_Declare* scratchSurfaceOffset = nullptr; // if scratch surface is used, this will be initialized once at entry 347 348 //The temp var for eu fusion W/A 349 G4_Declare* euFusionWATmpVar = nullptr; 350 351 // Indicates that sampler header cache (builtinSamplerHeader) is correctly 352 // initialized with r0 contents. 353 // Used only when vISA_cacheSamplerHeader option is set. 354 bool builtinSamplerHeaderInitialized; 355 356 // function call related declares 357 G4_Declare* be_sp = nullptr; 358 G4_Declare* be_fp = nullptr; 359 // Part FDE inst is the move that stores r125.[0-3] to a temp. 360 // This is used to restore ret %ip, ret EM, and BE ptrs. 361 G4_INST* savePartFDInst = nullptr; 362 // FDE spill inst is first spill instruction that writes frame 363 // descriptor to stack. 364 G4_INST* FDSpillInst = nullptr; 365 G4_Declare* tmpFCRet = nullptr; 366 // Used to store implicit arg, local id buffer ptrs for stackcall 367 G4_Declare* implArgBufferPtr = nullptr; 368 G4_Declare* localIdBufferPtr = nullptr; 369 370 unsigned short arg_size; 371 unsigned short return_var_size; 372 373 unsigned int sampler8x8_group_id; 374 375 // input declare of R1. 376 G4_Declare* inputR1 = nullptr; 377 378 // Populate this data structure so after compiling all kernels 379 // in file, we can emit out patch file using this up-levelled 380 // information. 381 FCPatchingInfo* fcPatchInfo = nullptr; 382 383 const WA_TABLE *m_pWaTable; 384 Options *m_options = nullptr; 385 386 std::map<const G4_INST*, G4_FCALL*> m_fcallInfo; 387 388 // Basic region descriptors. 389 RegionDesc CanonicalRegionStride0, // <0; 1, 0> 390 CanonicalRegionStride1, // <1; 1, 0> 391 CanonicalRegionStride2, // <2; 1, 0> 392 CanonicalRegionStride4; // <4; 1, 0> 393 394 // map of all stack functioncs ever invoked by this builder's kernel/function 395 std::map<std::string, G4_Label*> m_fcallLabels; 396 getFcallLabel(const std::string & str)397 G4_Label* getFcallLabel(const std::string &str) 398 { 399 auto it = m_fcallLabels.find(str); 400 if (it == m_fcallLabels.end()) 401 { 402 auto label = createLabel(str, LABEL_FUNCTION); 403 m_fcallLabels[str] = label; 404 return label; 405 } 406 return it->second; 407 } 408 409 class PreDefinedVars 410 { 411 public: setHasPredefined(PreDefinedVarsInternal id,bool val)412 void setHasPredefined(PreDefinedVarsInternal id, bool val) { 413 hasPredefined[static_cast<int>(id)] = val; 414 } 415 isHasPredefined(PreDefinedVarsInternal id)416 bool isHasPredefined(PreDefinedVarsInternal id) const { 417 return hasPredefined[static_cast<int>(id)]; 418 } 419 setPredefinedVar(PreDefinedVarsInternal id,G4_Declare * dcl)420 void setPredefinedVar(PreDefinedVarsInternal id, G4_Declare *dcl) { 421 predefinedVars[static_cast<int>(id)] = dcl; 422 } 423 getPreDefinedVar(PreDefinedVarsInternal id)424 G4_Declare* getPreDefinedVar(PreDefinedVarsInternal id) const { 425 if (id >= PreDefinedVarsInternal::VAR_LAST) { 426 return nullptr; 427 } 428 return predefinedVars[static_cast<int>(id)]; 429 } 430 private: 431 // records whether a vISA pre-defined var is used by the kernel 432 // some predefined need to be expanded (e.g., HWTid, color) 433 bool hasPredefined[static_cast<int>(PreDefinedVarsInternal::VAR_LAST)] {}; 434 G4_Declare* predefinedVars[static_cast<int>(PreDefinedVarsInternal::VAR_LAST)] {}; 435 }; 436 437 bool hasNullReturnSampler = false; 438 439 const CISA_IR_Builder* parentBuilder = nullptr; 440 441 // stores all metadata ever allocated 442 Mem_Manager metadataMem; 443 std::vector<Metadata*> allMDs; 444 std::vector<MDNode*> allMDNodes; 445 446 public: 447 PreDefinedVars preDefVars; 448 Mem_Manager& mem; // memory for all operands and insts 449 PhyRegPool phyregpool; // all physical regs 450 OperandHashTable hashtable; // all created region operands 451 RegionPool rgnpool; // all region description 452 DeclarePool dclpool; // all created decalres 453 INST_LIST instList; // all created insts 454 // list of instructions ever allocated 455 // This list may only grow and is freed when IR_Builder is destroyed 456 std::vector<G4_INST*> instAllocList; 457 G4_Kernel& kernel; 458 // the following fileds are used for dcl name when a new dcl is created. 459 // number of predefined variables are included. 460 unsigned num_temp_dcl; 461 // number of temp GRF vars created to hold spilled addr/flag 462 uint32_t numAddrFlagSpillLoc = 0; 463 std::vector<input_info_t*> m_inputVect; 464 getOptions()465 const Options* getOptions() const { return m_options; } getOption(vISAOptions opt)466 bool getOption(vISAOptions opt) const {return m_options->getOption(opt); } getuint32Option(vISAOptions opt)467 uint32_t getuint32Option(vISAOptions opt) const { return m_options->getuInt32Option(opt); } getOption(vISAOptions opt,const char * & str)468 void getOption(vISAOptions opt, const char *&str) const {return m_options->getOption(opt, str); } 469 void addInputArg(input_info_t * inpt); 470 input_info_t * getInputArg(unsigned int index) const; 471 unsigned int getInputCount() const; 472 input_info_t * getRetIPArg() const; 473 getParent()474 const CISA_IR_Builder* getParent() const { return parentBuilder; } 475 476 void dump(std::ostream &os); // not const because G4_INST::emit isn't :( 477 478 std::stringstream& criticalMsgStream(); 479 getAllocator()480 const USE_DEF_ALLOCATOR& getAllocator() const { return useDefAllocator; } 481 482 // Following enum describes layout of r125 on entry to a function. 483 // Ret_IP and Ret_EM may be altered due to callees. They'll be 484 // restored right before fret. 485 enum SubRegs_Stackcall 486 { 487 Ret_IP = 0, // :ud 488 Ret_EM = 1, // :ud 489 BE_SP = 2, // :ud 490 BE_FP = 3, // :ud 491 FE_FP = 2, // :uq 492 FE_SP = 3, // :uq 493 }; 494 495 enum SubRegs_ImplPtrs 496 { 497 ImplBufPtr = 0, // :uq 498 LocalIdBufPtr = 3, // :uq 499 }; 500 501 enum ArgRet_Stackcall 502 { 503 Arg = 26, 504 Ret = 26 505 }; 506 507 // Getter/setter for be_sp and be_fp getBESP()508 G4_Declare* getBESP() 509 { 510 if (be_sp == NULL) 511 { 512 be_sp = createDeclareNoLookup("be_sp", G4_GRF, 1, 1, Type_UD); 513 be_sp->getRegVar()->setPhyReg( 514 phyregpool.getGreg(kernel.getFPSPGRF()), SubRegs_Stackcall::BE_SP); 515 } 516 517 return be_sp; 518 } 519 getBEFP()520 G4_Declare* getBEFP() 521 { 522 if (be_fp == NULL) 523 { 524 be_fp = createDeclareNoLookup("be_fp", G4_GRF, 1, 1, Type_UD); 525 be_fp->getRegVar()->setPhyReg( 526 phyregpool.getGreg(kernel.getFPSPGRF()), SubRegs_Stackcall::BE_FP); 527 } 528 529 return be_fp; 530 } 531 getPartFDSaveInst()532 G4_INST* getPartFDSaveInst() const { return savePartFDInst; } setPartFDSaveInst(G4_INST * i)533 void setPartFDSaveInst(G4_INST* i) { savePartFDInst = i; } 534 getFDSpillInst()535 G4_INST* getFDSpillInst() const { return FDSpillInst; } setFDSpillInst(G4_INST * i)536 void setFDSpillInst(G4_INST* i) { FDSpillInst = i; } 537 getStackCallArg()538 G4_Declare* getStackCallArg() const { 539 return preDefVars.getPreDefinedVar(PreDefinedVarsInternal::ARG); 540 } getStackCallRet()541 G4_Declare* getStackCallRet() const { 542 return preDefVars.getPreDefinedVar(PreDefinedVarsInternal::RET); 543 } 544 getFE_SP()545 G4_Declare* getFE_SP() const { 546 return preDefVars.getPreDefinedVar(PreDefinedVarsInternal::FE_SP); 547 } 548 getFE_FP()549 G4_Declare* getFE_FP() const { 550 return preDefVars.getPreDefinedVar(PreDefinedVarsInternal::FE_FP); 551 } 552 isPreDefArg(G4_Declare * dcl)553 bool isPreDefArg(G4_Declare* dcl) const { 554 return dcl == getStackCallArg(); 555 } 556 isPreDefRet(G4_Declare * dcl)557 bool isPreDefRet(G4_Declare* dcl) const { 558 return dcl == getStackCallRet(); 559 } 560 isPreDefFEStackVar(G4_Declare * dcl)561 bool isPreDefFEStackVar(G4_Declare* dcl) const { 562 return dcl == getFE_SP() || dcl == getFE_FP(); 563 } 564 565 // this refers to vISA's internal stack for spill and caller/callee-save 566 // Note that this is only valid after CFG is constructed 567 // ToDo: make this a pass? usesStack()568 bool usesStack() const { 569 return kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc(); 570 } 571 572 void bindInputDecl(G4_Declare* dcl, int grfOffset); 573 getPerThreadInputSize()574 uint32_t getPerThreadInputSize() const { 575 return kernel.getInt32KernelAttr(Attributes::ATTR_PerThreadInputSize); 576 } 577 getCrossThreadInputSize()578 int32_t getCrossThreadInputSize() const { 579 return kernel.getInt32KernelAttr(Attributes::ATTR_CrossThreadInputSize); 580 } 581 getLTOInvokeOptTarget()582 bool getLTOInvokeOptTarget() const { 583 return kernel.getBoolKernelAttr(Attributes::ATTR_LTOInvokeOptTarget); 584 } 585 586 // 587 // Check if opnd is or can be made "alignByte"-byte aligned. 588 // These functions will change the underlying variable's alignment 589 // (e.g., make a scalar variable GRF-aligned) when possible to satisfy 590 // the alignment 591 bool isOpndAligned(G4_Operand* opnd, int alignByte) const; 592 bool isOpndAligned(G4_Operand *opnd, unsigned short &offset, int align_byte) const; 593 setType(enum VISA_BUILD_TYPE _type)594 void setType(enum VISA_BUILD_TYPE _type) { type = _type; } getIsKernel()595 bool getIsKernel() const { return type == VISA_BUILD_TYPE::KERNEL; } getIsFunction()596 bool getIsFunction() const { return type == VISA_BUILD_TYPE::FUNCTION; } getIsPayload()597 bool getIsPayload() const { return type == VISA_BUILD_TYPE::PAYLOAD; } getType()598 enum VISA_BUILD_TYPE getType() const { return type; } 599 void predefinedVarRegAssignment(uint8_t inputSize); 600 void expandPredefinedVars(); setArgSize(unsigned short size)601 void setArgSize(unsigned short size) { arg_size = size; } getArgSize()602 unsigned short getArgSize() const { return arg_size; } setRetVarSize(unsigned short size)603 void setRetVarSize(unsigned short size) { return_var_size = size; } getRetVarSize()604 unsigned short getRetVarSize() const { return return_var_size; } 605 606 FCPatchingInfo* getFCPatchInfo(); setFCPatchInfo(FCPatchingInfo * instance)607 void setFCPatchInfo(FCPatchingInfo* instance) { fcPatchInfo = instance; } 608 getPWaTable()609 const WA_TABLE *getPWaTable() const { return m_pWaTable; } 610 611 static const char* getNameString(Mem_Manager& mem, size_t size, const char* format, ...); 612 613 G4_Predicate_Control vISAPredicateToG4Predicate( 614 VISA_PREDICATE_CONTROL control, G4_ExecSize execSize); 615 616 G4_FCALL* getFcallInfo(const G4_INST* inst) const; addFcallInfo(const G4_INST * FcallInst,uint16_t ArgSize,uint16_t RetSize)617 void addFcallInfo(const G4_INST* FcallInst, uint16_t ArgSize, uint16_t RetSize) 618 { 619 m_fcallInfo[FcallInst] = new (mem) G4_FCALL(ArgSize, RetSize); 620 } 621 622 // If this is true (detected in TranslateInterface.cpp), we need a sampler flush before EOT getHasNullReturnSampler()623 bool getHasNullReturnSampler() const { return hasNullReturnSampler; } 624 625 // Initializes predefined vars for all the vISA versions 626 void createPreDefinedVars(); 627 628 void createBuiltinDecls(); 629 630 G4_Declare* getSpillFillHeader(); 631 632 G4_Declare* getEUFusionWATmpVar(); 633 634 G4_Declare* getOldA0Dot2Temp(); hasValidOldA0Dot2()635 bool hasValidOldA0Dot2() { return oldA0Dot2Temp; } 636 637 IR_Builder( 638 TARGET_PLATFORM genPlatform, 639 INST_LIST_NODE_ALLOCATOR &alloc, 640 G4_Kernel &k, 641 Mem_Manager &m, 642 Options *options, 643 CISA_IR_Builder* parent, 644 FINALIZER_INFO *jitInfo, 645 const WA_TABLE *pWaTable); 646 647 ~IR_Builder(); 648 rebuildPhyRegPool(unsigned int numRegisters)649 void rebuildPhyRegPool(unsigned int numRegisters) { 650 phyregpool.rebuildRegPool(mem, numRegisters); 651 } 652 getPlatform()653 TARGET_PLATFORM getPlatform() const {return platform;} getJitInfo()654 FINALIZER_INFO* getJitInfo() {return metaData;} getcompilerStats()655 CompilerStats &getcompilerStats() {return compilerStats;} 656 657 G4_Declare* createDeclareNoLookup( 658 const char* name, 659 G4_RegFileKind regFile, 660 unsigned short n_elems, 661 unsigned short n_rows, 662 G4_Type ty, 663 DeclareType kind = Regular, 664 G4_RegVar * base = NULL, 665 G4_Operand * repRegion = NULL, 666 G4_ExecSize execSize = G4_ExecSize(0)); 667 668 createPreVarDeclareNoLookup(PreDefinedVarsInternal index,unsigned short n_elems,unsigned short n_rows,G4_Type ty)669 G4_Declare* createPreVarDeclareNoLookup( 670 PreDefinedVarsInternal index, 671 unsigned short n_elems, 672 unsigned short n_rows, 673 G4_Type ty) 674 { 675 G4_Declare* dcl = dclpool.createPreVarDeclare(index, n_elems, n_rows, ty); 676 kernel.Declares.push_back(dcl); 677 return dcl; 678 } 679 getBuiltinR0()680 G4_Declare* getBuiltinR0() {return builtinR0;} getRealR0()681 G4_Declare* getRealR0() const {return realR0;} // undefined terminology: what's "real" here (vs "builtin" above)? getBuiltinA0()682 G4_Declare* getBuiltinA0() {return builtinA0;} getBuiltinA0Dot2()683 G4_Declare* getBuiltinA0Dot2() {return builtinA0Dot2;} getBuiltinHWTID()684 G4_Declare* getBuiltinHWTID() const {return builtinHWTID;} getBuiltinT252()685 G4_Declare* getBuiltinT252() const {return builtinT252;} getBuiltinBindlessSampler()686 G4_Declare* getBuiltinBindlessSampler() const {return builtinBindlessSampler; } getBuiltinSamplerHeader()687 G4_Declare* getBuiltinSamplerHeader() const { return builtinSamplerHeader; } getOldA0Dot2Temp()688 G4_Declare* getOldA0Dot2Temp() const { return oldA0Dot2Temp; } 689 getInputR1()690 G4_Declare* getInputR1() { return inputR1; } setInputR1(G4_Declare * r1)691 void setInputR1(G4_Declare* r1) { inputR1 = r1; } 692 isBindlessSampler(const G4_Operand * sampler)693 bool isBindlessSampler(const G4_Operand* sampler) const { 694 return sampler->isSrcRegRegion() && sampler->getTopDcl() == getBuiltinBindlessSampler(); 695 } 696 isBindlessSurface(const G4_Operand * bti)697 bool isBindlessSurface(const G4_Operand* bti) const { 698 return bti->isSrcRegRegion() && bti->getTopDcl() == getBuiltinT252(); 699 } 700 701 // IsSLMSurface - Check whether the given surface is SLM surface. IsSLMSurface(const G4_Operand * surface)702 static bool IsSLMSurface(const G4_Operand *surface) { 703 // So far, it's only reliable to check an immediate surface. 704 return surface->isImm() && surface->asImm()->getImm() == PREDEF_SURF_0; 705 } 706 707 getDeclare(const G4_Operand * opnd)708 static const G4_Declare *getDeclare(const G4_Operand *opnd) { 709 const G4_Declare *dcl = opnd->getBase()->asRegVar()->getDeclare(); 710 711 while (const G4_Declare *parentDcl = dcl->getAliasDeclare()) 712 dcl = parentDcl; 713 714 return dcl; 715 } getDeclare(G4_Operand * opnd)716 static G4_Declare *getDeclare(G4_Operand *opnd) { 717 return const_cast<G4_Declare *>(getDeclare((const G4_Operand *)opnd)); 718 } 719 shouldForceSplitSend(const G4_Operand * surface)720 bool shouldForceSplitSend(const G4_Operand* surface) const 721 { 722 return surface->isSrcRegRegion() && 723 (surface->getTopDcl() == getBuiltinT252() || 724 surface->getTopDcl() == getBuiltinScratchSurface()); 725 } 726 727 /// getSplitEMask() calculates the new mask after splitting from the current 728 /// execution mask at the given execution size. 729 /// It only works with masks covering whole GRF and thus won't generate/consume 730 /// nibbles. 731 static uint32_t getSplitEMask(unsigned execSize, uint32_t eMask, bool isLo); getSplitLoEMask(unsigned execSize,uint32_t eMask)732 static uint32_t getSplitLoEMask(unsigned execSize, uint32_t eMask) { 733 return getSplitEMask(execSize, eMask, true); 734 } getSplitHiEMask(unsigned execSize,uint32_t eMask)735 static uint32_t getSplitHiEMask(unsigned execSize, uint32_t eMask) { 736 return getSplitEMask(execSize, eMask, false); 737 } 738 isScratchSpace(G4_Operand * bti)739 bool isScratchSpace(G4_Operand* bti) const { 740 return bti->isSrcRegRegion() && bti->getTopDcl() == builtinScratchSurface; 741 } getBuiltinScratchSurface()742 G4_Declare* getBuiltinScratchSurface() const { 743 return builtinScratchSurface; 744 } 745 746 G4_SrcRegRegion* createScratchExDesc(uint32_t exdesc); 747 748 void initScratchSurfaceOffset(); 749 getSpillSurfaceOffset()750 G4_Declare* getSpillSurfaceOffset() {return scratchSurfaceOffset;} 751 752 // create a new temp GRF with the specified type/size and undefined regions 753 G4_Declare* createTempVar( 754 unsigned int numElements, G4_Type type, G4_SubReg_Align subAlign, 755 const char* prefix = "TV", bool appendIdToName = true); 756 757 758 // create a new temp GRF as the home location of a spilled addr/flag dcl 759 G4_Declare* createAddrFlagSpillLoc(G4_Declare* dcl); 760 761 762 // like the above, but also mark the variable as don't spill 763 // this is used for temp variables in macro sequences where spilling woul not help 764 // FIXME: can we somehow merge this with G4_RegVarTmp/G4_RegVarTransient? 765 G4_Declare* createTempVarWithNoSpill( 766 unsigned int numElements, G4_Type type, G4_SubReg_Align subAlign, 767 const char* prefix = "TV") 768 { 769 G4_Declare* dcl = createTempVar(numElements, type, subAlign, prefix); 770 dcl->setDoNotSpill(); 771 return dcl; 772 } 773 774 // 775 // Create a declare that is hardwired to some phyiscal GRF. 776 // It is useful to implement various workarounds post RA where we want to directly 777 // address some physical GRF. 778 // regOff is in unit of the declare type. 779 // caller is responsible for ensuring the resulting variable does not violate any HW restrictions 780 // (e.g., operand does not cross two GRF) 781 G4_Declare* createHardwiredDeclare( 782 uint32_t numElements, G4_Type type, uint32_t regNum, uint32_t regOff); 783 784 G4_INST* createPseudoKills(std::initializer_list<G4_Declare*> dcls, PseudoKillType ty); 785 786 G4_INST* createPseudoKill(G4_Declare* dcl, PseudoKillType ty); 787 788 G4_INST* createEUWASpill(bool addToInstList); 789 790 // numRows is in hword units 791 // offset is in hword units 792 G4_INST* createSpill( 793 G4_DstRegRegion* dst, G4_SrcRegRegion* header, G4_SrcRegRegion* payload, 794 G4_ExecSize execSize, 795 uint16_t numRows, uint32_t offset, G4_Declare* fp, G4_InstOption option, 796 bool addToInstList); 797 798 G4_INST* createSpill( 799 G4_DstRegRegion* dst, G4_SrcRegRegion* payload, 800 G4_ExecSize execSize, uint16_t numRows, uint32_t offset, 801 G4_Declare* fp, G4_InstOption option, bool addToInstList); 802 803 804 G4_INST* createFill( 805 G4_SrcRegRegion* header, 806 G4_DstRegRegion* dstData, G4_ExecSize execSize, 807 uint16_t numRows, uint32_t offset, G4_Declare* fp, G4_InstOption option, 808 bool addToInstList); 809 G4_INST* createFill( 810 G4_DstRegRegion* dstData, G4_ExecSize execSize, 811 uint16_t numRows, uint32_t offset, G4_Declare* fp , G4_InstOption option, 812 bool addToInstList); 813 814 815 // numberOfFlags MEANS NUMBER OF WORDS (e.g., 1 means 16-bit), not number of bits or number of data elements in operands. 816 G4_Declare* createTempFlag(unsigned short numberOfFlags, const char* prefix = "TEMP_FLAG_"); 817 818 // like above, but pass numFlagElements instead. This allows us to distinguish between 1/8/16-bit flags, 819 // which are all allocated as a UW. name is allocated by caller 820 G4_Declare* createFlag(uint16_t numFlagElements, const char* name); 821 822 G4_Declare* createTempScalar(uint16_t numFlagElements, const char* prefix); 823 824 G4_Declare* createScalar(uint16_t numFlagElements, const char* name); 825 826 G4_Declare* createPreVar( 827 PreDefinedVarsInternal preDefVar_index, unsigned short numElements, G4_Type type); 828 829 // 830 // create <vstride; width, hstride> 831 // 832 // PLEASE use getRegion* interface to get regions if possible! 833 // This function will be mostly used for external regions. createRegionDesc(uint16_t vstride,uint16_t width,uint16_t hstride)834 const RegionDesc* createRegionDesc( 835 uint16_t vstride, 836 uint16_t width, 837 uint16_t hstride) 838 { 839 return rgnpool.createRegion(vstride, width, hstride); 840 } 841 842 // Given the execution size and region parameters, create a region 843 // descriptor. 844 // 845 // PLEASE use getRegion* interface to get regions if possible! 846 // This function will be mostly used for external regions. createRegionDesc(uint16_t execSize,uint16_t vstride,uint16_t width,uint16_t hstride)847 const RegionDesc *createRegionDesc( 848 uint16_t execSize, uint16_t vstride, uint16_t width, uint16_t hstride) 849 { 850 // Performs normalization for commonly used regions. 851 switch (RegionDesc::getRegionDescKind(execSize, vstride, width, hstride)) { 852 case RegionDesc::RK_Stride0: return getRegionScalar(); 853 case RegionDesc::RK_Stride1: return getRegionStride1(); 854 case RegionDesc::RK_Stride2: return getRegionStride2(); 855 case RegionDesc::RK_Stride4: return getRegionStride4(); 856 default: break; 857 } 858 return rgnpool.createRegion(vstride, width, hstride); 859 } 860 861 /// Helper to normalize an existing region descriptor. getNormalizedRegion(uint16_t execSize,const RegionDesc * rd)862 const RegionDesc *getNormalizedRegion(uint16_t execSize, const RegionDesc *rd) { 863 return createRegionDesc(execSize, rd->vertStride, rd->width, rd->horzStride); 864 } 865 866 /// Get the predefined region descriptors. getRegionScalar()867 const RegionDesc *getRegionScalar() const { return &CanonicalRegionStride0; } getRegionStride1()868 const RegionDesc *getRegionStride1() const { return &CanonicalRegionStride1; } getRegionStride2()869 const RegionDesc *getRegionStride2() const { return &CanonicalRegionStride2; } getRegionStride4()870 const RegionDesc *getRegionStride4() const { return &CanonicalRegionStride4; } 871 872 // ToDo: get rid of this version and use the message type specific ones below instead, 873 // so we can avoid having to explicitly create extDesc bits 874 G4_SendDescRaw * createGeneralMsgDesc( 875 uint32_t desc, 876 uint32_t extDesc, 877 SendAccess access, 878 G4_Operand* bti = nullptr, 879 G4_Operand* sti = nullptr); 880 881 G4_SendDescRaw * createReadMsgDesc( 882 SFID sfid, 883 uint32_t desc, 884 G4_Operand* bti = nullptr); 885 886 G4_SendDescRaw * createWriteMsgDesc( 887 SFID sfid, 888 uint32_t desc, 889 int src1Len, 890 G4_Operand* bti = nullptr); 891 892 G4_SendDescRaw * createSyncMsgDesc( 893 SFID sfid, 894 uint32_t desc); 895 896 G4_SendDescRaw * createSampleMsgDesc( 897 uint32_t desc, 898 bool cps, 899 int src1Len, 900 G4_Operand* bti, 901 G4_Operand* sti); 902 getSendAccessType(bool isRead,bool isWrite)903 static SendAccess getSendAccessType(bool isRead, bool isWrite) { 904 if (isRead && isWrite) 905 { 906 return SendAccess::READ_WRITE; 907 } 908 return isRead ? SendAccess::READ_ONLY : SendAccess::WRITE_ONLY; 909 } 910 911 G4_SendDescRaw* createSendMsgDesc( 912 SFID sfid, 913 uint32_t desc, 914 uint32_t extDesc, 915 int src1Len, 916 SendAccess access, 917 G4_Operand* bti, 918 G4_ExecSize execSize, 919 bool isValidFuncCtrl = true); 920 921 G4_SendDescRaw * createSendMsgDesc( 922 SFID sfid, 923 uint32_t desc, 924 uint32_t extDesc, 925 int src1Len, 926 SendAccess access, 927 G4_Operand *bti, 928 bool isValidFuncCtrl = true); 929 930 G4_SendDescRaw * createSendMsgDesc( 931 unsigned funcCtrl, 932 unsigned regs2rcv, 933 unsigned regs2snd, 934 SFID funcID, 935 unsigned extMsgLength, 936 uint16_t extFuncCtrl, 937 SendAccess access, 938 G4_Operand* bti = nullptr, 939 G4_Operand* sti = nullptr); 940 941 G4_Operand* emitSampleIndexGE16( 942 G4_Operand* sampler, G4_Declare* headerDecl); 943 944 // 945 // deprecated, please use the one below 946 // createSrcRegRegion(G4_SrcRegRegion & src)947 G4_SrcRegRegion* createSrcRegRegion(G4_SrcRegRegion& src) 948 { 949 G4_SrcRegRegion* rgn = new (mem)G4_SrcRegRegion(src); 950 return rgn; 951 } 952 953 // Create a new srcregregion allocated in mem 954 G4_SrcRegRegion* createSrc( 955 G4_VarBase* b, 956 short roff, 957 short sroff, 958 const RegionDesc *rd, 959 G4_Type ty, 960 G4_AccRegSel regSel = ACC_UNDEFINED) 961 { 962 return createSrcRegRegion(G4_SrcModifier::Mod_src_undef, G4_RegAccess::Direct, b, roff, sroff, rd, ty, regSel); 963 } 964 965 // deprecated, either use createSrc or createIndirectSrc 966 G4_SrcRegRegion* createSrcRegRegion( 967 G4_SrcModifier m, 968 G4_RegAccess a, 969 G4_VarBase* b, 970 short roff, 971 short sroff, 972 const RegionDesc *rd, 973 G4_Type ty, 974 G4_AccRegSel regSel = ACC_UNDEFINED) 975 { 976 G4_SrcRegRegion* rgn = new (mem)G4_SrcRegRegion(m, a, b, roff, sroff, rd, ty, regSel); 977 return rgn; 978 } 979 980 G4_SrcRegRegion* createSrcWithNewRegOff(G4_SrcRegRegion* old, short newRegOff); 981 G4_SrcRegRegion* createSrcWithNewSubRegOff(G4_SrcRegRegion* old, short newSubRegOff); 982 G4_SrcRegRegion* createSrcWithNewBase(G4_SrcRegRegion* old, G4_VarBase* newBase); 983 createIndirectSrc(G4_SrcModifier m,G4_VarBase * b,short roff,short sroff,const RegionDesc * rd,G4_Type ty,short immAddrOff)984 G4_SrcRegRegion* createIndirectSrc( 985 G4_SrcModifier m, 986 G4_VarBase* b, 987 short roff, 988 short sroff, 989 const RegionDesc* rd, 990 G4_Type ty, 991 short immAddrOff) 992 { 993 G4_SrcRegRegion* rgn = new (mem) G4_SrcRegRegion(m, IndirGRF, b, roff, sroff, rd, ty, ACC_UNDEFINED); 994 rgn->setImmAddrOff(immAddrOff); 995 return rgn; 996 } 997 998 // 999 // deprecated, please use the version below 1000 // createDstRegRegion(G4_DstRegRegion & dst)1001 G4_DstRegRegion* createDstRegRegion(G4_DstRegRegion& dst) 1002 { 1003 G4_DstRegRegion* rgn = new (mem) G4_DstRegRegion(dst); 1004 return rgn; 1005 } 1006 1007 // create a direct DstRegRegion 1008 G4_DstRegRegion* createDst( 1009 G4_VarBase* b, 1010 short roff, 1011 short sroff, 1012 unsigned short hstride, 1013 G4_Type ty, 1014 G4_AccRegSel regSel = ACC_UNDEFINED) 1015 { 1016 return createDstRegRegion(Direct, b, roff, sroff, hstride, ty, regSel); 1017 } 1018 // create a direct DstRegRegion createDst(G4_VarBase * b,G4_Type ty)1019 G4_DstRegRegion* createDst(G4_VarBase* b, G4_Type ty) 1020 { 1021 return createDstRegRegion(Direct, b, 0, 0, 1, ty, ACC_UNDEFINED); 1022 } 1023 1024 // create a indirect DstRegRegion 1025 // b is the address variable, which only supports subreg offset createIndirectDst(G4_VarBase * b,short sroff,uint16_t hstride,G4_Type ty,int16_t immOff)1026 G4_DstRegRegion* createIndirectDst(G4_VarBase* b, 1027 short sroff, 1028 uint16_t hstride, 1029 G4_Type ty, 1030 int16_t immOff) 1031 { 1032 auto dst = createDstRegRegion(IndirGRF, b, 0, sroff, hstride, ty); 1033 dst->setImmAddrOff(immOff); 1034 return dst; 1035 } 1036 1037 G4_DstRegRegion* createDstWithNewSubRegOff(G4_DstRegRegion* old, short newSubRegOff); 1038 1039 1040 // 1041 // return the imm operand; create one if not yet created 1042 // createImm(int64_t imm,G4_Type ty)1043 G4_Imm* createImm(int64_t imm, G4_Type ty) 1044 { 1045 G4_Imm* i = hashtable.lookupImm(imm, ty); 1046 return (i != NULL)? i : hashtable.createImm(imm, ty); 1047 } 1048 1049 // 1050 // return the float operand; create one if not yet created 1051 // 1052 G4_Imm* createImm(float fp); 1053 1054 // 1055 // return the double operand; create one if not yet created 1056 // 1057 G4_Imm* createDFImm(double fp); 1058 1059 1060 // For integer immediates use a narrower type if possible 1061 // also change byte type to word type since HW does not support byte imm 1062 G4_Type getNewType(int64_t imm, G4_Type ty); 1063 1064 // 1065 // return the imm operand with its lowest type(W or above); create one if not yet created 1066 // createImmWithLowerType(int64_t imm,G4_Type ty)1067 G4_Imm* createImmWithLowerType(int64_t imm, G4_Type ty) 1068 { 1069 G4_Type new_type = getNewType(imm, ty); 1070 G4_Imm* i = hashtable.lookupImm(imm, new_type); 1071 return (i != NULL)? i : hashtable.createImm(imm, new_type); 1072 } 1073 1074 // 1075 // Create immediate operand without looking up hash table. This operand 1076 // is a relocatable immediate type. 1077 // createRelocImm(G4_Type ty)1078 G4_Reloc_Imm* createRelocImm(G4_Type ty) 1079 { 1080 G4_Reloc_Imm* newImm; 1081 newImm = new (mem)G4_Reloc_Imm(ty); 1082 return newImm; 1083 } 1084 1085 // 1086 // Create immediate operand without looking up hash table. This operand 1087 // is a relocatable immediate type. Specify the value of this imm field, 1088 // which will present in the output instruction's imm value. 1089 // createRelocImm(int64_t immval,G4_Type ty)1090 G4_Reloc_Imm* createRelocImm(int64_t immval, G4_Type ty) 1091 { 1092 G4_Reloc_Imm* newImm; 1093 newImm = new (mem)G4_Reloc_Imm(immval, ty); 1094 return newImm; 1095 } 1096 1097 // 1098 // a new null-terminated copy of "lab" is created for the new label, so 1099 // caller does not have to allocate memory for lab 1100 // createLabel(const std::string & lab,VISA_Label_Kind kind)1101 G4_Label* createLabel(const std::string &lab, VISA_Label_Kind kind) 1102 { 1103 auto labStr = lab.c_str(); 1104 size_t len = strlen(labStr) + 1; 1105 char* new_str = (char*)mem.alloc(len); // +1 for null that ends the string 1106 memcpy_s(new_str, len, labStr, len); 1107 return new (mem) G4_Label(new_str); 1108 } 1109 1110 G4_Predicate* createPredicate( 1111 G4_PredState s, G4_VarBase* flag, unsigned short srOff, 1112 G4_Predicate_Control ctrl = PRED_DEFAULT) 1113 { 1114 G4_Predicate* pred = new (mem)G4_Predicate(s, flag, srOff, ctrl); 1115 return pred; 1116 } 1117 createPredicate(G4_Predicate & prd)1118 G4_Predicate* createPredicate(G4_Predicate& prd) 1119 { 1120 G4_Predicate* p = new (mem) G4_Predicate(prd); 1121 return p; 1122 } 1123 createCondMod(G4_CondModifier m,G4_VarBase * flag,unsigned short off)1124 G4_CondMod* createCondMod(G4_CondModifier m, G4_VarBase* flag, unsigned short off) 1125 { 1126 G4_CondMod* p = new (mem)G4_CondMod(m, flag, off); 1127 return p; 1128 } 1129 1130 // 1131 // return the condition modifier; create one if not yet created 1132 // createCondMod(G4_CondMod & mod)1133 G4_CondMod* createCondMod(G4_CondMod& mod) 1134 { 1135 G4_CondMod* p = new (mem) G4_CondMod(mod); 1136 return p; 1137 } 1138 1139 // 1140 // create register address expression normalized to (® +/- exp) 1141 // createAddrExp(G4_RegVar * reg,int offset,G4_Type ty)1142 G4_AddrExp* createAddrExp(G4_RegVar* reg, int offset, G4_Type ty) 1143 { 1144 return new (mem) G4_AddrExp(reg, offset, ty); 1145 } 1146 1147 private: 1148 1149 // Avoid calling this directly, use createDst and createIndirectDst instead 1150 G4_DstRegRegion* createDstRegRegion( 1151 G4_RegAccess a, 1152 G4_VarBase* b, 1153 short roff, 1154 short sroff, 1155 unsigned short hstride, 1156 G4_Type ty, 1157 G4_AccRegSel regSel = ACC_UNDEFINED) 1158 { 1159 G4_DstRegRegion* rgn = new (mem) G4_DstRegRegion(a, b, roff, sroff, hstride, ty, regSel); 1160 return rgn; 1161 } 1162 1163 // please leave all createInst() as private and use the public wrappers below 1164 1165 // cond+sat+binary+line 1166 G4_INST* createInst( 1167 G4_Predicate* prd, G4_opcode op, G4_CondMod* mod, G4_Sat sat, 1168 G4_ExecSize size, 1169 G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, 1170 G4_InstOpts options, 1171 bool addToInstList); 1172 1173 // TODO: remove 1174 // old template: 1175 // cond+sat+binary+line createInst(G4_Predicate * prd,G4_opcode op,G4_CondMod * mod,G4_Sat sat,int execSize,G4_DstRegRegion * dst,G4_Operand * src0,G4_Operand * src1,G4_InstOpts options,bool addToInstList)1176 G4_INST* createInst( 1177 G4_Predicate* prd, G4_opcode op, G4_CondMod* mod, G4_Sat sat, 1178 int execSize, 1179 G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, 1180 G4_InstOpts options, 1181 bool addToInstList) 1182 { 1183 G4_ExecSize sz((unsigned char)execSize); 1184 return createInst(prd, op, mod, sat, sz, dst, src0, src1, options, addToInstList); 1185 } 1186 1187 // cond+sat+ternary 1188 G4_INST* createInst( 1189 G4_Predicate* prd, G4_opcode op, G4_CondMod* mod, G4_Sat sat, 1190 G4_ExecSize execSize, 1191 G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, G4_Operand* src2, 1192 G4_InstOpts options, 1193 bool addToInstList); 1194 1195 public: 1196 1197 G4_INST* createIf(G4_Predicate* prd, G4_ExecSize execSize, G4_InstOpts options); 1198 G4_INST* createElse(G4_ExecSize execSize, G4_InstOpts options); 1199 G4_INST* createEndif(G4_ExecSize execSize, G4_InstOpts options); 1200 G4_INST* createLabelInst(G4_Label* label, bool appendToInstList); 1201 G4_INST* createJmp( 1202 G4_Predicate* pred, G4_Operand* jmpTarget, G4_InstOpts options, bool appendToInstList); createGoto(G4_Predicate * pred,G4_ExecSize execSize,G4_Label * target,G4_InstOpts options,bool addToInstList)1203 G4_INST* createGoto(G4_Predicate* pred, G4_ExecSize execSize, G4_Label* target, G4_InstOpts options, bool addToInstList) 1204 { 1205 // jip is computed later during CFG construction 1206 return createCFInst(pred, G4_goto, execSize, nullptr, target, options, addToInstList); 1207 } 1208 1209 // ToDo: make createInternalInst() private as well and add wraper for them 1210 G4_INST* createInternalInst( 1211 G4_Predicate* prd, G4_opcode op, G4_CondMod* mod, G4_Sat sat, 1212 G4_ExecSize execSize, 1213 G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, 1214 G4_InstOpts options); 1215 1216 G4_INST* createInternalInst( 1217 G4_Predicate* prd, G4_opcode op, G4_CondMod* mod, G4_Sat sat, 1218 G4_ExecSize execSize, 1219 G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, G4_Operand* src2, 1220 G4_InstOpts options); 1221 1222 G4_INST* createCFInst( 1223 G4_Predicate* prd, G4_opcode op, G4_ExecSize execSize, 1224 G4_Label* jip, G4_Label* uip, 1225 G4_InstOpts options, 1226 bool addToInstList); 1227 1228 G4_INST* createInternalCFInst( 1229 G4_Predicate* prd, G4_opcode op, G4_ExecSize execSize, 1230 G4_Label* jip, G4_Label* uip, 1231 G4_InstOpts options); 1232 1233 G4_InstSend* createSendInst( 1234 G4_Predicate* prd, G4_opcode op, G4_ExecSize execSize, 1235 G4_DstRegRegion* postDst, G4_SrcRegRegion* payload, 1236 G4_Operand* msg, 1237 G4_InstOpts options, // FIXME: re-order options to follow all operands 1238 G4_SendDesc *msgDesc, 1239 bool addToInstList); 1240 G4_InstSend* createInternalSendInst( 1241 G4_Predicate* prd, G4_opcode op, 1242 G4_ExecSize execSize, 1243 G4_DstRegRegion* postDst, G4_SrcRegRegion* payload, 1244 G4_Operand* msg, 1245 G4_InstOpts options, // FIXME: re-order options to follow all operands 1246 G4_SendDesc *msgDescs); 1247 1248 G4_InstSend* createSplitSendInst( 1249 G4_Predicate* prd, G4_opcode op, 1250 G4_ExecSize execSize, 1251 G4_DstRegRegion* dst, G4_SrcRegRegion* src1, G4_SrcRegRegion* src2, 1252 G4_Operand* msg, G4_InstOpts options, 1253 G4_SendDesc* msgDesc, 1254 G4_Operand* src3, 1255 bool addToInstList); 1256 1257 G4_InstSend* createInternalSplitSendInst( 1258 G4_ExecSize execSize, 1259 G4_DstRegRegion* dst, G4_SrcRegRegion* src1, G4_SrcRegRegion* src2, 1260 // TODO: reorder parameters to put options last 1261 G4_Operand* msg, G4_InstOpts options, 1262 G4_SendDesc*msgDesc, 1263 G4_Operand* src3); 1264 1265 G4_INST* createMathInst( 1266 G4_Predicate* prd, G4_Sat sat, G4_ExecSize execSize, 1267 G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, G4_MathOp mathOp, 1268 G4_InstOpts options, bool addToInstList); 1269 1270 G4_INST* createInternalMathInst( 1271 G4_Predicate* prd, G4_Sat sat, G4_ExecSize execSize, 1272 G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, G4_MathOp mathOp, 1273 G4_InstOpts options); 1274 1275 G4_INST* createIntrinsicInst( 1276 G4_Predicate* prd, Intrinsic intrinId, G4_ExecSize execSize, 1277 G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, G4_Operand* src2, 1278 G4_InstOpts options, bool addToInstList); 1279 1280 G4_INST* createInternalIntrinsicInst( 1281 G4_Predicate* prd, Intrinsic intrinId, G4_ExecSize execSize, 1282 G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, G4_Operand* src2, 1283 G4_InstOpts options); 1284 1285 G4_INST* createIntrinsicAddrMovInst(Intrinsic intrinId, G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, G4_Operand* src2, G4_Operand* src3, G4_Operand* src4, G4_Operand* src5, G4_Operand* src6, G4_Operand* src7, bool addToInstList); 1286 1287 G4_INST* createNop(G4_InstOpts options); 1288 G4_INST* createSync(G4_opcode syncOp, G4_Operand* src); 1289 1290 G4_INST* createMov( 1291 G4_ExecSize execSize, 1292 G4_DstRegRegion* dst, G4_Operand* src0, 1293 G4_InstOpts options, 1294 bool appendToInstList); 1295 createBinOp(G4_opcode op,G4_ExecSize execSize,G4_DstRegRegion * dst,G4_Operand * src0,G4_Operand * src1,G4_InstOpts options,bool appendToInstList)1296 G4_INST* createBinOp( 1297 G4_opcode op, G4_ExecSize execSize, 1298 G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, 1299 G4_InstOpts options, 1300 bool appendToInstList) 1301 { 1302 return createBinOp(nullptr, op, execSize, dst, src0, src1, options, appendToInstList); 1303 } 1304 1305 G4_INST* createBinOp( 1306 G4_Predicate *pred, 1307 G4_opcode op, G4_ExecSize execSize, 1308 G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, 1309 G4_InstOpts options, 1310 bool appendToInstList = true); 1311 1312 G4_INST* createMach( 1313 G4_ExecSize execSize, 1314 G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, 1315 G4_InstOpts options, G4_Type accType); 1316 1317 G4_INST* createMacl( 1318 G4_ExecSize execSize, 1319 G4_DstRegRegion* dst, 1320 G4_Operand* src0, G4_Operand* src1, G4_InstOpts options, G4_Type accType); 1321 createMadm(G4_ExecSize execSize,G4_DstRegRegion * dst,G4_SrcRegRegion * src0,G4_SrcRegRegion * src1,G4_SrcRegRegion * src2,G4_InstOpts options)1322 G4_INST* createMadm( 1323 G4_ExecSize execSize, 1324 G4_DstRegRegion* dst, 1325 G4_SrcRegRegion* src0, G4_SrcRegRegion* src1, G4_SrcRegRegion* src2, 1326 G4_InstOpts options) 1327 { 1328 return createMadm(nullptr, execSize, dst, src0, src1, src2, options); 1329 } 1330 1331 G4_INST* createMadm( 1332 G4_Predicate* pred, 1333 G4_ExecSize execSize, 1334 G4_DstRegRegion* dst, 1335 G4_SrcRegRegion* src0, G4_SrcRegRegion* src1, G4_SrcRegRegion* src2, 1336 G4_InstOpts options); 1337 1338 static G4_MathOp Get_MathFuncCtrl(ISA_Opcode op, G4_Type type); 1339 1340 void resizePredefinedStackVars(); 1341 1342 template <typename T> duplicateOperand(T * opnd)1343 T* duplicateOperand(T* opnd) {return static_cast<T *>(duplicateOpndImpl(opnd));} 1344 G4_Operand* duplicateOpndImpl(G4_Operand* opnd); 1345 1346 1347 G4_DstRegRegion *createSubDstOperand( 1348 G4_DstRegRegion* dst, uint16_t start, uint8_t size); 1349 G4_SrcRegRegion *createSubSrcOperand( 1350 G4_SrcRegRegion* src, uint16_t start, uint8_t size, uint16_t newVs, uint16_t newWd); 1351 G4_INST *makeSplittingInst(G4_INST *inst, G4_ExecSize execSize); 1352 1353 G4_InstSend *createSendInst( 1354 G4_Predicate *pred, 1355 G4_DstRegRegion *postDst, G4_SrcRegRegion *payload, 1356 G4_ExecSize execSize, 1357 G4_SendDescRaw *msgDesc, 1358 G4_InstOpts options, 1359 bool is_sendc); 1360 1361 G4_InstSend *createSplitSendInst( 1362 G4_Predicate *pred, 1363 G4_DstRegRegion *dst, G4_SrcRegRegion *src1, G4_SrcRegRegion *src2, 1364 G4_ExecSize execSize, 1365 G4_SendDescRaw *msgDesc, 1366 G4_InstOpts options, 1367 bool is_sendc); 1368 1369 // TODO: move to TranslateSendLdStLsc or elide VISA_Exec_Size (pre-convert) 1370 G4_SendDescRaw* createLscMsgDesc( 1371 LSC_OP op, 1372 LSC_SFID lscSfid, 1373 VISA_Exec_Size execSizeEnum, 1374 LSC_CACHE_OPTS cacheOpts, 1375 LSC_ADDR addr, 1376 LSC_DATA_SHAPE shape, 1377 G4_Operand *surface, 1378 uint32_t dstLen, 1379 uint32_t addrRegs); 1380 1381 // ToDo: unify this with above function 1382 G4_SendDescRaw* createLscDesc( 1383 SFID sfid, 1384 uint32_t desc, 1385 uint32_t extDesc, 1386 int src1Len, 1387 SendAccess access, 1388 G4_Operand* bti); 1389 1390 G4_InstSend *createLscSendInst( 1391 G4_Predicate *pred, 1392 G4_DstRegRegion *dst, G4_SrcRegRegion *src0, G4_SrcRegRegion *src1, 1393 G4_ExecSize execsize, 1394 G4_SendDescRaw *msgDesc, 1395 G4_InstOpts option, 1396 LSC_ADDR_TYPE addrType, 1397 bool emitA0RegDef); 1398 G4_SrcRegRegion* getScratchSurfaceStatusIndex(); 1399 1400 void RestoreA0(); 1401 1402 G4_InstSend* createLscSendInstToScratch( 1403 G4_Predicate* pred, 1404 G4_DstRegRegion* dst, G4_SrcRegRegion* src0, G4_SrcRegRegion* src1, 1405 G4_ExecSize execSize, G4_SendDescRaw* msgDesc, G4_InstOpts option, 1406 bool usesBti); 1407 1408 G4_InstSend *createSplitSendToRenderTarget( 1409 G4_Predicate *pred, 1410 G4_DstRegRegion *dst, 1411 G4_SrcRegRegion *src1, 1412 G4_SrcRegRegion *src2, 1413 G4_SrcRegRegion *extDesc, 1414 G4_ExecSize execSize, 1415 G4_SendDescRaw *msgDesc, 1416 G4_InstOpts option); 1417 1418 G4_InstSend* createSendInst( 1419 G4_Predicate* pred, 1420 G4_DstRegRegion* postDst, 1421 G4_SrcRegRegion* payload, 1422 unsigned regs2snd, 1423 unsigned regs2rcv, 1424 G4_ExecSize execsize, 1425 unsigned fc, 1426 SFID tf_id, 1427 bool head_present, 1428 SendAccess access, 1429 G4_Operand* bti, 1430 G4_Operand* sti, 1431 G4_InstOpts options, 1432 bool is_sendc); 1433 1434 G4_InstSend* createSplitSendInst( 1435 G4_Predicate* pred, G4_DstRegRegion* dst, 1436 G4_SrcRegRegion* src1, unsigned regs2snd1, 1437 G4_SrcRegRegion* src2, unsigned regs2snd2, 1438 unsigned regs2rcv, 1439 G4_ExecSize execSize, 1440 unsigned fc, 1441 SFID tf_id, 1442 bool head_present, 1443 SendAccess access, 1444 G4_Operand* bti, G4_Operand* sti, 1445 G4_InstOpts option, 1446 bool is_sendc); 1447 1448 // helper functions 1449 G4_Declare *createSendPayloadDcl(unsigned num_elt, G4_Type type); 1450 1451 void createMovR0Inst( 1452 G4_Declare* dcl, 1453 short refOff, 1454 short subregOff, 1455 bool use_nomask = false, 1456 G4_InstOpts options = InstOpt_NoOpt); 1457 1458 void createMovInst( 1459 G4_Declare* dcl, 1460 short refOff, 1461 short subregOff, 1462 G4_ExecSize execsize, 1463 G4_Predicate* pred, 1464 G4_CondMod* condMod, 1465 G4_Operand* src_opnd, 1466 bool use_nomask = false, 1467 G4_InstOpts options = InstOpt_NoOpt); 1468 void createAddInst( 1469 G4_Declare* dcl, 1470 short regOff, 1471 short subregOff, 1472 G4_ExecSize execSize, 1473 G4_Predicate* pred, 1474 G4_CondMod* condMod, 1475 G4_Operand* src0_opnd, 1476 G4_Operand* src1_opnd, 1477 G4_InstOption options); 1478 void createMovSendSrcInst( 1479 G4_Declare* dcl, 1480 short refOff, 1481 short subregOff, 1482 unsigned num_dword, 1483 G4_Operand* src_opnd, 1484 G4_InstOpts options); 1485 1486 // short hand for creating a dstRegRegion 1487 G4_DstRegRegion* createDstRegRegion(G4_Declare* dcl, unsigned short hstride); 1488 G4_SrcRegRegion* createSrcRegRegion(G4_Declare* dcl, const RegionDesc* rd); 1489 1490 // Create a null dst with scalar region and the given type 1491 G4_DstRegRegion* createNullDst(G4_Type dstType); 1492 1493 // Create a null src with scalar region and the given type 1494 G4_SrcRegRegion* createNullSrc(G4_Type dstType); 1495 1496 G4_DstRegRegion* checkSendDst(G4_DstRegRegion *dst_opnd); 1497 1498 G4_INST* createDpasInst( 1499 G4_opcode opc, 1500 G4_ExecSize execSize, 1501 G4_DstRegRegion* dst, 1502 G4_Operand* src0, 1503 G4_Operand* src1, 1504 G4_Operand* src2, 1505 G4_Operand* src3, 1506 G4_InstOpts options, 1507 GenPrecision A, 1508 GenPrecision W, 1509 uint8_t D, 1510 uint8_t C, 1511 bool addToInstList); 1512 1513 G4_INST* createInternalDpasInst( 1514 G4_opcode opc, 1515 G4_ExecSize execSize, 1516 G4_DstRegRegion* dst, 1517 G4_Operand* src0, 1518 G4_Operand* src1, 1519 G4_Operand* src2, 1520 G4_Operand* src3, 1521 G4_InstOpts options, 1522 GenPrecision A, 1523 GenPrecision W, 1524 uint8_t D, 1525 uint8_t C); 1526 1527 G4_INST* createBfnInst( 1528 uint8_t booleanFuncCtrl, 1529 G4_Predicate* prd, 1530 G4_CondMod* mod, 1531 G4_Sat sat, 1532 G4_ExecSize execSize, 1533 G4_DstRegRegion* dst, 1534 G4_Operand* src0, 1535 G4_Operand* src1, 1536 G4_Operand* src2, 1537 G4_InstOpts options, 1538 bool addToInstLis); 1539 1540 G4_INST* createInternalBfnInst( 1541 uint8_t booleanFuncCtrl, 1542 G4_Predicate* prd, 1543 G4_CondMod* mod, 1544 G4_Sat sat, 1545 G4_ExecSize execSize, 1546 G4_DstRegRegion* dst, 1547 G4_Operand* src0, 1548 G4_Operand* src1, 1549 G4_Operand* src2, 1550 G4_InstOpts options); 1551 1552 1553 /////////////////////////////////////////////////////////////////////////// 1554 /////////////////////////////////////////////////////////////////////////// 1555 /////////////////////////////////////////////////////////////////////////// 1556 // translateXXXXXXX functions translate specific vISA instructions into 1557 // sequences of G4 IR that implement the operation 1558 // 1559 // Implementations are split amongst various files based on category. 1560 // Comments below will point the user to the correct implementation file. 1561 // Please keep implementations the same sequential order as in the header. 1562 // (Feel free to re-order, but reflect the re-order here.) 1563 // 1564 // During refactor anything that was in TranslationInterface.cpp was moved 1565 // to one of this VisaToG4/Translate* files, but some methods have nothing 1566 // to do with vISA and make sense in the BuildIRImpl.cpp and could be 1567 // moved there. Please move the declaration (prototype) upwards in 1568 // that case. 1569 1570 /////////////////////////////////////////////////////////////////////////// 1571 /////////////////////////////////////////////////////////////////////////// 1572 // Members related to general arithmetic/logic/shift ops are in 1573 // VisaToG4/TranslateALU.cpp 1574 int translateVISAAddrInst( 1575 ISA_Opcode opcode, 1576 VISA_Exec_Size execSize, 1577 VISA_EMask_Ctrl emask, 1578 G4_DstRegRegion *dst_opnd, 1579 G4_Operand *src0_opnd, 1580 G4_Operand *src1_opnd); 1581 1582 int translateVISAArithmeticInst( 1583 ISA_Opcode opcode, 1584 VISA_Exec_Size execSize, 1585 VISA_EMask_Ctrl emask, 1586 G4_Predicate *predOpnd, 1587 G4_Sat saturate, 1588 G4_CondMod* condMod, 1589 G4_DstRegRegion *dstOpnd, 1590 G4_Operand *src0Opnd, 1591 G4_Operand *src1Opnd, 1592 G4_Operand *src2Opnd, 1593 G4_DstRegRegion *carryBorrow); 1594 1595 int translateVISADpasInst( 1596 VISA_Exec_Size executionSize, 1597 VISA_EMask_Ctrl emask, 1598 G4_opcode opc, 1599 G4_DstRegRegion *dstOpnd, 1600 G4_SrcRegRegion *src0Opnd, 1601 G4_SrcRegRegion *src1Opnd, 1602 G4_SrcRegRegion *src2Opnd, 1603 G4_SrcRegRegion* src3Opnd, 1604 GenPrecision A, GenPrecision W, 1605 uint8_t D, uint8_t C); 1606 int translateVISABfnInst( 1607 uint8_t booleanFuncCtrl, 1608 VISA_Exec_Size executionSize, 1609 VISA_EMask_Ctrl emask, 1610 G4_Predicate *predOpnd, 1611 G4_Sat saturate, 1612 G4_CondMod* condMod, 1613 G4_DstRegRegion *dstOpnd, 1614 G4_Operand *src0Opnd, 1615 G4_Operand *src1Opnd, 1616 G4_Operand *src2Opnd); 1617 1618 int translateVISACompareInst( 1619 ISA_Opcode opcode, 1620 VISA_Exec_Size execSize, 1621 VISA_EMask_Ctrl emask, 1622 VISA_Cond_Mod relOp, 1623 G4_Declare* predDst, 1624 G4_Operand *src0_opnd, 1625 G4_Operand *src1_opnd); 1626 1627 int translateVISACompareInst( 1628 ISA_Opcode opcode, 1629 VISA_Exec_Size execSize, 1630 VISA_EMask_Ctrl emask, 1631 VISA_Cond_Mod relOp, 1632 G4_DstRegRegion *dstOpnd, 1633 G4_Operand *src0Opnd, 1634 G4_Operand *src1Opnd); 1635 1636 int translateVISALogicInst( 1637 ISA_Opcode opcode, 1638 G4_Predicate *pred_opnd, 1639 G4_Sat saturate, 1640 VISA_Exec_Size executionSize, 1641 VISA_EMask_Ctrl emask, 1642 G4_DstRegRegion* dst, 1643 G4_Operand* src0, 1644 G4_Operand* src1, 1645 G4_Operand* src2, 1646 G4_Operand* src3); 1647 1648 int translateVISADataMovementInst( 1649 ISA_Opcode opcode, 1650 CISA_MIN_MAX_SUB_OPCODE subOpcode, 1651 G4_Predicate *pred_opnd, 1652 VISA_Exec_Size executionSize, 1653 VISA_EMask_Ctrl emask, 1654 G4_Sat saturate, 1655 G4_DstRegRegion *dst, 1656 G4_Operand *src0, 1657 G4_Operand *src1); 1658 1659 /////////////////////////////////////////////////////////////////////////// 1660 /////////////////////////////////////////////////////////////////////////// 1661 // Control flow, function call, and branching ops are located in 1662 // VisaToG4/TranslateBranch.cpp 1663 int translateVISACFSwitchInst( 1664 G4_Operand *indexOpnd, 1665 uint8_t numLabels, 1666 G4_Label** lab); 1667 1668 int translateVISACFLabelInst(G4_Label* lab); 1669 1670 int translateVISACFCallInst( 1671 VISA_Exec_Size execsize, 1672 VISA_EMask_Ctrl emask, 1673 G4_Predicate *predOpnd, 1674 G4_Label* lab); 1675 1676 int translateVISACFJumpInst(G4_Predicate *predOpnd, G4_Label* lab); 1677 1678 int translateVISACFFCallInst( 1679 VISA_Exec_Size execsize, 1680 VISA_EMask_Ctrl emask, 1681 G4_Predicate *predOpnd, 1682 std::string funcName, 1683 uint8_t argSize, 1684 uint8_t returnSize); 1685 1686 int translateVISACFIFCallInst( 1687 VISA_Exec_Size execsize, 1688 VISA_EMask_Ctrl emask, 1689 G4_Predicate *predOpnd, 1690 G4_Operand* funcAddr, 1691 uint8_t argSize, 1692 uint8_t returnSize); 1693 1694 int translateVISACFSymbolInst( 1695 const std::string& symbolName, 1696 G4_DstRegRegion* dst); 1697 1698 int translateVISACFFretInst( 1699 VISA_Exec_Size execsize, 1700 VISA_EMask_Ctrl emask, 1701 G4_Predicate *predOpnd); 1702 1703 int translateVISACFRetInst( 1704 VISA_Exec_Size execsize, 1705 VISA_EMask_Ctrl emask, 1706 G4_Predicate *predOpnd); 1707 1708 int translateVISAGotoInst( 1709 G4_Predicate *predOpnd, 1710 VISA_Exec_Size executionSize, 1711 VISA_EMask_Ctrl emask, 1712 G4_Label *label); 1713 1714 1715 /////////////////////////////////////////////////////////////////////////// 1716 // members related to special math sequences VisaToG4/TranslateMath.cpp 1717 void expandFdiv( 1718 G4_ExecSize exsize, G4_Predicate *predOpnd, G4_Sat saturate, 1719 G4_DstRegRegion *dstOpnd, G4_Operand *src0Opnd, G4_Operand *src1Opnd, 1720 uint32_t instOpt); 1721 1722 void expandPow( 1723 G4_ExecSize exsize, G4_Predicate *predOpnd, G4_Sat saturate, 1724 G4_DstRegRegion *dstOpnd, G4_Operand *src0Opnd, G4_Operand *src1Opnd, 1725 uint32_t instOpt); 1726 1727 1728 int translateVISAArithmeticDoubleInst( 1729 ISA_Opcode opcode, 1730 VISA_Exec_Size execSize, 1731 VISA_EMask_Ctrl emask, 1732 G4_Predicate *predOpnd, 1733 G4_Sat saturate, 1734 G4_DstRegRegion *dstOpnd, 1735 G4_Operand *src0Opnd, 1736 G4_Operand *src1Opnd); 1737 1738 int translateVISAArithmeticSingleDivideIEEEInst( 1739 ISA_Opcode opcode, 1740 VISA_Exec_Size execSize, 1741 VISA_EMask_Ctrl emask, 1742 G4_Predicate *predOpnd, 1743 G4_Sat saturate, 1744 G4_CondMod* condMod, 1745 G4_DstRegRegion *dstOpnd, 1746 G4_Operand *src0Opnd, 1747 G4_Operand *src1Opnd); 1748 1749 int translateVISAArithmeticSingleSQRTIEEEInst( 1750 ISA_Opcode opcode, 1751 VISA_Exec_Size execSize, 1752 VISA_EMask_Ctrl emask, 1753 G4_Predicate *predOpnd, 1754 G4_Sat saturate, 1755 G4_CondMod* condMod, 1756 G4_DstRegRegion *dstOpnd, 1757 G4_Operand *src0Opnd); 1758 1759 int translateVISAArithmeticDoubleSQRTInst( 1760 ISA_Opcode opcode, 1761 VISA_Exec_Size execSize, 1762 VISA_EMask_Ctrl emask, 1763 G4_Predicate *predOpnd, 1764 G4_Sat saturate, 1765 G4_CondMod* condMod, 1766 G4_DstRegRegion *dstOpnd, 1767 G4_Operand *src0Opnd); 1768 1769 1770 /////////////////////////////////////////////////////////////////////////// 1771 /////////////////////////////////////////////////////////////////////////// 1772 // Members related miscellaneous instructions that don't fit any other 1773 // category are in VisaToG4/TranslateMisc.cpp 1774 // 1775 // (As stated above some of this could move to BuildIRImpl.cpp.) 1776 static bool isNoMask(VISA_EMask_Ctrl eMask); 1777 static G4_ExecSize toExecSize(VISA_Exec_Size execSize); 1778 1779 VISA_Exec_Size roundUpExecSize(VISA_Exec_Size execSize); 1780 1781 G4_Declare* getImmDcl(G4_Imm* val, int numElt); 1782 1783 struct PayloadSource { 1784 G4_SrcRegRegion *opnd; 1785 G4_ExecSize execSize; 1786 G4_InstOpts instOpt; 1787 }; 1788 1789 /// preparePayload - This method prepares payload from the specified header 1790 /// and sources. 1791 /// 1792 /// \param msgs Message(s) prepared. That 2-element array must 1793 /// be cleared before calling preparePayload(). 1794 /// \param sizes Size(s) (in GRF) of each message prepared. That 1795 /// 2-element array must be cleared before calling 1796 /// preparePayload(). 1797 /// \param batchExSize When it's required to copy sources, batchExSize 1798 /// specifies the SIMD width of copy. 1799 /// \param splitSendEnabled Whether feature split-send is available. When 1800 /// feature split-send is available, this function 1801 /// will check whether two consecutive regions 1802 /// could be prepared instead of one to take 1803 /// advantage of split-send. 1804 /// \param srcs The array of sources (including header if 1805 /// present). 1806 /// \param len The length of the array of sources. 1807 /// 1808 void preparePayload( 1809 G4_SrcRegRegion *msgs[2], unsigned sizes[2], 1810 G4_ExecSize batchExSize, bool splitSendEnabled, 1811 PayloadSource sources[], unsigned len); 1812 1813 // Coalesce multiple payloads into a single region. Pads each region with 1814 // an optional alignment argument (e.g. a GRF size). The source region 1815 // sizes are determined by source dimension, so use an alias if you are 1816 // using a subregion. All copies are made under no mask semantics using 1817 // the maximal SIMD width for the current device. 1818 // 1819 // A second alignment option allows a caller to align the full payload 1820 // to some total. 1821 // 1822 // If all parameters are nullptr or the null register, we return the null 1823 // register. 1824 // 1825 // Some examples: 1826 // 1827 // 1. coalescePayloads(GRF_SIZE,GRF_SIZE,...); 1828 // Coalesces each source into a single region. Each source is padded 1829 // out to a full GRF, and the sum total result is also padded out to 1830 // a full GRF. 1831 // 1832 // 2. coalescePayloads(1,GRF_SIZE,...); 1833 // Coalesces each source into a single region packing each source 1834 // together, but padding the result. E.g. one could copy a QW and then 1835 // a DW and pad the result out to a GRF. 1836 // 1837 G4_SrcRegRegion *coalescePayload( 1838 unsigned alignSourcesTo, 1839 unsigned alignPayloadTo, 1840 uint32_t payloadSize, 1841 uint32_t srcSize, 1842 std::initializer_list<G4_SrcRegRegion *> srcs, 1843 VISA_EMask_Ctrl emask); 1844 1845 // emask is InstOption 1846 void Copy_SrcRegRegion_To_Payload( 1847 G4_Declare* payload, 1848 unsigned int& regOff, 1849 G4_SrcRegRegion* src, 1850 G4_ExecSize execSize, 1851 uint32_t emask); 1852 unsigned int getByteOffsetSrcRegion(G4_SrcRegRegion* srcRegion); 1853 1854 // only used in TranslateSend3D, maybe consider moving there if no 1855 // one else uses them. 1856 bool checkIfRegionsAreConsecutive( 1857 G4_SrcRegRegion* first, 1858 G4_SrcRegRegion* second, 1859 G4_ExecSize execSize); 1860 bool checkIfRegionsAreConsecutive( 1861 G4_SrcRegRegion* first, 1862 G4_SrcRegRegion* second, 1863 G4_ExecSize execSize, 1864 G4_Type type); 1865 1866 int generateDebugInfoPlaceholder(); // TODO: move to BuildIRImpl.cpp? 1867 1868 // legitimiately belongs in Misc 1869 int translateVISALifetimeInst(unsigned char properties, G4_Operand* var); 1870 1871 1872 /////////////////////////////////////////////////////////////////////////// 1873 /////////////////////////////////////////////////////////////////////////// 1874 // members related to 3D and sampler ops are in VisaToG4/TranslateSend3D.cpp 1875 int translateVISASampleInfoInst( 1876 VISA_Exec_Size executionSize, 1877 VISA_EMask_Ctrl emask, 1878 ChannelMask chMask, 1879 G4_Operand* surface, 1880 G4_DstRegRegion* dst); 1881 1882 int translateVISAResInfoInst( 1883 VISA_Exec_Size executionSize, 1884 VISA_EMask_Ctrl emask, 1885 ChannelMask chMask, 1886 G4_Operand* surface, 1887 G4_SrcRegRegion* lod, 1888 G4_DstRegRegion* dst); 1889 1890 int translateVISAURBWrite3DInst( 1891 G4_Predicate* pred, 1892 VISA_Exec_Size executionSize, 1893 VISA_EMask_Ctrl emask, 1894 uint8_t numOut, 1895 uint16_t globalOffset, 1896 G4_SrcRegRegion* channelMask, 1897 G4_SrcRegRegion* urbHandle, 1898 G4_SrcRegRegion* perSlotOffset, 1899 G4_SrcRegRegion* vertexData); 1900 1901 int translateVISARTWrite3DInst( 1902 G4_Predicate* pred, 1903 VISA_Exec_Size executionSize, 1904 VISA_EMask_Ctrl emask, 1905 G4_Operand *surface, 1906 G4_SrcRegRegion *r1HeaderOpnd, 1907 G4_Operand *rtIndex, 1908 vISA_RT_CONTROLS cntrls, 1909 G4_SrcRegRegion *sampleIndexOpnd, 1910 G4_Operand *cpsCounter, 1911 unsigned int numParms, 1912 G4_SrcRegRegion ** msgOpnds); 1913 1914 1915 int splitSampleInst( 1916 VISASampler3DSubOpCode actualop, 1917 bool pixelNullMask, 1918 bool cpsEnable, 1919 G4_Predicate* pred, 1920 ChannelMask srcChannel, 1921 int numChannels, 1922 G4_Operand *aoffimmi, 1923 G4_Operand *sampler, 1924 G4_Operand *surface, 1925 G4_DstRegRegion* dst, 1926 VISA_EMask_Ctrl emask, 1927 bool useHeader, 1928 unsigned numRows, // msg length for each simd8 1929 unsigned int numParms, 1930 G4_SrcRegRegion ** params, 1931 bool uniformSampler = true); 1932 1933 void doSamplerHeaderMove(G4_Declare* header, G4_Operand* sampler); 1934 G4_Declare* getSamplerHeader(bool isBindlessSampler, bool samplerIndexGE16); 1935 uint32_t getSamplerResponseLength( 1936 int numChannels, bool isFP16, int execSize, bool pixelNullMask, bool nullDst); 1937 1938 int translateVISASampler3DInst( 1939 VISASampler3DSubOpCode actualop, 1940 bool pixelNullMask, 1941 bool cpsEnable, 1942 bool uniformSampler, 1943 G4_Predicate* pred, 1944 VISA_Exec_Size executionSize, 1945 VISA_EMask_Ctrl emask, 1946 ChannelMask srcChannel, 1947 G4_Operand* aoffimmi, 1948 G4_Operand *sampler, 1949 G4_Operand *surface, 1950 G4_DstRegRegion* dst, 1951 unsigned int numParms, 1952 G4_SrcRegRegion ** params); 1953 1954 int translateVISALoad3DInst( 1955 VISASampler3DSubOpCode actualop, 1956 bool pixelNullMask, 1957 G4_Predicate *pred, 1958 VISA_Exec_Size exeuctionSize, 1959 VISA_EMask_Ctrl em, 1960 ChannelMask channelMask, 1961 G4_Operand* aoffimmi, 1962 G4_Operand* surface, 1963 G4_DstRegRegion* dst, 1964 uint8_t numOpnds, 1965 G4_SrcRegRegion ** opndArray); 1966 1967 int translateVISAGather3dInst( 1968 VISASampler3DSubOpCode actualop, 1969 bool pixelNullMask, 1970 G4_Predicate* pred, 1971 VISA_Exec_Size exeuctionSize, 1972 VISA_EMask_Ctrl em, 1973 ChannelMask channelMask, 1974 G4_Operand* aoffimmi, 1975 G4_Operand* sampler, 1976 G4_Operand* surface, 1977 G4_DstRegRegion* dst, 1978 unsigned int numOpnds, 1979 G4_SrcRegRegion ** opndArray); 1980 1981 int translateVISASamplerNormInst( 1982 G4_Operand* surface, 1983 G4_Operand* sampler, 1984 ChannelMask channel, 1985 unsigned numEnabledChannels, 1986 G4_Operand* deltaUOpnd, 1987 G4_Operand* uOffOpnd, 1988 G4_Operand* deltaVOpnd, 1989 G4_Operand* vOffOpnd, 1990 G4_DstRegRegion* dst_opnd); 1991 1992 int translateVISASamplerInst( 1993 unsigned simdMode, 1994 G4_Operand* surface, 1995 G4_Operand* sampler, 1996 ChannelMask channel, 1997 unsigned numEnabledChannels, 1998 G4_Operand* uOffOpnd, 1999 G4_Operand* vOffOpnd, 2000 G4_Operand* rOffOpnd, 2001 G4_DstRegRegion* dstOpnd); 2002 2003 /////////////////////////////////////////////////////////////////////////// 2004 /////////////////////////////////////////////////////////////////////////// 2005 // basic load and store (type and untyped) are located in 2006 // VisaToG4/TranslateSendLdStLegacy.cpp 2007 2008 // IsHeaderOptional - Check whether the message header is optional. 2009 bool isMessageHeaderOptional(G4_Operand *surface, G4_Operand *Offset) const; 2010 2011 // IsStatelessSurface - Check whether the give surface is statelesss surface. isStatelessSurface(const G4_Operand * surface)2012 static bool isStatelessSurface(const G4_Operand *surface) { 2013 // So far, it's only reliable to check an immediate surface. 2014 return surface->isImm() && 2015 (surface->asImm()->getImm() == PREDEF_SURF_255 || surface->asImm()->getImm() == PREDEF_SURF_253); 2016 } 2017 2018 int translateVISAQWGatherInst( 2019 VISA_Exec_Size executionSize, 2020 VISA_EMask_Ctrl emask, 2021 G4_Predicate* pred, 2022 VISA_SVM_Block_Num numBlocks, 2023 G4_SrcRegRegion* surface, 2024 G4_SrcRegRegion* addresses, 2025 G4_DstRegRegion* dst); 2026 2027 int translateVISAQWScatterInst( 2028 VISA_Exec_Size executionSize, 2029 VISA_EMask_Ctrl emask, 2030 G4_Predicate* pred, 2031 VISA_SVM_Block_Num numBlocks, 2032 G4_SrcRegRegion* surface, 2033 G4_SrcRegRegion* addresses, 2034 G4_SrcRegRegion* src); 2035 2036 uint32_t setOwordForDesc(uint32_t desc, int numOword, bool isSLM = false) const; 2037 int translateVISAOwordLoadInst( 2038 ISA_Opcode opcode, 2039 bool modified, 2040 G4_Operand* surface, 2041 VISA_Oword_Num size, 2042 G4_Operand* offOpnd, 2043 G4_DstRegRegion* dstOpnd); 2044 2045 int translateVISAOwordStoreInst( 2046 G4_Operand* surface, 2047 VISA_Oword_Num size, 2048 G4_Operand* offOpnd, 2049 G4_SrcRegRegion* srcOpnd); 2050 2051 int translateVISAGatherInst( 2052 VISA_EMask_Ctrl emask, 2053 bool modified, 2054 GATHER_SCATTER_ELEMENT_SIZE eltSize, 2055 VISA_Exec_Size executionSize, 2056 G4_Operand* surface, 2057 G4_Operand* gOffOpnd, 2058 G4_SrcRegRegion* eltOFfOpnd, 2059 G4_DstRegRegion* dstOpnd); 2060 2061 int translateVISAScatterInst( 2062 VISA_EMask_Ctrl emask, 2063 GATHER_SCATTER_ELEMENT_SIZE eltSize, 2064 VISA_Exec_Size executionSize, 2065 G4_Operand* surface, 2066 G4_Operand* gOffOpnd, 2067 G4_SrcRegRegion* eltOffOpnd, 2068 G4_SrcRegRegion* srcOpnd); 2069 2070 int translateVISAGather4Inst( 2071 VISA_EMask_Ctrl emask, 2072 bool modified, 2073 ChannelMask chMask, 2074 VISA_Exec_Size executionSize, 2075 G4_Operand* surface, 2076 G4_Operand* gOffOpnd, 2077 G4_SrcRegRegion* eltOffOpnd, 2078 G4_DstRegRegion* dstOpnd); 2079 2080 int translateVISAScatter4Inst( 2081 VISA_EMask_Ctrl emask, 2082 ChannelMask chMask, 2083 VISA_Exec_Size executionSize, 2084 G4_Operand* surface, 2085 G4_Operand* gOffOpnd, 2086 G4_SrcRegRegion* eltOffOpnd, 2087 G4_SrcRegRegion* srcOpnd); 2088 2089 int translateVISADwordAtomicInst( 2090 VISAAtomicOps subOpc, 2091 bool is16Bit, 2092 G4_Predicate *pred, 2093 VISA_Exec_Size execSize, 2094 VISA_EMask_Ctrl eMask, 2095 G4_Operand* surface, 2096 G4_SrcRegRegion* offsets, 2097 G4_SrcRegRegion* src0, 2098 G4_SrcRegRegion* src1, 2099 G4_DstRegRegion* dst); 2100 2101 void buildTypedSurfaceAddressPayload( 2102 G4_SrcRegRegion* u, G4_SrcRegRegion* v, G4_SrcRegRegion* r, G4_SrcRegRegion* lod, 2103 G4_ExecSize execSize, G4_InstOpts instOpt, PayloadSource sources[], uint32_t& len); 2104 2105 int translateVISAGather4TypedInst( 2106 G4_Predicate *pred, 2107 VISA_EMask_Ctrl emask, 2108 ChannelMask chMask, 2109 G4_Operand *surfaceOpnd, 2110 VISA_Exec_Size executionSize, 2111 G4_SrcRegRegion *uOffsetOpnd, 2112 G4_SrcRegRegion *vOffsetOpnd, 2113 G4_SrcRegRegion *rOffsetOpnd, 2114 G4_SrcRegRegion *lodOpnd, 2115 G4_DstRegRegion *dstOpnd); 2116 2117 int translateVISAScatter4TypedInst( 2118 G4_Predicate *pred, 2119 VISA_EMask_Ctrl emask, 2120 ChannelMask chMask, 2121 G4_Operand *surfaceOpnd, 2122 VISA_Exec_Size executionSize, 2123 G4_SrcRegRegion *uOffsetOpnd, 2124 G4_SrcRegRegion *vOffsetOpnd, 2125 G4_SrcRegRegion *rOffsetOpnd, 2126 G4_SrcRegRegion *lodOpnd, 2127 G4_SrcRegRegion *srcOpnd); 2128 2129 int translateVISATypedAtomicInst( 2130 VISAAtomicOps atomicOp, 2131 bool is16Bit, 2132 G4_Predicate *pred, 2133 VISA_EMask_Ctrl emask, 2134 VISA_Exec_Size execSize, 2135 G4_Operand *surface, 2136 G4_SrcRegRegion *uOffsetOpnd, 2137 G4_SrcRegRegion *vOffsetOpnd, 2138 G4_SrcRegRegion *rOffsetOpnd, 2139 G4_SrcRegRegion *lodOpnd, 2140 G4_SrcRegRegion *src0, 2141 G4_SrcRegRegion *src1, 2142 G4_DstRegRegion *dst); 2143 2144 void applySideBandOffset(G4_Operand* sideBand, const G4_SendDescRaw * sendMsgDesc); 2145 2146 int translateVISAGather4ScaledInst( 2147 G4_Predicate *pred, 2148 VISA_Exec_Size execSize, 2149 VISA_EMask_Ctrl eMask, 2150 ChannelMask chMask, 2151 G4_Operand *surface, 2152 G4_Operand *globalOffset, 2153 G4_SrcRegRegion *offsets, 2154 G4_DstRegRegion *dst); 2155 2156 int translateVISAScatter4ScaledInst( 2157 G4_Predicate *pred, 2158 VISA_Exec_Size execSize, 2159 VISA_EMask_Ctrl eMask, 2160 ChannelMask chMask, 2161 G4_Operand *surface, 2162 G4_Operand *globalOffset, 2163 G4_SrcRegRegion *offsets, 2164 G4_SrcRegRegion *src); 2165 2166 int translateGather4Inst( 2167 G4_Predicate *pred, 2168 VISA_Exec_Size execSize, 2169 VISA_EMask_Ctrl eMask, 2170 ChannelMask chMask, 2171 G4_Operand *surface, 2172 G4_Operand *globalOffset, 2173 G4_SrcRegRegion *offsets, 2174 G4_DstRegRegion *dst); 2175 2176 int translateScatter4Inst( 2177 G4_Predicate *pred, 2178 VISA_Exec_Size execSize, 2179 VISA_EMask_Ctrl eMask, 2180 ChannelMask chMask, 2181 G4_Operand *surface, 2182 G4_Operand *globalOffset, 2183 G4_SrcRegRegion *offsets, 2184 G4_SrcRegRegion *src); 2185 2186 int translateVISAGatherScaledInst( 2187 G4_Predicate *pred, 2188 VISA_Exec_Size execSize, 2189 VISA_EMask_Ctrl eMask, 2190 VISA_SVM_Block_Num numBlocks, 2191 G4_Operand *surface, 2192 G4_Operand *globalOffset, 2193 G4_SrcRegRegion *offsets, 2194 G4_DstRegRegion *dst); 2195 2196 int translateVISAScatterScaledInst( 2197 G4_Predicate *pred, 2198 VISA_Exec_Size execSize, 2199 VISA_EMask_Ctrl eMask, 2200 VISA_SVM_Block_Num numBlocks, 2201 G4_Operand *surface, 2202 G4_Operand *globalOffset, 2203 G4_SrcRegRegion *offsets, 2204 G4_SrcRegRegion *src); 2205 2206 int translateByteGatherInst( 2207 G4_Predicate *pred, 2208 VISA_Exec_Size execSize, 2209 VISA_EMask_Ctrl eMask, 2210 VISA_SVM_Block_Num numBlocks, 2211 G4_Operand *surface, 2212 G4_Operand *globalOffset, 2213 G4_SrcRegRegion *offsets, 2214 G4_DstRegRegion *dst); 2215 2216 int translateByteScatterInst( 2217 G4_Predicate *pred, 2218 VISA_Exec_Size execSize, 2219 VISA_EMask_Ctrl eMask, 2220 VISA_SVM_Block_Num numBlocks, 2221 G4_Operand *surface, 2222 G4_Operand *globalOffset, 2223 G4_SrcRegRegion *offsets, 2224 G4_SrcRegRegion *src); 2225 2226 int translateVISASVMBlockReadInst( 2227 VISA_Oword_Num numOword, 2228 bool unaligned, 2229 G4_Operand* address, 2230 G4_DstRegRegion* dst); 2231 2232 int translateVISASVMBlockWriteInst( 2233 VISA_Oword_Num numOword, 2234 G4_Operand* address, 2235 G4_SrcRegRegion* src); 2236 2237 int translateVISASVMScatterReadInst( 2238 VISA_Exec_Size executionSize, 2239 VISA_EMask_Ctrl emask, 2240 G4_Predicate* pred, 2241 VISA_SVM_Block_Type blockSize, 2242 VISA_SVM_Block_Num numBlocks, 2243 G4_SrcRegRegion* addresses, 2244 G4_DstRegRegion* dst); 2245 2246 int translateVISASVMScatterWriteInst( 2247 VISA_Exec_Size executionSize, 2248 VISA_EMask_Ctrl emask, 2249 G4_Predicate* pred, 2250 VISA_SVM_Block_Type blockSize, 2251 VISA_SVM_Block_Num numBlocks, 2252 G4_SrcRegRegion* addresses, 2253 G4_SrcRegRegion* src); 2254 2255 int translateVISASVMAtomicInst( 2256 VISAAtomicOps op, 2257 unsigned short bitwidth, 2258 VISA_Exec_Size executionSize, 2259 VISA_EMask_Ctrl emask, 2260 G4_Predicate* pred, 2261 G4_SrcRegRegion* addresses, 2262 G4_SrcRegRegion* src0, 2263 G4_SrcRegRegion* src1, 2264 G4_DstRegRegion* dst); 2265 2266 // return globalOffset + offsets as a contiguous operand 2267 G4_SrcRegRegion* getSVMOffset( 2268 G4_Operand* globalOffset, 2269 G4_SrcRegRegion* offsets, 2270 uint16_t exSize, 2271 G4_Predicate* pred, 2272 uint32_t mask); 2273 2274 int translateSVMGather4Inst( 2275 VISA_Exec_Size execSize, 2276 VISA_EMask_Ctrl eMask, 2277 ChannelMask chMask, 2278 G4_Predicate *pred, 2279 G4_Operand *address, 2280 G4_SrcRegRegion *offsets, 2281 G4_DstRegRegion *dst); 2282 2283 int translateSVMScatter4Inst( 2284 VISA_Exec_Size execSize, 2285 VISA_EMask_Ctrl eMask, 2286 ChannelMask chMask, 2287 G4_Predicate *pred, 2288 G4_Operand *address, 2289 G4_SrcRegRegion *offsets, 2290 G4_SrcRegRegion *src); 2291 2292 int translateVISASVMGather4ScaledInst( 2293 VISA_Exec_Size execSize, 2294 VISA_EMask_Ctrl eMask, 2295 ChannelMask chMask, 2296 G4_Predicate *pred, 2297 G4_Operand *address, 2298 G4_SrcRegRegion *offsets, 2299 G4_DstRegRegion *dst); 2300 2301 int translateVISASVMScatter4ScaledInst( 2302 VISA_Exec_Size execSize, 2303 VISA_EMask_Ctrl eMask, 2304 ChannelMask chMask, 2305 G4_Predicate *pred, 2306 G4_Operand *address, 2307 G4_SrcRegRegion *offsets, 2308 G4_SrcRegRegion *src); 2309 2310 // Minimum execution size for LSC on this platform 2311 // Minimum is generally half the full size except rare cases. 2312 // Full LSC SIMD size for PVC derivatives is 32, and 16 for DG2 derivatives 2313 G4_ExecSize lscMinExecSize(LSC_SFID lscSfid) const; 2314 2315 /////////////////////////////////////////////////////////////////////////// 2316 /////////////////////////////////////////////////////////////////////////// 2317 // New LSC-based load and store (type and untyped) are located in 2318 // VisaToG4/TranslateSendLdStLsc.cpp 2319 int translateLscUntypedInst( 2320 LSC_OP op, 2321 LSC_SFID lscSfid, 2322 G4_Predicate *pred, 2323 VISA_Exec_Size execSize, 2324 VISA_EMask_Ctrl emask, 2325 LSC_CACHE_OPTS cacheOpts, 2326 LSC_ADDR addrInfo, 2327 LSC_DATA_SHAPE shape, 2328 G4_Operand *surface, // surface/bti 2329 G4_DstRegRegion *dstData, 2330 G4_SrcRegRegion *src0AddrOrBlockY, 2331 G4_Operand *src0AddrStrideOrBlockX, // only for strided and block2d 2332 G4_SrcRegRegion *src1Data, // store data/extra atomic operands 2333 G4_SrcRegRegion *src2Data // only for fcas/icas 2334 ); 2335 2336 int translateLscUntypedBlock2DInst( 2337 LSC_OP op, 2338 LSC_SFID lscSfid, 2339 G4_Predicate *pred, 2340 VISA_Exec_Size execSize, 2341 VISA_EMask_Ctrl emask, 2342 LSC_CACHE_OPTS cacheOpts, 2343 LSC_DATA_SHAPE_BLOCK2D shape, 2344 G4_DstRegRegion *dstData, 2345 G4_Operand *src0Addrs[LSC_BLOCK2D_ADDR_PARAMS], 2346 G4_SrcRegRegion *src1Data); 2347 int translateLscTypedInst( 2348 LSC_OP op, 2349 G4_Predicate *pred, 2350 VISA_Exec_Size execSize, 2351 VISA_EMask_Ctrl emask, 2352 LSC_CACHE_OPTS cacheOpts, 2353 LSC_ADDR_TYPE addrModel, 2354 LSC_ADDR_SIZE addrSize, 2355 LSC_DATA_SHAPE shape, 2356 G4_Operand *surface, // surface/bti 2357 G4_DstRegRegion *dstData, // dst on load/atomic 2358 G4_SrcRegRegion *src0AddrUs, 2359 G4_SrcRegRegion *src0AddrVs, 2360 G4_SrcRegRegion *src0AddrRs, 2361 G4_SrcRegRegion *src0AddrLODs, 2362 G4_SrcRegRegion *src1Data, // store data/extra atomic operands 2363 G4_SrcRegRegion *src2Data // icas/fcas only 2364 ); 2365 2366 LSC_DATA_ELEMS lscGetElementNum(unsigned eNum) const; 2367 int lscEncodeAddrSize(LSC_ADDR_SIZE addr_size, uint32_t &desc, int &status) const; 2368 int lscEncodeDataSize(LSC_DATA_SIZE data_size, uint32_t &desc, int &status) const; 2369 int lscEncodeDataElems(LSC_DATA_ELEMS data_elems, uint32_t &desc, int &status) const; 2370 void lscEncodeDataOrder(LSC_DATA_ORDER t, uint32_t &desc, int &status) const; 2371 void lscEncodeCachingOpts( 2372 const LscOpInfo &opInfo, 2373 LSC_CACHE_OPTS cacheOpts, 2374 uint32_t &desc, 2375 int &status) const; 2376 void lscEncodeAddrType(LSC_ADDR_TYPE at, uint32_t &desc, int &status) const; 2377 2378 G4_SrcRegRegion *lscBuildStridedPayload( 2379 G4_Predicate *pred, 2380 G4_SrcRegRegion *src0AddrBase, 2381 G4_Operand *src0AddrStride, 2382 int dataSizeBytes, int vecSize, bool transposed); 2383 G4_SrcRegRegion *lscBuildBlock2DPayload( 2384 LSC_DATA_SHAPE_BLOCK2D dataShape2D, 2385 G4_Predicate *pred, 2386 G4_Operand *src0Addrs[6]); 2387 2388 // 2389 // LSC allows users to pass an immediate scale and immediate addend. 2390 // Hardware may be able to take advantage of that if they satisfy 2391 // various constraints. This also broadcasts if needed. 2392 G4_SrcRegRegion *lscLoadEffectiveAddress( 2393 LSC_OP lscOp, 2394 LSC_SFID lscSfid, 2395 G4_Predicate *pred, 2396 G4_ExecSize execSize, 2397 VISA_EMask_Ctrl execCtrl, 2398 LSC_ADDR addrInfo, 2399 int bytesPerDataElem, 2400 const G4_Operand *surface, 2401 G4_SrcRegRegion *addr, 2402 uint32_t &exDesc 2403 ); 2404 G4_SrcRegRegion *lscCheckRegion( 2405 G4_Predicate *pred, 2406 G4_ExecSize execSize, 2407 VISA_EMask_Ctrl execCtrl, 2408 G4_SrcRegRegion *src); 2409 2410 G4_SrcRegRegion *lscMulAdd( 2411 G4_Predicate *pred, 2412 G4_ExecSize execSize, 2413 VISA_EMask_Ctrl execCtrl, 2414 G4_SrcRegRegion *src, 2415 int16_t mulImm16, 2416 int64_t addImm64); 2417 G4_SrcRegRegion *lscMul( 2418 G4_Predicate *pred, 2419 G4_ExecSize execSize, 2420 VISA_EMask_Ctrl execCtrl, 2421 G4_SrcRegRegion *src, 2422 int16_t mulImm16); 2423 G4_SrcRegRegion *lscAdd( 2424 G4_Predicate *pred, 2425 G4_ExecSize execSize, 2426 VISA_EMask_Ctrl execCtrl, 2427 G4_SrcRegRegion *src, 2428 int64_t addImm64); 2429 G4_SrcRegRegion *lscAdd64AosNative( 2430 G4_Predicate *pred, 2431 G4_ExecSize execSize, 2432 VISA_EMask_Ctrl execCtrl, 2433 G4_SrcRegRegion *src, 2434 int64_t addImm64); 2435 G4_SrcRegRegion *lscAdd64AosEmu( 2436 G4_Predicate *pred, 2437 G4_ExecSize execSize, 2438 VISA_EMask_Ctrl execCtrl, 2439 G4_SrcRegRegion *src, 2440 int64_t addImm64); 2441 G4_SrcRegRegion *lscMul64Aos( 2442 G4_Predicate *pred, 2443 G4_ExecSize execSize, 2444 VISA_EMask_Ctrl execCtrl, 2445 G4_SrcRegRegion *src, 2446 int16_t mulImm16); 2447 2448 /////////////////////////////////////////////////////////////////////////// 2449 /////////////////////////////////////////////////////////////////////////// 2450 // members related to media and VME are in VisaToG4/TranslateSendMedia.cpp 2451 int translateVISAMediaLoadInst( 2452 MEDIA_LD_mod mod, 2453 G4_Operand* surface, 2454 unsigned planeID, 2455 unsigned blockWidth, 2456 unsigned blockHeight, 2457 G4_Operand* xOffOpnd, 2458 G4_Operand* yOffOpnd, 2459 G4_DstRegRegion* dst_opnd); 2460 2461 int translateVISAMediaStoreInst( 2462 MEDIA_ST_mod mod, 2463 G4_Operand* surface, 2464 unsigned planeID, 2465 unsigned blockWidth, 2466 unsigned blockHeight, 2467 G4_Operand* xOffOpnd, 2468 G4_Operand* yOffOpnd, 2469 G4_SrcRegRegion* srcOpnd); 2470 2471 int translateVISAVmeImeInst( 2472 uint8_t stream_mode, 2473 uint8_t search_ctrl, 2474 G4_Operand* surfaceOpnd, 2475 G4_Operand* uniInputOpnd, 2476 G4_Operand* imeInputOpnd, 2477 G4_Operand* ref0Opnd, 2478 G4_Operand* ref1Opnd, 2479 G4_Operand* costCenterOpnd, 2480 G4_DstRegRegion* outputOpnd); 2481 2482 int translateVISAVmeSicInst( 2483 G4_Operand* surfaceOpnd, 2484 G4_Operand* uniInputOpnd, 2485 G4_Operand* sicInputOpnd, 2486 G4_DstRegRegion* outputOpnd); 2487 2488 int translateVISAVmeFbrInst( 2489 G4_Operand* surfaceOpnd, 2490 G4_Operand* unitInputOpnd, 2491 G4_Operand* fbrInputOpnd, 2492 G4_Operand* fbrMbModOpnd, 2493 G4_Operand* fbrSubMbShapeOpnd, 2494 G4_Operand* fbrSubPredModeOpnd, 2495 G4_DstRegRegion* outputOpnd); 2496 2497 int translateVISAVmeIdmInst( 2498 G4_Operand* surfaceOpnd, 2499 G4_Operand* unitInputOpnd, 2500 G4_Operand* idmInputOpnd, 2501 G4_DstRegRegion* outputOpnd); 2502 2503 int translateVISASamplerVAGenericInst( 2504 G4_Operand* surface, 2505 G4_Operand* sampler, 2506 G4_Operand* uOffOpnd, 2507 G4_Operand* vOffOpnd, 2508 G4_Operand* vSizeOpnd, 2509 G4_Operand* hSizeOpnd, 2510 G4_Operand* mmfMode, 2511 unsigned char cntrl, 2512 unsigned char msgSeq, 2513 VA_fopcode fopcode, 2514 G4_DstRegRegion* dstOpnd, 2515 G4_Type dstType, 2516 unsigned dstSize, 2517 bool isBigKernel = false); 2518 2519 int translateVISAAvsInst( 2520 G4_Operand* surface, 2521 G4_Operand* sampler, 2522 ChannelMask channel, 2523 unsigned numEnabledChannels, 2524 G4_Operand* deltaUOpnd, 2525 G4_Operand* uOffOpnd, 2526 G4_Operand* deltaVOpnd, 2527 G4_Operand* vOffOpnd, 2528 G4_Operand* u2dOpnd, 2529 G4_Operand* groupIDOpnd, 2530 G4_Operand* verticalBlockNumberOpnd, 2531 unsigned char cntrl, 2532 G4_Operand* v2dOpnd, 2533 unsigned char execMode, 2534 G4_Operand* eifbypass, 2535 G4_DstRegRegion* dstOpnd); 2536 2537 int translateVISAVaSklPlusGeneralInst( 2538 ISA_VA_Sub_Opcode sub_opcode, 2539 G4_Operand* surface, 2540 G4_Operand* sampler, 2541 unsigned char mode, 2542 unsigned char functionality, 2543 G4_Operand* uOffOpnd, 2544 G4_Operand* vOffOpnd, 2545 //1pixel convolve 2546 G4_Operand * offsetsOpnd, 2547 2548 //FloodFill 2549 G4_Operand* loopCountOpnd, 2550 G4_Operand* pixelHMaskOpnd, 2551 G4_Operand* pixelVMaskLeftOpnd, 2552 G4_Operand* pixelVMaskRightOpnd, 2553 2554 //LBP Correlation 2555 G4_Operand* disparityOpnd, 2556 2557 //Correlation Search 2558 G4_Operand* verticalOriginOpnd, 2559 G4_Operand* horizontalOriginOpnd, 2560 G4_Operand* xDirectionSizeOpnd, 2561 G4_Operand* yDirectionSizeOpnd, 2562 G4_Operand* xDirectionSearchSizeOpnd, 2563 G4_Operand* yDirectionSearchSizeOpnd, 2564 2565 G4_DstRegRegion* dstOpnd, 2566 G4_Type dstType, 2567 unsigned dstSize, 2568 2569 //HDC 2570 unsigned char pixelSize, 2571 G4_Operand* dstSurfaceOpnd, 2572 G4_Operand *dstXOpnd, 2573 G4_Operand* dstYOpnd, 2574 bool hdcMode); 2575 2576 2577 /////////////////////////////////////////////////////////////////////////// 2578 /////////////////////////////////////////////////////////////////////////// 2579 // Raw send related members are in VisaToG4/TranslateSendRaw.cpp 2580 int translateVISARawSendInst( 2581 G4_Predicate *predOpnd, 2582 VISA_Exec_Size executionSize, 2583 VISA_EMask_Ctrl emask, 2584 uint8_t modifiers, 2585 unsigned int exDesc, 2586 uint8_t numSrc, 2587 uint8_t numDst, 2588 G4_Operand* msgDescOpnd, 2589 G4_SrcRegRegion* msgOpnd, 2590 G4_DstRegRegion* dstOpnd); 2591 2592 int translateVISARawSendsInst( 2593 G4_Predicate *predOpnd, 2594 VISA_Exec_Size executionSize, 2595 VISA_EMask_Ctrl emask, 2596 uint8_t modifiers, 2597 G4_Operand* exDesc, 2598 uint8_t numSrc0, 2599 uint8_t numSrc1, 2600 uint8_t numDst, 2601 G4_Operand* msgDescOpnd, 2602 G4_Operand* msgOpnd0, 2603 G4_Operand* msgOpnd1, 2604 G4_DstRegRegion* dstOpnd, 2605 unsigned ffid, 2606 bool hasEOT = false); 2607 2608 2609 2610 /////////////////////////////////////////////////////////////////////////// 2611 /////////////////////////////////////////////////////////////////////////// 2612 // Raw send related members are in VisaToG4/TranslateSendSync.cpp 2613 G4_INST* translateLscFence( 2614 SFID sfid, 2615 LSC_FENCE_OP fenceOp, 2616 LSC_SCOPE scope, 2617 int& status); 2618 translateLscFence(SFID sfid,LSC_FENCE_OP fenceOp,LSC_SCOPE scope)2619 G4_INST* translateLscFence( 2620 SFID sfid, 2621 LSC_FENCE_OP fenceOp, 2622 LSC_SCOPE scope) 2623 { 2624 int status = VISA_SUCCESS; 2625 return translateLscFence(sfid, fenceOp, scope, status); 2626 } 2627 enum class NamedBarrierType 2628 { 2629 PRODUCER, 2630 CONSUMER, 2631 BOTH 2632 }; 2633 2634 void generateNamedBarrier( 2635 int numProducer, int numConsumer, NamedBarrierType type, G4_Operand* barrierId); 2636 2637 void generateNamedBarrier(G4_Operand* barrierId, G4_SrcRegRegion* threadValue); 2638 2639 void generateSingleBarrier(); 2640 2641 int translateVISANamedBarrierWait(G4_Operand* barrierId); 2642 int translateVISANamedBarrierSignal(G4_Operand* barrierId, G4_Operand* threadCount); 2643 2644 G4_INST* createFenceInstruction( 2645 uint8_t flushParam, bool commitEnable, bool globalMemFence, bool isSendc); 2646 2647 G4_INST* createSLMFence(); 2648 2649 int translateVISAWaitInst(G4_Operand* mask); 2650 2651 void generateBarrierSend(); 2652 void generateBarrierWait(); 2653 2654 int translateVISASyncInst(ISA_Opcode opcode, unsigned int mask); 2655 2656 int translateVISASplitBarrierInst(bool isSignal); 2657 2658 /////////////////////////////////////////////////////////////////////////// 2659 /////////////////////////////////////////////////////////////////////////// 2660 2661 // return either 253 or 255 for A64 messages, depending on whether we want I/A coherency or not getA64BTI()2662 uint8_t getA64BTI() const { return m_options->getOption(vISA_noncoherentStateless) ? 0xFD : 0xFF; } 2663 useSends()2664 bool useSends() const 2665 { 2666 return getPlatform() >= GENX_SKL && m_options->getOption(vISA_UseSends) && 2667 !(VISA_WA_CHECK(m_pWaTable, WaDisableSendsSrc0DstOverlap)); 2668 } 2669 allocateMD()2670 Metadata* allocateMD() 2671 { 2672 Metadata* newMD = new (metadataMem) Metadata(); 2673 allMDs.push_back(newMD); 2674 return newMD; 2675 } 2676 allocateMDString(const std::string & str)2677 MDNode* allocateMDString(const std::string& str) 2678 { 2679 auto newNode = new (metadataMem) MDString(str); 2680 allMDNodes.push_back(newNode); 2681 return newNode; 2682 } 2683 allocateMDLocation(int line,const char * file)2684 MDLocation* allocateMDLocation(int line, const char* file) 2685 { 2686 auto newNode = new (metadataMem) MDLocation(line, file); 2687 allMDNodes.push_back(newNode); 2688 return newNode; 2689 } 2690 allocateMDTokenLocation(unsigned short token,unsigned globalID)2691 MDTokenLocation* allocateMDTokenLocation(unsigned short token, unsigned globalID) 2692 { 2693 auto newNode = new (metadataMem) MDTokenLocation(token, globalID); 2694 allMDNodes.push_back(newNode); 2695 return newNode; 2696 } 2697 2698 /////////////////////////////////////////////////////////////////////////// 2699 /////////////////////////////////////////////////////////////////////////// 2700 // Generic IR simplification tasks 2701 G4_Imm* foldConstVal(G4_Imm* const1, G4_Imm* const2, G4_opcode op); 2702 void doConsFolding(G4_INST *inst); 2703 void doSimplification(G4_INST *inst); 2704 2705 static G4_Type findConstFoldCommonType(G4_Type type1, G4_Type type2); 2706 /////////////////////////////////////////////////////////////////////////// 2707 /////////////////////////////////////////////////////////////////////////// 2708 2709 void materializeGlobalImm(G4_BB* entryBB); // why is in FlowGraph.cpp??? 2710 2711 #include "HWCaps.inc" 2712 2713 private: 2714 G4_SrcRegRegion* createBindlessExDesc(uint32_t exdesc); 2715 }; 2716 } // namespace vISA 2717 2718 // G4IR instructions added by JIT that do not result from lowering 2719 // any CISA bytecode will be assigned CISA offset = 0xffffffff. 2720 // This includes pseudo nodes, G4_labels, mov introduced for copying 2721 // r0 for pre-emption support. 2722 constexpr int UNMAPPABLE_VISA_INDEX = vISA::IR_Builder::OrphanVISAIndex; 2723 2724 #endif 2725