1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef _BUILDIR_H_
10 #define _BUILDIR_H_
11 
12 #include <cstdarg>
13 #include <list>
14 #include <map>
15 #include <set>
16 #include <string>
17 
18 #include "G4_Kernel.hpp"
19 #include "G4_IR.hpp"
20 #include "InstSplit.h"
21 #include "visa_igc_common_header.h"
22 #include "Common_ISA.h"
23 #include "Common_ISA_util.h"
24 #include "RT_Jitter_Interface.h"
25 #include "visa_wa.h"
26 #include "PreDefinedVars.h"
27 #include "CompilerStats.h"
28 #include "inc/common/sku_wa.h"
29 
30 
31 #define MAX_DWORD_VALUE  0x7fffffff
32 #define MIN_DWORD_VALUE  0x80000000
33 #define MAX_UDWORD_VALUE 0xffffffff
34 #define MIN_UDWORD_VALUE 0
35 #define MAX_WORD_VALUE 32767
36 #define MIN_WORD_VALUE -32768
37 #define MAX_UWORD_VALUE 65535
38 #define MIN_UWORD_VALUE 0
39 #define MAX_CHAR_VALUE 127
40 #define MIN_CHAR_VALUE -128
41 #define MAX_UCHAR_VALUE 255
42 #define MIN_UCHAR_VALUE 0
43 
44 typedef struct FCCalls
45 {
46     // callOffset is in inst number units
47     unsigned int callOffset;
48     const char* calleeLabelString;
49 } FCCalls;
50 
51 enum DeclareType
52 {
53     Regular = 0,
54     Fill = 1,
55     Spill = 2,
56     Tmp = 3,
57     AddrSpill = 4,
58     CoalescedFill = 5,
59     CoalescedSpill = 6
60 };
61 
62 // forward declaration
63 // FIXME: our #include is a mess, need to clean it up
64 class CISA_IR_Builder;
65 
66 namespace vISA
67 {
68 // IR_Builder class has a member of type FCPatchingInfo
69 // as a member. This class is expected to hold all FC
70 // related information.
71 class FCPatchingInfo
72 {
73 private:
74     // Flag to tell if this instance has any fast-composite
75     // type calls. Callees for such call instructions are not available
76     // in same compilation unit.
77     bool hasFCCalls;
78 
79     // Set to true if kernel has "Callable" attribute set in VISA
80     // stream.
81     bool isFCCallableKernel;
82 
83     // Set to true if kernel was compiled with /mCM_composable_kernel
84     // FE flag.
85     bool isFCComposableKernel;
86 
87     // Set to true if kernel has "Entry" attribute set in VISA
88     // stream.
89     bool isFCEntryKernel;
90 
91     std::vector<FCCalls*> FCCallsToPatch;
92     std::vector<unsigned int> FCReturnOffsetsToPatch;
93 
94 public:
FCPatchingInfo()95     FCPatchingInfo()
96     {
97         hasFCCalls = false;
98         isFCCallableKernel = false;
99         isFCComposableKernel = false;
100         isFCEntryKernel = false;
101     }
102 
setHasFCCalls(bool hasFC)103     void setHasFCCalls(bool hasFC) { hasFCCalls = hasFC; }
getHasFCCalls()104     bool getHasFCCalls() const { return hasFCCalls; }
setIsCallableKernel(bool value)105     void setIsCallableKernel(bool value) { isFCCallableKernel = value; }
getIsCallableKernel()106     bool getIsCallableKernel() const { return isFCCallableKernel; }
setFCComposableKernel(bool value)107     void setFCComposableKernel(bool value) { isFCComposableKernel = value; }
getFCComposableKernel()108     bool getFCComposableKernel() const { return isFCComposableKernel; }
setIsEntryKernel(bool value)109     void setIsEntryKernel(bool value) { isFCEntryKernel = value; }
getIsEntryKernel()110     bool getIsEntryKernel() const { return isFCEntryKernel; }
getFCCallsToPatch()111     std::vector<FCCalls*>& getFCCallsToPatch() { return FCCallsToPatch; }
getFCReturnsToPatch()112     std::vector<unsigned int>& getFCReturnsToPatch() { return FCReturnOffsetsToPatch; }
113 
114     enum RegAccessType : unsigned char {
115       Fully_Use = 0,
116       Partial_Use = 1,
117       Fully_Def = 2,
118       Partial_Def = 3
119     };
120 
121     enum RegAccessPipe : unsigned char {
122       Pipe_ALU = 0,
123       Pipe_Math = 1,
124       Pipe_Send = 2,
125       Pipe_Dpas = 3
126     };
127 
128     struct RegAccess {
129       RegAccess *Next;    // The next access on the same GRF.
130       RegAccessType Type; // 'def' or 'use' of that GRF.
131       unsigned RegNo;     // GRF.
132       unsigned Pipe;      // Pipe.
133       // where that access is issued.
134       G4_INST *Inst;        // Where that GRF is accessed.
135       unsigned Offset;      // Instruction offset populated finally.
136       // Token associated with that access.
137       unsigned Token; // Optional token allocation associated to 'def'.
138 
RegAccessRegAccess139       RegAccess() :
140         Next(nullptr), Type(Fully_Use), RegNo(0), Pipe(0), Inst(nullptr),
141         Offset(0), Token(0) {}
142     };
143 
144     // FIXME: Need to consider the pipeline ID together with GRF since
145     // different pipe will use/def out-of-order. Need to synchronize all of
146     // them to resolve the dependency.
147 
148     std::list<RegAccess> RegFirstAccessList;
149     std::list<RegAccess> RegLastAccessList;
150     // Per GRF, the first access.
151     std::map<unsigned, RegAccess *> RegFirstAccessMap;
152     // Per GRF, the last access.
153     std::map<unsigned, RegAccess *> RegLastAccessMap;
154     // Note that tokens recorded are tokens allocated but not used in last
155     // access list.
156     std::set<unsigned> AllocatedToken;  // Allocated token.
157 };
158 }
159 
160 namespace vISA
161 {
162 //
163 // hash table for holding reg vars and reg region
164 //
165 
166 class OperandHashTable
167 {
168     Mem_Manager& mem;
169 
170     struct ImmKey
171     {
172         int64_t val;
173         G4_Type valType;
ImmKeyImmKey174         ImmKey(int64_t imm, G4_Type type) : val(imm), valType(type) {}
175         bool operator==(const ImmKey& imm) const
176         {
177             return val == imm.val && valType == imm.valType;
178         }
179     };
180 
181     struct ImmKeyHash
182     {
operatorImmKeyHash183         std::size_t operator()(const ImmKey& imm) const
184         {
185             return (std::size_t) (imm.val ^ imm.valType);
186         }
187     };
188 
189     struct stringCompare
190     {
operatorstringCompare191         bool operator() (const char* s1, const char* s2) const
192         {
193             return strcmp(s1, s2) == 0;
194         }
195     };
196 
197     std::unordered_map<ImmKey, G4_Imm*, ImmKeyHash> immTable;
198     std::unordered_map<const char *, G4_Label*, std::hash<const char*>, stringCompare> labelTable;
199 
200 public:
OperandHashTable(Mem_Manager & m)201     OperandHashTable(Mem_Manager& m) : mem(m)
202     {
203     }
204 
205     // Generic methods that work on both integer and floating-point types.
206     // For floating-point types, 'imm' needs to be G4_Imm(<float-value>.getImm().
207     G4_Imm*          lookupImm(int64_t imm, G4_Type ty);
208     G4_Imm*          createImm(int64_t imm, G4_Type ty);
209 };
210 
211 //
212 // place for holding all region descriptions
213 //
214 class RegionPool
215 {
216     Mem_Manager& mem;
217     std::vector<RegionDesc*> rgnlist;
218 public:
RegionPool(Mem_Manager & m)219     RegionPool(Mem_Manager& m) : mem(m) {}
220     const RegionDesc* createRegion(
221         uint16_t vstride, uint16_t width, uint16_t hstride);
222 };
223 
224 //
225 // place for hbolding all .declare
226 //
227 class DeclarePool
228 {
229     Mem_Manager& mem;
230     std::vector<G4_Declare*> dcllist;
231     int addrSpillLocCount; //incremented in G4_RegVarAddrSpillLoc()
232 public:
DeclarePool(Mem_Manager & m)233     DeclarePool(Mem_Manager& m) : mem(m), addrSpillLocCount(0) { dcllist.reserve(2048); }
234     ~DeclarePool();
235 
236     G4_Declare* createDeclare(
237         const char*    name,
238         G4_RegFileKind regFile,
239         unsigned short nElems,
240         unsigned short nRows,
241         G4_Type        ty,
242         DeclareType    kind = Regular,
243         G4_RegVar *    base = nullptr,
244         G4_Operand *   repRegion = nullptr,
245         G4_ExecSize    execSize = G4_ExecSize(0));
246 
createPreVarDeclare(PreDefinedVarsInternal index,unsigned short n_elems,unsigned short n_rows,G4_Type ty)247     G4_Declare* createPreVarDeclare(
248                               PreDefinedVarsInternal index,
249                               unsigned short n_elems,
250                               unsigned short n_rows,
251                               G4_Type        ty)
252     {
253 
254         G4_Declare* dcl = new (mem)G4_Declare(getPredefinedVarString(index), G4_INPUT, n_elems * n_rows, ty, dcllist);
255         G4_RegVar * regVar;
256         regVar = new (mem) G4_RegVar(dcl, G4_RegVar::RegVarType::Default);
257         dcl->setRegVar(regVar);
258 
259         return dcl;
260     }
261 
getDeclareList()262     std::vector<G4_Declare*>& getDeclareList() {return dcllist;}
263 };
264 
265 
266 //
267 // interface for creating operands and instructions
268 //
269 class IR_Builder
270 {
271 public:
272     const char* curFile;
273     unsigned int curLine;
274     int curCISAOffset;
275 
276     static const int OrphanVISAIndex = 0xffffffff;
277     int debugInfoPlaceholder = OrphanVISAIndex; // used for debug info, catch all VISA offset for orphan instructions
278 
279 private:
280     class GlobalImmPool
281     {
282         struct ImmVal
283         {
284             G4_Imm* imm;
285             int numElt;
286 
287             bool operator==(const ImmVal& v) const {
288                 return imm == v.imm && numElt == v.numElt;
289             }
290         };
291         static const int MAX_POOL_SIZE = 8;   // reg pressure control, for now just do naive first-come first-serve
292         std::array<ImmVal, MAX_POOL_SIZE> immArray;
293         std::array<G4_Declare*, MAX_POOL_SIZE> dclArray;
294         int curSize = 0;
295         IR_Builder& builder;
296 
297     public:
GlobalImmPool(IR_Builder & b)298         GlobalImmPool(IR_Builder& b) : builder(b), immArray(), dclArray() {}
299         G4_Declare* addImmVal(G4_Imm* imm, int numElt);
size()300         int size() const { return curSize; }
getImmVal(int i)301         const ImmVal& getImmVal(int i) {return immArray[i];}
getImmDcl(int i)302         G4_Declare* getImmDcl(int i) {return dclArray[i];}
303     };
304 
305     GlobalImmPool immPool;
306 
307     const TARGET_PLATFORM platform;
308 
309     //allocator pools
310     USE_DEF_ALLOCATOR     useDefAllocator;
311 
312     FINALIZER_INFO*       metaData = nullptr;
313     CompilerStats         compilerStats;
314 
315     int                   subroutineId = -1;   // the kernel itself has id 0, as we always emit a subroutine label for kernel too
316     enum VISA_BUILD_TYPE type; // as opposed to what?
317 
318     // pre-defined declare that binds to R0 (the entire GRF)
319     // when pre-emption is enabled, builtinR0 is replaced by a temp,
320     // and a move is inserted at kernel entry
321     // mov (8) builtinR0 realR0
322     G4_Declare* builtinR0 = nullptr; // this is either r0 or the temp if pre-emption is enabled
323     G4_Declare* realR0 = nullptr; // this always refers to r0
324 
325     // pre-defined declare that binds to A0.0:ud
326     G4_Declare* builtinA0 = nullptr;
327     // pre-defined declare that binds to A0.2:ud
328     G4_Declare* builtinA0Dot2 = nullptr; //used for splitsend's ext msg descriptor
329     // pre-defind declare that binds to HWTid (R0.5:ud)
330     G4_Declare* builtinHWTID = nullptr;
331     // pre-defined bindless surface index (252, 1 UD)
332     G4_Declare* builtinT252 = nullptr;
333     // pre-defined bindless sampler index (31, 1 UD)
334     G4_Declare* builtinBindlessSampler = nullptr;
335     // pre-defined sampler header
336     G4_Declare* builtinSamplerHeader = nullptr;
337 
338     // common message header for spill/fill intrinsics
339     // We put them here instead of spillManager since there may be multiple rounds of spill,
340     // and we want to use a common header
341     G4_Declare* spillFillHeader = nullptr;
342 
343     G4_Declare* oldA0Dot2Temp = nullptr;
344 
345     G4_Declare* builtinScratchSurface = nullptr;
346     G4_Declare* scratchSurfaceOffset = nullptr; // if scratch surface is used, this will be initialized once at entry
347 
348     //The temp var for eu fusion W/A
349     G4_Declare* euFusionWATmpVar = nullptr;
350 
351     // Indicates that sampler header cache (builtinSamplerHeader) is correctly
352     // initialized with r0 contents.
353     // Used only when vISA_cacheSamplerHeader option is set.
354     bool builtinSamplerHeaderInitialized;
355 
356     // function call related declares
357     G4_Declare* be_sp = nullptr;
358     G4_Declare* be_fp = nullptr;
359     // Part FDE inst is the move that stores r125.[0-3] to a temp.
360     // This is used to restore ret %ip, ret EM, and BE ptrs.
361     G4_INST* savePartFDInst = nullptr;
362     // FDE spill inst is first spill instruction that writes frame
363     // descriptor to stack.
364     G4_INST* FDSpillInst = nullptr;
365     G4_Declare* tmpFCRet = nullptr;
366     // Used to store implicit arg, local id buffer ptrs for stackcall
367     G4_Declare* implArgBufferPtr = nullptr;
368     G4_Declare* localIdBufferPtr = nullptr;
369 
370     unsigned short arg_size;
371     unsigned short return_var_size;
372 
373     unsigned int sampler8x8_group_id;
374 
375     // input declare of R1.
376     G4_Declare* inputR1 = nullptr;
377 
378     // Populate this data structure so after compiling all kernels
379     // in file, we can emit out patch file using this up-levelled
380     // information.
381     FCPatchingInfo* fcPatchInfo = nullptr;
382 
383     const WA_TABLE *m_pWaTable;
384     Options *m_options = nullptr;
385 
386     std::map<const G4_INST*, G4_FCALL*> m_fcallInfo;
387 
388     // Basic region descriptors.
389     RegionDesc CanonicalRegionStride0, // <0; 1, 0>
390                CanonicalRegionStride1, // <1; 1, 0>
391                CanonicalRegionStride2, // <2; 1, 0>
392                CanonicalRegionStride4; // <4; 1, 0>
393 
394     // map of all stack functioncs ever invoked by this builder's kernel/function
395     std::map<std::string, G4_Label*> m_fcallLabels;
396 
getFcallLabel(const std::string & str)397     G4_Label* getFcallLabel(const std::string &str)
398     {
399         auto it = m_fcallLabels.find(str);
400         if (it == m_fcallLabels.end())
401         {
402             auto label = createLabel(str, LABEL_FUNCTION);
403             m_fcallLabels[str] = label;
404             return label;
405         }
406         return it->second;
407     }
408 
409     class PreDefinedVars
410     {
411     public:
setHasPredefined(PreDefinedVarsInternal id,bool val)412         void setHasPredefined(PreDefinedVarsInternal id, bool val) {
413             hasPredefined[static_cast<int>(id)] = val;
414         }
415 
isHasPredefined(PreDefinedVarsInternal id)416         bool isHasPredefined(PreDefinedVarsInternal id) const {
417             return hasPredefined[static_cast<int>(id)];
418         }
419 
setPredefinedVar(PreDefinedVarsInternal id,G4_Declare * dcl)420         void setPredefinedVar(PreDefinedVarsInternal id, G4_Declare *dcl) {
421             predefinedVars[static_cast<int>(id)] = dcl;
422         }
423 
getPreDefinedVar(PreDefinedVarsInternal id)424         G4_Declare* getPreDefinedVar(PreDefinedVarsInternal id) const {
425             if (id >= PreDefinedVarsInternal::VAR_LAST) {
426                 return nullptr;
427             }
428             return predefinedVars[static_cast<int>(id)];
429         }
430     private:
431         // records whether a vISA pre-defined var is used by the kernel
432         // some predefined need to be expanded (e.g., HWTid, color)
433         bool hasPredefined[static_cast<int>(PreDefinedVarsInternal::VAR_LAST)] {};
434         G4_Declare* predefinedVars[static_cast<int>(PreDefinedVarsInternal::VAR_LAST)] {};
435     };
436 
437     bool hasNullReturnSampler = false;
438 
439     const CISA_IR_Builder* parentBuilder = nullptr;
440 
441     // stores all metadata ever allocated
442     Mem_Manager            metadataMem;
443     std::vector<Metadata*> allMDs;
444     std::vector<MDNode*>   allMDNodes;
445 
446 public:
447     PreDefinedVars      preDefVars;
448     Mem_Manager&        mem;        // memory for all operands and insts
449     PhyRegPool          phyregpool; // all physical regs
450     OperandHashTable    hashtable;  // all created region operands
451     RegionPool          rgnpool;    // all region description
452     DeclarePool         dclpool;    // all created decalres
453     INST_LIST           instList;   // all created insts
454     // list of instructions ever allocated
455     // This list may only grow and is freed when IR_Builder is destroyed
456     std::vector<G4_INST*> instAllocList;
457     G4_Kernel&          kernel;
458     // the following fileds are used for dcl name when a new dcl is created.
459     // number of predefined variables are included.
460     unsigned            num_temp_dcl;
461     // number of temp GRF vars created to hold spilled addr/flag
462     uint32_t            numAddrFlagSpillLoc = 0;
463     std::vector<input_info_t*> m_inputVect;
464 
getOptions()465     const Options* getOptions() const { return m_options; }
getOption(vISAOptions opt)466     bool           getOption(vISAOptions opt) const {return m_options->getOption(opt); }
getuint32Option(vISAOptions opt)467     uint32_t       getuint32Option(vISAOptions opt) const { return m_options->getuInt32Option(opt); }
getOption(vISAOptions opt,const char * & str)468     void           getOption(vISAOptions opt, const char *&str) const {return m_options->getOption(opt, str); }
469     void           addInputArg(input_info_t * inpt);
470     input_info_t * getInputArg(unsigned int index) const;
471     unsigned int   getInputCount() const;
472     input_info_t * getRetIPArg() const;
473 
getParent()474     const CISA_IR_Builder* getParent() const { return parentBuilder; }
475 
476     void dump(std::ostream &os); // not const because G4_INST::emit isn't :(
477 
478     std::stringstream& criticalMsgStream();
479 
getAllocator()480     const USE_DEF_ALLOCATOR& getAllocator() const { return useDefAllocator; }
481 
482     // Following enum describes layout of r125 on entry to a function.
483     // Ret_IP and Ret_EM may be altered due to callees. They'll be
484     // restored right before fret.
485     enum SubRegs_Stackcall
486     {
487         Ret_IP = 0, // :ud
488         Ret_EM = 1, // :ud
489         BE_SP = 2, // :ud
490         BE_FP = 3, // :ud
491         FE_FP = 2, // :uq
492         FE_SP = 3, // :uq
493     };
494 
495     enum SubRegs_ImplPtrs
496     {
497         ImplBufPtr = 0, // :uq
498         LocalIdBufPtr = 3, // :uq
499     };
500 
501     enum ArgRet_Stackcall
502     {
503         Arg = 26,
504         Ret = 26
505     };
506 
507     // Getter/setter for be_sp and be_fp
getBESP()508     G4_Declare* getBESP()
509     {
510         if (be_sp == NULL)
511         {
512             be_sp = createDeclareNoLookup("be_sp", G4_GRF, 1, 1, Type_UD);
513             be_sp->getRegVar()->setPhyReg(
514                 phyregpool.getGreg(kernel.getFPSPGRF()), SubRegs_Stackcall::BE_SP);
515         }
516 
517         return be_sp;
518     }
519 
getBEFP()520     G4_Declare* getBEFP()
521     {
522         if (be_fp == NULL)
523         {
524             be_fp = createDeclareNoLookup("be_fp", G4_GRF, 1, 1, Type_UD);
525             be_fp->getRegVar()->setPhyReg(
526                 phyregpool.getGreg(kernel.getFPSPGRF()), SubRegs_Stackcall::BE_FP);
527         }
528 
529         return be_fp;
530     }
531 
getPartFDSaveInst()532     G4_INST* getPartFDSaveInst() const { return savePartFDInst; }
setPartFDSaveInst(G4_INST * i)533     void setPartFDSaveInst(G4_INST* i) { savePartFDInst = i; }
534 
getFDSpillInst()535     G4_INST* getFDSpillInst() const { return FDSpillInst; }
setFDSpillInst(G4_INST * i)536     void setFDSpillInst(G4_INST* i) { FDSpillInst = i; }
537 
getStackCallArg()538     G4_Declare* getStackCallArg() const {
539         return preDefVars.getPreDefinedVar(PreDefinedVarsInternal::ARG);
540     }
getStackCallRet()541     G4_Declare* getStackCallRet() const {
542         return preDefVars.getPreDefinedVar(PreDefinedVarsInternal::RET);
543     }
544 
getFE_SP()545     G4_Declare* getFE_SP() const {
546         return preDefVars.getPreDefinedVar(PreDefinedVarsInternal::FE_SP);
547     }
548 
getFE_FP()549     G4_Declare* getFE_FP() const {
550         return preDefVars.getPreDefinedVar(PreDefinedVarsInternal::FE_FP);
551     }
552 
isPreDefArg(G4_Declare * dcl)553     bool isPreDefArg(G4_Declare* dcl) const {
554         return dcl == getStackCallArg();
555     }
556 
isPreDefRet(G4_Declare * dcl)557     bool isPreDefRet(G4_Declare* dcl) const {
558         return dcl == getStackCallRet();
559     }
560 
isPreDefFEStackVar(G4_Declare * dcl)561     bool isPreDefFEStackVar(G4_Declare* dcl) const {
562         return dcl == getFE_SP() || dcl == getFE_FP();
563     }
564 
565     // this refers to vISA's internal stack for spill and caller/callee-save
566     // Note that this is only valid after CFG is constructed
567     // ToDo: make this a pass?
usesStack()568     bool usesStack() const {
569         return kernel.fg.getHasStackCalls() || kernel.fg.getIsStackCallFunc();
570     }
571 
572     void bindInputDecl(G4_Declare* dcl, int grfOffset);
573 
getPerThreadInputSize()574     uint32_t getPerThreadInputSize() const {
575         return kernel.getInt32KernelAttr(Attributes::ATTR_PerThreadInputSize);
576     }
577 
getCrossThreadInputSize()578     int32_t getCrossThreadInputSize() const {
579         return kernel.getInt32KernelAttr(Attributes::ATTR_CrossThreadInputSize);
580     }
581 
getLTOInvokeOptTarget()582     bool getLTOInvokeOptTarget() const {
583         return kernel.getBoolKernelAttr(Attributes::ATTR_LTOInvokeOptTarget);
584     }
585 
586     //
587     // Check if opnd is or can be made "alignByte"-byte aligned.
588     // These functions will change the underlying variable's alignment
589     // (e.g., make a scalar variable GRF-aligned) when possible to satisfy
590     // the alignment
591     bool isOpndAligned(G4_Operand* opnd, int alignByte) const;
592     bool isOpndAligned(G4_Operand *opnd, unsigned short &offset, int align_byte) const;
593 
setType(enum VISA_BUILD_TYPE _type)594     void setType(enum VISA_BUILD_TYPE _type) { type = _type; }
getIsKernel()595     bool getIsKernel() const { return type == VISA_BUILD_TYPE::KERNEL; }
getIsFunction()596     bool getIsFunction() const { return type == VISA_BUILD_TYPE::FUNCTION; }
getIsPayload()597     bool getIsPayload() const { return type == VISA_BUILD_TYPE::PAYLOAD; }
getType()598     enum VISA_BUILD_TYPE getType() const { return type; }
599     void predefinedVarRegAssignment(uint8_t inputSize);
600     void expandPredefinedVars();
setArgSize(unsigned short size)601     void setArgSize(unsigned short size) { arg_size = size; }
getArgSize()602     unsigned short getArgSize() const { return arg_size; }
setRetVarSize(unsigned short size)603     void setRetVarSize(unsigned short size) { return_var_size = size; }
getRetVarSize()604     unsigned short getRetVarSize() const { return return_var_size; }
605 
606     FCPatchingInfo* getFCPatchInfo();
setFCPatchInfo(FCPatchingInfo * instance)607     void setFCPatchInfo(FCPatchingInfo* instance) { fcPatchInfo = instance; }
608 
getPWaTable()609     const WA_TABLE *getPWaTable() const { return m_pWaTable; }
610 
611     static const char* getNameString(Mem_Manager& mem, size_t size, const char* format, ...);
612 
613     G4_Predicate_Control vISAPredicateToG4Predicate(
614         VISA_PREDICATE_CONTROL control, G4_ExecSize execSize);
615 
616     G4_FCALL* getFcallInfo(const G4_INST* inst) const;
addFcallInfo(const G4_INST * FcallInst,uint16_t ArgSize,uint16_t RetSize)617     void addFcallInfo(const G4_INST* FcallInst, uint16_t ArgSize, uint16_t RetSize)
618     {
619         m_fcallInfo[FcallInst] = new (mem) G4_FCALL(ArgSize, RetSize);
620     }
621 
622     // If this is true (detected in TranslateInterface.cpp), we need a sampler flush before EOT
getHasNullReturnSampler()623     bool getHasNullReturnSampler() const { return hasNullReturnSampler; }
624 
625     //    Initializes predefined vars for all the vISA versions
626     void createPreDefinedVars();
627 
628     void createBuiltinDecls();
629 
630     G4_Declare* getSpillFillHeader();
631 
632     G4_Declare* getEUFusionWATmpVar();
633 
634     G4_Declare* getOldA0Dot2Temp();
hasValidOldA0Dot2()635     bool hasValidOldA0Dot2() { return oldA0Dot2Temp; }
636 
637     IR_Builder(
638         TARGET_PLATFORM genPlatform,
639         INST_LIST_NODE_ALLOCATOR &alloc,
640         G4_Kernel &k,
641         Mem_Manager &m,
642         Options *options,
643         CISA_IR_Builder* parent,
644         FINALIZER_INFO *jitInfo,
645         const WA_TABLE *pWaTable);
646 
647     ~IR_Builder();
648 
rebuildPhyRegPool(unsigned int numRegisters)649     void rebuildPhyRegPool(unsigned int numRegisters) {
650         phyregpool.rebuildRegPool(mem, numRegisters);
651     }
652 
getPlatform()653     TARGET_PLATFORM getPlatform() const {return platform;}
getJitInfo()654     FINALIZER_INFO* getJitInfo() {return metaData;}
getcompilerStats()655     CompilerStats &getcompilerStats() {return compilerStats;}
656 
657     G4_Declare* createDeclareNoLookup(
658         const char*    name,
659         G4_RegFileKind regFile,
660         unsigned short n_elems,
661         unsigned short n_rows,
662         G4_Type        ty,
663         DeclareType    kind = Regular,
664         G4_RegVar *    base = NULL,
665         G4_Operand *   repRegion = NULL,
666         G4_ExecSize    execSize = G4_ExecSize(0));
667 
668 
createPreVarDeclareNoLookup(PreDefinedVarsInternal index,unsigned short n_elems,unsigned short n_rows,G4_Type ty)669     G4_Declare* createPreVarDeclareNoLookup(
670         PreDefinedVarsInternal index,
671         unsigned short n_elems,
672         unsigned short n_rows,
673         G4_Type        ty)
674     {
675         G4_Declare* dcl = dclpool.createPreVarDeclare(index, n_elems, n_rows, ty);
676         kernel.Declares.push_back(dcl);
677         return dcl;
678     }
679 
getBuiltinR0()680     G4_Declare* getBuiltinR0() {return builtinR0;}
getRealR0()681     G4_Declare* getRealR0() const {return realR0;} // undefined terminology: what's "real" here (vs "builtin" above)?
getBuiltinA0()682     G4_Declare* getBuiltinA0() {return builtinA0;}
getBuiltinA0Dot2()683     G4_Declare* getBuiltinA0Dot2() {return builtinA0Dot2;}
getBuiltinHWTID()684     G4_Declare* getBuiltinHWTID() const {return builtinHWTID;}
getBuiltinT252()685     G4_Declare* getBuiltinT252() const {return builtinT252;}
getBuiltinBindlessSampler()686     G4_Declare* getBuiltinBindlessSampler() const {return builtinBindlessSampler; }
getBuiltinSamplerHeader()687     G4_Declare* getBuiltinSamplerHeader() const { return builtinSamplerHeader; }
getOldA0Dot2Temp()688     G4_Declare* getOldA0Dot2Temp() const { return oldA0Dot2Temp; }
689 
getInputR1()690     G4_Declare* getInputR1() { return inputR1; }
setInputR1(G4_Declare * r1)691     void setInputR1(G4_Declare* r1) { inputR1 = r1; }
692 
isBindlessSampler(const G4_Operand * sampler)693     bool isBindlessSampler(const G4_Operand* sampler) const {
694         return sampler->isSrcRegRegion() && sampler->getTopDcl() == getBuiltinBindlessSampler();
695     }
696 
isBindlessSurface(const G4_Operand * bti)697     bool isBindlessSurface(const G4_Operand* bti) const {
698         return bti->isSrcRegRegion() && bti->getTopDcl() == getBuiltinT252();
699     }
700 
701     // IsSLMSurface - Check whether the given surface is SLM surface.
IsSLMSurface(const G4_Operand * surface)702     static bool IsSLMSurface(const G4_Operand *surface) {
703         // So far, it's only reliable to check an immediate surface.
704         return surface->isImm() && surface->asImm()->getImm() == PREDEF_SURF_0;
705     }
706 
707 
getDeclare(const G4_Operand * opnd)708     static const G4_Declare *getDeclare(const G4_Operand *opnd) {
709         const G4_Declare *dcl = opnd->getBase()->asRegVar()->getDeclare();
710 
711         while (const G4_Declare *parentDcl = dcl->getAliasDeclare())
712             dcl = parentDcl;
713 
714         return dcl;
715     }
getDeclare(G4_Operand * opnd)716     static G4_Declare *getDeclare(G4_Operand *opnd) {
717         return const_cast<G4_Declare *>(getDeclare((const G4_Operand *)opnd));
718     }
719 
shouldForceSplitSend(const G4_Operand * surface)720     bool shouldForceSplitSend(const G4_Operand* surface) const
721     {
722         return surface->isSrcRegRegion() &&
723             (surface->getTopDcl() == getBuiltinT252() ||
724                 surface->getTopDcl() == getBuiltinScratchSurface());
725     }
726 
727     /// getSplitEMask() calculates the new mask after splitting from the current
728     /// execution mask at the given execution size.
729     /// It only works with masks covering whole GRF and thus won't generate/consume
730     /// nibbles.
731     static uint32_t getSplitEMask(unsigned execSize, uint32_t eMask, bool isLo);
getSplitLoEMask(unsigned execSize,uint32_t eMask)732     static uint32_t getSplitLoEMask(unsigned execSize, uint32_t eMask) {
733         return getSplitEMask(execSize, eMask, true);
734     }
getSplitHiEMask(unsigned execSize,uint32_t eMask)735     static uint32_t getSplitHiEMask(unsigned execSize, uint32_t eMask) {
736         return getSplitEMask(execSize, eMask, false);
737     }
738 
isScratchSpace(G4_Operand * bti)739     bool isScratchSpace(G4_Operand* bti) const {
740         return bti->isSrcRegRegion() && bti->getTopDcl() == builtinScratchSurface;
741     }
getBuiltinScratchSurface()742     G4_Declare* getBuiltinScratchSurface() const {
743         return builtinScratchSurface;
744     }
745 
746     G4_SrcRegRegion* createScratchExDesc(uint32_t exdesc);
747 
748     void initScratchSurfaceOffset();
749 
getSpillSurfaceOffset()750     G4_Declare* getSpillSurfaceOffset() {return scratchSurfaceOffset;}
751 
752     // create a new temp GRF with the specified type/size and undefined regions
753     G4_Declare* createTempVar(
754         unsigned int numElements, G4_Type type, G4_SubReg_Align subAlign,
755         const char* prefix = "TV", bool appendIdToName = true);
756 
757 
758     // create a new temp GRF as the home location of a spilled addr/flag dcl
759     G4_Declare* createAddrFlagSpillLoc(G4_Declare* dcl);
760 
761 
762     // like the above, but also mark the variable as don't spill
763     // this is used for temp variables in macro sequences where spilling woul not help
764     // FIXME: can we somehow merge this with G4_RegVarTmp/G4_RegVarTransient?
765     G4_Declare* createTempVarWithNoSpill(
766         unsigned int numElements, G4_Type type, G4_SubReg_Align subAlign,
767         const char* prefix = "TV")
768     {
769         G4_Declare* dcl = createTempVar(numElements, type, subAlign, prefix);
770         dcl->setDoNotSpill();
771         return dcl;
772     }
773 
774     //
775     // Create a declare that is hardwired to some phyiscal GRF.
776     // It is useful to implement various workarounds post RA where we want to directly
777     // address some physical GRF.
778     // regOff is in unit of the declare type.
779     // caller is responsible for ensuring the resulting variable does not violate any HW restrictions
780     // (e.g., operand does not cross two GRF)
781     G4_Declare* createHardwiredDeclare(
782         uint32_t numElements, G4_Type type, uint32_t regNum, uint32_t regOff);
783 
784     G4_INST* createPseudoKills(std::initializer_list<G4_Declare*> dcls, PseudoKillType ty);
785 
786     G4_INST* createPseudoKill(G4_Declare* dcl, PseudoKillType ty);
787 
788     G4_INST* createEUWASpill(bool addToInstList);
789 
790     // numRows is in hword units
791     // offset is in hword units
792     G4_INST* createSpill(
793         G4_DstRegRegion* dst, G4_SrcRegRegion* header, G4_SrcRegRegion* payload,
794         G4_ExecSize execSize,
795         uint16_t numRows, uint32_t offset, G4_Declare* fp, G4_InstOption option,
796         bool addToInstList);
797 
798     G4_INST* createSpill(
799         G4_DstRegRegion* dst, G4_SrcRegRegion* payload,
800         G4_ExecSize execSize, uint16_t numRows, uint32_t offset,
801         G4_Declare* fp, G4_InstOption option, bool addToInstList);
802 
803 
804     G4_INST* createFill(
805         G4_SrcRegRegion* header,
806         G4_DstRegRegion* dstData, G4_ExecSize execSize,
807         uint16_t numRows, uint32_t offset, G4_Declare* fp, G4_InstOption option,
808         bool addToInstList);
809     G4_INST* createFill(
810         G4_DstRegRegion* dstData, G4_ExecSize execSize,
811         uint16_t numRows, uint32_t offset, G4_Declare* fp , G4_InstOption option,
812         bool addToInstList);
813 
814 
815     // numberOfFlags MEANS NUMBER OF WORDS (e.g., 1 means 16-bit), not number of bits or number of data elements in operands.
816     G4_Declare* createTempFlag(unsigned short numberOfFlags, const char* prefix = "TEMP_FLAG_");
817 
818     // like above, but pass numFlagElements instead. This allows us to distinguish between 1/8/16-bit flags,
819     // which are all allocated as a UW. name is allocated by caller
820     G4_Declare* createFlag(uint16_t numFlagElements, const char* name);
821 
822     G4_Declare* createTempScalar(uint16_t numFlagElements, const char* prefix);
823 
824     G4_Declare* createScalar(uint16_t numFlagElements, const char* name);
825 
826     G4_Declare* createPreVar(
827         PreDefinedVarsInternal preDefVar_index, unsigned short numElements, G4_Type type);
828 
829     //
830     // create <vstride; width, hstride>
831     //
832     // PLEASE use getRegion* interface to get regions if possible!
833     // This function will be mostly used for external regions.
createRegionDesc(uint16_t vstride,uint16_t width,uint16_t hstride)834     const RegionDesc* createRegionDesc(
835         uint16_t vstride,
836         uint16_t width,
837         uint16_t hstride)
838     {
839         return rgnpool.createRegion(vstride, width, hstride);
840     }
841 
842     // Given the execution size and region parameters, create a region
843     // descriptor.
844     //
845     // PLEASE use getRegion* interface to get regions if possible!
846     // This function will be mostly used for external regions.
createRegionDesc(uint16_t execSize,uint16_t vstride,uint16_t width,uint16_t hstride)847     const RegionDesc *createRegionDesc(
848         uint16_t execSize, uint16_t vstride, uint16_t width, uint16_t hstride)
849     {
850         // Performs normalization for commonly used regions.
851         switch (RegionDesc::getRegionDescKind(execSize, vstride, width, hstride)) {
852         case RegionDesc::RK_Stride0: return getRegionScalar();
853         case RegionDesc::RK_Stride1: return getRegionStride1();
854         case RegionDesc::RK_Stride2: return getRegionStride2();
855         case RegionDesc::RK_Stride4: return getRegionStride4();
856         default: break;
857         }
858         return rgnpool.createRegion(vstride, width, hstride);
859     }
860 
861     /// Helper to normalize an existing region descriptor.
getNormalizedRegion(uint16_t execSize,const RegionDesc * rd)862     const RegionDesc *getNormalizedRegion(uint16_t execSize, const RegionDesc *rd) {
863         return createRegionDesc(execSize, rd->vertStride, rd->width, rd->horzStride);
864     }
865 
866     /// Get the predefined region descriptors.
getRegionScalar()867     const RegionDesc *getRegionScalar()  const { return &CanonicalRegionStride0; }
getRegionStride1()868     const RegionDesc *getRegionStride1() const { return &CanonicalRegionStride1; }
getRegionStride2()869     const RegionDesc *getRegionStride2() const { return &CanonicalRegionStride2; }
getRegionStride4()870     const RegionDesc *getRegionStride4() const { return &CanonicalRegionStride4; }
871 
872     // ToDo: get rid of this version and use the message type specific ones below instead,
873     // so we can avoid having to explicitly create extDesc bits
874     G4_SendDescRaw * createGeneralMsgDesc(
875         uint32_t desc,
876         uint32_t extDesc,
877         SendAccess access,
878         G4_Operand* bti = nullptr,
879         G4_Operand* sti = nullptr);
880 
881     G4_SendDescRaw * createReadMsgDesc(
882         SFID sfid,
883         uint32_t desc,
884         G4_Operand* bti = nullptr);
885 
886     G4_SendDescRaw * createWriteMsgDesc(
887         SFID sfid,
888         uint32_t desc,
889         int src1Len,
890         G4_Operand* bti = nullptr);
891 
892     G4_SendDescRaw * createSyncMsgDesc(
893         SFID sfid,
894         uint32_t desc);
895 
896     G4_SendDescRaw * createSampleMsgDesc(
897         uint32_t desc,
898         bool cps,
899         int src1Len,
900         G4_Operand* bti,
901         G4_Operand* sti);
902 
getSendAccessType(bool isRead,bool isWrite)903     static SendAccess getSendAccessType(bool isRead, bool isWrite) {
904         if (isRead && isWrite)
905         {
906             return SendAccess::READ_WRITE;
907         }
908         return isRead ? SendAccess::READ_ONLY : SendAccess::WRITE_ONLY;
909     }
910 
911     G4_SendDescRaw* createSendMsgDesc(
912         SFID sfid,
913         uint32_t desc,
914         uint32_t extDesc,
915         int src1Len,
916         SendAccess access,
917         G4_Operand* bti,
918         G4_ExecSize execSize,
919         bool isValidFuncCtrl = true);
920 
921     G4_SendDescRaw * createSendMsgDesc(
922         SFID sfid,
923         uint32_t desc,
924         uint32_t extDesc,
925         int src1Len,
926         SendAccess access,
927         G4_Operand *bti,
928         bool isValidFuncCtrl = true);
929 
930     G4_SendDescRaw * createSendMsgDesc(
931         unsigned funcCtrl,
932         unsigned regs2rcv,
933         unsigned regs2snd,
934         SFID funcID,
935         unsigned extMsgLength,
936         uint16_t extFuncCtrl,
937         SendAccess access,
938         G4_Operand* bti = nullptr,
939         G4_Operand* sti = nullptr);
940 
941     G4_Operand* emitSampleIndexGE16(
942         G4_Operand* sampler, G4_Declare* headerDecl);
943 
944     //
945     // deprecated, please use the one below
946     //
createSrcRegRegion(G4_SrcRegRegion & src)947     G4_SrcRegRegion* createSrcRegRegion(G4_SrcRegRegion& src)
948     {
949         G4_SrcRegRegion* rgn = new (mem)G4_SrcRegRegion(src);
950         return rgn;
951     }
952 
953     // Create a new srcregregion allocated in mem
954     G4_SrcRegRegion* createSrc(
955         G4_VarBase* b,
956         short roff,
957         short sroff,
958         const RegionDesc *rd,
959         G4_Type ty,
960         G4_AccRegSel regSel = ACC_UNDEFINED)
961     {
962         return createSrcRegRegion(G4_SrcModifier::Mod_src_undef, G4_RegAccess::Direct, b, roff, sroff, rd, ty, regSel);
963     }
964 
965     // deprecated, either use createSrc or createIndirectSrc
966     G4_SrcRegRegion* createSrcRegRegion(
967         G4_SrcModifier m,
968         G4_RegAccess   a,
969         G4_VarBase*    b,
970         short roff,
971         short sroff,
972         const RegionDesc *rd,
973         G4_Type        ty,
974         G4_AccRegSel regSel = ACC_UNDEFINED)
975     {
976         G4_SrcRegRegion* rgn = new (mem)G4_SrcRegRegion(m, a, b, roff, sroff, rd, ty, regSel);
977         return rgn;
978     }
979 
980     G4_SrcRegRegion* createSrcWithNewRegOff(G4_SrcRegRegion* old, short newRegOff);
981     G4_SrcRegRegion* createSrcWithNewSubRegOff(G4_SrcRegRegion* old, short newSubRegOff);
982     G4_SrcRegRegion* createSrcWithNewBase(G4_SrcRegRegion* old, G4_VarBase* newBase);
983 
createIndirectSrc(G4_SrcModifier m,G4_VarBase * b,short roff,short sroff,const RegionDesc * rd,G4_Type ty,short immAddrOff)984     G4_SrcRegRegion* createIndirectSrc(
985         G4_SrcModifier m,
986         G4_VarBase* b,
987         short roff,
988         short sroff,
989         const RegionDesc* rd,
990         G4_Type ty,
991         short immAddrOff)
992     {
993         G4_SrcRegRegion* rgn = new (mem) G4_SrcRegRegion(m, IndirGRF, b, roff, sroff, rd, ty, ACC_UNDEFINED);
994         rgn->setImmAddrOff(immAddrOff);
995         return rgn;
996     }
997 
998     //
999     // deprecated, please use the version below
1000     //
createDstRegRegion(G4_DstRegRegion & dst)1001     G4_DstRegRegion* createDstRegRegion(G4_DstRegRegion& dst)
1002     {
1003         G4_DstRegRegion* rgn = new (mem) G4_DstRegRegion(dst);
1004         return rgn;
1005     }
1006 
1007     // create a direct DstRegRegion
1008     G4_DstRegRegion* createDst(
1009         G4_VarBase* b,
1010         short roff,
1011         short sroff,
1012         unsigned short hstride,
1013         G4_Type        ty,
1014         G4_AccRegSel regSel = ACC_UNDEFINED)
1015     {
1016         return createDstRegRegion(Direct, b, roff, sroff, hstride, ty, regSel);
1017     }
1018     // create a direct DstRegRegion
createDst(G4_VarBase * b,G4_Type ty)1019     G4_DstRegRegion* createDst(G4_VarBase* b, G4_Type ty)
1020     {
1021         return createDstRegRegion(Direct, b, 0, 0, 1, ty, ACC_UNDEFINED);
1022     }
1023 
1024     // create a indirect DstRegRegion
1025     // b is the address variable, which only supports subreg offset
createIndirectDst(G4_VarBase * b,short sroff,uint16_t hstride,G4_Type ty,int16_t immOff)1026     G4_DstRegRegion* createIndirectDst(G4_VarBase* b,
1027         short sroff,
1028         uint16_t hstride,
1029         G4_Type ty,
1030         int16_t immOff)
1031     {
1032         auto dst = createDstRegRegion(IndirGRF, b, 0, sroff, hstride, ty);
1033         dst->setImmAddrOff(immOff);
1034         return dst;
1035     }
1036 
1037     G4_DstRegRegion* createDstWithNewSubRegOff(G4_DstRegRegion* old, short newSubRegOff);
1038 
1039 
1040     //
1041     // return the imm operand; create one if not yet created
1042     //
createImm(int64_t imm,G4_Type ty)1043     G4_Imm* createImm(int64_t imm, G4_Type ty)
1044     {
1045         G4_Imm* i = hashtable.lookupImm(imm, ty);
1046         return (i != NULL)? i : hashtable.createImm(imm, ty);
1047     }
1048 
1049     //
1050     // return the float operand; create one if not yet created
1051     //
1052     G4_Imm* createImm(float fp);
1053 
1054     //
1055     // return the double operand; create one if not yet created
1056     //
1057     G4_Imm* createDFImm(double fp);
1058 
1059 
1060     // For integer immediates use a narrower type if possible
1061     // also change byte type to word type since HW does not support byte imm
1062     G4_Type getNewType(int64_t imm, G4_Type ty);
1063 
1064     //
1065     // return the imm operand with its lowest type(W or above); create one if not yet created
1066     //
createImmWithLowerType(int64_t imm,G4_Type ty)1067     G4_Imm* createImmWithLowerType(int64_t imm, G4_Type ty)
1068     {
1069         G4_Type new_type = getNewType(imm, ty);
1070         G4_Imm* i = hashtable.lookupImm(imm, new_type);
1071         return (i != NULL)? i : hashtable.createImm(imm, new_type);
1072     }
1073 
1074     //
1075     // Create immediate operand without looking up hash table. This operand
1076     // is a relocatable immediate type.
1077     //
createRelocImm(G4_Type ty)1078     G4_Reloc_Imm* createRelocImm(G4_Type ty)
1079     {
1080         G4_Reloc_Imm* newImm;
1081         newImm = new (mem)G4_Reloc_Imm(ty);
1082         return newImm;
1083     }
1084 
1085     //
1086     // Create immediate operand without looking up hash table. This operand
1087     // is a relocatable immediate type. Specify the value of this imm field,
1088     // which will present in the output instruction's imm value.
1089     //
createRelocImm(int64_t immval,G4_Type ty)1090     G4_Reloc_Imm* createRelocImm(int64_t immval, G4_Type ty)
1091     {
1092         G4_Reloc_Imm* newImm;
1093         newImm = new (mem)G4_Reloc_Imm(immval, ty);
1094         return newImm;
1095     }
1096 
1097     //
1098     // a new null-terminated copy of "lab" is created for the new label, so
1099     // caller does not have to allocate memory for lab
1100     //
createLabel(const std::string & lab,VISA_Label_Kind kind)1101     G4_Label* createLabel(const std::string &lab, VISA_Label_Kind kind)
1102     {
1103         auto labStr = lab.c_str();
1104         size_t len = strlen(labStr) + 1;
1105         char* new_str = (char*)mem.alloc(len);  // +1 for null that ends the string
1106         memcpy_s(new_str, len, labStr, len);
1107         return new (mem) G4_Label(new_str);
1108     }
1109 
1110     G4_Predicate* createPredicate(
1111         G4_PredState s, G4_VarBase* flag, unsigned short srOff,
1112         G4_Predicate_Control ctrl = PRED_DEFAULT)
1113     {
1114         G4_Predicate* pred = new (mem)G4_Predicate(s, flag, srOff, ctrl);
1115         return pred;
1116     }
1117 
createPredicate(G4_Predicate & prd)1118     G4_Predicate* createPredicate(G4_Predicate& prd)
1119     {
1120         G4_Predicate* p = new (mem) G4_Predicate(prd);
1121         return p;
1122     }
1123 
createCondMod(G4_CondModifier m,G4_VarBase * flag,unsigned short off)1124     G4_CondMod* createCondMod(G4_CondModifier m, G4_VarBase* flag, unsigned short off)
1125     {
1126         G4_CondMod* p = new (mem)G4_CondMod(m, flag, off);
1127         return p;
1128     }
1129 
1130     //
1131     // return the condition modifier; create one if not yet created
1132     //
createCondMod(G4_CondMod & mod)1133     G4_CondMod* createCondMod(G4_CondMod& mod)
1134     {
1135         G4_CondMod* p = new (mem) G4_CondMod(mod);
1136         return p;
1137     }
1138 
1139     //
1140     // create register address expression normalized to (&reg +/- exp)
1141     //
createAddrExp(G4_RegVar * reg,int offset,G4_Type ty)1142     G4_AddrExp* createAddrExp(G4_RegVar* reg, int offset, G4_Type ty)
1143     {
1144         return new (mem) G4_AddrExp(reg, offset, ty);
1145     }
1146 
1147  private:
1148 
1149     // Avoid calling this directly, use createDst and createIndirectDst instead
1150     G4_DstRegRegion* createDstRegRegion(
1151         G4_RegAccess a,
1152         G4_VarBase* b,
1153         short roff,
1154         short sroff,
1155         unsigned short hstride,
1156         G4_Type        ty,
1157         G4_AccRegSel regSel = ACC_UNDEFINED)
1158     {
1159         G4_DstRegRegion* rgn = new (mem) G4_DstRegRegion(a, b, roff, sroff, hstride, ty, regSel);
1160         return rgn;
1161     }
1162 
1163     // please leave all createInst() as private and use the public wrappers below
1164 
1165      // cond+sat+binary+line
1166      G4_INST* createInst(
1167          G4_Predicate* prd, G4_opcode op, G4_CondMod* mod, G4_Sat sat,
1168          G4_ExecSize size,
1169          G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1,
1170          G4_InstOpts options,
1171          bool addToInstList);
1172 
1173      // TODO: remove
1174      // old template:
1175      // cond+sat+binary+line
createInst(G4_Predicate * prd,G4_opcode op,G4_CondMod * mod,G4_Sat sat,int execSize,G4_DstRegRegion * dst,G4_Operand * src0,G4_Operand * src1,G4_InstOpts options,bool addToInstList)1176      G4_INST* createInst(
1177          G4_Predicate* prd, G4_opcode op, G4_CondMod* mod, G4_Sat sat,
1178          int execSize,
1179          G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1,
1180          G4_InstOpts options,
1181           bool addToInstList)
1182      {
1183          G4_ExecSize sz((unsigned char)execSize);
1184          return createInst(prd, op, mod, sat, sz, dst, src0, src1, options, addToInstList);
1185      }
1186 
1187      // cond+sat+ternary
1188      G4_INST* createInst(
1189          G4_Predicate* prd, G4_opcode op, G4_CondMod* mod, G4_Sat sat,
1190          G4_ExecSize execSize,
1191          G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, G4_Operand* src2,
1192          G4_InstOpts options,
1193          bool addToInstList);
1194 
1195 public:
1196 
1197     G4_INST* createIf(G4_Predicate* prd, G4_ExecSize execSize, G4_InstOpts options);
1198     G4_INST* createElse(G4_ExecSize execSize, G4_InstOpts options);
1199     G4_INST* createEndif(G4_ExecSize execSize, G4_InstOpts options);
1200     G4_INST* createLabelInst(G4_Label* label, bool appendToInstList);
1201     G4_INST* createJmp(
1202         G4_Predicate* pred, G4_Operand* jmpTarget, G4_InstOpts options, bool appendToInstList);
createGoto(G4_Predicate * pred,G4_ExecSize execSize,G4_Label * target,G4_InstOpts options,bool addToInstList)1203     G4_INST* createGoto(G4_Predicate* pred, G4_ExecSize execSize, G4_Label* target, G4_InstOpts options, bool addToInstList)
1204     {
1205         // jip is computed later during CFG construction
1206         return createCFInst(pred, G4_goto, execSize, nullptr, target, options, addToInstList);
1207     }
1208 
1209     // ToDo: make createInternalInst() private as well and add wraper for them
1210     G4_INST* createInternalInst(
1211         G4_Predicate* prd, G4_opcode op, G4_CondMod* mod, G4_Sat sat,
1212         G4_ExecSize execSize,
1213         G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1,
1214         G4_InstOpts options);
1215 
1216     G4_INST* createInternalInst(
1217         G4_Predicate* prd, G4_opcode op, G4_CondMod* mod, G4_Sat sat,
1218         G4_ExecSize execSize,
1219         G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, G4_Operand* src2,
1220         G4_InstOpts options);
1221 
1222     G4_INST* createCFInst(
1223         G4_Predicate* prd, G4_opcode op, G4_ExecSize execSize,
1224         G4_Label* jip, G4_Label* uip,
1225         G4_InstOpts options,
1226         bool addToInstList);
1227 
1228     G4_INST* createInternalCFInst(
1229         G4_Predicate* prd, G4_opcode op, G4_ExecSize execSize,
1230         G4_Label* jip, G4_Label* uip,
1231         G4_InstOpts options);
1232 
1233     G4_InstSend* createSendInst(
1234         G4_Predicate* prd, G4_opcode op, G4_ExecSize execSize,
1235         G4_DstRegRegion* postDst, G4_SrcRegRegion* payload,
1236         G4_Operand* msg,
1237         G4_InstOpts options, // FIXME: re-order options to follow all operands
1238         G4_SendDesc *msgDesc,
1239         bool addToInstList);
1240     G4_InstSend* createInternalSendInst(
1241         G4_Predicate* prd, G4_opcode op,
1242         G4_ExecSize execSize,
1243         G4_DstRegRegion* postDst, G4_SrcRegRegion* payload,
1244         G4_Operand* msg,
1245         G4_InstOpts options, // FIXME: re-order options to follow all operands
1246         G4_SendDesc *msgDescs);
1247 
1248     G4_InstSend* createSplitSendInst(
1249         G4_Predicate* prd, G4_opcode op,
1250         G4_ExecSize execSize,
1251         G4_DstRegRegion* dst, G4_SrcRegRegion* src1, G4_SrcRegRegion* src2,
1252         G4_Operand* msg, G4_InstOpts options,
1253         G4_SendDesc* msgDesc,
1254         G4_Operand* src3,
1255         bool addToInstList);
1256 
1257     G4_InstSend* createInternalSplitSendInst(
1258         G4_ExecSize execSize,
1259         G4_DstRegRegion* dst, G4_SrcRegRegion* src1, G4_SrcRegRegion* src2,
1260         // TODO: reorder parameters to put options last
1261         G4_Operand* msg, G4_InstOpts options,
1262         G4_SendDesc*msgDesc,
1263         G4_Operand* src3);
1264 
1265     G4_INST* createMathInst(
1266         G4_Predicate* prd, G4_Sat sat, G4_ExecSize execSize,
1267         G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, G4_MathOp mathOp,
1268         G4_InstOpts options, bool addToInstList);
1269 
1270     G4_INST* createInternalMathInst(
1271         G4_Predicate* prd, G4_Sat sat, G4_ExecSize execSize,
1272         G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, G4_MathOp mathOp,
1273         G4_InstOpts options);
1274 
1275     G4_INST* createIntrinsicInst(
1276         G4_Predicate* prd, Intrinsic intrinId, G4_ExecSize execSize,
1277         G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, G4_Operand* src2,
1278         G4_InstOpts options, bool addToInstList);
1279 
1280     G4_INST* createInternalIntrinsicInst(
1281         G4_Predicate* prd, Intrinsic intrinId, G4_ExecSize execSize,
1282         G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, G4_Operand* src2,
1283         G4_InstOpts options);
1284 
1285     G4_INST* createIntrinsicAddrMovInst(Intrinsic intrinId, G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1, G4_Operand* src2, G4_Operand* src3, G4_Operand* src4, G4_Operand* src5, G4_Operand* src6, G4_Operand* src7, bool addToInstList);
1286 
1287     G4_INST* createNop(G4_InstOpts options);
1288     G4_INST* createSync(G4_opcode syncOp, G4_Operand* src);
1289 
1290     G4_INST* createMov(
1291         G4_ExecSize execSize,
1292         G4_DstRegRegion* dst, G4_Operand* src0,
1293         G4_InstOpts options,
1294         bool appendToInstList);
1295 
createBinOp(G4_opcode op,G4_ExecSize execSize,G4_DstRegRegion * dst,G4_Operand * src0,G4_Operand * src1,G4_InstOpts options,bool appendToInstList)1296     G4_INST* createBinOp(
1297         G4_opcode op, G4_ExecSize execSize,
1298         G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1,
1299         G4_InstOpts options,
1300         bool appendToInstList)
1301     {
1302         return createBinOp(nullptr, op, execSize, dst, src0, src1, options, appendToInstList);
1303     }
1304 
1305     G4_INST* createBinOp(
1306         G4_Predicate *pred,
1307         G4_opcode op, G4_ExecSize execSize,
1308         G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1,
1309         G4_InstOpts options,
1310         bool appendToInstList = true);
1311 
1312     G4_INST* createMach(
1313         G4_ExecSize execSize,
1314         G4_DstRegRegion* dst, G4_Operand* src0, G4_Operand* src1,
1315         G4_InstOpts options, G4_Type accType);
1316 
1317     G4_INST* createMacl(
1318         G4_ExecSize execSize,
1319         G4_DstRegRegion* dst,
1320         G4_Operand* src0, G4_Operand* src1, G4_InstOpts options, G4_Type accType);
1321 
createMadm(G4_ExecSize execSize,G4_DstRegRegion * dst,G4_SrcRegRegion * src0,G4_SrcRegRegion * src1,G4_SrcRegRegion * src2,G4_InstOpts options)1322     G4_INST* createMadm(
1323         G4_ExecSize execSize,
1324         G4_DstRegRegion* dst,
1325         G4_SrcRegRegion* src0, G4_SrcRegRegion* src1, G4_SrcRegRegion* src2,
1326         G4_InstOpts options)
1327     {
1328         return createMadm(nullptr, execSize, dst, src0, src1, src2, options);
1329     }
1330 
1331     G4_INST* createMadm(
1332         G4_Predicate* pred,
1333         G4_ExecSize execSize,
1334         G4_DstRegRegion* dst,
1335         G4_SrcRegRegion* src0, G4_SrcRegRegion* src1, G4_SrcRegRegion* src2,
1336         G4_InstOpts options);
1337 
1338     static G4_MathOp Get_MathFuncCtrl(ISA_Opcode op, G4_Type type);
1339 
1340     void resizePredefinedStackVars();
1341 
1342     template <typename T>
duplicateOperand(T * opnd)1343     T*          duplicateOperand(T* opnd) {return static_cast<T *>(duplicateOpndImpl(opnd));}
1344     G4_Operand* duplicateOpndImpl(G4_Operand* opnd);
1345 
1346 
1347     G4_DstRegRegion *createSubDstOperand(
1348         G4_DstRegRegion* dst, uint16_t start, uint8_t size);
1349     G4_SrcRegRegion *createSubSrcOperand(
1350         G4_SrcRegRegion* src, uint16_t start, uint8_t size, uint16_t newVs, uint16_t newWd);
1351     G4_INST *makeSplittingInst(G4_INST *inst, G4_ExecSize execSize);
1352 
1353     G4_InstSend *createSendInst(
1354         G4_Predicate *pred,
1355         G4_DstRegRegion *postDst, G4_SrcRegRegion *payload,
1356         G4_ExecSize execSize,
1357         G4_SendDescRaw *msgDesc,
1358         G4_InstOpts options,
1359         bool is_sendc);
1360 
1361     G4_InstSend *createSplitSendInst(
1362         G4_Predicate *pred,
1363         G4_DstRegRegion *dst, G4_SrcRegRegion *src1, G4_SrcRegRegion *src2,
1364         G4_ExecSize execSize,
1365         G4_SendDescRaw *msgDesc,
1366         G4_InstOpts options,
1367         bool is_sendc);
1368 
1369     // TODO: move to TranslateSendLdStLsc or elide VISA_Exec_Size (pre-convert)
1370     G4_SendDescRaw* createLscMsgDesc(
1371         LSC_OP                op,
1372         LSC_SFID              lscSfid,
1373         VISA_Exec_Size        execSizeEnum,
1374         LSC_CACHE_OPTS        cacheOpts,
1375         LSC_ADDR              addr,
1376         LSC_DATA_SHAPE        shape,
1377         G4_Operand           *surface,
1378         uint32_t              dstLen,
1379         uint32_t              addrRegs);
1380 
1381     // ToDo: unify this with above function
1382     G4_SendDescRaw* createLscDesc(
1383         SFID sfid,
1384         uint32_t desc,
1385         uint32_t extDesc,
1386         int src1Len,
1387         SendAccess access,
1388         G4_Operand* bti);
1389 
1390     G4_InstSend *createLscSendInst(
1391         G4_Predicate *pred,
1392         G4_DstRegRegion *dst, G4_SrcRegRegion *src0, G4_SrcRegRegion *src1,
1393         G4_ExecSize execsize,
1394         G4_SendDescRaw *msgDesc,
1395         G4_InstOpts option,
1396         LSC_ADDR_TYPE addrType,
1397         bool emitA0RegDef);
1398     G4_SrcRegRegion* getScratchSurfaceStatusIndex();
1399 
1400     void RestoreA0();
1401 
1402     G4_InstSend* createLscSendInstToScratch(
1403         G4_Predicate* pred,
1404         G4_DstRegRegion* dst, G4_SrcRegRegion* src0, G4_SrcRegRegion* src1,
1405         G4_ExecSize execSize, G4_SendDescRaw* msgDesc, G4_InstOpts option,
1406         bool usesBti);
1407 
1408     G4_InstSend *createSplitSendToRenderTarget(
1409         G4_Predicate *pred,
1410         G4_DstRegRegion *dst,
1411         G4_SrcRegRegion *src1,
1412         G4_SrcRegRegion *src2,
1413         G4_SrcRegRegion *extDesc,
1414         G4_ExecSize execSize,
1415         G4_SendDescRaw *msgDesc,
1416         G4_InstOpts option);
1417 
1418     G4_InstSend* createSendInst(
1419         G4_Predicate* pred,
1420         G4_DstRegRegion* postDst,
1421         G4_SrcRegRegion* payload,
1422         unsigned regs2snd,
1423         unsigned regs2rcv,
1424         G4_ExecSize execsize,
1425         unsigned fc,
1426         SFID tf_id,
1427         bool head_present,
1428         SendAccess access,
1429         G4_Operand* bti,
1430         G4_Operand* sti,
1431         G4_InstOpts options,
1432         bool is_sendc);
1433 
1434     G4_InstSend* createSplitSendInst(
1435         G4_Predicate* pred, G4_DstRegRegion* dst,
1436         G4_SrcRegRegion* src1, unsigned regs2snd1,
1437         G4_SrcRegRegion* src2, unsigned regs2snd2,
1438         unsigned regs2rcv,
1439         G4_ExecSize execSize,
1440         unsigned fc,
1441         SFID tf_id,
1442         bool head_present,
1443         SendAccess access,
1444         G4_Operand* bti, G4_Operand* sti,
1445         G4_InstOpts option,
1446         bool is_sendc);
1447 
1448     // helper functions
1449     G4_Declare *createSendPayloadDcl(unsigned num_elt, G4_Type type);
1450 
1451     void createMovR0Inst(
1452         G4_Declare* dcl,
1453         short refOff,
1454         short subregOff,
1455         bool use_nomask = false,
1456         G4_InstOpts options = InstOpt_NoOpt);
1457 
1458     void createMovInst(
1459         G4_Declare* dcl,
1460         short refOff,
1461         short subregOff,
1462         G4_ExecSize execsize,
1463         G4_Predicate* pred,
1464         G4_CondMod* condMod,
1465         G4_Operand* src_opnd,
1466         bool use_nomask = false,
1467         G4_InstOpts options = InstOpt_NoOpt);
1468     void createAddInst(
1469         G4_Declare* dcl,
1470         short         regOff,
1471         short         subregOff,
1472         G4_ExecSize   execSize,
1473         G4_Predicate* pred,
1474         G4_CondMod*   condMod,
1475         G4_Operand*   src0_opnd,
1476         G4_Operand*   src1_opnd,
1477         G4_InstOption options);
1478     void createMovSendSrcInst(
1479         G4_Declare* dcl,
1480         short refOff,
1481         short subregOff,
1482         unsigned num_dword,
1483         G4_Operand* src_opnd,
1484         G4_InstOpts options);
1485 
1486     // short hand for creating a dstRegRegion
1487     G4_DstRegRegion* createDstRegRegion(G4_Declare* dcl, unsigned short hstride);
1488     G4_SrcRegRegion* createSrcRegRegion(G4_Declare* dcl, const RegionDesc* rd);
1489 
1490     // Create a null dst with scalar region and the given type
1491     G4_DstRegRegion* createNullDst(G4_Type dstType);
1492 
1493     // Create a null src with scalar region and the given type
1494     G4_SrcRegRegion* createNullSrc(G4_Type dstType);
1495 
1496     G4_DstRegRegion* checkSendDst(G4_DstRegRegion *dst_opnd);
1497 
1498     G4_INST* createDpasInst(
1499         G4_opcode opc,
1500         G4_ExecSize execSize,
1501         G4_DstRegRegion* dst,
1502         G4_Operand* src0,
1503         G4_Operand* src1,
1504         G4_Operand* src2,
1505         G4_Operand* src3,
1506         G4_InstOpts options,
1507         GenPrecision A,
1508         GenPrecision W,
1509         uint8_t D,
1510         uint8_t C,
1511         bool addToInstList);
1512 
1513     G4_INST* createInternalDpasInst(
1514         G4_opcode opc,
1515         G4_ExecSize execSize,
1516         G4_DstRegRegion* dst,
1517         G4_Operand* src0,
1518         G4_Operand* src1,
1519         G4_Operand* src2,
1520         G4_Operand* src3,
1521         G4_InstOpts options,
1522         GenPrecision A,
1523         GenPrecision W,
1524         uint8_t D,
1525         uint8_t C);
1526 
1527     G4_INST* createBfnInst(
1528         uint8_t booleanFuncCtrl,
1529         G4_Predicate* prd,
1530         G4_CondMod* mod,
1531         G4_Sat sat,
1532         G4_ExecSize execSize,
1533         G4_DstRegRegion* dst,
1534         G4_Operand* src0,
1535         G4_Operand* src1,
1536         G4_Operand* src2,
1537         G4_InstOpts options,
1538         bool addToInstLis);
1539 
1540     G4_INST* createInternalBfnInst(
1541         uint8_t booleanFuncCtrl,
1542         G4_Predicate* prd,
1543         G4_CondMod* mod,
1544         G4_Sat sat,
1545         G4_ExecSize execSize,
1546         G4_DstRegRegion* dst,
1547         G4_Operand* src0,
1548         G4_Operand* src1,
1549         G4_Operand* src2,
1550         G4_InstOpts options);
1551 
1552 
1553     ///////////////////////////////////////////////////////////////////////////
1554     ///////////////////////////////////////////////////////////////////////////
1555     ///////////////////////////////////////////////////////////////////////////
1556     // translateXXXXXXX functions translate specific vISA instructions into
1557     // sequences of G4 IR that implement the operation
1558     //
1559     // Implementations are split amongst various files based on category.
1560     // Comments below will point the user to the correct implementation file.
1561     // Please keep implementations the same sequential order as in the header.
1562     // (Feel free to re-order, but reflect the re-order here.)
1563     //
1564     // During refactor anything that was in TranslationInterface.cpp was moved
1565     // to one of this VisaToG4/Translate* files, but some methods have nothing
1566     // to do with vISA and make sense in the BuildIRImpl.cpp and could be
1567     // moved there.  Please move the declaration (prototype) upwards in
1568     // that case.
1569 
1570     ///////////////////////////////////////////////////////////////////////////
1571     ///////////////////////////////////////////////////////////////////////////
1572     // Members related to general arithmetic/logic/shift ops are in
1573     // VisaToG4/TranslateALU.cpp
1574     int translateVISAAddrInst(
1575         ISA_Opcode opcode,
1576         VISA_Exec_Size execSize,
1577         VISA_EMask_Ctrl emask,
1578         G4_DstRegRegion *dst_opnd,
1579         G4_Operand *src0_opnd,
1580         G4_Operand *src1_opnd);
1581 
1582     int translateVISAArithmeticInst(
1583         ISA_Opcode opcode,
1584         VISA_Exec_Size execSize,
1585         VISA_EMask_Ctrl emask,
1586         G4_Predicate *predOpnd,
1587         G4_Sat saturate,
1588         G4_CondMod* condMod,
1589         G4_DstRegRegion *dstOpnd,
1590         G4_Operand *src0Opnd,
1591         G4_Operand *src1Opnd,
1592         G4_Operand *src2Opnd,
1593         G4_DstRegRegion *carryBorrow);
1594 
1595     int translateVISADpasInst(
1596         VISA_Exec_Size executionSize,
1597         VISA_EMask_Ctrl emask,
1598         G4_opcode opc,
1599         G4_DstRegRegion *dstOpnd,
1600         G4_SrcRegRegion *src0Opnd,
1601         G4_SrcRegRegion *src1Opnd,
1602         G4_SrcRegRegion *src2Opnd,
1603         G4_SrcRegRegion* src3Opnd,
1604         GenPrecision A, GenPrecision W,
1605         uint8_t D, uint8_t C);
1606     int translateVISABfnInst(
1607         uint8_t booleanFuncCtrl,
1608         VISA_Exec_Size executionSize,
1609         VISA_EMask_Ctrl emask,
1610         G4_Predicate *predOpnd,
1611         G4_Sat saturate,
1612         G4_CondMod* condMod,
1613         G4_DstRegRegion *dstOpnd,
1614         G4_Operand *src0Opnd,
1615         G4_Operand *src1Opnd,
1616         G4_Operand *src2Opnd);
1617 
1618     int translateVISACompareInst(
1619         ISA_Opcode opcode,
1620         VISA_Exec_Size execSize,
1621         VISA_EMask_Ctrl emask,
1622         VISA_Cond_Mod relOp,
1623         G4_Declare* predDst,
1624         G4_Operand *src0_opnd,
1625         G4_Operand *src1_opnd);
1626 
1627     int translateVISACompareInst(
1628         ISA_Opcode opcode,
1629         VISA_Exec_Size execSize,
1630         VISA_EMask_Ctrl emask,
1631         VISA_Cond_Mod relOp,
1632         G4_DstRegRegion *dstOpnd,
1633         G4_Operand *src0Opnd,
1634         G4_Operand *src1Opnd);
1635 
1636     int translateVISALogicInst(
1637         ISA_Opcode opcode,
1638         G4_Predicate *pred_opnd,
1639         G4_Sat saturate,
1640         VISA_Exec_Size executionSize,
1641         VISA_EMask_Ctrl emask,
1642         G4_DstRegRegion* dst,
1643         G4_Operand* src0,
1644         G4_Operand* src1,
1645         G4_Operand* src2,
1646         G4_Operand* src3);
1647 
1648     int translateVISADataMovementInst(
1649         ISA_Opcode opcode,
1650         CISA_MIN_MAX_SUB_OPCODE subOpcode,
1651         G4_Predicate *pred_opnd,
1652         VISA_Exec_Size executionSize,
1653         VISA_EMask_Ctrl emask,
1654         G4_Sat saturate,
1655         G4_DstRegRegion *dst,
1656         G4_Operand *src0,
1657         G4_Operand *src1);
1658 
1659     ///////////////////////////////////////////////////////////////////////////
1660     ///////////////////////////////////////////////////////////////////////////
1661     // Control flow, function call, and branching ops are located in
1662     // VisaToG4/TranslateBranch.cpp
1663     int translateVISACFSwitchInst(
1664         G4_Operand *indexOpnd,
1665         uint8_t numLabels,
1666         G4_Label** lab);
1667 
1668     int translateVISACFLabelInst(G4_Label* lab);
1669 
1670     int translateVISACFCallInst(
1671         VISA_Exec_Size execsize,
1672         VISA_EMask_Ctrl emask,
1673         G4_Predicate *predOpnd,
1674         G4_Label* lab);
1675 
1676     int translateVISACFJumpInst(G4_Predicate *predOpnd, G4_Label* lab);
1677 
1678     int translateVISACFFCallInst(
1679         VISA_Exec_Size execsize,
1680         VISA_EMask_Ctrl emask,
1681         G4_Predicate *predOpnd,
1682         std::string funcName,
1683         uint8_t argSize,
1684         uint8_t returnSize);
1685 
1686     int translateVISACFIFCallInst(
1687         VISA_Exec_Size execsize,
1688         VISA_EMask_Ctrl emask,
1689         G4_Predicate *predOpnd,
1690         G4_Operand* funcAddr,
1691         uint8_t argSize,
1692         uint8_t returnSize);
1693 
1694     int translateVISACFSymbolInst(
1695         const std::string& symbolName,
1696         G4_DstRegRegion* dst);
1697 
1698     int translateVISACFFretInst(
1699         VISA_Exec_Size execsize,
1700         VISA_EMask_Ctrl emask,
1701         G4_Predicate *predOpnd);
1702 
1703     int translateVISACFRetInst(
1704         VISA_Exec_Size execsize,
1705         VISA_EMask_Ctrl emask,
1706         G4_Predicate *predOpnd);
1707 
1708     int translateVISAGotoInst(
1709         G4_Predicate *predOpnd,
1710         VISA_Exec_Size executionSize,
1711         VISA_EMask_Ctrl emask,
1712         G4_Label *label);
1713 
1714 
1715     ///////////////////////////////////////////////////////////////////////////
1716     // members related to special math sequences VisaToG4/TranslateMath.cpp
1717     void expandFdiv(
1718         G4_ExecSize exsize, G4_Predicate *predOpnd, G4_Sat saturate,
1719         G4_DstRegRegion *dstOpnd, G4_Operand *src0Opnd, G4_Operand *src1Opnd,
1720         uint32_t instOpt);
1721 
1722     void expandPow(
1723         G4_ExecSize exsize, G4_Predicate *predOpnd, G4_Sat saturate,
1724         G4_DstRegRegion *dstOpnd, G4_Operand *src0Opnd, G4_Operand *src1Opnd,
1725         uint32_t instOpt);
1726 
1727 
1728     int translateVISAArithmeticDoubleInst(
1729         ISA_Opcode opcode,
1730         VISA_Exec_Size execSize,
1731         VISA_EMask_Ctrl emask,
1732         G4_Predicate *predOpnd,
1733         G4_Sat saturate,
1734         G4_DstRegRegion *dstOpnd,
1735         G4_Operand *src0Opnd,
1736         G4_Operand *src1Opnd);
1737 
1738     int translateVISAArithmeticSingleDivideIEEEInst(
1739         ISA_Opcode opcode,
1740         VISA_Exec_Size execSize,
1741         VISA_EMask_Ctrl emask,
1742         G4_Predicate *predOpnd,
1743         G4_Sat saturate,
1744         G4_CondMod* condMod,
1745         G4_DstRegRegion *dstOpnd,
1746         G4_Operand *src0Opnd,
1747         G4_Operand *src1Opnd);
1748 
1749     int translateVISAArithmeticSingleSQRTIEEEInst(
1750         ISA_Opcode opcode,
1751         VISA_Exec_Size execSize,
1752         VISA_EMask_Ctrl emask,
1753         G4_Predicate *predOpnd,
1754         G4_Sat saturate,
1755         G4_CondMod* condMod,
1756         G4_DstRegRegion *dstOpnd,
1757         G4_Operand *src0Opnd);
1758 
1759     int translateVISAArithmeticDoubleSQRTInst(
1760         ISA_Opcode opcode,
1761         VISA_Exec_Size execSize,
1762         VISA_EMask_Ctrl emask,
1763         G4_Predicate *predOpnd,
1764         G4_Sat saturate,
1765         G4_CondMod* condMod,
1766         G4_DstRegRegion *dstOpnd,
1767         G4_Operand *src0Opnd);
1768 
1769 
1770     ///////////////////////////////////////////////////////////////////////////
1771     ///////////////////////////////////////////////////////////////////////////
1772     // Members related miscellaneous instructions that don't fit any other
1773     // category are in VisaToG4/TranslateMisc.cpp
1774     //
1775     // (As stated above some of this could move to BuildIRImpl.cpp.)
1776     static bool isNoMask(VISA_EMask_Ctrl eMask);
1777     static G4_ExecSize toExecSize(VISA_Exec_Size execSize);
1778 
1779     VISA_Exec_Size roundUpExecSize(VISA_Exec_Size execSize);
1780 
1781     G4_Declare* getImmDcl(G4_Imm* val, int numElt);
1782 
1783     struct PayloadSource {
1784         G4_SrcRegRegion  *opnd;
1785         G4_ExecSize       execSize;
1786         G4_InstOpts       instOpt;
1787     };
1788 
1789     /// preparePayload - This method prepares payload from the specified header
1790     /// and sources.
1791     ///
1792     /// \param msgs             Message(s) prepared. That 2-element array must
1793     ///                         be cleared before calling preparePayload().
1794     /// \param sizes            Size(s) (in GRF) of each message prepared. That
1795     ///                         2-element array must be cleared before calling
1796     ///                         preparePayload().
1797     /// \param batchExSize      When it's required to copy sources, batchExSize
1798     ///                         specifies the SIMD width of copy.
1799     /// \param splitSendEnabled Whether feature split-send is available. When
1800     ///                         feature split-send is available, this function
1801     ///                         will check whether two consecutive regions
1802     ///                         could be prepared instead of one to take
1803     ///                         advantage of split-send.
1804     /// \param srcs             The array of sources (including header if
1805     ///                         present).
1806     /// \param len              The length of the array of sources.
1807     ///
1808     void preparePayload(
1809         G4_SrcRegRegion *msgs[2], unsigned sizes[2],
1810         G4_ExecSize batchExSize, bool splitSendEnabled,
1811         PayloadSource sources[], unsigned len);
1812 
1813     // Coalesce multiple payloads into a single region.  Pads each region with
1814     // an optional alignment argument (e.g. a GRF size).  The source region
1815     // sizes are determined by source dimension, so use an alias if you are
1816     // using a subregion.  All copies are made under no mask semantics using
1817     // the maximal SIMD width for the current device.
1818     //
1819     // A second alignment option allows a caller to align the full payload
1820     // to some total.
1821     //
1822     // If all parameters are nullptr or the null register, we return the null
1823     // register.
1824     //
1825     // Some examples:
1826     //
1827     // 1. coalescePayloads(GRF_SIZE,GRF_SIZE,...);
1828     //    Coalesces each source into a single region.  Each source is padded
1829     //    out to a full GRF, and the sum total result is also padded out to
1830     //    a full GRF.
1831     //
1832     // 2. coalescePayloads(1,GRF_SIZE,...);
1833     //    Coalesces each source into a single region packing each source
1834     //    together, but padding the result.  E.g. one could copy a QW and then
1835     //    a DW and pad the result out to a GRF.
1836     //
1837     G4_SrcRegRegion *coalescePayload(
1838         unsigned alignSourcesTo,
1839         unsigned alignPayloadTo,
1840         uint32_t payloadSize,
1841         uint32_t srcSize,
1842         std::initializer_list<G4_SrcRegRegion *> srcs,
1843         VISA_EMask_Ctrl emask);
1844 
1845     // emask is InstOption
1846     void Copy_SrcRegRegion_To_Payload(
1847         G4_Declare* payload,
1848         unsigned int& regOff,
1849         G4_SrcRegRegion* src,
1850         G4_ExecSize execSize,
1851         uint32_t emask);
1852     unsigned int getByteOffsetSrcRegion(G4_SrcRegRegion* srcRegion);
1853 
1854     // only used in TranslateSend3D, maybe consider moving there if no
1855     // one else uses them.
1856     bool checkIfRegionsAreConsecutive(
1857         G4_SrcRegRegion* first,
1858         G4_SrcRegRegion* second,
1859         G4_ExecSize execSize);
1860     bool checkIfRegionsAreConsecutive(
1861         G4_SrcRegRegion* first,
1862         G4_SrcRegRegion* second,
1863         G4_ExecSize execSize,
1864         G4_Type type);
1865 
1866     int generateDebugInfoPlaceholder(); // TODO: move to BuildIRImpl.cpp?
1867 
1868     // legitimiately belongs in Misc
1869     int translateVISALifetimeInst(unsigned char properties, G4_Operand* var);
1870 
1871 
1872     ///////////////////////////////////////////////////////////////////////////
1873     ///////////////////////////////////////////////////////////////////////////
1874     // members related to 3D and sampler ops are in VisaToG4/TranslateSend3D.cpp
1875     int translateVISASampleInfoInst(
1876         VISA_Exec_Size executionSize,
1877         VISA_EMask_Ctrl emask,
1878         ChannelMask chMask,
1879         G4_Operand* surface,
1880         G4_DstRegRegion* dst);
1881 
1882     int translateVISAResInfoInst(
1883         VISA_Exec_Size executionSize,
1884         VISA_EMask_Ctrl emask,
1885         ChannelMask chMask,
1886         G4_Operand* surface,
1887         G4_SrcRegRegion* lod,
1888         G4_DstRegRegion* dst);
1889 
1890     int translateVISAURBWrite3DInst(
1891         G4_Predicate* pred,
1892         VISA_Exec_Size executionSize,
1893         VISA_EMask_Ctrl emask,
1894         uint8_t numOut,
1895         uint16_t globalOffset,
1896         G4_SrcRegRegion* channelMask,
1897         G4_SrcRegRegion* urbHandle,
1898         G4_SrcRegRegion* perSlotOffset,
1899         G4_SrcRegRegion* vertexData);
1900 
1901     int translateVISARTWrite3DInst(
1902         G4_Predicate* pred,
1903         VISA_Exec_Size executionSize,
1904         VISA_EMask_Ctrl emask,
1905         G4_Operand *surface,
1906         G4_SrcRegRegion *r1HeaderOpnd,
1907         G4_Operand *rtIndex,
1908         vISA_RT_CONTROLS cntrls,
1909         G4_SrcRegRegion *sampleIndexOpnd,
1910         G4_Operand *cpsCounter,
1911         unsigned int numParms,
1912         G4_SrcRegRegion ** msgOpnds);
1913 
1914 
1915     int splitSampleInst(
1916         VISASampler3DSubOpCode actualop,
1917         bool pixelNullMask,
1918         bool cpsEnable,
1919         G4_Predicate* pred,
1920         ChannelMask srcChannel,
1921         int numChannels,
1922         G4_Operand *aoffimmi,
1923         G4_Operand *sampler,
1924         G4_Operand *surface,
1925         G4_DstRegRegion* dst,
1926         VISA_EMask_Ctrl emask,
1927         bool useHeader,
1928         unsigned numRows, // msg length for each simd8
1929         unsigned int numParms,
1930         G4_SrcRegRegion ** params,
1931         bool uniformSampler = true);
1932 
1933     void doSamplerHeaderMove(G4_Declare* header, G4_Operand* sampler);
1934     G4_Declare* getSamplerHeader(bool isBindlessSampler, bool samplerIndexGE16);
1935     uint32_t getSamplerResponseLength(
1936         int numChannels, bool isFP16, int execSize, bool pixelNullMask, bool nullDst);
1937 
1938     int translateVISASampler3DInst(
1939         VISASampler3DSubOpCode actualop,
1940         bool pixelNullMask,
1941         bool cpsEnable,
1942         bool uniformSampler,
1943         G4_Predicate* pred,
1944         VISA_Exec_Size executionSize,
1945         VISA_EMask_Ctrl emask,
1946         ChannelMask srcChannel,
1947         G4_Operand* aoffimmi,
1948         G4_Operand *sampler,
1949         G4_Operand *surface,
1950         G4_DstRegRegion* dst,
1951         unsigned int numParms,
1952         G4_SrcRegRegion ** params);
1953 
1954     int translateVISALoad3DInst(
1955         VISASampler3DSubOpCode actualop,
1956         bool pixelNullMask,
1957         G4_Predicate *pred,
1958         VISA_Exec_Size exeuctionSize,
1959         VISA_EMask_Ctrl em,
1960         ChannelMask channelMask,
1961         G4_Operand* aoffimmi,
1962         G4_Operand* surface,
1963         G4_DstRegRegion* dst,
1964         uint8_t numOpnds,
1965         G4_SrcRegRegion ** opndArray);
1966 
1967     int translateVISAGather3dInst(
1968         VISASampler3DSubOpCode actualop,
1969         bool pixelNullMask,
1970         G4_Predicate* pred,
1971         VISA_Exec_Size exeuctionSize,
1972         VISA_EMask_Ctrl em,
1973         ChannelMask channelMask,
1974         G4_Operand* aoffimmi,
1975         G4_Operand* sampler,
1976         G4_Operand* surface,
1977         G4_DstRegRegion* dst,
1978         unsigned int numOpnds,
1979         G4_SrcRegRegion ** opndArray);
1980 
1981     int translateVISASamplerNormInst(
1982         G4_Operand* surface,
1983         G4_Operand* sampler,
1984         ChannelMask channel,
1985         unsigned numEnabledChannels,
1986         G4_Operand* deltaUOpnd,
1987         G4_Operand* uOffOpnd,
1988         G4_Operand* deltaVOpnd,
1989         G4_Operand* vOffOpnd,
1990         G4_DstRegRegion* dst_opnd);
1991 
1992     int translateVISASamplerInst(
1993         unsigned simdMode,
1994         G4_Operand* surface,
1995         G4_Operand* sampler,
1996         ChannelMask channel,
1997         unsigned numEnabledChannels,
1998         G4_Operand* uOffOpnd,
1999         G4_Operand* vOffOpnd,
2000         G4_Operand* rOffOpnd,
2001         G4_DstRegRegion* dstOpnd);
2002 
2003     ///////////////////////////////////////////////////////////////////////////
2004     ///////////////////////////////////////////////////////////////////////////
2005     // basic load and store (type and untyped) are located in
2006     // VisaToG4/TranslateSendLdStLegacy.cpp
2007 
2008     // IsHeaderOptional - Check whether the message header is optional.
2009     bool isMessageHeaderOptional(G4_Operand *surface, G4_Operand *Offset) const;
2010 
2011     // IsStatelessSurface - Check whether the give surface is statelesss surface.
isStatelessSurface(const G4_Operand * surface)2012     static bool isStatelessSurface(const G4_Operand *surface) {
2013         // So far, it's only reliable to check an immediate surface.
2014         return surface->isImm() &&
2015             (surface->asImm()->getImm() == PREDEF_SURF_255 || surface->asImm()->getImm() == PREDEF_SURF_253);
2016     }
2017 
2018     int translateVISAQWGatherInst(
2019         VISA_Exec_Size executionSize,
2020         VISA_EMask_Ctrl emask,
2021         G4_Predicate* pred,
2022         VISA_SVM_Block_Num numBlocks,
2023         G4_SrcRegRegion* surface,
2024         G4_SrcRegRegion* addresses,
2025         G4_DstRegRegion* dst);
2026 
2027     int translateVISAQWScatterInst(
2028         VISA_Exec_Size executionSize,
2029         VISA_EMask_Ctrl emask,
2030         G4_Predicate* pred,
2031         VISA_SVM_Block_Num numBlocks,
2032         G4_SrcRegRegion* surface,
2033         G4_SrcRegRegion* addresses,
2034         G4_SrcRegRegion* src);
2035 
2036     uint32_t setOwordForDesc(uint32_t desc, int numOword, bool isSLM = false) const;
2037     int translateVISAOwordLoadInst(
2038         ISA_Opcode opcode,
2039         bool modified,
2040         G4_Operand* surface,
2041         VISA_Oword_Num size,
2042         G4_Operand* offOpnd,
2043         G4_DstRegRegion* dstOpnd);
2044 
2045     int translateVISAOwordStoreInst(
2046         G4_Operand* surface,
2047         VISA_Oword_Num size,
2048         G4_Operand* offOpnd,
2049         G4_SrcRegRegion* srcOpnd);
2050 
2051     int translateVISAGatherInst(
2052         VISA_EMask_Ctrl emask,
2053         bool modified,
2054         GATHER_SCATTER_ELEMENT_SIZE eltSize,
2055         VISA_Exec_Size executionSize,
2056         G4_Operand* surface,
2057         G4_Operand* gOffOpnd,
2058         G4_SrcRegRegion* eltOFfOpnd,
2059         G4_DstRegRegion* dstOpnd);
2060 
2061     int translateVISAScatterInst(
2062         VISA_EMask_Ctrl emask,
2063         GATHER_SCATTER_ELEMENT_SIZE eltSize,
2064         VISA_Exec_Size executionSize,
2065         G4_Operand* surface,
2066         G4_Operand* gOffOpnd,
2067         G4_SrcRegRegion* eltOffOpnd,
2068         G4_SrcRegRegion* srcOpnd);
2069 
2070     int translateVISAGather4Inst(
2071         VISA_EMask_Ctrl emask,
2072         bool modified,
2073         ChannelMask chMask,
2074         VISA_Exec_Size executionSize,
2075         G4_Operand* surface,
2076         G4_Operand* gOffOpnd,
2077         G4_SrcRegRegion* eltOffOpnd,
2078         G4_DstRegRegion* dstOpnd);
2079 
2080     int translateVISAScatter4Inst(
2081         VISA_EMask_Ctrl emask,
2082         ChannelMask chMask,
2083         VISA_Exec_Size executionSize,
2084         G4_Operand* surface,
2085         G4_Operand* gOffOpnd,
2086         G4_SrcRegRegion* eltOffOpnd,
2087         G4_SrcRegRegion* srcOpnd);
2088 
2089     int translateVISADwordAtomicInst(
2090         VISAAtomicOps    subOpc,
2091         bool             is16Bit,
2092         G4_Predicate    *pred,
2093         VISA_Exec_Size   execSize,
2094         VISA_EMask_Ctrl  eMask,
2095         G4_Operand*      surface,
2096         G4_SrcRegRegion* offsets,
2097         G4_SrcRegRegion* src0,
2098         G4_SrcRegRegion* src1,
2099         G4_DstRegRegion* dst);
2100 
2101     void buildTypedSurfaceAddressPayload(
2102         G4_SrcRegRegion* u, G4_SrcRegRegion* v, G4_SrcRegRegion* r, G4_SrcRegRegion* lod,
2103         G4_ExecSize execSize, G4_InstOpts instOpt, PayloadSource sources[], uint32_t& len);
2104 
2105     int translateVISAGather4TypedInst(
2106         G4_Predicate   *pred,
2107         VISA_EMask_Ctrl emask,
2108         ChannelMask     chMask,
2109         G4_Operand     *surfaceOpnd,
2110         VISA_Exec_Size  executionSize,
2111         G4_SrcRegRegion *uOffsetOpnd,
2112         G4_SrcRegRegion *vOffsetOpnd,
2113         G4_SrcRegRegion *rOffsetOpnd,
2114         G4_SrcRegRegion *lodOpnd,
2115         G4_DstRegRegion *dstOpnd);
2116 
2117     int translateVISAScatter4TypedInst(
2118         G4_Predicate           *pred,
2119         VISA_EMask_Ctrl emask,
2120         ChannelMask chMask,
2121         G4_Operand *surfaceOpnd,
2122         VISA_Exec_Size executionSize,
2123         G4_SrcRegRegion *uOffsetOpnd,
2124         G4_SrcRegRegion *vOffsetOpnd,
2125         G4_SrcRegRegion *rOffsetOpnd,
2126         G4_SrcRegRegion *lodOpnd,
2127         G4_SrcRegRegion *srcOpnd);
2128 
2129     int translateVISATypedAtomicInst(
2130         VISAAtomicOps    atomicOp,
2131         bool             is16Bit,
2132         G4_Predicate    *pred,
2133         VISA_EMask_Ctrl  emask,
2134         VISA_Exec_Size   execSize,
2135         G4_Operand      *surface,
2136         G4_SrcRegRegion *uOffsetOpnd,
2137         G4_SrcRegRegion *vOffsetOpnd,
2138         G4_SrcRegRegion *rOffsetOpnd,
2139         G4_SrcRegRegion *lodOpnd,
2140         G4_SrcRegRegion *src0,
2141         G4_SrcRegRegion *src1,
2142         G4_DstRegRegion *dst);
2143 
2144     void applySideBandOffset(G4_Operand* sideBand, const G4_SendDescRaw * sendMsgDesc);
2145 
2146     int translateVISAGather4ScaledInst(
2147         G4_Predicate *pred,
2148         VISA_Exec_Size execSize,
2149         VISA_EMask_Ctrl eMask,
2150         ChannelMask chMask,
2151         G4_Operand *surface,
2152         G4_Operand *globalOffset,
2153         G4_SrcRegRegion *offsets,
2154         G4_DstRegRegion *dst);
2155 
2156     int translateVISAScatter4ScaledInst(
2157         G4_Predicate *pred,
2158         VISA_Exec_Size execSize,
2159         VISA_EMask_Ctrl eMask,
2160         ChannelMask chMask,
2161         G4_Operand *surface,
2162         G4_Operand *globalOffset,
2163         G4_SrcRegRegion *offsets,
2164         G4_SrcRegRegion *src);
2165 
2166     int translateGather4Inst(
2167         G4_Predicate *pred,
2168         VISA_Exec_Size execSize,
2169         VISA_EMask_Ctrl eMask,
2170         ChannelMask chMask,
2171         G4_Operand *surface,
2172         G4_Operand *globalOffset,
2173         G4_SrcRegRegion *offsets,
2174         G4_DstRegRegion *dst);
2175 
2176     int translateScatter4Inst(
2177         G4_Predicate *pred,
2178         VISA_Exec_Size execSize,
2179         VISA_EMask_Ctrl eMask,
2180         ChannelMask chMask,
2181         G4_Operand *surface,
2182         G4_Operand *globalOffset,
2183         G4_SrcRegRegion *offsets,
2184         G4_SrcRegRegion *src);
2185 
2186     int translateVISAGatherScaledInst(
2187         G4_Predicate *pred,
2188         VISA_Exec_Size execSize,
2189         VISA_EMask_Ctrl eMask,
2190         VISA_SVM_Block_Num numBlocks,
2191         G4_Operand *surface,
2192         G4_Operand *globalOffset,
2193         G4_SrcRegRegion *offsets,
2194         G4_DstRegRegion *dst);
2195 
2196     int translateVISAScatterScaledInst(
2197         G4_Predicate *pred,
2198         VISA_Exec_Size execSize,
2199         VISA_EMask_Ctrl eMask,
2200         VISA_SVM_Block_Num numBlocks,
2201         G4_Operand *surface,
2202         G4_Operand *globalOffset,
2203         G4_SrcRegRegion *offsets,
2204         G4_SrcRegRegion *src);
2205 
2206     int translateByteGatherInst(
2207         G4_Predicate *pred,
2208         VISA_Exec_Size execSize,
2209         VISA_EMask_Ctrl eMask,
2210         VISA_SVM_Block_Num numBlocks,
2211         G4_Operand *surface,
2212         G4_Operand *globalOffset,
2213         G4_SrcRegRegion *offsets,
2214         G4_DstRegRegion *dst);
2215 
2216     int translateByteScatterInst(
2217         G4_Predicate *pred,
2218         VISA_Exec_Size execSize,
2219         VISA_EMask_Ctrl eMask,
2220         VISA_SVM_Block_Num numBlocks,
2221         G4_Operand *surface,
2222         G4_Operand *globalOffset,
2223         G4_SrcRegRegion *offsets,
2224         G4_SrcRegRegion *src);
2225 
2226     int translateVISASVMBlockReadInst(
2227         VISA_Oword_Num numOword,
2228         bool unaligned,
2229         G4_Operand* address,
2230         G4_DstRegRegion* dst);
2231 
2232     int translateVISASVMBlockWriteInst(
2233         VISA_Oword_Num numOword,
2234         G4_Operand* address,
2235         G4_SrcRegRegion* src);
2236 
2237     int translateVISASVMScatterReadInst(
2238         VISA_Exec_Size executionSize,
2239         VISA_EMask_Ctrl emask,
2240         G4_Predicate* pred,
2241         VISA_SVM_Block_Type blockSize,
2242         VISA_SVM_Block_Num numBlocks,
2243         G4_SrcRegRegion* addresses,
2244         G4_DstRegRegion* dst);
2245 
2246     int translateVISASVMScatterWriteInst(
2247         VISA_Exec_Size      executionSize,
2248         VISA_EMask_Ctrl     emask,
2249         G4_Predicate*       pred,
2250         VISA_SVM_Block_Type blockSize,
2251         VISA_SVM_Block_Num  numBlocks,
2252         G4_SrcRegRegion*    addresses,
2253         G4_SrcRegRegion*    src);
2254 
2255     int translateVISASVMAtomicInst(
2256         VISAAtomicOps       op,
2257         unsigned short      bitwidth,
2258         VISA_Exec_Size      executionSize,
2259         VISA_EMask_Ctrl     emask,
2260         G4_Predicate*       pred,
2261         G4_SrcRegRegion*    addresses,
2262         G4_SrcRegRegion*    src0,
2263         G4_SrcRegRegion*    src1,
2264         G4_DstRegRegion*    dst);
2265 
2266     // return globalOffset + offsets as a contiguous operand
2267     G4_SrcRegRegion* getSVMOffset(
2268         G4_Operand* globalOffset,
2269         G4_SrcRegRegion* offsets,
2270         uint16_t exSize,
2271         G4_Predicate* pred,
2272         uint32_t mask);
2273 
2274     int translateSVMGather4Inst(
2275         VISA_Exec_Size      execSize,
2276         VISA_EMask_Ctrl     eMask,
2277         ChannelMask         chMask,
2278         G4_Predicate       *pred,
2279         G4_Operand         *address,
2280         G4_SrcRegRegion    *offsets,
2281         G4_DstRegRegion    *dst);
2282 
2283     int translateSVMScatter4Inst(
2284         VISA_Exec_Size      execSize,
2285         VISA_EMask_Ctrl     eMask,
2286         ChannelMask         chMask,
2287         G4_Predicate       *pred,
2288         G4_Operand         *address,
2289         G4_SrcRegRegion    *offsets,
2290         G4_SrcRegRegion    *src);
2291 
2292     int translateVISASVMGather4ScaledInst(
2293         VISA_Exec_Size      execSize,
2294         VISA_EMask_Ctrl     eMask,
2295         ChannelMask         chMask,
2296         G4_Predicate       *pred,
2297         G4_Operand         *address,
2298         G4_SrcRegRegion    *offsets,
2299         G4_DstRegRegion    *dst);
2300 
2301     int translateVISASVMScatter4ScaledInst(
2302         VISA_Exec_Size      execSize,
2303         VISA_EMask_Ctrl     eMask,
2304         ChannelMask         chMask,
2305         G4_Predicate       *pred,
2306         G4_Operand         *address,
2307         G4_SrcRegRegion    *offsets,
2308         G4_SrcRegRegion    *src);
2309 
2310     // Minimum execution size for LSC on this platform
2311     // Minimum is generally half the full size except rare cases.
2312     // Full LSC SIMD size for PVC derivatives is 32, and 16 for DG2 derivatives
2313     G4_ExecSize lscMinExecSize(LSC_SFID lscSfid) const;
2314 
2315     ///////////////////////////////////////////////////////////////////////////
2316     ///////////////////////////////////////////////////////////////////////////
2317     // New LSC-based load and store (type and untyped) are located in
2318     // VisaToG4/TranslateSendLdStLsc.cpp
2319     int translateLscUntypedInst(
2320         LSC_OP                  op,
2321         LSC_SFID                lscSfid,
2322         G4_Predicate           *pred,
2323         VISA_Exec_Size          execSize,
2324         VISA_EMask_Ctrl         emask,
2325         LSC_CACHE_OPTS          cacheOpts,
2326         LSC_ADDR                addrInfo,
2327         LSC_DATA_SHAPE          shape,
2328         G4_Operand             *surface,  // surface/bti
2329         G4_DstRegRegion        *dstData,
2330         G4_SrcRegRegion        *src0AddrOrBlockY,
2331         G4_Operand             *src0AddrStrideOrBlockX, // only for strided and block2d
2332         G4_SrcRegRegion        *src1Data, // store data/extra atomic operands
2333         G4_SrcRegRegion        *src2Data // only for fcas/icas
2334     );
2335 
2336     int translateLscUntypedBlock2DInst(
2337         LSC_OP                  op,
2338         LSC_SFID                lscSfid,
2339         G4_Predicate           *pred,
2340         VISA_Exec_Size          execSize,
2341         VISA_EMask_Ctrl         emask,
2342         LSC_CACHE_OPTS          cacheOpts,
2343         LSC_DATA_SHAPE_BLOCK2D  shape,
2344         G4_DstRegRegion        *dstData,
2345         G4_Operand             *src0Addrs[LSC_BLOCK2D_ADDR_PARAMS],
2346         G4_SrcRegRegion        *src1Data);
2347     int translateLscTypedInst(
2348         LSC_OP                  op,
2349         G4_Predicate           *pred,
2350         VISA_Exec_Size          execSize,
2351         VISA_EMask_Ctrl         emask,
2352         LSC_CACHE_OPTS          cacheOpts,
2353         LSC_ADDR_TYPE           addrModel,
2354         LSC_ADDR_SIZE           addrSize,
2355         LSC_DATA_SHAPE          shape,
2356         G4_Operand             *surface,  // surface/bti
2357         G4_DstRegRegion        *dstData,  // dst on load/atomic
2358         G4_SrcRegRegion        *src0AddrUs,
2359         G4_SrcRegRegion        *src0AddrVs,
2360         G4_SrcRegRegion        *src0AddrRs,
2361         G4_SrcRegRegion        *src0AddrLODs,
2362         G4_SrcRegRegion        *src1Data, // store data/extra atomic operands
2363         G4_SrcRegRegion        *src2Data // icas/fcas only
2364     );
2365 
2366     LSC_DATA_ELEMS lscGetElementNum(unsigned eNum) const;
2367     int  lscEncodeAddrSize(LSC_ADDR_SIZE addr_size, uint32_t &desc, int &status) const;
2368     int  lscEncodeDataSize(LSC_DATA_SIZE data_size, uint32_t &desc, int &status) const;
2369     int  lscEncodeDataElems(LSC_DATA_ELEMS data_elems, uint32_t &desc, int &status) const;
2370     void lscEncodeDataOrder(LSC_DATA_ORDER t, uint32_t &desc, int &status) const;
2371     void lscEncodeCachingOpts(
2372         const LscOpInfo &opInfo,
2373         LSC_CACHE_OPTS cacheOpts,
2374         uint32_t &desc,
2375         int &status) const;
2376     void lscEncodeAddrType(LSC_ADDR_TYPE at, uint32_t &desc, int &status) const;
2377 
2378     G4_SrcRegRegion *lscBuildStridedPayload(
2379         G4_Predicate        *pred,
2380         G4_SrcRegRegion     *src0AddrBase,
2381         G4_Operand          *src0AddrStride,
2382         int dataSizeBytes, int vecSize, bool transposed);
2383     G4_SrcRegRegion *lscBuildBlock2DPayload(
2384         LSC_DATA_SHAPE_BLOCK2D  dataShape2D,
2385         G4_Predicate           *pred,
2386         G4_Operand             *src0Addrs[6]);
2387 
2388     //
2389     // LSC allows users to pass an immediate scale and immediate addend.
2390     // Hardware may be able to take advantage of that if they satisfy
2391     // various constraints.  This also broadcasts if needed.
2392     G4_SrcRegRegion *lscLoadEffectiveAddress(
2393         LSC_OP                    lscOp,
2394         LSC_SFID                  lscSfid,
2395         G4_Predicate             *pred,
2396         G4_ExecSize               execSize,
2397         VISA_EMask_Ctrl           execCtrl,
2398         LSC_ADDR                  addrInfo,
2399         int                       bytesPerDataElem,
2400         const G4_Operand         *surface,
2401         G4_SrcRegRegion          *addr,
2402         uint32_t                 &exDesc
2403     );
2404     G4_SrcRegRegion *lscCheckRegion(
2405         G4_Predicate             *pred,
2406         G4_ExecSize               execSize,
2407         VISA_EMask_Ctrl           execCtrl,
2408         G4_SrcRegRegion          *src);
2409 
2410     G4_SrcRegRegion *lscMulAdd(
2411         G4_Predicate             *pred,
2412         G4_ExecSize               execSize,
2413         VISA_EMask_Ctrl           execCtrl,
2414         G4_SrcRegRegion          *src,
2415         int16_t                   mulImm16,
2416         int64_t                   addImm64);
2417     G4_SrcRegRegion *lscMul(
2418         G4_Predicate             *pred,
2419         G4_ExecSize               execSize,
2420         VISA_EMask_Ctrl           execCtrl,
2421         G4_SrcRegRegion          *src,
2422         int16_t                   mulImm16);
2423     G4_SrcRegRegion *lscAdd(
2424         G4_Predicate             *pred,
2425         G4_ExecSize               execSize,
2426         VISA_EMask_Ctrl           execCtrl,
2427         G4_SrcRegRegion          *src,
2428         int64_t                   addImm64);
2429     G4_SrcRegRegion *lscAdd64AosNative(
2430         G4_Predicate             *pred,
2431         G4_ExecSize               execSize,
2432         VISA_EMask_Ctrl           execCtrl,
2433         G4_SrcRegRegion          *src,
2434         int64_t                   addImm64);
2435     G4_SrcRegRegion *lscAdd64AosEmu(
2436         G4_Predicate             *pred,
2437         G4_ExecSize               execSize,
2438         VISA_EMask_Ctrl           execCtrl,
2439         G4_SrcRegRegion          *src,
2440         int64_t                   addImm64);
2441     G4_SrcRegRegion *lscMul64Aos(
2442         G4_Predicate             *pred,
2443         G4_ExecSize               execSize,
2444         VISA_EMask_Ctrl           execCtrl,
2445         G4_SrcRegRegion          *src,
2446         int16_t                   mulImm16);
2447 
2448     ///////////////////////////////////////////////////////////////////////////
2449     ///////////////////////////////////////////////////////////////////////////
2450     // members related to media and VME are in VisaToG4/TranslateSendMedia.cpp
2451     int translateVISAMediaLoadInst(
2452         MEDIA_LD_mod mod,
2453         G4_Operand* surface,
2454         unsigned planeID,
2455         unsigned blockWidth,
2456         unsigned blockHeight,
2457         G4_Operand* xOffOpnd,
2458         G4_Operand* yOffOpnd,
2459         G4_DstRegRegion* dst_opnd);
2460 
2461     int translateVISAMediaStoreInst(
2462         MEDIA_ST_mod mod,
2463         G4_Operand* surface,
2464         unsigned planeID,
2465         unsigned blockWidth,
2466         unsigned blockHeight,
2467         G4_Operand* xOffOpnd,
2468         G4_Operand* yOffOpnd,
2469         G4_SrcRegRegion* srcOpnd);
2470 
2471     int translateVISAVmeImeInst(
2472         uint8_t stream_mode,
2473         uint8_t search_ctrl,
2474         G4_Operand* surfaceOpnd,
2475         G4_Operand* uniInputOpnd,
2476         G4_Operand* imeInputOpnd,
2477         G4_Operand* ref0Opnd,
2478         G4_Operand* ref1Opnd,
2479         G4_Operand* costCenterOpnd,
2480         G4_DstRegRegion* outputOpnd);
2481 
2482     int translateVISAVmeSicInst(
2483         G4_Operand* surfaceOpnd,
2484         G4_Operand* uniInputOpnd,
2485         G4_Operand* sicInputOpnd,
2486         G4_DstRegRegion* outputOpnd);
2487 
2488     int translateVISAVmeFbrInst(
2489         G4_Operand* surfaceOpnd,
2490         G4_Operand* unitInputOpnd,
2491         G4_Operand* fbrInputOpnd,
2492         G4_Operand* fbrMbModOpnd,
2493         G4_Operand* fbrSubMbShapeOpnd,
2494         G4_Operand* fbrSubPredModeOpnd,
2495         G4_DstRegRegion* outputOpnd);
2496 
2497     int translateVISAVmeIdmInst(
2498         G4_Operand* surfaceOpnd,
2499         G4_Operand* unitInputOpnd,
2500         G4_Operand* idmInputOpnd,
2501         G4_DstRegRegion* outputOpnd);
2502 
2503     int translateVISASamplerVAGenericInst(
2504         G4_Operand*   surface,
2505         G4_Operand*   sampler,
2506         G4_Operand*   uOffOpnd,
2507         G4_Operand*   vOffOpnd,
2508         G4_Operand*   vSizeOpnd,
2509         G4_Operand*   hSizeOpnd,
2510         G4_Operand*   mmfMode,
2511         unsigned char cntrl,
2512         unsigned char msgSeq,
2513         VA_fopcode    fopcode,
2514         G4_DstRegRegion*   dstOpnd,
2515         G4_Type       dstType,
2516         unsigned      dstSize,
2517         bool isBigKernel = false);
2518 
2519     int translateVISAAvsInst(
2520         G4_Operand* surface,
2521         G4_Operand* sampler,
2522         ChannelMask channel,
2523         unsigned numEnabledChannels,
2524         G4_Operand* deltaUOpnd,
2525         G4_Operand* uOffOpnd,
2526         G4_Operand* deltaVOpnd,
2527         G4_Operand* vOffOpnd,
2528         G4_Operand* u2dOpnd,
2529         G4_Operand* groupIDOpnd,
2530         G4_Operand* verticalBlockNumberOpnd,
2531         unsigned char cntrl,
2532         G4_Operand* v2dOpnd,
2533         unsigned char execMode,
2534         G4_Operand* eifbypass,
2535         G4_DstRegRegion* dstOpnd);
2536 
2537     int translateVISAVaSklPlusGeneralInst(
2538         ISA_VA_Sub_Opcode sub_opcode,
2539         G4_Operand* surface,
2540         G4_Operand* sampler,
2541         unsigned char mode,
2542         unsigned char functionality,
2543         G4_Operand* uOffOpnd,
2544         G4_Operand* vOffOpnd,
2545         //1pixel convolve
2546         G4_Operand * offsetsOpnd,
2547 
2548         //FloodFill
2549         G4_Operand* loopCountOpnd,
2550         G4_Operand* pixelHMaskOpnd,
2551         G4_Operand* pixelVMaskLeftOpnd,
2552         G4_Operand* pixelVMaskRightOpnd,
2553 
2554         //LBP Correlation
2555         G4_Operand* disparityOpnd,
2556 
2557         //Correlation Search
2558         G4_Operand* verticalOriginOpnd,
2559         G4_Operand* horizontalOriginOpnd,
2560         G4_Operand* xDirectionSizeOpnd,
2561         G4_Operand* yDirectionSizeOpnd,
2562         G4_Operand* xDirectionSearchSizeOpnd,
2563         G4_Operand* yDirectionSearchSizeOpnd,
2564 
2565         G4_DstRegRegion* dstOpnd,
2566         G4_Type dstType,
2567         unsigned dstSize,
2568 
2569         //HDC
2570         unsigned char pixelSize,
2571         G4_Operand* dstSurfaceOpnd,
2572         G4_Operand *dstXOpnd,
2573         G4_Operand* dstYOpnd,
2574         bool hdcMode);
2575 
2576 
2577     ///////////////////////////////////////////////////////////////////////////
2578     ///////////////////////////////////////////////////////////////////////////
2579     // Raw send related members are in VisaToG4/TranslateSendRaw.cpp
2580     int translateVISARawSendInst(
2581         G4_Predicate *predOpnd,
2582         VISA_Exec_Size executionSize,
2583         VISA_EMask_Ctrl emask,
2584         uint8_t modifiers,
2585         unsigned int exDesc,
2586         uint8_t numSrc,
2587         uint8_t numDst,
2588         G4_Operand* msgDescOpnd,
2589         G4_SrcRegRegion* msgOpnd,
2590         G4_DstRegRegion* dstOpnd);
2591 
2592     int translateVISARawSendsInst(
2593         G4_Predicate *predOpnd,
2594         VISA_Exec_Size executionSize,
2595         VISA_EMask_Ctrl emask,
2596         uint8_t modifiers,
2597         G4_Operand* exDesc,
2598         uint8_t numSrc0,
2599         uint8_t numSrc1,
2600         uint8_t numDst,
2601         G4_Operand* msgDescOpnd,
2602         G4_Operand* msgOpnd0,
2603         G4_Operand* msgOpnd1,
2604         G4_DstRegRegion* dstOpnd,
2605         unsigned ffid,
2606         bool hasEOT = false);
2607 
2608 
2609 
2610     ///////////////////////////////////////////////////////////////////////////
2611     ///////////////////////////////////////////////////////////////////////////
2612     // Raw send related members are in VisaToG4/TranslateSendSync.cpp
2613     G4_INST* translateLscFence(
2614         SFID                    sfid,
2615         LSC_FENCE_OP            fenceOp,
2616         LSC_SCOPE               scope,
2617         int&                    status);
2618 
translateLscFence(SFID sfid,LSC_FENCE_OP fenceOp,LSC_SCOPE scope)2619     G4_INST* translateLscFence(
2620         SFID                    sfid,
2621         LSC_FENCE_OP            fenceOp,
2622         LSC_SCOPE               scope)
2623     {
2624         int status = VISA_SUCCESS;
2625         return translateLscFence(sfid, fenceOp, scope, status);
2626     }
2627     enum class NamedBarrierType
2628     {
2629         PRODUCER,
2630         CONSUMER,
2631         BOTH
2632     };
2633 
2634     void generateNamedBarrier(
2635         int numProducer, int numConsumer, NamedBarrierType type, G4_Operand* barrierId);
2636 
2637     void generateNamedBarrier(G4_Operand* barrierId, G4_SrcRegRegion* threadValue);
2638 
2639     void generateSingleBarrier();
2640 
2641     int translateVISANamedBarrierWait(G4_Operand* barrierId);
2642     int translateVISANamedBarrierSignal(G4_Operand* barrierId, G4_Operand* threadCount);
2643 
2644     G4_INST* createFenceInstruction(
2645         uint8_t flushParam, bool commitEnable, bool globalMemFence, bool isSendc);
2646 
2647     G4_INST* createSLMFence();
2648 
2649     int translateVISAWaitInst(G4_Operand* mask);
2650 
2651     void generateBarrierSend();
2652     void generateBarrierWait();
2653 
2654     int translateVISASyncInst(ISA_Opcode opcode, unsigned int mask);
2655 
2656     int translateVISASplitBarrierInst(bool isSignal);
2657 
2658     ///////////////////////////////////////////////////////////////////////////
2659     ///////////////////////////////////////////////////////////////////////////
2660 
2661     // return either 253 or 255 for A64 messages, depending on whether we want I/A coherency or not
getA64BTI()2662     uint8_t getA64BTI() const { return m_options->getOption(vISA_noncoherentStateless) ? 0xFD : 0xFF; }
2663 
useSends()2664     bool useSends() const
2665     {
2666         return getPlatform() >= GENX_SKL && m_options->getOption(vISA_UseSends) &&
2667             !(VISA_WA_CHECK(m_pWaTable, WaDisableSendsSrc0DstOverlap));
2668     }
2669 
allocateMD()2670     Metadata* allocateMD()
2671     {
2672         Metadata* newMD = new (metadataMem) Metadata();
2673         allMDs.push_back(newMD);
2674         return newMD;
2675     }
2676 
allocateMDString(const std::string & str)2677     MDNode* allocateMDString(const std::string& str)
2678     {
2679         auto newNode = new (metadataMem) MDString(str);
2680         allMDNodes.push_back(newNode);
2681         return newNode;
2682     }
2683 
allocateMDLocation(int line,const char * file)2684     MDLocation* allocateMDLocation(int line, const char* file)
2685     {
2686         auto newNode = new (metadataMem) MDLocation(line, file);
2687         allMDNodes.push_back(newNode);
2688         return newNode;
2689     }
2690 
allocateMDTokenLocation(unsigned short token,unsigned globalID)2691     MDTokenLocation* allocateMDTokenLocation(unsigned short token, unsigned globalID)
2692     {
2693         auto newNode = new (metadataMem) MDTokenLocation(token, globalID);
2694         allMDNodes.push_back(newNode);
2695         return newNode;
2696     }
2697 
2698     ///////////////////////////////////////////////////////////////////////////
2699     ///////////////////////////////////////////////////////////////////////////
2700     // Generic IR simplification tasks
2701     G4_Imm* foldConstVal(G4_Imm* const1, G4_Imm* const2, G4_opcode op);
2702     void doConsFolding(G4_INST *inst);
2703     void doSimplification(G4_INST *inst);
2704 
2705     static G4_Type findConstFoldCommonType(G4_Type type1, G4_Type type2);
2706     ///////////////////////////////////////////////////////////////////////////
2707     ///////////////////////////////////////////////////////////////////////////
2708 
2709     void materializeGlobalImm(G4_BB* entryBB); // why is in FlowGraph.cpp???
2710 
2711 #include "HWCaps.inc"
2712 
2713 private:
2714     G4_SrcRegRegion* createBindlessExDesc(uint32_t exdesc);
2715 };
2716 } // namespace vISA
2717 
2718 // G4IR instructions added by JIT that do not result from lowering
2719 // any CISA bytecode will be assigned CISA offset = 0xffffffff.
2720 // This includes pseudo nodes, G4_labels, mov introduced for copying
2721 // r0 for pre-emption support.
2722 constexpr int UNMAPPABLE_VISA_INDEX = vISA::IR_Builder::OrphanVISAIndex;
2723 
2724 #endif
2725