1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef _GEN4_IR_HPP_
10 #define _GEN4_IR_HPP_
11 
12 
13 #include <set>
14 #include <list>
15 #include <string>
16 #include <bitset>
17 #include <vector>
18 #include <climits>
19 #include <cstdlib>
20 #include <cstddef>
21 #include <fstream>
22 #include <iostream>
23 #include <algorithm>
24 #include <iomanip>
25 #include <stack>
26 #include <optional>
27 #include <array>
28 
29 #include "Mem_Manager.h"
30 #include "G4_Opcode.h"
31 #include "G4_SendDescs.hpp"
32 #include "Option.h"
33 #include "visa_igc_common_header.h"
34 #include "Common_ISA.h"
35 #include "Common_GEN.h"
36 #include "Attributes.hpp"
37 #include "JitterDataStruct.h"
38 #include "Metadata.h"
39 #include "BitSet.h"
40 #include "IGC/common/StringMacros.hpp"
41 
42 #include <memory>
43 
44 namespace vISA
45 {
46     template <class T>
47     class std_arena_based_allocator
48     {
49     protected:
50     std::shared_ptr<Mem_Manager> mem_manager_ptr;
51 
52     public:
53 
54         //for allocator_traits
55         typedef std::size_t    size_type;
56         typedef std::ptrdiff_t difference_type;
57         typedef T*             pointer;
58         typedef const T*       const_pointer;
59         typedef T&             reference;
60         typedef const T&       const_reference;
61         typedef T              value_type;
62 
std_arena_based_allocator(std::shared_ptr<Mem_Manager> _other_ptr)63         explicit std_arena_based_allocator(std::shared_ptr<Mem_Manager> _other_ptr)
64             :mem_manager_ptr(_other_ptr)
65         {
66         }
67 
std_arena_based_allocator()68         explicit std_arena_based_allocator()
69             :mem_manager_ptr(nullptr)
70         {
71             //This implicitly calls Mem_manager constructor.
72         mem_manager_ptr = std::make_shared<Mem_Manager>(4096);
73         }
74 
std_arena_based_allocator(const std_arena_based_allocator & other)75         explicit std_arena_based_allocator(const std_arena_based_allocator& other)
76             : mem_manager_ptr(other.mem_manager_ptr)
77         {}
78 
79 
80         template <class U>
std_arena_based_allocator(const std_arena_based_allocator<U> & other)81         std_arena_based_allocator(const std_arena_based_allocator<U>& other)
82             : mem_manager_ptr(other.mem_manager_ptr)
83         {}
84 
85         template <class U>
operator =(const std_arena_based_allocator<U> & other)86         std_arena_based_allocator& operator=(const std_arena_based_allocator<U>& other)
87         {
88             mem_manager_ptr = other.mem_manager_ptr;
89             return *this;
90         }
91 
92         template <class U>
93         struct rebind { typedef std_arena_based_allocator<U> other; };
94 
95         template <class U> friend class std_arena_based_allocator;
96 
allocate(size_type n,const void * =0)97         pointer allocate(size_type n, const void * = 0)
98         {
99             T* t = (T*)mem_manager_ptr->alloc(n * sizeof(T));
100             return t;
101         }
102 
deallocate(void * p,size_type)103         void deallocate(void* p, size_type)
104         {
105             //No deallocation for arena allocator.
106         }
107 
address(reference x) const108         pointer           address(reference x) const { return &x; }
address(const_reference x) const109         const_pointer     address(const_reference x) const { return &x; }
110 
operator =(const std_arena_based_allocator &)111         std_arena_based_allocator<T>&  operator=(const std_arena_based_allocator&)
112         {
113             return *this;
114         }
115 
construct(pointer p,const T & val)116         void              construct(pointer p, const T& val)
117         {
118             new ((T*)p) T(val);
119         }
destroy(pointer p)120         void              destroy(pointer p) { p->~T(); }
121 
max_size() const122         size_type         max_size() const { return size_t(-1); }
123 
operator ==(const std_arena_based_allocator &) const124         bool operator==(const std_arena_based_allocator &) const { return true; }
125 
operator !=(const std_arena_based_allocator & a) const126         bool operator!=(const std_arena_based_allocator & a) const { return !operator==(a); }
127     };
128 }
129 
130 // We use memory manager.  Memory manager will free all the space at once so that
131 // there is no need to call destructor or delete to free up space.
132 #ifdef _MSC_VER
133 #pragma warning (disable: 4291)
134 #pragma warning (disable: 4996)
135 #endif
136 
137 namespace vISA
138 {
139 // forward declaration
140 class G4_INST;
141 class G4_Areg;
142 class G4_RegVar;
143 class G4_Declare;
144 class G4_Operand;
145 class G4_CondMod;
146 class G4_Predicate;
147 class GlobalRA;
148 
149 class G4_Imm;
150 class G4_Greg;
151 class G4_Label;
152 class G4_AddrExp;
153 class G4_DstRegRegion;
154 class G4_SrcRegRegion;
155 
156 class IR_Builder;
157 
158 class LocalLiveRange;
159 class G4_Kernel;
160 class G4_VarBase;
161 
162 class G4_SpillIntrinsic;
163 class G4_FillIntrinsic;
164 class G4_PseudoAddrMovIntrinsic;
165 
166 
167 }
168 
169 // Forward declarations for global opt report related functions
170 void getOptReportStream(std::ofstream& reportStream, const Options *options);
171 void closeOptReportStream(std::ofstream& reportStream);
172 
173 vISA::G4_Declare* GetTopDclFromRegRegion(vISA::G4_Operand* opnd);
174 
175 enum BankConflict {
176     BANK_CONFLICT_NONE,
177     BANK_CONFLICT_FIRST_HALF_EVEN,
178     BANK_CONFLICT_FIRST_HALF_ODD,
179     BANK_CONFLICT_SECOND_HALF_EVEN,
180     BANK_CONFLICT_SECOND_HALF_ODD};
181 
182 typedef enum
183 {
184     MATH_RESERVED = 0,
185     MATH_INV = 1,
186     MATH_LOG = 2,
187     MATH_EXP = 3,
188     MATH_SQRT = 4,
189     MATH_RSQ = 5,
190     MATH_SIN = 6,
191     MATH_COS = 7,
192     // 8 is skipped
193     MATH_FDIV = 9,
194     MATH_POW = 0xA,
195     MATH_INT_DIV = 0xB,
196     MATH_INT_DIV_QUOT = 0xC,
197     MATH_INT_DIV_REM = 0xD,
198     MATH_INVM = 0xE,
199     MATH_RSQRTM = 0xF
200 } G4_MathOp;
201 
202 inline const char* const MathOpNames[16] =
203 {
204     "reserved",
205     "inv",
206     "log",
207     "exp",
208     "sqrt",
209     "rsq",
210     "sin",
211     "cos",
212     "undefined",
213     "fdiv",
214     "pow",
215     "intdiv",
216     "quot",
217     "rem",
218     "invm",
219     "rsqrtm"
220 };
221 
222 typedef enum  _SB_INST_PIPE
223 {
224     PIPE_NONE = 0,
225     PIPE_INT = 1,
226     PIPE_FLOAT = 2,
227     PIPE_LONG = 3,
228     PIPE_MATH = 4,
229     PIPE_DPAS = 6,
230     PIPE_SEND = 7,
231 } SB_INST_PIPE;
232 
233 struct lsc_descriptor {
234     uint32_t opcode     : 6; // [5:0]
235     uint32_t reserved6  : 1; // [6]
236     uint32_t addr_size  : 2; // [8:7]
237     uint32_t data_size  : 3; // [11:9]
238     uint32_t data_vec   : 3; // [14:12]
239     uint32_t data_order : 1; // [15]
240     uint32_t reserved16 : 1; // [16]
241     uint32_t cache_opts : 3; // [19:17]
242     uint32_t rlen       : 5; // [24:20]
243     uint32_t mlen       : 5; // [29:25]
244     uint32_t addr_type  : 2; // [31:30]
245 };
246 
247 typedef vISA::std_arena_based_allocator<vISA::G4_INST*> INST_LIST_NODE_ALLOCATOR;
248 
249 typedef std::list<vISA::G4_INST*, INST_LIST_NODE_ALLOCATOR>           INST_LIST;
250 typedef std::list<vISA::G4_INST*, INST_LIST_NODE_ALLOCATOR>::iterator INST_LIST_ITER;
251 typedef std::list<vISA::G4_INST*, INST_LIST_NODE_ALLOCATOR>::const_iterator INST_LIST_CITER;
252 typedef std::list<vISA::G4_INST*, INST_LIST_NODE_ALLOCATOR>::reverse_iterator INST_LIST_RITER;
253 
254 typedef std::pair<vISA::G4_INST*, Gen4_Operand_Number> USE_DEF_NODE;
255 typedef vISA::std_arena_based_allocator<USE_DEF_NODE> USE_DEF_ALLOCATOR;
256 
257 typedef std::list<USE_DEF_NODE, USE_DEF_ALLOCATOR > USE_EDGE_LIST;
258 typedef std::list<USE_DEF_NODE, USE_DEF_ALLOCATOR >::iterator USE_EDGE_LIST_ITER;
259 typedef std::list<USE_DEF_NODE, USE_DEF_ALLOCATOR > DEF_EDGE_LIST;
260 typedef std::list<USE_DEF_NODE, USE_DEF_ALLOCATOR >::iterator DEF_EDGE_LIST_ITER;
261 
262 namespace vISA
263 {
264 
265 
266 
267 //forward declaration for the binary of an instruction
268 class BinInst;
269 
270 class G4_FCALL
271 {
272     uint16_t argSize;
273     uint16_t retSize;
274 
275 public:
G4_FCALL(uint16_t argVarSz,uint16_t retVarSz)276     G4_FCALL(uint16_t argVarSz, uint16_t retVarSz) : argSize(argVarSz), retSize(retVarSz)
277     {}
278 
operator new(size_t sz,Mem_Manager & m)279     void *operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
280 
getArgSize() const281     uint16_t getArgSize() const { return argSize; }
getRetSize() const282     uint16_t getRetSize() const { return retSize; }
283 };
284 
285 //forward references.
286 class G4_InstMath;
287 class G4_InstCF;
288 class G4_InstIntrinsic;
289 class G4_PseudoAddrMovIntrinsic;
290 class G4_InstSend;
291 class G4_InstBfn;
292 class G4_InstDpas;
293 
294 class G4_INST
295 {
296     friend class G4_SendDesc;
297     friend class IR_Builder;
298 
299 protected:
300     G4_opcode        op;
301     std::array<G4_Operand*, G4_MAX_SRCS> srcs;
302     G4_DstRegRegion* dst;
303     G4_Predicate*    predicate;
304     G4_CondMod*      mod;
305     unsigned int     option;     // inst option
306     G4_Operand*             implAccSrc;
307     G4_DstRegRegion*        implAccDst;
308 
309     // def-use chain: list of <inst, opndPos> such that this[dst/condMod] defines inst[opndPos]
310     // opndNum must be one of src0, src1, src2, pred, implAccSrc
311     USE_EDGE_LIST useInstList;
312 
313     // use-def chain: list of <inst, opndPos> such that inst[dst/condMod] defines this[opndPos]
314     DEF_EDGE_LIST defInstList;
315 
316     // instruction's id in BB. Each optimization should re-initialize before using
317     int32_t   localId;
318 
319     static const int UndefinedCisaOffset = -1;
320     int srcCISAoff = UndefinedCisaOffset; // record CISA inst offset that resulted in this instruction
321 
322     Metadata* MD = nullptr;
323 
324 #define UNDEFINED_GEN_OFFSET -1
325     int64_t genOffset = UNDEFINED_GEN_OFFSET;
326 
327     void emit_options(std::ostream& output) const;
328 
329     //WARNING: if adding new options, please make sure that bitfield does not
330     //overflow.
331     unsigned short sat : 1;
332     // during optimization, an inst may become redundant and be marked dead
333     unsigned short dead : 1;
334     unsigned short evenlySplitInst : 1;
335     unsigned short skipPostRA : 1;  // for NoMaskWA
336     G4_ExecSize    execSize;
337 
338     BinInst *bin;
339 
340     // make it private so only the IR_Builder can create new instructions
operator new(size_t sz,Mem_Manager & m)341     void *operator new(size_t sz, Mem_Manager& m) { return m.alloc(sz); }
342     uint32_t global_id = (uint32_t) -1;
343 
344     const IR_Builder& builder;  // link to builder to access the various compilation options
345 
346 public:
347     enum SWSBTokenType {
348         TOKEN_NONE,
349         SB_SET,
350         NoACCSBSet,
351         AFTER_READ,
352         AFTER_WRITE,
353         READ_ALL,
354         WRITE_ALL,
355     };
356 
357     enum DistanceType {
358         DIST_NONE,
359         DIST,
360         DISTALL,
361         DISTINT,
362         DISTFLOAT,
363         DISTLONG,
364         DISTMATH
365     };
366 typedef struct _SWSBInfo
367 {
368     unsigned short depDistance : 3;
369     unsigned short distType : 4;
370     unsigned short SBToken : 5;
371     unsigned short tokenType : 4;
_SWSBInfovISA::G4_INST::_SWSBInfo372     _SWSBInfo()
373     {
374         depDistance = 0;
375         distType = DIST_NONE;
376         SBToken = 0;
377         tokenType = TOKEN_NONE;
378     }
379 } SWSBInfo;
380 
381 protected:
382     //unsigned char depDistance = 0;
383     bool operandTypeIndicated = false;
384     bool isClosestALUType_ = false;
385 
386     SWSBInfo  swsb;
387 
388 public:
389 
setDistance(unsigned char dep_distance)390     void setDistance(unsigned char dep_distance)
391     {
392         assert(swsb.depDistance <= 7);
393         swsb.depDistance = dep_distance;
394     }
getDistance() const395     unsigned char getDistance() const { return swsb.depDistance; }
396 
setDistanceTypeXe(DistanceType type)397     void setDistanceTypeXe(DistanceType type) { swsb.distType = type; }
getDistanceTypeXe() const398     DistanceType getDistanceTypeXe() const { return (DistanceType)swsb.distType; }
399 
setToken(unsigned short token)400     void setToken(unsigned short token) {swsb.SBToken = token;}
getToken() const401     unsigned short getToken() const { return swsb.SBToken; }
setTokenType(SWSBTokenType type)402     void setTokenType(SWSBTokenType type) {  swsb.tokenType = (unsigned short)type; }
getTokenType() const403     SWSBTokenType getTokenType() const { return (SWSBTokenType)swsb.tokenType; }
404 
setSetToken(unsigned short token)405     void setSetToken(unsigned short token) {swsb.SBToken = token; swsb.tokenType = SB_SET;}
getSetToken() const406     unsigned short getSetToken() const { if (swsb.tokenType == SB_SET) return swsb.SBToken; else return -1; }
407 
setNoACCSBSet()408     void setNoACCSBSet() { swsb.tokenType = NoACCSBSet;}
hasNoACCSBSet()409     bool hasNoACCSBSet() { return swsb.tokenType == NoACCSBSet;}
410 
setOperandTypeIndicated(bool indicated)411     void setOperandTypeIndicated(bool indicated) { operandTypeIndicated = indicated; }
setIsClosestALUType(bool indicated)412     void setIsClosestALUType(bool indicated) { isClosestALUType_ = indicated; }
413 
isOperandTypeIndicated() const414     bool isOperandTypeIndicated() const {return operandTypeIndicated;}
isClosestALUType() const415     bool isClosestALUType() const { return isClosestALUType_; }
416 
isDpas() const417     bool isDpas() const { return (op == G4_dpas || op == G4_dpasw); }
asDpasInst() const418     G4_InstDpas* asDpasInst() const
419     {
420         MUST_BE_TRUE(isDpas(), ERROR_UNKNOWN);
421         return (G4_InstDpas*) this;
422     }
423 
424 public:
G4_INST(const IR_Builder & irb,G4_Predicate * prd,G4_opcode o,G4_CondMod * m,G4_Sat s,G4_ExecSize size,G4_DstRegRegion * d,G4_Operand * s0,G4_Operand * s1,G4_InstOpts opt)425     G4_INST(
426         const IR_Builder& irb,
427         G4_Predicate* prd,
428         G4_opcode o,
429         G4_CondMod* m,
430         G4_Sat s,
431         G4_ExecSize size,
432         G4_DstRegRegion* d,
433         G4_Operand* s0,
434         G4_Operand* s1,
435         G4_InstOpts opt)
436         : G4_INST(irb, prd, o, m, s, size, d, s0, s1, nullptr, nullptr, opt)
437     { }
438 
G4_INST(const IR_Builder & irb,G4_Predicate * prd,G4_opcode o,G4_CondMod * m,G4_Sat s,G4_ExecSize size,G4_DstRegRegion * d,G4_Operand * s0,G4_Operand * s1,G4_Operand * s2,G4_InstOpts opt)439     G4_INST(
440         const IR_Builder& irb,
441         G4_Predicate* prd,
442         G4_opcode o,
443         G4_CondMod* m,
444         G4_Sat s,
445         G4_ExecSize size,
446         G4_DstRegRegion* d,
447         G4_Operand* s0,
448         G4_Operand* s1,
449         G4_Operand* s2,
450         G4_InstOpts opt)
451         : G4_INST(irb, prd, o, m, s, size, d, s0, s1, s2, nullptr, opt)
452     { }
453 
454     G4_INST(
455         const IR_Builder& builder,
456         G4_Predicate* prd,
457         G4_opcode o,
458         G4_CondMod* m,
459         G4_Sat s,
460         G4_ExecSize size,
461         G4_DstRegRegion* d,
462         G4_Operand* s0,
463         G4_Operand* s1,
464         G4_Operand* s2,
465         G4_Operand* s3,
466         G4_InstOpts opt);
467 
~G4_INST()468     virtual ~G4_INST()
469     {
470     }
471 
472     // The method is declared virtual so subclasses of G4_INST
473     // should also implement this method to populate members
474     // unique to them.
475     virtual G4_INST* cloneInst();
isBaseInst() const476     virtual bool isBaseInst() const { return true; }
isCFInst() const477     virtual bool isCFInst() const { return false; }
478 
getLexicalId() const479     uint32_t getLexicalId() const { return global_id; }
setLexicalId(uint32_t id)480     void setLexicalId(uint32_t id) { global_id = id; }
481 
482     void setPredicate(G4_Predicate* p);
getPredicate() const483     G4_Predicate* getPredicate() const {return predicate;}
484 
setSaturate(G4_Sat s)485     void setSaturate(G4_Sat s) {sat = s == g4::SAT ? 1 : 0;}
setSaturate(bool z)486     void setSaturate(bool z) {sat = z ? 1 : 0;}
getSaturate() const487     G4_Sat getSaturate() const {return sat ? g4::SAT : g4::NOSAT;}
488 
opcode() const489     G4_opcode opcode()  const {return op;}
490 
491     void setOpcode(G4_opcode opcd);
492 
getDst() const493     G4_DstRegRegion* getDst() const { return dst; }
494     bool supportsNullDst() const;
495 
496     bool isPseudoKill() const;
497     bool isLifeTimeEnd() const;
498     bool isSpillIntrinsic() const;
499     bool isFlagSpillIntrinsic() const;
500     G4_SpillIntrinsic* asSpillIntrinsic() const;
501     bool isFillIntrinsic() const;
502     G4_FillIntrinsic* asFillIntrinsic() const;
503     bool isPseudoAddrMovIntrinsic() const;
504     bool isSplitIntrinsic() const;
505     bool isCallerSave() const;
506     bool isCallerRestore() const;
507     bool isCalleeSave() const;
508     bool isCalleeRestore() const;
509     bool isRelocationMov() const;
isMov() const510     bool isMov() const { return G4_Inst_Table[op].instType == InstTypeMov; }
isLogic() const511     bool isLogic() const { return G4_Inst_Table[op].instType == InstTypeLogic; }
isCompare() const512     bool isCompare() const
513     {
514         return G4_Inst_Table[op].instType == InstTypeCompare;
515     }
isFlowControl() const516     bool isFlowControl() const
517     {
518         return G4_Inst_Table[op].instType == InstTypeFlow;
519     }
isArithmetic() const520     bool isArithmetic() const
521     {
522         return G4_Inst_Table[op].instType == InstTypeArith;
523     }
isVector() const524     bool isVector() const
525     {
526         return G4_Inst_Table[op].instType == InstTypeVector;
527     }
isLabel() const528     bool isLabel() const { return op == G4_label; }
isCall() const529     bool isCall() const { return op == G4_call; }
isFCall() const530     bool isFCall() const { return op == G4_pseudo_fcall; }
isReturn() const531     bool isReturn() const { return op == G4_return; }
isFReturn() const532     bool isFReturn() const { return (op == G4_pseudo_fret); }
isMath() const533     bool isMath() const { return op == G4_math; }
isIntrinsic() const534     bool isIntrinsic() const { return op == G4_intrinsic; }
isSend() const535     bool isSend() const { return op == G4_send || op == G4_sendc || op == G4_sends || op == G4_sendsc; }
isSplitSend() const536     bool isSplitSend() const { return op == G4_sends || op == G4_sendsc; }
isRSWADivergentInst() const537     bool isRSWADivergentInst() const { return op == G4_goto || op == G4_while || op == G4_if || op == G4_break; }
isBfn() const538     bool isBfn() const { return op == G4_bfn; }
539 
540     // ToDo: get rid of these functions which don't make sense for non-sends
isEOT() const541     virtual bool isEOT() const { return false; }
getMsgDesc() const542     virtual G4_SendDesc * getMsgDesc() const { return nullptr; }
543 
getMsgDescRaw() const544     const G4_SendDescRaw * getMsgDescRaw() const {
545         const auto *msgDesc = getMsgDesc();
546         if (msgDesc == nullptr || !getMsgDesc()->isRaw())
547             return nullptr;
548         return (const G4_SendDescRaw *)msgDesc;
549     }
getMsgDescRaw()550     G4_SendDescRaw * getMsgDescRaw() {
551         auto *msgDesc = getMsgDesc();
552         if (msgDesc == nullptr || !getMsgDesc()->isRaw())
553             return nullptr;
554         return (G4_SendDescRaw *)msgDesc;
555     }
getMsgDescLdSt() const556     const G4_SendDescLdSt * getMsgDescLdSt() const {
557         const auto *msgDesc = getMsgDesc();
558         if (msgDesc == nullptr || !getMsgDesc()->isRaw())
559             return nullptr;
560         return (const G4_SendDescLdSt *)msgDesc;
561     }
562 
mayExceedTwoGRF() const563     virtual bool mayExceedTwoGRF() const
564     {
565         return false;
566     }
567     // special instructions(e.g., send) should override
568     virtual void computeRightBound(G4_Operand* opnd);
569 
isWait() const570     bool isWait() const { return op == G4_wait; }
isSyncOpcode(G4_opcode opcode)571     static bool isSyncOpcode(G4_opcode opcode) { return opcode == G4_sync_nop || opcode == G4_sync_allrd || opcode == G4_sync_allwr; }
isSWSBSync() const572     bool isSWSBSync() const
573     {
574         return G4_INST::isSyncOpcode(op);
575     }
576 
isPseudoLogic() const577     bool isPseudoLogic() const
578     {
579         return op == G4_pseudo_and || op == G4_pseudo_or || op == G4_pseudo_xor || op == G4_pseudo_not;
580     }
581 
582     bool isPartialWrite() const;
583     bool isPartialWriteForSpill(bool inSIMDCF) const;
584     bool isArithAddr() const;
585     bool isMovAddr() const;
586     bool isAccSrcInst() const;
587     bool isAccDstInst() const;
588 
asMathInst() const589     G4_InstMath* asMathInst() const
590     {
591         MUST_BE_TRUE(isMath(), ERROR_UNKNOWN);
592         return ((G4_InstMath*) this);
593     }
594 
asCFInst() const595     G4_InstCF* asCFInst() const
596     {
597         MUST_BE_TRUE(isFlowControl(), ERROR_UNKNOWN);
598         return ((G4_InstCF*) this);
599     }
600 
asIntrinsicInst() const601     G4_InstIntrinsic* asIntrinsicInst() const
602     {
603         MUST_BE_TRUE(isIntrinsic(), ERROR_UNKNOWN);
604         return (G4_InstIntrinsic*) this;
605     }
606 
asPseudoAddrMovIntrinsic() const607     G4_PseudoAddrMovIntrinsic* asPseudoAddrMovIntrinsic() const
608     {
609         MUST_BE_TRUE(isPseudoAddrMovIntrinsic(), "not a fill intrinsic");
610         return const_cast<G4_PseudoAddrMovIntrinsic*>(reinterpret_cast<const G4_PseudoAddrMovIntrinsic*>(this));
611     }
612 
asSendInst() const613     const G4_InstSend* asSendInst() const
614     {
615         if (!isSend())
616         {
617             return nullptr;
618         }
619         return reinterpret_cast<const G4_InstSend*>(this);
620     }
asSendInst()621     G4_InstSend* asSendInst()
622     {
623         if (!isSend())
624         {
625             return nullptr;
626         }
627         return reinterpret_cast<G4_InstSend*>(this);
628     }
629 
asBfnInst() const630     G4_InstBfn* asBfnInst() const
631     {
632         MUST_BE_TRUE(isBfn(), ERROR_UNKNOWN);
633         return (G4_InstBfn*) this;
634     }
635 
636     bool isPseudoUse() const;
637     G4_Type getExecType() const;
638     G4_Type getExecType2() const;
isComprInst() const639     bool isComprInst() const
640     {
641         return detectComprInst();
642     }
643     bool isComprInvariantSrcRegion(G4_SrcRegRegion* src, int srcPos);
644 
645     G4_Operand* getOperand(Gen4_Operand_Number opnd_num);
646 
647     G4_Operand* getSrc(unsigned i) const;
648     void setSrc(G4_Operand* opnd, unsigned i);
649     int getNumSrc() const;
650     int getNumDst() const;
651 
src_begin() const652     auto src_begin() const { return srcs.begin(); }
src_begin()653     auto src_begin() { return srcs.begin(); }
src_end() const654     auto src_end() const { return srcs.begin() + getNumSrc(); }
src_end()655     auto src_end() { return srcs.begin() + getNumSrc(); }
656 
657     // this assume we don't have to recompute bound for the swapped source
658     // Note that def-use chain is not maintained after this; call swapDefUse
659     // if you want to update the du-chain.
swapSrc(int src1,int src2)660     void swapSrc(int src1, int src2)
661     {
662         assert(src1 >= 0 && src1 < getNumSrc() && src2 >= 0 && src2 < getNumSrc() && "illegal src number");
663         std::swap(srcs[src1], srcs[src2]);
664     }
665 
getLabel()666     G4_Label* getLabel()
667     {
668         MUST_BE_TRUE(op == G4_label, "inst must be a label");
669         return (G4_Label*) getSrc(0);
670     }
671 
672     void setDest(G4_DstRegRegion* opnd);
673     void setExecSize(G4_ExecSize s);
674 
675     void computeARFRightBound();
676 
isMaskOption(G4_InstOption opt)677     static bool isMaskOption(G4_InstOption opt)
678     {
679         return (opt & InstOpt_QuarterMasks) != 0;
680     }
681 
setOptions(unsigned int o)682     void setOptions(unsigned int o)
683     {
684         unsigned int oldMaskOffset = getMaskOffset();
685         option = o;
686         unsigned int newMaskOffset = getMaskOffset();
687 
688         if (oldMaskOffset != newMaskOffset)
689         {
690             // Change in mask offset requires change in
691             // bounds for pred/cond mod/impl acc src/dst
692             computeARFRightBound();
693         }
694     }
695 
setOptionOn(G4_InstOption o)696     void setOptionOn(G4_InstOption o)
697     {
698         assert(!isMaskOption(o) && "use setMaskOption() to change emask instead");
699         option |= o;
700     }
701 
setOptionOff(G4_InstOption o)702     void setOptionOff(G4_InstOption o)
703     {
704         assert(!isMaskOption(o) && "use setMaskOption() to change emask instead");
705         option &= (~o);
706     }
getOption() const707     unsigned int getOption() const {return option;}
getMaskOption() const708     unsigned int getMaskOption() const {return option & InstOpt_Masks;}
setMaskOption(G4_InstOption opt)709     void setMaskOption(G4_InstOption opt)
710     {
711         // mask options are mutually exclusive, so we have to clear any previous setting
712         // note that this does not clear NoMask
713         MUST_BE_TRUE(opt & InstOpt_QuarterMasks, "opt is not a valid mask option");
714         setOptions((option & ~InstOpt_QuarterMasks) | opt);
715     }
716 
setNoMask(bool clearEMask)717     void setNoMask(bool clearEMask)
718     {
719         if (clearEMask)
720         {
721             // Clear the M0/M4/M8 emask as well
722             setOptions((getOption() & ~InstOpt_Masks) | InstOpt_WriteEnable);
723         }
724         else
725         {
726             setOptionOn(InstOpt_WriteEnable);
727         }
728     }
729 
is1QInst() const730     bool is1QInst() const { return execSize == g4::SIMD8 && getMaskOffset() == 0; }
isWriteEnableInst() const731     bool isWriteEnableInst() const { return (option & InstOpt_WriteEnable) ? true : false; }
isYieldInst() const732     bool isYieldInst() const { return (option & InstOpt_Switch) ? true : false; }
isNoPreemptInst() const733     bool isNoPreemptInst() const { return (option & InstOpt_NoPreempt) ? true : false; }
734 
735     void emit_inst(std::ostream& output, bool symbol_dst, bool *symbol_srcs);
736     void emit(std::ostream& output, bool symbolreg = false, bool dotStyle = false);
737     void emitDefUse(std::ostream& output) const;
738     void emitInstIds(std::ostream& output) const;
739     void print(std::ostream& OS) const;
740     void dump() const;
741     bool isValidSymbolOperand(bool &dst_valid, bool *srcs_valid) const;
742     const char *getLabelStr() const;
743 
744     // get simd lane mask for this instruction. For example,
745     //      add  (8|M8) ...
746     // will have 0xFF00, which lane 8-15
747     uint32_t getExecLaneMask() const;
getExecSize() const748     G4_ExecSize getExecSize() const {return execSize;}
getCondMod() const749     const G4_CondMod*    getCondMod() const {return mod;}
getCondMod()750           G4_CondMod*    getCondMod()       {return mod;}
751     const G4_VarBase*    getCondModBase() const;
getCondModBase()752           G4_VarBase*    getCondModBase() {
753               return const_cast<G4_VarBase*>(((const G4_INST*)this)->getCondModBase());
754           }
755     void setCondMod(G4_CondMod* m);
756 
isDead() const757     bool isDead() const {return dead;}
markDead()758     void markDead() {dead = true;}
759 
isAligned1Inst() const760     bool isAligned1Inst() const { return !isAligned16Inst(); }
isAligned16Inst() const761     bool isAligned16Inst() const { return (option & InstOpt_Align16)    ? true : false; }
isAccWrCtrlInst() const762     bool isAccWrCtrlInst() const { return (option & InstOpt_AccWrCtrl) ? true : false; }
isAtomicInst() const763     bool isAtomicInst()    const { return (option & InstOpt_Atomic)     ? true : false; }
isNoDDChkInst() const764     bool isNoDDChkInst()   const { return (option & InstOpt_NoDDChk)    ? true : false; }
isNoDDClrInst() const765     bool isNoDDClrInst()   const { return (option & InstOpt_NoDDClr)    ? true : false; }
isBreakPointInst() const766     bool isBreakPointInst() const { return (option & InstOpt_BreakPoint) ? true : false; }
767 
768     // true if inst reads/writes acc either implicitly or explicitly
useAcc() const769     bool useAcc() const
770     {
771         return isAccDstInst() || isAccSrcInst() || implAccDst != NULL || implAccSrc != NULL;
772     }
773 
defAcc() const774     bool defAcc() const {
775         return isAccDstInst() || implAccDst != NULL;
776     }
777 
setCompacted()778     void setCompacted()      { option = option | InstOpt_Compacted; }
setNoCompacted()779     void setNoCompacted()    { option = option | InstOpt_NoCompact; }
isCompactedInst() const780     bool isCompactedInst()  const { return (option & InstOpt_Compacted) ? true : false; }
isNoCompactedInst() const781     bool isNoCompactedInst() const { return (option & InstOpt_NoCompact) ? true : false; }
782 
setLocalId(int32_t lid)783     void setLocalId(int32_t lid)  { localId = lid; }
getLocalId() const784     int32_t getLocalId() const { return localId; }
785 
setEvenlySplitInst(bool val)786     void setEvenlySplitInst(bool val) { evenlySplitInst = val; }
getEvenlySplitInst()787     bool getEvenlySplitInst() { return evenlySplitInst; }
788 
setCISAOff(int offset)789     void setCISAOff(int offset) { srcCISAoff = offset; }
getCISAOff() const790     int getCISAOff() const { return srcCISAoff; }
isCISAOffValid() const791     bool isCISAOffValid() const { return getCISAOff() != UndefinedCisaOffset; }
792 
793     bool isOptBarrier() const;
hasImplicitAccSrc() const794     bool hasImplicitAccSrc() const
795     {
796        return op == G4_mac || op == G4_mach || op == G4_sada2;
797     }
798 
hasImplicitAccDst() const799     bool hasImplicitAccDst() const
800     {
801         return op == G4_addc || op == G4_subb;
802     }
803 
804     bool mayExpandToAccMacro() const;
805 
getSrcOperandNum(int srcPos) const806     Gen4_Operand_Number getSrcOperandNum(int srcPos) const
807     {
808         switch (srcPos)
809         {
810         case 0:
811             return Opnd_src0;
812         case 1:
813             return Opnd_src1;
814         case 2:
815             return Opnd_src2;
816         case 3:
817             return Opnd_src3;
818         default:
819             MUST_BE_TRUE(false, "bad source id");
820             return Opnd_src0;
821         }
822     }
getSrcNum(Gen4_Operand_Number opndNum)823     static int getSrcNum(Gen4_Operand_Number opndNum)
824     {
825         MUST_BE_TRUE(isSrcNum(opndNum), "not a source number");
826         return opndNum - 1;
827     }
isSrcNum(Gen4_Operand_Number opndNum)828     static bool isSrcNum(Gen4_Operand_Number opndNum)
829     {
830         return opndNum == Opnd_src0 || opndNum == Opnd_src1 ||
831                opndNum == Opnd_src2 || opndNum == Opnd_src3 ||
832                opndNum == Opnd_src4 || opndNum == Opnd_src5 ||
833                opndNum == Opnd_src6 || opndNum == Opnd_src7;
834     }
isInstrinsicOnlySrcNum(Gen4_Operand_Number opndNum)835     static bool isInstrinsicOnlySrcNum(Gen4_Operand_Number opndNum)
836     {
837         return opndNum == Opnd_src4 || opndNum == Opnd_src5 ||
838             opndNum == Opnd_src6 || opndNum == Opnd_src7;
839     }
840     const G4_Operand* getOperand(Gen4_Operand_Number opnd_num) const;
841 
842     /// Remove all definitons that contribute to this[opndNum] and remove all
843     /// uses from their corresponding definitions. To maintain def-use's, this
844     /// is required while resetting a source operand.
845     void removeDefUse(Gen4_Operand_Number opndNum);
846     /// Remove a use from this instruction and update its correponding def.
847     /// Returns the next use iterator of this instruction.
848     USE_EDGE_LIST_ITER eraseUse(USE_EDGE_LIST_ITER iter);
849     /// Remove all uses defined by this. To maintain def-use's, this is
850     /// required to clear useInstList.
851     void removeAllUses();
852     /// Remove all defs that used by this. To maintain def-use's, this is
853     /// required to clear defInstList.
854     void removeAllDefs();
855     void transferDef(G4_INST *inst2, Gen4_Operand_Number opndNum1,
856                      Gen4_Operand_Number opndNum2);
857     void transferUse(G4_INST *inst2, bool keepExisting = false);
858     /// Copy this[opndNum1]'s definition to inst2[opndNum2]'s definition.
859     void copyDef(G4_INST *inst2, Gen4_Operand_Number opndNum1,
860                  Gen4_Operand_Number opndNum2, bool checked = false);
861     /// Copy this instructions's defs to inst2. If checked is true, then only
862     /// copy those effective defs.
863     void copyDefsTo(G4_INST *inst2, bool checked);
864     /// Copy this instruction's uses to inst2. If checked is true, then only
865     /// copy those effective uses.
866     void copyUsesTo(G4_INST *inst2, bool checked);
867     void removeUseOfInst();
868     void trimDefInstList();
869     bool isDFInstruction() const;
870     bool isMathPipeInst() const;
871     bool distanceHonourInstruction() const;
872     bool tokenHonourInstruction() const;
873     bool hasNoPipe();
874     bool isLongPipeType(G4_Type type) const;
875     bool isIntegerPipeType(G4_Type type) const;
876     bool isJEUPipeInstructionXe() const;
877     bool isLongPipeInstructionXe() const;
878     bool isIntegerPipeInstructionXe() const;
879     bool isFloatPipeInstructionXe() const;
880     SB_INST_PIPE getDataTypePipeXe(G4_Type type);
881     SB_INST_PIPE getInstructionPipeXe();
882 
883     void swapDefUse(
884         Gen4_Operand_Number srcIxA = Opnd_src0,
885         Gen4_Operand_Number srcIxB = Opnd_src1);
886     void addDefUse(G4_INST* use, Gen4_Operand_Number usePos);
uniqueDefUse()887     void uniqueDefUse()
888     {
889         useInstList.unique();
890         defInstList.unique();
891     }
clearUse()892     void clearUse() { useInstList.clear(); }
clearDef()893     void clearDef() { defInstList.clear(); }
useEmpty() const894     bool useEmpty() const { return useInstList.empty(); }
hasOneUse() const895     bool hasOneUse() const { return useInstList.size() == 1; }
896     /// Returns its definition if this's operand has a single definition. Returns
897     /// 0 otherwise.
898     G4_INST *getSingleDef(Gen4_Operand_Number opndNum, bool MakeUnique = false);
use_begin() const899     USE_EDGE_LIST::const_iterator use_begin() const { return useInstList.begin(); }
use_begin()900     USE_EDGE_LIST::iterator       use_begin()       { return useInstList.begin(); }
use_end() const901     USE_EDGE_LIST::const_iterator use_end() const { return useInstList.end(); }
use_end()902     USE_EDGE_LIST::iterator       use_end()       { return useInstList.end(); }
use_front()903     USE_EDGE_LIST::reference      use_front() { return useInstList.front(); }
use_back()904     USE_EDGE_LIST::reference      use_back() { return useInstList.back(); }
def_begin() const905     DEF_EDGE_LIST::const_iterator def_begin() const { return defInstList.begin(); }
def_begin()906     DEF_EDGE_LIST::iterator       def_begin()       { return defInstList.begin(); }
def_end() const907     DEF_EDGE_LIST::const_iterator def_end() const { return defInstList.end(); }
def_end()908     DEF_EDGE_LIST::iterator       def_end()       { return defInstList.end(); }
def_front()909     DEF_EDGE_LIST::reference      def_front() { return defInstList.front(); }
def_back()910     DEF_EDGE_LIST::reference      def_back() { return defInstList.back(); }
use_size() const911     size_t use_size() const { return useInstList.size(); }
def_size() const912     size_t def_size() const { return defInstList.size(); }
913     void dumpDefUse(std::ostream &os = std::cerr);
sortUses(Compare Cmp)914     template <typename Compare> void sortUses(Compare Cmp)
915     {
916         useInstList.sort(Cmp);
917     }
918 
919     void fixMACSrc2DefUse();
920     void setImplAccSrc(G4_Operand* opnd);
921     void setImplAccDst(G4_DstRegRegion* opnd);
922 
923     bool isWAWdep(G4_INST *inst); /* not const: may compute bound */
924     bool isWARdep(G4_INST *inst); /* not const: may compute bound */
925     bool isRAWdep(G4_INST *inst); /* not const: may compute bound */
getImplAccSrc() const926     const G4_Operand* getImplAccSrc() const { return implAccSrc; }
getImplAccSrc()927           G4_Operand* getImplAccSrc()       { return implAccSrc; }
getImplAccDst() const928     const G4_DstRegRegion* getImplAccDst() const { return implAccDst; }
getImplAccDst()929           G4_DstRegRegion* getImplAccDst()       { return implAccDst; }
930     uint16_t getMaskOffset() const;
931     static G4_InstOption offsetToMask(int execSize, int offset, bool nibOk);
932     bool isRawMov() const;
933     bool hasACCSrc() const;
934     bool hasACCOpnd() const;
935     G4_Type getOpExecType(int& extypesize);
936     bool canHoistTo(const G4_INST *defInst, bool simdBB) const;
937     enum MovType {
938         Copy        = 0,        // MOV is a copy.
939         ZExt        = 1,        // MOV is a zero extension.
940         SExt        = 2,        // MOV is a sign extension.
941         Trunc       = 3,        // MOV is a truncation.
942         IntToFP     = 4,        // MOV is a conversion from Int to Float.
943         FPToInt     = 5,        // MOV is a conversion from Float to Int.
944         FPUpConv    = 6,        // MOV is a conversion from low precision to
945                                 // high precision.
946         FPDownConv  = 7,        // MOV is a conversion from high precision to
947                                 // low precision.
948         FPDownConvSafe  = 8,        // Float down conversion for DX shaders.
949         SuperMov        = 9,        // MOV is a mov with other effects.
950     };
951     MovType canPropagate() const;
952     //
953     // check if this is a simple integer add that can be propagated to a
954     // ternary instruction potentially
955     //
956     // has to to be:
957     //   {add,mul} (E)  dst.0<1>:d  src0:{d,w} {src1|imm32}:{d,w}
958     //   {add,mul} (1)  dst.X<1>:d  src0.X:{d,w} {src1|imm32}:{d,w}
959     // And there are other various constraints....
960     bool canPropagateBinaryToTernary() const;
961 
962     G4_Type getPropType(Gen4_Operand_Number opndNum, MovType MT, const G4_INST *mov) const;
963     bool isSignSensitive(Gen4_Operand_Number opndNum) const;
964     bool canPropagateTo(G4_INST* useInst, Gen4_Operand_Number opndNum, MovType MT, bool inSimdFlow, bool statelessAddrss = false);
965     bool canHoist(bool simdBB, const Options *opt) const;
966     bool isCommutative() const;
967 
968     bool hasNULLDst() const;
969     bool goodTwoGRFDst(bool& evenSplitDst);
getBinInst() const970     const BinInst *getBinInst() const { return bin; };
getBinInst()971           BinInst *getBinInst()       { return bin; };
setBinInst(BinInst * _bin)972     void        setBinInst(BinInst *_bin) { bin = _bin; };
setGenOffset(int64_t off)973     void setGenOffset(int64_t off) { genOffset = off; }
getGenOffset() const974     int64_t getGenOffset() const { return genOffset; }
975 
976     void computeLeftBoundForImplAcc(G4_Operand* opnd);
977 
setNoSrcDepSet(bool val)978     void setNoSrcDepSet(bool val)
979     {
980          if (val)
981          {
982              option |= InstOpt_NoSrcDepSet;
983          }
984          else
985          {
986              option &= ~InstOpt_NoSrcDepSet;
987          }
988     }
989 
isNoSrcDepSet() const990     bool isNoSrcDepSet() const
991     {
992         return (option & InstOpt_NoSrcDepSet) != 0;
993     }
994     bool isMixedMode() const;
995     bool canSupportCondMod() const;
996     bool canSwapSource() const;
997     bool canSupportSaturate() const;
998     bool canSupportSrcModifier() const;
999 
1000     bool writesFlag() const;
1001 
usesFlag() const1002     bool usesFlag() const
1003     {
1004         return predicate != nullptr || (op != G4_sel && mod != nullptr);
1005     }
1006 
is2SrcAlign16() const1007     bool is2SrcAlign16() const
1008     {
1009         return op == G4_dp2 || op == G4_dp3 || op == G4_dp4 || op == G4_dph;
1010     }
1011     bool isFastHFInstruction(void) const;
1012 
1013     bool isAlign1Ternary() const;
1014 
1015     // if instruction requries operansd to have DW (D/UD) type
needsDWType() const1016     bool needsDWType() const
1017     {
1018         return op == G4_mulh || op == G4_madw;
1019     }
1020 
1021     bool canExecSizeBeAcc(Gen4_Operand_Number opndNum) const;
1022     bool canDstBeAcc() const;
1023     bool canSrcBeAcc(Gen4_Operand_Number opndNum) const;
1024 
1025     bool canSrcBeAccBeforeHWConform(Gen4_Operand_Number opndNum) const;
1026 
1027     bool canSrcBeAccAfterHWConform(Gen4_Operand_Number opndNum) const;
1028 
1029     TARGET_PLATFORM getPlatform() const;
1030 
1031     void setMetadata(const std::string& key, MDNode* value);
1032 
getMetadata(const std::string & key) const1033     MDNode* getMetadata(const std::string& key) const
1034     {
1035         return MD ? MD->getMetadata(key) : nullptr;
1036     }
1037 
getTokenLocationNum() const1038     unsigned getTokenLocationNum() const
1039     {
1040         auto tokenLoc = getMetadata(Metadata::TokenLoc);
1041         if (!tokenLoc)
1042         {
1043             return 0;
1044         }
1045         MDTokenLocation *token = tokenLoc->asMDTokenLocation();
1046         if (token != nullptr)
1047         {
1048             return token->getTokenLocationNum();
1049         }
1050         else
1051         {
1052             return 0;
1053         }
1054     }
1055 
getTokenLoc(int i,unsigned short & tokenID) const1056     unsigned getTokenLoc(int i, unsigned short &tokenID) const
1057     {
1058         auto tokenLoc = getMetadata(Metadata::TokenLoc);
1059         if (!tokenLoc)
1060         {
1061             return 0;
1062         }
1063         MDTokenLocation *token = tokenLoc->asMDTokenLocation();
1064         if (token != nullptr)
1065         {
1066             tokenID = token->getToken(i);
1067             return token->getTokenLocation(i);
1068         }
1069         else
1070         {
1071             return 0;
1072         }
1073     }
1074 
1075     void setTokenLoc(unsigned short token, unsigned globalID);
1076 
1077     // adds a comment to this instruction
1078     // this appends the comment to any existing comment separating it with
1079     // some separator e.g. "foo; bar"
1080     void addComment(const std::string& comment);
1081 
1082     // replaces any old comments with this
1083     // prefer addComment if don't wish to stomp earlier comments
1084     void setComments(const std::string& comments);
1085 
1086     // For NoMaskWA. Set in PreRA WA for all instructions. PostRA WA will
1087     // apply on new instructions created by RA only.
getSkipPostRA() const1088     bool getSkipPostRA() const { return skipPostRA; }
setSkipPostRA(bool V)1089     void setSkipPostRA(bool V) { skipPostRA = V; }
1090 
getComments() const1091     std::string getComments() const
1092     {
1093         auto comments = getMetadata(Metadata::InstComment);
1094         return comments && comments->isMDString() ? comments->asMDString()->getData() : "";
1095     }
1096 
getLocation() const1097     MDLocation* getLocation() const
1098     {
1099         auto location = getMetadata(Metadata::InstLoc);
1100         return (location && location->isMDLocation()) ? location->asMDLocation() : nullptr;
1101     }
1102 
getLineNo() const1103     int getLineNo() const
1104     {
1105         auto location = getLocation();
1106         return location ? location->getLineNo() : 0;
1107     }
1108 
getSrcFilename() const1109     const char* getSrcFilename() const
1110     {
1111         auto location = getLocation();
1112         return location ? location->getSrcFilename() : nullptr;
1113     }
1114 
1115     void inheritDIFrom(const G4_INST* inst);
1116 
1117     void inheritSWSBFrom(const G4_INST* inst);
1118 
getBuilder()1119     const IR_Builder& getBuilder() { return builder; }
1120 
1121 private:
1122 
1123     // use inheritDIFrom() instead
setLocation(MDLocation * loc)1124     void setLocation(MDLocation* loc)
1125     {
1126         setMetadata(Metadata::InstLoc, loc);
1127     }
1128     bool detectComprInst() const;
1129     bool isLegalType(G4_Type type, Gen4_Operand_Number opndNum) const;
1130     bool isFloatOnly() const;
1131 };
1132 } // namespace vISA
1133 
1134 std::ostream& operator<<(std::ostream& os, vISA::G4_INST& inst);
1135 
1136 namespace vISA
1137 {
1138 
1139 class G4_InstBfn : public G4_INST
1140 {
1141     uint8_t funcCtrl;
1142 public:
G4_InstBfn(const IR_Builder & builder,G4_Predicate * prd,G4_CondMod * m,G4_Sat sat,G4_ExecSize size,G4_DstRegRegion * d,G4_Operand * s0,G4_Operand * s1,G4_Operand * s2,G4_InstOpts opt,uint8_t mBooleanFuncCtrl)1143     G4_InstBfn(
1144         const IR_Builder& builder,
1145         G4_Predicate* prd,
1146         G4_CondMod* m,
1147         G4_Sat sat,
1148         G4_ExecSize size,
1149         G4_DstRegRegion* d,
1150         G4_Operand* s0,
1151         G4_Operand* s1,
1152         G4_Operand* s2,
1153         G4_InstOpts opt,
1154         uint8_t mBooleanFuncCtrl) :
1155         G4_INST(builder, prd, G4_bfn, m, sat, size, d, s0, s1, s2, opt),
1156         funcCtrl(mBooleanFuncCtrl)
1157     {
1158     }
1159 
1160     G4_INST* cloneInst() override;
1161 
getBooleanFuncCtrl() const1162     uint8_t getBooleanFuncCtrl() const { return funcCtrl; }
1163 };
1164 
1165 
1166 class G4_InstDpas : public G4_INST
1167 {
1168     GenPrecision Src1Precision;   // Weights
1169     GenPrecision Src2Precision;   // Activation
1170     uint8_t      SystolicDepth;   // 1|2|4|8
1171     uint8_t      RepeatCount;     // 1-8
1172 
1173     enum {
1174         OPS_PER_CHAN_1 = 1,
1175         OPS_PER_CHAN_2 = 2,
1176         OPS_PER_CHAN_4 = 4,
1177         OPS_PER_CHAN_8 = 8
1178     };
1179 
1180     public:
GetPrecisionSizeInBits(GenPrecision P)1181         static uint32_t GetPrecisionSizeInBits(GenPrecision P)
1182         {
1183             return GenPrecisionTable[(int)P].BitSize;
1184         }
1185 
G4_InstDpas(const IR_Builder & builder,G4_opcode o,G4_ExecSize size,G4_DstRegRegion * d,G4_Operand * s0,G4_Operand * s1,G4_Operand * s2,G4_Operand * s3,G4_InstOpts opt,GenPrecision a,GenPrecision w,uint8_t sd,uint8_t rc)1186         G4_InstDpas(
1187             const IR_Builder& builder,
1188             G4_opcode o,
1189             G4_ExecSize size,
1190             G4_DstRegRegion* d,
1191             G4_Operand* s0,
1192             G4_Operand* s1,
1193             G4_Operand* s2,
1194             G4_Operand* s3,
1195             G4_InstOpts opt,
1196             GenPrecision a,
1197             GenPrecision w,
1198             uint8_t      sd,
1199             uint8_t      rc) :
1200             G4_INST(builder, nullptr, o, nullptr, g4::NOSAT, size, d, s0, s1, s2, s3, opt),
1201             Src2Precision(a), Src1Precision(w), SystolicDepth(sd), RepeatCount(rc)
1202         {
1203         }
1204 
1205         G4_INST* cloneInst() override;
1206 
1207         // Check if this is int dpas or half-float dpas
isBF16() const1208         bool isBF16() const { return Src1Precision == GenPrecision::BF16; }
isFP16() const1209         bool isFP16() const { return Src1Precision == GenPrecision::FP16; }
isBF8() const1210         bool isBF8() const { return Src1Precision == GenPrecision::BF8; }
isTF32() const1211         bool isTF32() const { return Src1Precision == GenPrecision::TF32; }
1212         bool isInt() const;
1213         bool is2xInt8() const; // true if it is 2xint8 dpas
1214 
1215         uint8_t getOpsPerChan() const;
getSystolicDepth() const1216         uint8_t getSystolicDepth() const { return SystolicDepth; }
getRepeatCount() const1217         uint8_t getRepeatCount() const { return RepeatCount; }
getSrc1Precision() const1218         GenPrecision getSrc1Precision() const { return Src1Precision; }
getSrc2Precision() const1219         GenPrecision getSrc2Precision() const { return Src2Precision; }
1220 
setRepeatCount(uint8_t rc)1221         void setRepeatCount(uint8_t rc) { RepeatCount = rc; }
1222         // data size per lane (data size per each systolic depth)
getPrecisionSizePerLaneInByte(GenPrecision P) const1223         uint32_t getPrecisionSizePerLaneInByte(GenPrecision P) const {
1224             uint32_t PBits = G4_InstDpas::GetPrecisionSizeInBits(P);
1225             return (PBits * getOpsPerChan() / 8);
1226         }
getSrc1SizePerLaneInByte() const1227         uint32_t getSrc1SizePerLaneInByte() const {
1228             return getPrecisionSizePerLaneInByte(Src1Precision);
1229         }
getSrc2SizePerLaneInByte() const1230         uint32_t getSrc2SizePerLaneInByte() const {
1231             return getPrecisionSizePerLaneInByte(Src2Precision);
1232         }
1233 
mayExceedTwoGRF() const1234         bool mayExceedTwoGRF() const override { return true; }
1235         void computeRightBound(G4_Operand* opnd) override;
1236 };
1237 
1238 class G4_InstMath : public G4_INST
1239 {
1240     G4_MathOp mathOp;
1241 public:
1242 
G4_InstMath(const IR_Builder & builder,G4_Predicate * prd,G4_opcode o,G4_CondMod * m,G4_Sat sat,G4_ExecSize execSize,G4_DstRegRegion * d,G4_Operand * s0,G4_Operand * s1,G4_InstOpts opt,G4_MathOp mOp=MATH_RESERVED)1243     G4_InstMath(
1244         const IR_Builder& builder,
1245         G4_Predicate* prd,
1246         G4_opcode o,
1247         G4_CondMod* m,
1248         G4_Sat sat,
1249         G4_ExecSize execSize,
1250         G4_DstRegRegion* d,
1251         G4_Operand* s0,
1252         G4_Operand* s1,
1253         G4_InstOpts opt,
1254         G4_MathOp mOp = MATH_RESERVED) :
1255         G4_INST(builder, prd, o, m, sat, execSize, d, s0, s1, opt),
1256         mathOp(mOp)
1257     {
1258 
1259     }
1260 
1261     G4_INST* cloneInst() override;
1262 
isIEEEMath() const1263     bool isIEEEMath() const { return mathOp == MATH_INVM || mathOp == MATH_RSQRTM; }
isMathIntDiv() const1264     bool isMathIntDiv() const { return mathOp >= MATH_INT_DIV && mathOp < MATH_INVM; }
isOneSrcMath() const1265     bool isOneSrcMath() const
1266     {
1267         return
1268             mathOp == MATH_INV ||
1269             mathOp == MATH_LOG ||
1270             mathOp == MATH_EXP ||
1271             mathOp == MATH_SQRT ||
1272             mathOp == MATH_RSQ ||
1273             mathOp == MATH_SIN ||
1274             mathOp == MATH_COS ||
1275             mathOp == MATH_RSQRTM;
1276     }
1277 
getMathCtrl() const1278     G4_MathOp getMathCtrl() const { return mathOp; }
1279 };
1280 
1281 class G4_InstCF : public G4_INST
1282 {
1283     // operands for CF instructions
1284     // -- if, else, endif, while, break, cont, goto: JIP and UIP are used for the branch target.
1285     // -- jmpi: src0 stores the branch target, and it can be either a label (direct) or a SrcRegRegion (indirect)
1286     // -- call: src0 stores the callee address, and it must be a label
1287     // -- fcall: src0 stores the callee address, and it can be either a label (direct) or a SrcRegRegion (indirect).
1288     //           dst contains the ret IP and call mask.
1289     // -- ret, fret: src0 contains the ret IP and call mask
1290     // Note that for call/ret the retIP variable is not created till RA
1291     G4_Label*       jip; // GT JIP.
1292     G4_Label*         uip; // GT UIP.
1293     // list of labels that this instruction could jump to.  Only used for switch jmps
1294     std::list<G4_Label*> indirectJmpTarget;
1295 
1296     // True if this is a backward branch (including while)
1297     bool isBackwardBr;
1298 
1299     // True if this branch is a uniform. By uniform, it means that all active lanes
1300     // at the branch goes to the same target (Valid for if/while/break/goto/jmpi only.
1301     // This info could be encoded in instOpt.).  Note that all active lanes at the
1302     // branch could be subset of all active lanes on entry to shader/kernel.
1303     bool isUniformBr;
1304 
1305 public:
1306 
1307     static const uint32_t unknownCallee = 0xFFFF;
1308 
1309     // used by non jmp/call/ret instructions
G4_InstCF(const IR_Builder & builder,G4_Predicate * prd,G4_opcode op,G4_ExecSize size,G4_Label * jipLabel,G4_Label * uipLabel,G4_InstOpts instOpt)1310     G4_InstCF(const IR_Builder& builder,
1311         G4_Predicate* prd,
1312         G4_opcode op,
1313         G4_ExecSize size,
1314         G4_Label* jipLabel,
1315         G4_Label* uipLabel,
1316         G4_InstOpts instOpt) :
1317         G4_INST(builder, prd, op, nullptr, g4::NOSAT, size, nullptr, nullptr, nullptr, instOpt),
1318         jip(jipLabel), uip(uipLabel), isBackwardBr(op == G4_while), isUniformBr(false)
1319     {
1320         isUniformBr = (op == G4_jmpi ||
1321                        (op == G4_goto && (size == g4::SIMD1 || prd == nullptr)));
1322     }
1323 
1324     // used by jump/call/ret
G4_InstCF(const IR_Builder & builder,G4_Predicate * prd,G4_opcode o,G4_CondMod * m,G4_ExecSize size,G4_DstRegRegion * d,G4_Operand * s0,G4_InstOpts opt)1325     G4_InstCF(
1326         const IR_Builder& builder,
1327         G4_Predicate* prd,
1328         G4_opcode o,
1329         G4_CondMod* m,
1330         G4_ExecSize size,
1331         G4_DstRegRegion* d,
1332         G4_Operand* s0,
1333         G4_InstOpts opt) :
1334         G4_INST(builder, prd, o, m, g4::NOSAT, size, d, s0, nullptr, opt),
1335         jip(NULL), uip(NULL), isBackwardBr(o == G4_while), isUniformBr(false)
1336     {
1337         isUniformBr = (op == G4_jmpi ||
1338             (op == G4_goto && (size == g4::SIMD1 || prd == nullptr)));
1339     }
1340 
isCFInst() const1341     bool isCFInst() const override { return true; }
1342 
setJip(G4_Label * opnd)1343     void setJip(G4_Label* opnd) {jip = opnd;}
getJip() const1344     const G4_Label* getJip() const {return jip;}
getJip()1345           G4_Label* getJip()       {return jip;}
1346     const char* getJipLabelStr() const;
1347 
setUip(G4_Label * opnd)1348     void setUip(G4_Label* opnd) {uip = opnd;}
getUip() const1349     const G4_Label* getUip() const {return uip;}
getUip()1350           G4_Label* getUip()       {return uip;}
1351     const char* getUipLabelStr() const;
1352 
addIndirectJmpLabel(G4_Label * label)1353     void addIndirectJmpLabel(G4_Label* label)
1354     {
1355         MUST_BE_TRUE(isIndirectJmp(), "may only be called for indirect jmp");
1356         indirectJmpTarget.push_back(label);
1357     }
getIndirectJmpLabels()1358     const std::list<G4_Label*>& getIndirectJmpLabels()
1359     {
1360         MUST_BE_TRUE(isIndirectJmp(), "may only be called for indirect jmp");
1361         return indirectJmpTarget;
1362     }
1363 
setBackward(bool val)1364     void setBackward(bool val) {isBackwardBr = val;}
1365 
isBackward() const1366     bool isBackward() const {return isBackwardBr;}
1367 
setUniform(bool val)1368     void setUniform(bool val) { isUniformBr = val; }
isUniform() const1369     bool isUniform() const { return isUniformBr; }
1370 
1371     bool isIndirectJmp() const;
1372 
1373     bool isUniformGoto(unsigned KernelSimdSize) const;
1374 
1375     bool isIndirectCall() const;
1376 
1377     // for direct call, this is null till after the compilation units are stitched together
1378     // for indirect call, this is src0
getCalleeAddress() const1379     G4_Operand* getCalleeAddress() const
1380     {
1381         if (op == G4_pseudo_fcall)
1382         {
1383             return getSrc(0);
1384         }
1385         else
1386         {
1387             return nullptr;
1388         }
1389     }
1390 
pseudoCallToCall()1391     void pseudoCallToCall()
1392     {
1393         assert(isFCall() || op == G4_pseudo_fc_call);
1394         setOpcode(G4_call);
1395     }
1396 
pseudoRetToRet()1397     void pseudoRetToRet()
1398     {
1399         assert(isFReturn() || op == G4_pseudo_fc_ret);
1400         setOpcode(G4_return);
1401     }
1402 
callToFCall()1403     void callToFCall()
1404     {
1405         assert(isCall());
1406         setOpcode(G4_pseudo_fcall);
1407     }
1408 
retToFRet()1409     void retToFRet()
1410     {
1411         if (isFReturn()) return;
1412         assert(isReturn());
1413         setOpcode(G4_pseudo_fret);
1414     }
1415 }; // G4_InstCF
1416 
1417 class G4_InstSend : public G4_INST
1418 {
1419     // Once initialized, remain unchanged as it could be shared among several sends.
1420     G4_SendDesc* msgDesc;
1421 
1422 public:
1423 
1424     // send (one source)
1425     // desc is either imm or a0.0 and in src1
1426     // extDesc is always immediate and encoded in md
1427     G4_InstSend(
1428         const IR_Builder& builder,
1429         G4_Predicate* prd,
1430         G4_opcode o,
1431         G4_ExecSize execSize,
1432         G4_DstRegRegion* dst,
1433         G4_SrcRegRegion* payload,
1434         G4_Operand* desc,
1435         G4_InstOpts opt,
1436         G4_SendDesc* md);
1437 
1438     // split send (two source)
1439     // desc is either imm or a0.0 and in src2
1440     // extDesc is either imm or a0.N and in src3
1441     G4_InstSend(
1442         const IR_Builder& builder,
1443         G4_Predicate* prd,
1444         G4_opcode o,
1445         G4_ExecSize execSize,
1446         G4_DstRegRegion* dst,
1447         G4_SrcRegRegion* payload,
1448         G4_SrcRegRegion* src1,
1449         G4_Operand* desc,
1450         G4_Operand* extDesc,
1451         G4_InstOpts opt,
1452         G4_SendDesc* md);
1453 
1454     G4_INST* cloneInst() override;
1455 
isSendc() const1456     bool isSendc() const { return op == G4_sendc || op == G4_sendsc; }
setSendc()1457     void setSendc()
1458     {
1459         // no effect if op is already G4_sendc/G4_sendsc
1460         if (op == G4_send)
1461         {
1462             op = G4_sendc;
1463         }
1464         else if (op == G4_sends)
1465         {
1466             op = G4_sendsc;
1467         }
1468     }
mayExceedTwoGRF() const1469     bool mayExceedTwoGRF() const override { return true; }
1470 
getMsgDescOperand() const1471     G4_Operand* getMsgDescOperand() const
1472     {
1473         return isSplitSend() ? srcs[2] : srcs[1];
1474     }
1475 
getMsgExtDescOperand() const1476     G4_Operand* getMsgExtDescOperand() const
1477     {
1478         assert(isSplitSend() && "must be a split send instruction");
1479         return srcs[3];
1480     }
1481 
getMsgDesc() const1482     G4_SendDesc *getMsgDesc() const override
1483     {
1484         return msgDesc;
1485     }
1486 
1487     void setMsgDesc(G4_SendDesc *in);
1488 
1489     // restrictions on whether a send may be EOT:
1490     // -- The posted destination operand must be null
1491     // -- A thread must terminate with a send instruction with message to a shared function on the output message bus;
1492     //    therefore, it cannot terminate with a send instruction with message to the following shared functions: Sampler unit, NULL function
1493     //    For example, a thread may terminate with a URB write message or a render cache write message.
1494     // -- A root thread originated from the media (generic) pipeline must terminate
1495     //    with a send instruction with message to the Thread Spawner unit. A child
1496     //    thread should also terminate with a send to TS.
canBeEOT() const1497     bool canBeEOT() const
1498     {
1499         if (!msgDesc->isRaw())
1500             return false;
1501         bool canEOT = getMsgDesc()->getDstLenRegs() == 0 &&
1502             (getMsgDesc()->getSFID() != SFID::NULL_SFID &&
1503                 getMsgDesc()->getSFID() != SFID::SAMPLER);
1504 
1505         return canEOT;
1506     }
1507 
isFence() const1508     bool isFence() const {return getMsgDesc()->isFence();}
1509 
isEOT() const1510     bool isEOT() const override {return msgDesc->isEOT();}
1511 
1512     bool isDirectSplittableSend();
1513 
1514     void computeRightBound(G4_Operand* opnd) override;
1515 
1516     void emit_send(std::ostream& output, bool symbol_dst, bool *symbol_srcs);
1517     void emit_send(std::ostream& output, bool dotStyle = false);
1518     void emit_send_desc(std::ostream& output);
1519 
setSerialize()1520     void setSerialize() {option = option | InstOpt_Serialize;}
isSerializedInst() const1521     bool isSerializedInst() const { return (option & InstOpt_Serialize) != 0; }
1522 }; // G4_InstSend
1523 
1524 }
1525 
1526 enum PseudoKillType
1527 {
1528     FromLiveness = 1,
1529     Src = 2,
1530     Other = 3
1531 };
1532 
1533 // a special intrinsic instruction for any pseudo operations. An intrinsic inst has the following characteristics
1534 // -- it is modeled as a call to some unknown function
1535 // -- 1 dst and up to 3 srcs are allowed for the intrinsic
1536 // -- conditonal modifier and saturation are currently not allowed (can add later)
1537 // -- an intrinsic may reserve additional GRF/addr/flag for its code gen, which RA needs to honor
1538 // -- it must be lowered/deleted before certain phases in the finalizer (no later than binary encoding)
1539 
1540 // all intrinsic opcode go here
1541 // order must match that of the G4_Intrinsics table
1542 enum class Intrinsic
1543 {
1544     Wait,
1545     Use,
1546     MemFence,
1547     PseudoKill,
1548     PseudoUse,  // ToDo: can we merge Use and PseudoUse? former is from input while latter is internally generated.
1549     Spill,
1550     Fill,
1551     Split,
1552     CallerSave,
1553     CallerRestore,
1554     CalleeSave,
1555     CalleeRestore,
1556     FlagSpill,
1557     PseudoAddrMov,
1558     NumIntrinsics
1559 };
1560 
1561 enum class Phase
1562 {
1563     CFG,
1564     Optimizer,
1565     HWConformity,
1566     RA,
1567     Scheduler,
1568     BinaryEncoding
1569 };
1570 
1571 struct IntrinsicInfo
1572 {
1573     Intrinsic id;
1574     const char* name;
1575     int numDst;
1576     int numSrc;
1577     Phase loweredBy;    //intrinsic must be lowered before entering this phase
1578     struct {
1579         int numTmpGRF;       //number of tmp GRFs needed for this intrinsic
1580         int numTmpAddr;     //number of tmp addresses needed (in unit of uw)
1581         int numTmpFlag;     //number of tmp flags needed (in unit of 16-bit)
1582         bool useR0;
1583         bool useA0;
1584     } temps;
1585 };
1586 
1587 static const IntrinsicInfo G4_Intrinsics[(int)Intrinsic::NumIntrinsics] =
1588 {
1589     //  id                      name            numDst  numSrc  loweredBy               temp
1590     {Intrinsic::Wait,           "wait",         0,      0,      Phase::Optimizer,       { 0, 0, 0, false, false } },
1591     {Intrinsic::Use,            "use",          0,      1,      Phase::Scheduler,       { 0, 0, 0, false, false } },
1592     {Intrinsic::MemFence,       "mem_fence",    0,      0,      Phase::BinaryEncoding,  { 0, 0, 0, false, false } },
1593     {Intrinsic::PseudoKill,     "pseudo_kill",  1,      1,      Phase::RA,              { 0, 0, 0, false, false } },
1594     {Intrinsic::PseudoUse,      "pseudo_use",   0,      1,      Phase::RA,              { 0, 0, 0, false, false } },
1595     {Intrinsic::Spill,          "spill",        1,      2,      Phase::RA,              { 0, 0, 0, false, false } },
1596     {Intrinsic::Fill,           "fill",         1,      1,      Phase::RA,              { 0, 0, 0, false, false } },
1597     {Intrinsic::Split,          "split",        1,      1,      Phase::RA,              { 0, 0, 0, false, false } },
1598     {Intrinsic::CallerSave,     "caller_save",  1,      0,      Phase::RA,              { 0, 0, 0, false, false } },
1599     {Intrinsic::CallerRestore,  "caller_restore", 0,    1,      Phase::RA,              { 0, 0, 0, false, false } },
1600     {Intrinsic::CalleeSave,     "callee_save",  1,      0,      Phase::RA,              { 0, 0, 0, false, false } },
1601     {Intrinsic::CalleeRestore,  "callee_restore", 0,    1,      Phase::RA,              { 0, 0, 0, false, false } },
1602     {Intrinsic::FlagSpill,            "flagSpill",          0,      1,      Phase::RA,       { 0, 0, 0, false, false } },
1603     {Intrinsic::PseudoAddrMov,            "pseudo_addr_mov",          1,      8,      Phase::Optimizer,       { 0, 0, 0, false, false } },
1604 };
1605 
1606 namespace vISA
1607 {
1608 class G4_InstIntrinsic : public G4_INST
1609 {
1610     const Intrinsic intrinsicId;
1611     std::array<G4_Operand*, G4_MAX_INTRINSIC_SRCS> srcs;
1612 
1613     // these should be set by RA if intrinsic requires tmp GRF/addr/flag
1614     int tmpGRFStart;
1615     int tmpAddrStart;
1616     int tmpFlagStart;
1617 
1618 public:
1619 
G4_InstIntrinsic(const IR_Builder & builder,G4_Predicate * prd,Intrinsic intrinId,G4_ExecSize execSize,G4_DstRegRegion * d,G4_Operand * s0,G4_Operand * s1,G4_Operand * s2,G4_InstOpts opt)1620     G4_InstIntrinsic(
1621         const IR_Builder& builder,
1622         G4_Predicate* prd,
1623         Intrinsic intrinId,
1624         G4_ExecSize execSize,
1625         G4_DstRegRegion* d,
1626         G4_Operand* s0,
1627         G4_Operand* s1,
1628         G4_Operand* s2,
1629         G4_InstOpts opt) :
1630         G4_INST(builder, prd, G4_intrinsic, nullptr, g4::NOSAT, execSize, d, s0, s1, s2, opt),
1631         intrinsicId(intrinId), tmpGRFStart(-1), tmpAddrStart(-1), tmpFlagStart(-1)
1632     {
1633 
1634     }
1635 
1636     G4_InstIntrinsic(
1637         const IR_Builder& builder,
1638         G4_Predicate* prd,
1639         Intrinsic intrinId,
1640         G4_ExecSize execSize,
1641         G4_DstRegRegion* d,
1642         G4_Operand* s0,
1643         G4_Operand* s1,
1644         G4_Operand* s2,
1645         G4_Operand* s3,
1646         G4_Operand* s4,
1647         G4_Operand* s5,
1648         G4_Operand* s6,
1649         G4_Operand* s7,
1650         G4_InstOpts opt);
1651 
1652     G4_Operand* getIntrinsicSrc(unsigned i) const;
1653     G4_Operand* getOperand(Gen4_Operand_Number opnd_num) const;
1654 
1655     void setIntrinsicSrc(G4_Operand* opnd, unsigned i);
1656 
1657     G4_INST* cloneInst() override;
1658 
getNumDst() const1659     int getNumDst() const { return G4_Intrinsics[(int) intrinsicId].numDst; }
getNumSrc() const1660     int getNumSrc() const { return G4_Intrinsics[(int) intrinsicId].numSrc; }
1661 
getIntrinsicId() const1662     Intrinsic   getIntrinsicId()    const { return intrinsicId; }
getName() const1663     const char* getName()           const { return G4_Intrinsics[(int) intrinsicId].name; }
getLoweredByPhase() const1664     Phase       getLoweredByPhase() const { return G4_Intrinsics[(int)intrinsicId].loweredBy; }
1665 
getTmpGRFStart() const1666     int getTmpGRFStart() const { return tmpGRFStart; }
setTmpGRFStart(int startGRF)1667     void setTmpGRFStart(int startGRF) { tmpGRFStart = startGRF; }
getTmpAddrStart() const1668     int getTmpAddrStart() const { return tmpAddrStart; }
setTmpAddrStart(int startAddr)1669     void setTmpAddrStart(int startAddr) { tmpAddrStart = startAddr; }
getTmpFlagStart() const1670     int getTmpFlagStart() const { return tmpFlagStart; }
setTmpFlagStart(int startFlag)1671     void setTmpFlagStart(int startFlag) { tmpFlagStart = startFlag; }
1672 };
1673 }
1674 
1675 // RegionWH and RegionV are special for the different modes of source register indirect addressing
1676 // RegionWH = <width, horzStride>, we set vertStride to UNDEFINED_SHORT
1677 // RegionV = <horzStride>, we set both vertStride and width to UNDEFINED_SHORT
1678 //
1679 struct RegionDesc
1680 {
1681     const uint16_t vertStride;
1682     const uint16_t width;
1683     const uint16_t horzStride;
1684 
RegionDescRegionDesc1685     RegionDesc(uint16_t vs, uint16_t w, uint16_t hs) : vertStride(vs), width(w), horzStride(hs)
1686     {
1687         assert(isLegal() && "illegal region desc");
1688     }
operator newRegionDesc1689     void* operator new(size_t sz, vISA::Mem_Manager& m) {return m.alloc(sz);}
1690 
1691     // The legal values for Width are {1, 2, 4, 8, 16}.
1692     // The legal values for VertStride are {0, 1, 2, 4, 8, 16, 32}.
1693     // The legal values for HorzStride are {0, 1, 2, 4}.
isLegalRegionDesc1694     bool isLegal() const {return isLegal(vertStride, width, horzStride);}
1695 
1696     static bool isLegal(unsigned vs, unsigned w, unsigned hs);
1697 
1698     enum RegionDescKind {
1699         RK_Other,   // all others like <4; 2, 1> etc.
1700         RK_Stride0, // <0;1,0> aka scalar
1701         RK_Stride1, // <1;1,0> aka contiguous
1702         RK_Stride2, // <2;1,0>
1703         RK_Stride4  // <4;1,0>
1704     };
1705 
1706     // Determine the region description kind. Strided case only.
1707     static RegionDescKind getRegionDescKind(uint16_t size, uint16_t vstride,
1708                                             uint16_t width, uint16_t hstride);
1709 
1710 
isRegionWHRegionDesc1711     bool isRegionWH() const {return vertStride == UNDEFINED_SHORT && width != UNDEFINED_SHORT;}
isRegionVRegionDesc1712     bool isRegionV() const {return vertStride == UNDEFINED_SHORT && width == UNDEFINED_SHORT;}
isScalarRegionDesc1713     bool isScalar() const { return (vertStride == 0 && horzStride == 0) || (width == 1 && vertStride == 0); }        // to support decompression
isRegionSWRegionDesc1714     bool isRegionSW() const {return vertStride != UNDEFINED_SHORT && width == UNDEFINED_SHORT && horzStride == UNDEFINED_SHORT;}
isEqualRegionDesc1715     bool isEqual(const RegionDesc *r) const { return vertStride == r->vertStride && width == r->width && horzStride == r->horzStride; }        // to support re-compression
1716     void emit(std::ostream& output) const;
isPackedRegionRegionDesc1717     bool isPackedRegion() const { return ((horzStride == 0 && vertStride <= 1) || (horzStride == 1 && vertStride <= width)); }
isFlatRegionRegionDesc1718     bool isFlatRegion() const { return (isScalar() || vertStride == horzStride * width); }
isRepeatRegionRegionDesc1719     bool isRepeatRegion(unsigned short execSize) const { return (!isScalar() && (execSize > width && vertStride < horzStride * width)); }
1720 
1721     // Contiguous regions are:
1722     // (1) ExSize is 1, or
1723     // (2) <1; 1, *> with arbitrary ExSize, or
1724     // (3) <N; N, 1> with arbitrary ExSize, or
1725     // (4) <*; N, 1> with ExSize == N.
1726     //
1727     // A region is contiguous iff sequence
1728     // { f(0, 0), f(0, 1), ..., f(1, 0), ..., f(ExSize / width - 1, width - 1) }
1729     // has a common difference 1, where
1730     //
1731     // f(i, j) = i x vstride + j x hstride
1732     //
1733     // for 0 <= i < ExSize / width and 0 <= j < width
1734     bool isContiguous(unsigned ExSize) const;
1735     bool isSingleNonUnitStride(uint32_t execSize, uint16_t& stride) const;
1736     bool isSingleStride(uint32_t execSize, uint16_t &stride) const;
isSingleStrideRegionDesc1737     bool isSingleStride(uint32_t execSize) const
1738     {
1739         uint16_t stride = 0;
1740         return isSingleStride(execSize, stride);
1741     }
1742 };
1743 
1744 namespace vISA
1745 {
1746     class LiveIntervalInfo
1747     {
1748     public:
1749         enum DebugLiveIntervalState
1750         {
1751             Open = 0,
1752             Closed = 1
1753         };
1754 
1755     private:
1756         std::list<std::pair<uint32_t, uint32_t>> liveIntervals;
1757         uint32_t cleanedAt;
1758         DebugLiveIntervalState state;
1759         uint32_t openIntervalVISAIndex;
1760 
1761     public:
operator new(size_t sz,Mem_Manager & m)1762         void *operator new(size_t sz, Mem_Manager& m) { return m.alloc(sz); }
1763 
1764         void addLiveInterval(uint32_t start, uint32_t end);
1765         void liveAt(uint32_t cisaOff);
1766         void getLiveIntervals(std::vector<std::pair<uint32_t, uint32_t>>& intervals);
clearLiveIntervals()1767         void clearLiveIntervals() { liveIntervals.clear(); }
1768 
getState() const1769         DebugLiveIntervalState getState() const { return state; }
1770 
setStateOpen(uint32_t VISAIndex)1771         void setStateOpen(uint32_t VISAIndex)
1772         {
1773             //MUST_BE_TRUE(state == Closed, "Cannot open internal in Open state");
1774             state = Open;
1775             openIntervalVISAIndex = VISAIndex;
1776         }
1777 
setStateClosed(uint32_t VISAIndex)1778         void setStateClosed(uint32_t VISAIndex)
1779         {
1780             //MUST_BE_TRUE(state == Open, "Cannot close interval in Close state");
1781             state = Closed;
1782             addLiveInterval(VISAIndex, openIntervalVISAIndex);
1783         }
1784 
isLiveAt(uint32_t VISAIndex) const1785         bool isLiveAt(uint32_t VISAIndex) const
1786         {
1787             for (auto& k : liveIntervals)
1788             {
1789                 if (k.first <= VISAIndex && k.second >= VISAIndex)
1790                     return true;
1791             }
1792             return false;
1793         }
1794 
LiveIntervalInfo()1795         LiveIntervalInfo() { cleanedAt = 0; state = Closed; openIntervalVISAIndex = 0; }
1796     };
1797 }
1798 
1799 typedef enum class AugmentationMasks
1800 {
1801     Undetermined = 0,
1802     Default16Bit = 1,
1803     Default32Bit = 2,
1804     Default64Bit = 3,
1805     DefaultPredicateMask = 4,
1806     NonDefault = 5,
1807 } AugmentationMasks;
1808 
1809 namespace vISA
1810 {
1811 
1812 template <G4_Type T>
numEltPerGRF()1813 unsigned int numEltPerGRF() {return getGRFSize() / TypeSize(T);}
1814 template unsigned int numEltPerGRF<Type_UD>();
1815 template unsigned int numEltPerGRF<Type_D>();
1816 template unsigned int numEltPerGRF<Type_UW>();
1817 template unsigned int numEltPerGRF<Type_W>();
1818 template unsigned int numEltPerGRF<Type_UB>();
1819 template unsigned int numEltPerGRF<Type_B>();
1820 template unsigned int numEltPerGRF<Type_F>();
1821 template unsigned int numEltPerGRF<Type_VF>();
1822 template unsigned int numEltPerGRF<Type_V>();
1823 template unsigned int numEltPerGRF<Type_DF>();
1824 template unsigned int numEltPerGRF<Type_BOOL>();
1825 template unsigned int numEltPerGRF<Type_UV>();
1826 template unsigned int numEltPerGRF<Type_Q>();
1827 template unsigned int numEltPerGRF<Type_UQ>();
1828 template unsigned int numEltPerGRF<Type_HF>();
1829 template unsigned int numEltPerGRF<Type_NF>();
1830 template unsigned int numEltPerGRF<Type_BF>();
1831 
numEltPerGRF(G4_Type t)1832 inline unsigned int numEltPerGRF(G4_Type t)
1833 {
1834     return getGRFSize() / TypeSize(t);
1835 }
1836 
1837 class G4_Declare
1838 {
1839     friend class IR_Builder;
1840 
1841     const char*        name;        // Var_Name
1842     G4_RegFileKind     regFile;     // from which reg file
1843     G4_Type            elemType;    // element type
1844 
1845     G4_RegVar*        regVar;        // corresponding reg var
1846 
1847     G4_Declare *    AliasDCL;    // Alias Declare
1848     unsigned        AliasOffset;    // Alias Offset
1849 
1850     unsigned        startID;
1851 
1852     uint16_t spillFlag : 1;    // Indicate this declare gets spill reg
1853     uint16_t addressed : 1;     // whether this declare is address-taken
1854 
1855     uint16_t doNotSpill : 1;    // indicates that this declare should never be spilled
1856 
1857     uint16_t liveIn : 1;   // indicate if this varaible has "Input" or "Input_Output" attribute
1858     uint16_t liveOut : 1;  // indicate if this varaible has "Output" or "Input_Output" attribute
1859     uint16_t payloadLiveOut : 1;  // indicate if this varaible has "Output" attribute for the payload section
1860 
1861     // This is an optimization *hint* to indicate if optimizer should skip
1862     // widening this variable or not (e.g. byte to word).
1863     uint16_t noWidening : 1;
1864 
1865     uint16_t capableOfReuse : 1;
1866     uint16_t isSplittedDcl : 1;
1867     uint16_t isPartialDcl : 1;
1868     uint16_t refInSend : 1;
1869     uint16_t PreDefinedVar : 1;  // indicate if this dcl is created from preDefinedVars.
1870 
1871     unsigned declId;     // global decl id for this builder
1872 
1873     uint32_t numElements;
1874     unsigned numFlagElements;
1875 
1876     // byte offset of this declare from the base declare.  For top-level declares this value is 0
1877     int offsetFromBase;
1878 
1879     // if set to nonzero, indicates the declare is only used by subroutine "scopeID".
1880     // it is used to prevent a subroutin-local declare from escaping its subroutine when doing liveness
1881     unsigned scopeID;
1882 
1883     // For GRFs, store byte offset of allocated GRF
1884     unsigned GRFBaseOffset;
1885 
1886     // fields that are only ever referenced by RA and spill code
1887     // ToDo: they should be moved out of G4_Declare and stored as maps in RA/spill
1888     G4_Declare* spillDCL;  // if an addr/flag var is spilled, SpillDCL is the location (GRF) holding spilled value
1889 
1890     G4_Declare* addrTakenSpillFillDcl; // dcl to use for address taken spill/fill temp
1891 
1892     // this should only be called by builder
setNumberFlagElements(uint8_t numEl)1893     void setNumberFlagElements(uint8_t numEl)
1894     {
1895         assert(regFile == G4_FLAG && "may only be called on a flag");
1896         numFlagElements = numEl;
1897     }
1898 
1899 public:
G4_Declare(const char * n,G4_RegFileKind k,uint32_t numElems,G4_Type ty,std::vector<G4_Declare * > & dcllist)1900     G4_Declare(const char*    n,
1901                G4_RegFileKind k,
1902                uint32_t numElems,
1903                G4_Type        ty,
1904                std::vector<G4_Declare*>& dcllist) :
1905       name(n), regFile(k), elemType(ty), addressed(false), liveIn(false),
1906       liveOut(false), payloadLiveOut(false), noWidening(false), isSplittedDcl(false), isPartialDcl(false),
1907       refInSend(false), PreDefinedVar(false), numElements(numElems), offsetFromBase(-1)
1908     {
1909         //
1910         // set the rest values to default uninitialized values
1911         //
1912 
1913         regVar        = NULL;
1914         AliasDCL = NULL;
1915         AliasOffset = 0;
1916 
1917         if (k == G4_FLAG)
1918         {
1919             //need original number of elements for any*
1920             numFlagElements = numElems * 16;
1921         }
1922         else
1923         {
1924             numFlagElements = 0;
1925         }
1926 
1927         spillFlag = false;
1928         spillDCL = NULL;
1929 
1930         addrTakenSpillFillDcl = NULL;
1931 
1932         startID = 0;
1933 
1934         doNotSpill = false;
1935         capableOfReuse = false;
1936 
1937         scopeID = 0;
1938 
1939         GRFBaseOffset = 0;
1940         declId = (unsigned)dcllist.size();
1941         dcllist.push_back(this);
1942     }
1943 
operator new(size_t sz,Mem_Manager & m)1944     void *operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
1945 
setGRFBaseOffset(unsigned int offset)1946     void setGRFBaseOffset(unsigned int offset) { GRFBaseOffset = offset; }
getGRFBaseOffset() const1947     unsigned int getGRFBaseOffset() const { return GRFBaseOffset; }
1948 
setLiveIn()1949     void setLiveIn() { liveIn = true; }
isLiveIn() const1950     bool isLiveIn() const { return liveIn; }
setLiveOut()1951     void setLiveOut() { liveOut = true; }
resetLiveOut()1952     void resetLiveOut() { liveOut = false; }
setPayloadLiveOut()1953     void setPayloadLiveOut() { payloadLiveOut = true; }
1954 
setDoNotWiden()1955     void setDoNotWiden() { noWidening = true; }
doNotWiden() const1956     bool doNotWiden() const { return noWidening; }
1957 
getScopeID() const1958     unsigned getScopeID() const { return scopeID;  }
updateScopeID(unsigned id)1959     void updateScopeID(unsigned id) { if (!isInput() && (scopeID < id)) scopeID = id; }
1960 
1961     //
1962     // functions for setting values
1963     //
setRegVar(G4_RegVar * rv)1964     void setRegVar(G4_RegVar* rv)
1965     {
1966         MUST_BE_TRUE(regVar == NULL, ERROR_UNKNOWN);
1967         regVar = rv;
1968     }
1969 
1970     // caller manages the name str
setName(const char * newName)1971     void setName(const char* newName)
1972     {
1973         name = newName;
1974     }
1975 
getByteSize() const1976     unsigned int getByteSize() const { return numElements * getElemSize(); }
1977 
getWordSize() const1978     unsigned int getWordSize() const {return (getByteSize() + 1)/2;}
1979 
resizeNumRows(unsigned int numrows)1980     void resizeNumRows(unsigned int numrows)
1981     {
1982         int byteSize = numrows * numEltPerGRF<Type_UB>();
1983         setTotalElems(byteSize / getElemSize());
1984     }
1985 
setAddrTakenSpillFill(G4_Declare * dcl)1986     void setAddrTakenSpillFill(G4_Declare* dcl)
1987     {
1988         addrTakenSpillFillDcl = dcl;
1989     }
1990 
getAddrTakenSpillFill() const1991     const G4_Declare* getAddrTakenSpillFill() const { return addrTakenSpillFillDcl; }
getAddrTakenSpillFill()1992           G4_Declare* getAddrTakenSpillFill()       { return addrTakenSpillFillDcl; }
1993 
1994     // declare this to be aliased to dcl+offset
1995     // This is an error if dcl+offset is not aligned to the type of this dcl
setAliasDeclare(G4_Declare * dcl,unsigned int offset)1996     void setAliasDeclare(G4_Declare* dcl, unsigned int offset)
1997     {
1998         AliasDCL = dcl;
1999         AliasOffset = offset;
2000         offsetFromBase = -1;
2001     }
2002 
resetSpillFlag()2003     void resetSpillFlag()
2004     {
2005         // This function is invoked from rematerialization pass.
2006         if (getAliasDeclare())
2007             getAliasDeclare()->resetSpillFlag();
2008         spillFlag = false;
2009     }
setSpillFlag()2010     void setSpillFlag()
2011     {
2012         if (getAliasDeclare())
2013         {
2014             // Iterate to top level dcl to set spill flag
2015             getAliasDeclare()->setSpillFlag();
2016         }
2017         spillFlag = true;
2018     }
isSpilled() const2019     bool isSpilled() const
2020     {
2021         if (getAliasDeclare() != NULL)
2022         {
2023             return getAliasDeclare()->isSpilled();
2024         }
2025 
2026         // Following executed only if G4_Declare doesnt have an alias
2027         return spillFlag;
2028     }
2029 
2030     bool isEvenAlign() const;
2031     G4_SubReg_Align getSubRegAlign() const;
2032     void        setEvenAlign();
2033     void        setSubRegAlign(G4_SubReg_Align subAl);
2034 
2035     void copyAlign(G4_Declare* dcl);
2036 
getByteAlignment() const2037     unsigned getByteAlignment() const
2038     {
2039         // we only consider subalign here
2040         unsigned byteAlign = getSubRegAlign() * TypeSize(Type_UW);
2041         return byteAlign < TypeSize(elemType) ?
2042             TypeSize(elemType) : byteAlign;
2043     }
2044 
setRegFile(G4_RegFileKind rfile)2045     void setRegFile(G4_RegFileKind rfile) { regFile = rfile; }
2046 
useGRF() const2047     bool useGRF() const { return (regFile & (G4_GRF | G4_INPUT)) != 0; }
isInput() const2048     bool isInput() const { return liveIn || ((regFile & G4_INPUT) != 0); }
isOutput() const2049     bool isOutput() const { return liveOut; }
isPayloadLiveOut() const2050     bool isPayloadLiveOut() const { return payloadLiveOut; }
2051 
2052     //
2053     // retrieving functions
2054     //
getAliasOffset() const2055     unsigned          getAliasOffset() const {return AliasOffset;}
getAliasDeclare() const2056     const G4_Declare *getAliasDeclare() const {return AliasDCL;}
getAliasDeclare()2057           G4_Declare *getAliasDeclare()       {return AliasDCL;}
getRootDeclare() const2058     const G4_Declare *getRootDeclare() const
2059     {
2060         const G4_Declare* rootDcl = this;
2061         while (rootDcl->getAliasDeclare() != NULL)
2062         {
2063             rootDcl = rootDcl->getAliasDeclare();
2064         }
2065         return rootDcl;
2066     }
getRootDeclare()2067     G4_Declare       *getRootDeclare() {
2068         return const_cast<G4_Declare*>(((const G4_Declare *)this)->getRootDeclare());
2069     }
2070 
2071     // like above, but also return the alias offset in bytes
getRootDeclare(uint32_t & offset) const2072     const G4_Declare*    getRootDeclare(uint32_t& offset) const
2073     {
2074         const G4_Declare* rootDcl = this;
2075         offset = 0;
2076         while (rootDcl->getAliasDeclare() != NULL)
2077         {
2078             offset += AliasOffset;
2079             rootDcl = rootDcl->getAliasDeclare();
2080         }
2081         return rootDcl;
2082     }
getRootDeclare(uint32_t & offset)2083     G4_Declare* getRootDeclare(uint32_t& offset) {
2084         return const_cast<G4_Declare*>(((const G4_Declare *)this)->getRootDeclare(offset));
2085     }
2086 
getName() const2087     const char*       getName() const {return name;}
getRegFile() const2088     G4_RegFileKind getRegFile() const {return regFile;}
2089 
2090     // returns number of elements per row
getNumElems() const2091     unsigned short getNumElems() const
2092     {
2093         return getNumRows() > 1 ? numEltPerGRF<Type_UB>() / getElemSize() : numElements;
2094     }
getNumRows() const2095     unsigned short getNumRows() const
2096     {
2097         return (getByteSize() + (numEltPerGRF<Type_UB>() - 1))/numEltPerGRF<Type_UB>();
2098     }
getTotalElems() const2099     unsigned short getTotalElems() const
2100     {
2101         return (unsigned short)numElements;
2102     }
2103 
setTotalElems(uint32_t numElems)2104     void setTotalElems(uint32_t numElems) { numElements = numElems; }
getNumberFlagElements() const2105     unsigned short getNumberFlagElements() const
2106     {
2107         assert(regFile == G4_FLAG && "should only be called for flag vars");
2108         return numFlagElements;
2109     }
2110 
getElemType() const2111     G4_Type          getElemType() const {return elemType;}
getElemSize() const2112     uint16_t         getElemSize() const {return TypeSize(elemType);}
getRegVar() const2113     const G4_RegVar *getRegVar() const {return regVar;}
getRegVar()2114           G4_RegVar *getRegVar()       {return regVar;}
2115 
getOffsetFromBase()2116     int getOffsetFromBase()
2117     {
2118         if (offsetFromBase == -1)
2119         {
2120             offsetFromBase = 0;
2121             for (const G4_Declare *dcl = this; dcl->getAliasDeclare() != NULL; dcl = dcl->getAliasDeclare())
2122             {
2123                 offsetFromBase += dcl->getAliasOffset();
2124             }
2125         }
2126         return offsetFromBase;
2127     }
2128 
setSpilledDeclare(G4_Declare * sd)2129     void        setSpilledDeclare(G4_Declare* sd) {spillDCL = sd;}
getSpilledDeclare() const2130     const G4_Declare* getSpilledDeclare() const {return spillDCL;}
getSpilledDeclare()2131           G4_Declare* getSpilledDeclare()  {return spillDCL;}
2132 
setDeclId(unsigned id)2133     void setDeclId(unsigned id) { declId = id; }
getDeclId() const2134     unsigned getDeclId() const { return declId; }
2135 
setIsSplittedDcl(bool b)2136     void setIsSplittedDcl(bool b) { isSplittedDcl = b; }
getIsSplittedDcl() const2137     bool getIsSplittedDcl() const { return isSplittedDcl; }
2138 
setIsPartialDcl(bool b)2139     void setIsPartialDcl(bool b) { isPartialDcl = b; }
getIsPartialDcl() const2140     bool getIsPartialDcl() const { return isPartialDcl; }
2141 
setIsRefInSendDcl(bool b)2142     void setIsRefInSendDcl(bool b) { refInSend |= b;}
getIsRefInSendDcl() const2143     bool getIsRefInSendDcl() const { return refInSend; }
2144 
setSplitVarStartID(unsigned id)2145     void        setSplitVarStartID(unsigned id) { startID = id; };
getSplitVarStartID() const2146     unsigned    getSplitVarStartID() const { return startID; };
2147 
setDoNotSpill()2148     void setDoNotSpill()      { doNotSpill = true; }
isDoNotSpill() const2149     bool isDoNotSpill() const { return doNotSpill; }
2150 
isMsgDesc() const2151     bool isMsgDesc() const { return regFile == G4_ADDRESS && elemType == Type_UD; }
2152 
setCapableOfReuse()2153     void setCapableOfReuse()       { capableOfReuse = true; }
getCapableOfReuse() const2154     bool getCapableOfReuse() const { return capableOfReuse; }
2155 
setAddressed()2156     void    setAddressed() { addressed = true; }
getAddressed() const2157     bool    getAddressed() const {
2158         if (addressed)
2159         {
2160             return true;
2161         }
2162         if (AliasDCL)
2163         {
2164             return AliasDCL->getAddressed();
2165         }
2166         else
2167         {
2168             return false;
2169         }
2170     }
2171 
setPreDefinedVar(bool b)2172     void setPreDefinedVar(bool b) { PreDefinedVar = b; }
isPreDefinedVar() const2173     bool isPreDefinedVar() const { return PreDefinedVar; }
2174 
2175     void emit(std::ostream& output) const;
2176 
dump() const2177     void dump() const { emit(std::cerr); }
2178 
2179     void prepareForRealloc(G4_Kernel*);
2180 };
2181 }
2182 typedef std::vector<vISA::G4_Declare*> DECLARE_LIST;
2183 typedef std::vector<vISA::G4_Declare*>::iterator DECLARE_LIST_ITER;
2184 
2185 namespace vISA
2186 {
2187 class G4_VarBase;
2188 
2189 class G4_Operand
2190 {
2191     friend class G4_INST;
2192     friend class G4_InstSend;
2193     friend class G4_FillIntrinsic;
2194     friend class G4_SpillIntrinsic;
2195     friend class G4_PseudoMovInstrinsic;
2196     friend class G4_InstDpas;
2197 
2198 public:
2199     enum Kind {
2200         immediate,
2201         srcRegRegion,
2202         dstRegRegion,
2203         predicate,      // instruction predicate
2204         condMod,        // condition modifier
2205         addrExp,
2206         label
2207     };
~G4_Operand()2208     virtual ~G4_Operand() {}
2209 protected:
2210     Kind kind;
2211     G4_Type type;
2212     G4_INST *inst;
2213 
2214     // fields used to compare operands
2215     G4_Declare *top_dcl;
2216     G4_VarBase *base;
2217 
2218     uint64_t bitVec[2];  // bit masks at byte granularity (for flags, at bit granularity)
2219 
2220     bool rightBoundSet;
2221     unsigned byteOffset;
2222     G4_AccRegSel accRegSel;
2223 
2224     // [left_bound, right_bound] describes the region in the root variable that this operand touches.
2225     // for variables and addresses:
2226     //  lb = offset of the first byte of the first element
2227     //  rb = offset of the last byte of the last element
2228     //  for non-send instructions, (rb - lb) < 64 always holds since operand can't cross 2GRF boundary
2229     //  for send instructions, rb is determined by the message/response length
2230     // for flags:
2231     //  lb = bit offset of the first flag bit
2232     //  rb = bit offset of the last flag bit
2233     //  (rb - lb) < 32 always holds for flags
2234     //  for predicate and conditonal modifers, the bounds are also effected by the quarter control
2235     unsigned left_bound;
2236     unsigned right_bound;
2237 
G4_Operand(Kind k,G4_Type ty=Type_UNDEF,G4_VarBase * base=nullptr)2238     explicit G4_Operand(
2239         Kind k, G4_Type ty = Type_UNDEF, G4_VarBase *base = nullptr)
2240         : kind(k), type(ty), inst(nullptr), top_dcl(nullptr), base(base),
2241           rightBoundSet(false), byteOffset(0), accRegSel(ACC_UNDEFINED),
2242           left_bound(0), right_bound(0)
2243     {
2244         bitVec[0] = bitVec[1] = 0;
2245     }
2246 
G4_Operand(Kind k,G4_VarBase * base)2247     G4_Operand(Kind k, G4_VarBase *base)
2248         : kind(k), type(Type_UNDEF), inst(nullptr), top_dcl(nullptr), base(base),
2249           rightBoundSet(false), byteOffset(0), accRegSel(ACC_UNDEFINED),
2250           left_bound(0), right_bound(0)
2251     {
2252         bitVec[0] = bitVec[1] = 0;
2253     }
2254 
2255 public:
getKind() const2256     Kind getKind() const { return kind; }
getType() const2257     G4_Type getType() const { return type; }
getTypeSize() const2258     unsigned short getTypeSize() const { return TypeSize(getType()); }
2259 
isImm() const2260     bool isImm() const { return kind == Kind::immediate; }
isSrcRegRegion() const2261     bool isSrcRegRegion() const { return kind == Kind::srcRegRegion; }
isDstRegRegion() const2262     bool isDstRegRegion() const { return kind == Kind::dstRegRegion; }
isRegRegion() const2263     bool isRegRegion() const
2264     {
2265         return kind == srcRegRegion || kind == dstRegRegion;
2266     }
isPredicate() const2267     bool isPredicate() const { return kind == predicate; }
isCondMod() const2268     bool isCondMod() const { return kind == condMod; }
isLabel() const2269     bool isLabel() const { return kind == label; }
isAddrExp() const2270     bool isAddrExp() const { return kind == addrExp; }
2271 
getTopDcl() const2272     const G4_Declare *getTopDcl() const { return top_dcl; }
getTopDcl()2273           G4_Declare* getTopDcl() { return top_dcl; }
2274 
getBase() const2275     const G4_VarBase *getBase() const { return base; }
getBase()2276           G4_VarBase *getBase() { return base; }
setBase(G4_VarBase * b)2277     void setBase(G4_VarBase *b) { base = b; }
2278     G4_RegAccess getRegAccess() const;
2279 
2280     const G4_Declare *getBaseRegVarRootDeclare() const;
2281           G4_Declare *getBaseRegVarRootDeclare();
2282 
isRelocImm() const2283     virtual bool isRelocImm() const { return false; }
2284     virtual void emit(std::ostream &output, bool symbolreg = false) = 0;
2285     void dump() const;
2286 
2287     bool isGreg() const;
2288     bool isAreg() const;
2289     bool isNullReg() const;
2290     bool isIpReg() const;
2291     bool isNReg() const;
2292     bool isAccReg() const;
2293     bool isFlag() const;
2294     bool isMaskReg() const;
2295     bool isMsReg() const;
2296     bool isSrReg() const;
2297     bool isCrReg() const;
2298     bool isDbgReg() const;
2299     bool isTmReg() const;
2300     bool isTDRReg() const;
2301     bool isA0() const;
2302     bool isAddress() const;
2303     bool isScalarAddr() const;
2304 
asAddrExp() const2305     const G4_AddrExp* asAddrExp() const
2306     {
2307 #ifdef _DEBUG
2308         if (!isAddrExp())
2309         {
2310             return nullptr;
2311         }
2312 #endif
2313         return reinterpret_cast<const G4_AddrExp*>(this);
2314     }
asAddrExp()2315     G4_AddrExp* asAddrExp() {
2316         return const_cast<G4_AddrExp*>(((const G4_Operand *)this)->asAddrExp());
2317     }
2318 
asDstRegRegion() const2319     const G4_DstRegRegion* asDstRegRegion() const
2320     {
2321 #ifdef _DEBUG
2322         if (!isDstRegRegion())
2323         {
2324             return nullptr;
2325         }
2326 #endif
2327         return reinterpret_cast<const G4_DstRegRegion*>(this);
2328     }
asDstRegRegion()2329     G4_DstRegRegion* asDstRegRegion() {
2330         return const_cast<G4_DstRegRegion*>(((const G4_Operand *)this)->asDstRegRegion());
2331     }
2332 
asSrcRegRegion() const2333     const G4_SrcRegRegion* asSrcRegRegion() const
2334     {
2335 #ifdef _DEBUG
2336         if (!isSrcRegRegion())
2337         {
2338             return nullptr;
2339         }
2340 #endif
2341         return reinterpret_cast<const G4_SrcRegRegion*>(this);
2342     }
asSrcRegRegion()2343     G4_SrcRegRegion* asSrcRegRegion() {
2344         return const_cast<G4_SrcRegRegion*>(((const G4_Operand *)this)->asSrcRegRegion());
2345     }
2346 
asImm() const2347     const G4_Imm* asImm() const
2348     {
2349 #ifdef _DEBUG
2350         if (!isImm())
2351         {
2352             return nullptr;
2353         }
2354 #endif
2355         return reinterpret_cast<const G4_Imm*>(this);
2356     }
asImm()2357     G4_Imm* asImm() {
2358         return const_cast<G4_Imm*>(((const G4_Operand *)this)->asImm());
2359     }
2360 
asPredicate() const2361     const G4_Predicate* asPredicate() const
2362     {
2363 #ifdef _DEBUG
2364         if (!isPredicate())
2365         {
2366             return nullptr;
2367         }
2368 #endif
2369         return reinterpret_cast<const G4_Predicate*>(this);
2370     }
asPredicate()2371     G4_Predicate* asPredicate() {
2372         return const_cast<G4_Predicate*>(((const G4_Operand *)this)->asPredicate());
2373     }
2374 
asCondMod() const2375     const G4_CondMod* asCondMod() const {
2376 #ifdef _DEBUG
2377         if (!isCondMod())
2378         {
2379             return nullptr;
2380         }
2381 #endif
2382         return reinterpret_cast<const G4_CondMod*>(this);
2383     }
2384 
asCondMod()2385     G4_CondMod* asCondMod()
2386     {
2387         return const_cast<G4_CondMod*>(((const G4_Operand *)this)->asCondMod());
2388     }
2389 
asLabel() const2390     const G4_Label *asLabel() const
2391     {
2392 #ifdef _DEBUG
2393         if (!isLabel())
2394         {
2395             return nullptr;
2396         }
2397 #endif
2398         return reinterpret_cast<const G4_Label*>(this);
2399     }
asLabel()2400     G4_Label* asLabel()
2401     {
2402         return const_cast<G4_Label*>(((const G4_Operand *)this)->asLabel());
2403     }
2404 
isSrc() const2405     bool isSrc() const
2406     {
2407         return isImm() || isAddrExp() || isSrcRegRegion();
2408     }
2409 
2410     bool isScalarSrc() const;
2411 
crossGRF()2412     bool crossGRF()
2413     {
2414         return getRightBound() / numEltPerGRF<Type_UB>() !=
2415                getLeftBound() / numEltPerGRF<Type_UB>();
2416     }
2417 
getLeftBound()2418     unsigned getLeftBound()
2419     {
2420         // The default left bound does not take emask into account for flags.
2421         // Compute the right bound in which updates the left bound accordingly.
2422         if (isRightBoundSet() == false && !isNullReg())
2423         {
2424             inst->computeRightBound(this);
2425         }
2426         return left_bound;
2427     }
getRightBound()2428     unsigned getRightBound()
2429     {
2430         if (isRightBoundSet() == false && !isNullReg())
2431         {
2432             inst->computeRightBound(this);
2433         }
2434         return right_bound;
2435     }
isRightBoundSet() const2436     bool isRightBoundSet() const { return rightBoundSet; }
getBitVecL()2437     uint64_t getBitVecL()
2438     {
2439         if (isRightBoundSet() == false && !isNullReg())
2440         {
2441             // computeRightBound also computes bitVec
2442             inst->computeRightBound(this);
2443         }
2444         return bitVec[0];
2445     }
getBitVecH()2446     uint64_t getBitVecH()
2447     {
2448         if (isRightBoundSet() == false && !isNullReg())
2449         {
2450             // computeRightBound also computes bitVec
2451             inst->computeRightBound(this);
2452         }
2453         if (getGRFSize() == 32)
2454         {
2455             assert(bitVec[1] == 0 && "upper bits should be 0");
2456         }
2457         return bitVec[1];
2458     }
2459     /*
2460         For operands that do use it, it is computed during left bound compuation.
2461     */
getByteOffset() const2462     unsigned getByteOffset() const { return byteOffset; }
2463 
2464     // ToDo: get rid of this setter
setBitVecL(uint64_t bvl)2465     void setBitVecL(uint64_t bvl)
2466     {
2467         bitVec[0] = bvl;
2468     }
2469 
2470     void setBitVecFromSize(uint32_t NBytes);
2471 
2472     void updateFootPrint(BitSet& footprint, bool isSet);
2473 
computeRightBound(uint8_t exec_size)2474     virtual unsigned computeRightBound(uint8_t exec_size) { return left_bound; }
setRightBound(unsigned val)2475     void setRightBound(unsigned val)
2476     {
2477         rightBoundSet = true;
2478         right_bound = val;
2479     }
unsetRightBound()2480     void unsetRightBound() { rightBoundSet = false; }
setLeftBound(unsigned val)2481     void setLeftBound(unsigned val) { left_bound = val; }
getInst() const2482     const G4_INST* getInst() const { return inst; }
getInst()2483           G4_INST* getInst()       { return inst; }
setInst(G4_INST * op)2484     void setInst(G4_INST* op) { inst = op; }
setAccRegSel(G4_AccRegSel value)2485     void setAccRegSel(G4_AccRegSel value) { accRegSel = value; }
getAccRegSel() const2486     G4_AccRegSel getAccRegSel() const { return accRegSel; }
isAccRegValid() const2487     bool isAccRegValid() const { return accRegSel != ACC_UNDEFINED;}
2488 
2489     unsigned getLinearizedStart();
2490     unsigned getLinearizedEnd();
2491 
2492     // compare if this operand is the same as the input w.r.t physical register in the end
compareOperand(G4_Operand * opnd)2493     virtual G4_CmpRelation compareOperand(G4_Operand *opnd)
2494     {
2495         return Rel_disjoint;
2496     }
2497 
2498     // should only be called post-RA, return true if this operand has overlapping GRF with other
2499     // ToDo: extend to non-GRF operands?
hasOverlappingGRF(G4_Operand * other)2500     bool hasOverlappingGRF(G4_Operand* other)
2501     {
2502         if (!other || !isGreg() || !other->isGreg())
2503         {
2504             return false;
2505         }
2506         auto LB = getLinearizedStart(), RB = getLinearizedEnd();
2507         auto otherLB = other->getLinearizedStart(), otherRB = other->getLinearizedEnd();
2508         return !(RB < otherLB || LB > otherRB);
2509     }
2510 
GetNonVectorImmType(G4_Type type)2511     static G4_Type GetNonVectorImmType(G4_Type type)
2512     {
2513         switch (type)
2514         {
2515             case Type_V:
2516                 return Type_W;
2517             case Type_UV:
2518                 return Type_UW;
2519             case Type_VF:
2520                 return Type_F;
2521             default:
2522                 return type;
2523         }
2524     }
2525 };
2526 
2527 class G4_VarBase
2528 {
2529 public:
2530     enum G4_VarKind {
2531         VK_regVar,  // register allocation candidate
2532         VK_phyGReg, // physical general register
2533         VK_phyAReg  // physical architecture register
2534     };
2535 
2536 protected:
2537     G4_VarKind Kind;
G4_VarBase(G4_VarKind K)2538     explicit G4_VarBase(G4_VarKind K) : Kind(K) {}
2539 
2540 public:
getKind() const2541     G4_VarKind getKind() const { return Kind; }
2542 
isRegVar() const2543     bool isRegVar() const { return getKind() == VK_regVar; }
isPhyReg() const2544     bool isPhyReg() const { return !isRegVar(); }
isPhyGreg() const2545     bool isPhyGreg() const { return getKind() == VK_phyGReg; }
isPhyAreg() const2546     bool isPhyAreg() const { return getKind() == VK_phyAReg; }
2547 
asRegVar() const2548     G4_RegVar *asRegVar() const
2549     {
2550         MUST_BE_TRUE(isRegVar(), ERROR_UNKNOWN);
2551         return (G4_RegVar *)this;
2552     }
asGreg() const2553     G4_Greg *asGreg() const
2554     {
2555         MUST_BE_TRUE(isPhyGreg(), ERROR_UNKNOWN);
2556         return (G4_Greg *)this;
2557     }
asAreg() const2558     G4_Areg *asAreg() const
2559     {
2560         MUST_BE_TRUE(isPhyAreg(), ERROR_UNKNOWN);
2561         return (G4_Areg *)this;
2562     }
2563 
2564     bool isAreg() const;
2565     bool isGreg() const;
2566     bool isNullReg() const;
2567     bool isIpReg() const;
2568     bool isFlag() const;
2569     bool isNReg() const;
2570     bool isAccReg() const;
2571     bool isMaskReg() const;
2572     bool isMsReg() const;
2573     bool isSrReg() const;
2574     bool isCrReg() const;
2575     bool isDbgReg() const;
2576     bool isTmReg() const;
2577     bool isTDRReg() const;
2578     bool isSpReg() const;
2579     bool isA0() const;
2580     bool isAddress() const;
2581     bool isRegAllocPartaker() const;
2582 
2583     bool noScoreBoard() const;
2584     bool isScalarAddr() const;
2585     G4_Areg* getAreg() const;
2586 
ExRegNum(bool & valid)2587     virtual unsigned short ExRegNum(bool &valid)
2588     {
2589         valid = false;
2590         return UNDEFINED_SHORT;
2591     }
2592 
ExSubRegNum(bool & valid)2593     virtual unsigned short ExSubRegNum(bool &valid)
2594     {
2595         valid = false;
2596         return UNDEFINED_SHORT;
2597     }
2598 
2599     virtual void emit(std::ostream &output, bool symbolreg = false) = 0;
2600 };
2601 
2602 //
2603 // General Register File
2604 //
2605 class G4_Greg final : public G4_VarBase
2606 {
2607     const unsigned RegNum;
2608 public:
G4_Greg(unsigned num)2609     explicit G4_Greg(unsigned num) : G4_VarBase(VK_phyGReg), RegNum(num) {}
operator new(size_t sz,Mem_Manager & m)2610     void *operator new(size_t sz, Mem_Manager &m) { return m.alloc(sz); }
getRegFile() const2611     G4_RegFileKind getRegFile() const { return G4_GRF; }
2612 
getRegNum() const2613     unsigned getRegNum() const { return RegNum; }
2614 
ExRegNum(bool & valid)2615     unsigned short ExRegNum(bool &valid) override
2616     {
2617         valid = true;
2618         return (unsigned short)getRegNum();
2619     }
2620 
2621     void emit(std::ostream &output, bool symbolreg = false) override;
2622 };
2623 
2624 //
2625 // Architecture Register File
2626 //
2627 class G4_Areg final : public G4_VarBase
2628 {
2629     const G4_ArchRegKind ArchRegType;
2630 public:
G4_Areg(G4_ArchRegKind k)2631     explicit G4_Areg(G4_ArchRegKind k)
2632         : G4_VarBase(VK_phyAReg), ArchRegType(k) {}
operator new(size_t sz,Mem_Manager & m)2633     void *operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
2634 
getArchRegType() const2635     G4_ArchRegKind getArchRegType() const { return ArchRegType; }
2636 
2637     void emit(std::ostream& output, bool symbolreg=false) override;
2638 
isNullReg() const2639     bool isNullReg() const { return getArchRegType() == AREG_NULL; }
isFlag() const2640     bool isFlag() const
2641     {
2642         switch (getArchRegType())
2643         {
2644             case AREG_F0:
2645             case AREG_F1:
2646             case AREG_F2:
2647             case AREG_F3:
2648                 return true;
2649             default:
2650                 return false;
2651         }
2652     }
isIpReg() const2653     bool isIpReg()  const { return getArchRegType() == AREG_IP;      }
isA0() const2654     bool isA0()     const { return getArchRegType() == AREG_A0;      }
isNReg() const2655     bool isNReg()   const { return getArchRegType() == AREG_N0      ||
2656                                getArchRegType() == AREG_N1;      }
isAcc0Reg() const2657     bool isAcc0Reg() const { return getArchRegType() == AREG_ACC0;    }
isAccReg() const2658     bool isAccReg()  const { return getArchRegType() == AREG_ACC0    ||
2659                                getArchRegType() == AREG_ACC1;    }
isMaskReg() const2660     bool isMaskReg() const { return getArchRegType() == AREG_MASK0;   }
isMsReg() const2661     bool isMsReg()   const { return getArchRegType() == AREG_MS0;     }
isDbgReg() const2662     bool isDbgReg()  const { return getArchRegType() == AREG_DBG;     }
isSrReg() const2663     bool isSrReg()   const { return getArchRegType() == AREG_SR0;     }
isCrReg() const2664     bool isCrReg()   const { return getArchRegType() == AREG_CR0;     }
isTmReg() const2665     bool isTmReg()   const { return getArchRegType() == AREG_TM0;     }
isTDRReg() const2666     bool isTDRReg()  const { return getArchRegType() == AREG_TDR0;    }
isSpReg() const2667     bool isSpReg()   const { return getArchRegType() == AREG_SP;      }
2668 
ExRegNum(bool & valid)2669     unsigned short ExRegNum(bool &valid) override
2670     {
2671         unsigned short rNum = UNDEFINED_SHORT;
2672         valid = true;
2673 
2674         if (isFlag())
2675         {
2676             return getFlagNum();
2677         }
2678 
2679         switch (getArchRegType()) {
2680         case AREG_NULL:
2681         case AREG_A0:
2682         case AREG_ACC0:
2683         case AREG_MASK0:
2684         case AREG_MS0:
2685         case AREG_DBG:
2686         case AREG_SR0:
2687         case AREG_CR0:
2688         case AREG_TM0:
2689         case AREG_N0:
2690         case AREG_IP:
2691         case AREG_TDR0:
2692         case AREG_SP:
2693             rNum = 0;
2694             break;
2695         case AREG_ACC1:
2696         case AREG_N1:
2697             rNum = 1;
2698             break;
2699         default:
2700             valid = false;
2701         }
2702         return rNum;
2703     }
2704 
getFlagNum() const2705     int getFlagNum() const
2706     {
2707         switch (getArchRegType())
2708         {
2709         case AREG_F0:
2710             return 0;
2711         case AREG_F1:
2712             return 1;
2713         case AREG_F2:
2714             return 2;
2715         case AREG_F3:
2716             return 3;
2717         default:
2718             assert(false && "should only be called on flag ARF");
2719             return -1;
2720         }
2721     }
2722 };
2723 
2724 class G4_Imm : public G4_Operand
2725 {
2726     // Requirement for the immediate value 'imm'
2727     //   Given a value V of type T, and let <V-as-uint> be its bit pattern as
2728     //   unsigned integer type whose size == sizeof(T). Let 'imm' be the
2729     //   immediate for V, the following must hold:
2730     //     (uint64_t)(<V-as-uint>) == (uint64_t)imm.num
2731     //     i.e.  int16_t v ---> (uint64_t)(*(uint16_t*)&v) == (uint64_t)imm.num
2732     //           float f   ---> (uint64_t)(*(uint32_t*)&f) == (uint64_t)imm.num
2733     union {
2734         int64_t  num;
2735         uint32_t num32;
2736         double   fp;
2737         float    fp32;
2738     } imm;
2739 
2740 public:
G4_Imm(int64_t i,G4_Type ty)2741     G4_Imm(int64_t i, G4_Type ty)
2742         : G4_Operand(G4_Operand::immediate, ty)
2743     {
2744         imm.num = i;
2745     }
2746 
G4_Imm(double fp,G4_Type ty)2747     G4_Imm(double fp, G4_Type ty)
2748         : G4_Operand(G4_Operand::immediate, ty)
2749     {
2750         imm.fp = fp;
2751     }
2752 
G4_Imm(float fp)2753     G4_Imm(float fp)
2754         : G4_Operand(G4_Operand::immediate, Type_F)
2755     {
2756         imm.num = 0;  // make sure to clear all the bits
2757         imm.fp32 = fp;
2758     }
2759 
operator new(size_t sz,Mem_Manager & m)2760     void *operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
getImm() const2761     int64_t getImm() const {return imm.num;}  // Get bits of imm AS integer.
getInt() const2762     int64_t getInt() const
2763     {
2764         MUST_BE_TRUE(!IS_TYPE_F32_F64(type), ERROR_UNKNOWN);
2765         return imm.num;
2766     }
getFloat() const2767     float getFloat() const
2768     {
2769         // if fp32 is sNAN, it will return qNAN. Be careful!
2770         MUST_BE_TRUE(IS_FTYPE(type), ERROR_UNKNOWN);
2771         return imm.fp32;
2772     }
getDouble() const2773     double getDouble() const
2774     {
2775         MUST_BE_TRUE(IS_DFTYPE(type), ERROR_UNKNOWN);
2776         return imm.fp;
2777     }
2778     bool isZero() const;
2779     // True if this is a signed integer and its sign bit(s) are 0.
2780     bool isSignBitZero() const;
2781     void emit(std::ostream& output, bool symbolreg=false) override;
2782     void emitAutoFmt(std::ostream& output);
2783 
2784     bool isEqualTo(G4_Imm& imm1) const;
isEqualTo(G4_Imm * imm1) const2785     bool isEqualTo(G4_Imm* imm1) const { return isEqualTo(*imm1); }
2786 
2787     G4_CmpRelation compareOperand(G4_Operand *opnd) override;
getRegFile() const2788     G4_RegFileKind getRegFile() const { return G4_UndefinedRF; }
2789 
2790     static bool isInTypeRange(int64_t imm, G4_Type ty);
2791 
2792     static int64_t typecastVals(int64_t value, G4_Type type);
2793 };
2794 
2795 class G4_Reloc_Imm : public G4_Imm
2796 {
2797 
2798 public:
operator new(size_t sz,Mem_Manager & m)2799     void *operator new(size_t sz, Mem_Manager& m) { return m.alloc(sz); }
isRelocImm() const2800     bool isRelocImm() const override { return true; }
2801 
2802     // G4_Reloc_Imm is the relocation target field. If the value is not given,
2803     // a magic number 0x6e10ca2e will present in final output
G4_Reloc_Imm(G4_Type ty)2804     G4_Reloc_Imm(G4_Type ty) : G4_Imm((int64_t)0x6e10ca2e, ty)
2805     {
2806     }
2807 
G4_Reloc_Imm(int64_t val,G4_Type ty)2808     G4_Reloc_Imm(int64_t val, G4_Type ty) : G4_Imm(val, ty)
2809     {
2810     }
2811 };
2812 
2813 class G4_Label: public G4_Operand
2814 {
2815     friend class IR_Builder;
2816 
2817     const char* label;
2818     bool funcLabel;
2819     bool start_loop_label;
2820     bool isFC;
2821 
G4_Label(const char * l)2822     G4_Label(const char* l) : G4_Operand(G4_Operand::label), label(l)
2823     {
2824         funcLabel = false;
2825         start_loop_label = false;
2826         isFC = false;
2827     }
2828 
2829 public:
2830 
getLabel() const2831     const char* getLabel() const {return label;}
operator new(size_t sz,Mem_Manager & m)2832     void *operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
2833     void emit(std::ostream& output, bool symbolreg = false) override;
setFuncLabel(bool val)2834     void setFuncLabel(bool val) { funcLabel = val; }
isFuncLabel() const2835     bool isFuncLabel() const { return funcLabel; }
setStartLoopLabel()2836     void setStartLoopLabel() { start_loop_label = true; }
isStartLoopLabel() const2837     bool isStartLoopLabel() const { return start_loop_label; }
isFCLabel() const2838     bool isFCLabel() const { return isFC; }
setFCLabel(bool fcLabel)2839     void setFCLabel(bool fcLabel) { isFC = fcLabel; }
2840 };
2841 }
2842 //
2843 // Since the sub regs of address reg a0 can be allocated individually,
2844 // we use subRegOff to indicate the sub registers
2845 //
2846 struct AssignedReg
2847 {
2848     vISA::G4_VarBase* phyReg = nullptr;
2849     unsigned    subRegOff = 0;
2850 };
2851 
2852 namespace vISA
2853 {
2854     class G4_RegVar : public G4_VarBase
2855     {
2856         friend class G4_Declare;
2857 
2858     public:
2859         enum RegVarType
2860         {
2861             Default = 0,
2862             GRFSpillTmp = 1,
2863             AddrSpillLoc = 2,
2864             Transient = 3,
2865             Coalesced = 4,
2866         };
2867 
2868     private:
2869         // G4_RegVar now has an enum that holds its type. Each subclass of G4_RegVar
2870         // will initialize the type according to its specific class. For eg,
2871         // Spill/Fill transient ranges will set this type to RegVarType::Transient.
2872         unsigned    id;        // id for register allocation
2873         const RegVarType type;
2874         G4_Declare* const decl;    // corresponding declare
2875         AssignedReg reg;    // assigned physical register; set after reg alloc
2876         unsigned    disp;   // displacement offset in spill memory
2877         G4_SubReg_Align subAlign;    // To support sub register alignment
2878         bool evenAlignment = false; // Align this regVar to even GRFs regardless of its size
2879 
2880     public:
2881 
2882         // To support sub register alignment
G4_RegVar(G4_Declare * d,RegVarType t)2883         G4_RegVar(G4_Declare* d, RegVarType t) :
2884             G4_VarBase(VK_regVar), id(UNDEFINED_VAL), type(t), decl(d),
2885             disp(UINT_MAX), subAlign(Any)
2886         {
2887         }
operator new(size_t sz,Mem_Manager & m)2888         void *operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
getId() const2889         unsigned    getId() const { return id; }
setId(unsigned i)2890         void        setId(unsigned i) { id = i; }
getName() const2891         const char*        getName() const { return decl->getName(); }
getDeclare() const2892         const G4_Declare* getDeclare() const { return decl; }
getDeclare()2893               G4_Declare* getDeclare()       { return decl; }
isPhyRegAssigned() const2894         bool        isPhyRegAssigned() const { return reg.phyReg != NULL; }
isFlag() const2895         bool        isFlag()    const { return decl->getRegFile() == G4_FLAG; }
isAreg() const2896         bool        isAreg()    const { return (reg.phyReg != NULL) && (reg.phyReg->isAreg()); }
isA0() const2897         bool        isA0()      const { return (reg.phyReg != NULL) && (reg.phyReg->isA0()); }
isCrReg() const2898         bool        isCrReg()   const { return (reg.phyReg != NULL) && (reg.phyReg->isCrReg()); }
isDbgReg() const2899         bool        isDbgReg()  const { return (reg.phyReg != NULL) && (reg.phyReg->isDbgReg()); }
isGreg() const2900         bool        isGreg()    const { return (reg.phyReg != NULL) && (reg.phyReg->isGreg()); }
isNReg() const2901         bool        isNReg()    const { return (reg.phyReg != NULL) && (reg.phyReg->isNReg()); }
isNullReg() const2902         bool        isNullReg() const { return (reg.phyReg != NULL) && (reg.phyReg->isNullReg()); }
isSrReg() const2903         bool        isSrReg()   const { return (reg.phyReg != NULL) && (reg.phyReg->isSrReg()); }
isTDRReg() const2904         bool        isTDRReg()  const { return (reg.phyReg != NULL) && (reg.phyReg->isTDRReg()); }
isTmReg() const2905         bool        isTmReg()   const { return (reg.phyReg != NULL) && (reg.phyReg->isTmReg()); }
isAccReg() const2906         bool        isAccReg()  const { return (reg.phyReg != NULL) && (reg.phyReg->isAccReg()); }
isIpReg() const2907         bool        isIpReg()   const { return (reg.phyReg != NULL) && (reg.phyReg->isIpReg()); }
isMaskReg() const2908         bool        isMaskReg() const { return (reg.phyReg != NULL) && (reg.phyReg->isMaskReg()); }
isMsReg() const2909         bool        isMsReg()   const { return (reg.phyReg != NULL) && (reg.phyReg->isMsReg()); }
isSpReg() const2910         bool        isSpReg()   const { return (reg.phyReg != NULL) && (reg.phyReg->isSpReg()); }
2911 
isRegAllocPartaker() const2912         bool        isRegAllocPartaker() const { return id != UNDEFINED_VAL; }
getRegAllocPartaker() const2913         unsigned    getRegAllocPartaker() const { return id;  }
isAddress() const2914         bool        isAddress()  const { return decl->getRegFile() == G4_ADDRESS; }
isScalarAddr() const2915         bool        isScalarAddr()  const { return decl->getRegFile() == G4_SCALAR; }
getPhyReg() const2916         const G4_VarBase* getPhyReg() const { return reg.phyReg; }
getPhyReg()2917               G4_VarBase* getPhyReg()       { return reg.phyReg; }
2918         unsigned    getByteAddr() const;
getPhyRegOff() const2919         unsigned    getPhyRegOff() const { return reg.subRegOff; }
setPhyReg(G4_VarBase * pr,unsigned off)2920         void        setPhyReg(G4_VarBase* pr, unsigned off)
2921         {
2922             MUST_BE_TRUE(pr == NULL || pr->isPhyReg(), ERROR_UNKNOWN);
2923             reg.phyReg = pr;
2924             reg.subRegOff = off;
2925         }
resetPhyReg()2926         void        resetPhyReg() { reg.phyReg = NULL; reg.subRegOff = 0; }
isSpilled() const2927         bool        isSpilled() const { return decl->isSpilled(); }
setDisp(unsigned offset)2928         void        setDisp(unsigned offset) { disp = offset; }
getDisp() const2929         unsigned    getDisp() const { return disp; }
isAliased() const2930         bool        isAliased() const { return decl->getAliasDeclare() != NULL; }
2931         unsigned getLocId() const;
2932 
isRegVarTransient() const2933         bool isRegVarTransient() const { return type == RegVarType::Transient; }
2934         bool isRegVarSpill() const;
2935         bool isRegVarFill()  const;
2936 
isRegVarTmp() const2937         bool isRegVarTmp()          const { return type == RegVarType::GRFSpillTmp; }
isRegVarAddrSpillLoc() const2938         bool isRegVarAddrSpillLoc() const { return type == RegVarType::AddrSpillLoc; }
2939 
isRegVarCoalesced() const2940         bool isRegVarCoalesced() const { return type == RegVarType::Coalesced; }
2941 
2942         G4_RegVar * getBaseRegVar();
2943         G4_RegVar * getAbsBaseRegVar();
2944 
2945         G4_RegVar * getNonTransientBaseRegVar();
2946 
2947         void emit(std::ostream& output, bool symbolreg = false) override;
2948 
ExRegNum(bool & valid)2949         unsigned short ExRegNum(bool &valid) override { return reg.phyReg->ExRegNum(valid); }
ExSubRegNum(bool & valid)2950         unsigned short ExSubRegNum(bool &valid) override { valid = true; return (unsigned short)reg.subRegOff; }
2951 
2952     protected:
isEvenAlign() const2953         bool isEvenAlign() const { return evenAlignment; }
setEvenAlign()2954         void setEvenAlign() { evenAlignment = true; }
getSubRegAlignment() const2955         G4_SubReg_Align getSubRegAlignment() const
2956         {
2957             return subAlign;
2958         }
2959 
2960         void setSubRegAlignment(G4_SubReg_Align subAlg);
2961     };
2962 
2963     class G4_RegVarTransient : public G4_RegVar
2964     {
2965     public:
2966         enum TransientType
2967         {
2968             Spill = 0,
2969             Fill = 1,
2970         };
2971 
2972     private:
2973         G4_RegVar*  baseRegVar;
2974         G4_Operand* repRegion;
2975         G4_ExecSize  execSize;
2976         TransientType type;
2977 
2978     public:
G4_RegVarTransient(G4_Declare * d,G4_RegVar * base,G4_Operand * reprRegion,G4_ExecSize eSize,TransientType t)2979         G4_RegVarTransient(G4_Declare* d, G4_RegVar* base, G4_Operand* reprRegion,
2980             G4_ExecSize eSize, TransientType t) :
2981             G4_RegVar(d, Transient), baseRegVar(base), repRegion(reprRegion),
2982             execSize(eSize), type(t)
2983         {
2984         }
2985 
operator new(size_t sz,Mem_Manager & m)2986         void *operator new(size_t sz, Mem_Manager& m) { return m.alloc(sz); }
2987 
getBaseRegVar()2988         G4_RegVar * getBaseRegVar() { return baseRegVar; }
2989 
getRepRegion() const2990         G4_Operand * getRepRegion() const { return repRegion; }
2991         G4_RegVar * getAbsBaseRegVar();
2992         G4_RegVar * getNonTransientBaseRegVar();
getExecSize() const2993         G4_ExecSize getExecSize() const { return execSize; }
2994 
isRegVarSpill() const2995         bool isRegVarSpill() const { return type == TransientType::Spill; }
isRegVarFill() const2996         bool isRegVarFill() const { return type == TransientType::Fill; }
getDstRepRegion() const2997         G4_DstRegRegion* getDstRepRegion() const { return repRegion->asDstRegRegion(); }
getSrcRepRegion() const2998         G4_SrcRegRegion* getSrcRepRegion() const { return repRegion->asSrcRegRegion(); }
2999 
3000     };
3001 
3002     class G4_RegVarTmp : public G4_RegVar
3003     {
3004         G4_RegVar * const baseRegVar;
3005 
3006     public:
G4_RegVarTmp(G4_Declare * d,G4_RegVar * base)3007         G4_RegVarTmp(G4_Declare * d, G4_RegVar * base) :
3008             G4_RegVar(d, RegVarType::GRFSpillTmp), baseRegVar(base)
3009         {
3010             assert(base->isRegVarTransient() == false);
3011             assert(base == base->getBaseRegVar());
3012         }
operator new(size_t sz,Mem_Manager & m)3013         void *operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
3014 
getBaseRegVar()3015         G4_RegVar * getBaseRegVar() { return baseRegVar; }
3016         G4_RegVar * getAbsBaseRegVar();
getNonTransientBaseRegVar()3017         G4_RegVar * getNonTransientBaseRegVar()
3018         {
3019             return baseRegVar;
3020         }
3021     };
3022 
3023     class G4_RegVarAddrSpillLoc : public G4_RegVar
3024     {
3025         unsigned         loc_id;
3026     public:
G4_RegVarAddrSpillLoc(G4_Declare * d,int & loc_count)3027         G4_RegVarAddrSpillLoc(G4_Declare * d, int& loc_count) : G4_RegVar(d, RegVarType::AddrSpillLoc)
3028         {
3029             if (d->getAliasDeclare() != NULL)
3030             {
3031                 unsigned elemSize = d->getRegVar()->getDeclare()->getElemSize();
3032                 loc_id = d->getRegVar()->getLocId() + d->getAliasOffset() / elemSize;
3033             }
3034             else {
3035                 loc_id = (++loc_count) * getNumAddrRegisters();
3036             }
3037         }
operator new(size_t sz,Mem_Manager & m)3038         void *operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
getLocId() const3039         unsigned getLocId() const { return loc_id; }
3040     };
3041 
3042     class G4_RegVarCoalesced : public G4_RegVar
3043     {
3044         // If spill, set f to false
3045         bool f;
3046     public:
G4_RegVarCoalesced(G4_Declare * dcl,bool fill)3047         G4_RegVarCoalesced(G4_Declare* dcl, bool fill) : G4_RegVar(dcl, RegVarType::Coalesced)
3048         {
3049             f = fill;
3050         }
3051 
operator new(size_t sz,Mem_Manager & m)3052         void *operator new(size_t sz, Mem_Manager& m) { return m.alloc(sz); }
isSpill() const3053         bool isSpill() const { return !f; }
isFill() const3054         bool isFill() const { return f; }
3055     };
3056 
3057     class G4_SrcRegRegion final : public G4_Operand
3058     {
3059         friend class IR_Builder;
3060 
3061         const static int max_swizzle = 5;
3062         char swizzle[max_swizzle];     // this should only be set in binary encoding
3063 
3064         G4_SrcModifier mod;
3065         const G4_RegAccess acc;
3066         const RegionDesc *desc;
3067         const short    regOff;        // base+regOff is the starting register of the region
3068         const short    subRegOff;    // sub reg offset related to the regVar in "base"
3069         short          immAddrOff;    // imm addr offset
3070 
G4_SrcRegRegion(G4_SrcModifier m,G4_RegAccess a,G4_VarBase * b,short roff,short sroff,const RegionDesc * rd,G4_Type ty,G4_AccRegSel regSel=ACC_UNDEFINED)3071         G4_SrcRegRegion(G4_SrcModifier m,
3072             G4_RegAccess   a,
3073             G4_VarBase*    b,
3074             short roff,
3075             short sroff,
3076             const RegionDesc* rd,
3077             G4_Type        ty,
3078             G4_AccRegSel regSel = ACC_UNDEFINED) :
3079             G4_Operand(G4_Operand::srcRegRegion, ty, b), mod(m), acc(a), desc(rd),
3080             regOff(roff), subRegOff(sroff)
3081         {
3082             immAddrOff = 0;
3083             swizzle[0] = '\0';
3084             accRegSel = regSel;
3085 
3086             computeLeftBound();
3087             right_bound = 0;
3088         }
3089 
3090         void setSrcBitVec(uint8_t exec_size);
3091 
3092     public:
3093         G4_SrcRegRegion(G4_SrcRegRegion& rgn);
operator new(size_t sz,Mem_Manager & m)3094         void *operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
3095 
operator ==(const G4_SrcRegRegion & other)3096         bool operator==(const G4_SrcRegRegion &other)
3097         {
3098             if (base != other.base || regOff != other.regOff || subRegOff != other.subRegOff ||
3099                 desc->vertStride != other.desc->vertStride ||
3100                 desc->horzStride != other.desc->horzStride ||
3101                 desc->width != other.desc->width ||
3102                 mod != other.mod || acc != other.acc || type != other.type)
3103             {
3104                 return false;
3105             }
3106 
3107             if (acc == IndirGRF && immAddrOff != other.immAddrOff)
3108             {
3109                 return false;
3110             }
3111 
3112             return true;
3113         }
3114 
3115         void computeLeftBound();
getRegOff() const3116         short getRegOff() const { return regOff; }
getSubRegOff() const3117         short getSubRegOff() const { return subRegOff; }
3118 
getSwizzle() const3119         const char*       getSwizzle() const { return swizzle; }
getModifier() const3120         G4_SrcModifier    getModifier() const  { return mod; }
hasModifier() const3121         bool              hasModifier() const  { return mod != Mod_src_undef; }
getRegion() const3122         const RegionDesc* getRegion() const  { return desc; }
getRegAccess() const3123         G4_RegAccess      getRegAccess() const { return acc; }
getAddrImm() const3124         short             getAddrImm()  const { return immAddrOff; }
getElemSize() const3125         unsigned short    getElemSize() const { return TypeSize(type); }
3126 
setImmAddrOff(short off)3127         void setImmAddrOff(short off) { immAddrOff = off; }
setModifier(G4_SrcModifier m)3128         void setModifier(G4_SrcModifier m) { mod = m; }
3129         void setSwizzle(const char* sw);
3130 
3131         bool sameSrcRegRegion(G4_SrcRegRegion& rgn);
3132         bool obeySymbolRegRule() const;
3133         void emit(std::ostream& output, bool symbolreg = false) override;
3134         void emitRegVarOff(std::ostream& output, bool symbolreg = false);
3135 
isAreg() const3136         bool isAreg()    const { return base->isAreg(); }
isNullReg() const3137         bool isNullReg() const { return base->isNullReg(); }
isIpReg() const3138         bool isIpReg()   const { return base->isIpReg();}
isFlag() const3139         bool isFlag()    const {return base->isFlag();}
isNReg() const3140         bool isNReg()    const {return base->isNReg();}
isAccReg() const3141         bool isAccReg()  const {return base->isAccReg();}
isMaskReg() const3142         bool isMaskReg() const {return base->isMaskReg();}
isMsReg() const3143         bool isMsReg()   const {return base->isMsReg();}
isSrReg() const3144         bool isSrReg()   const {return base->isSrReg();}
isCrReg() const3145         bool isCrReg()   const {return base->isCrReg();}
isDbgReg() const3146         bool isDbgReg()  const { return base->isDbgReg(); }
isTmReg() const3147         bool isTmReg()   const { return base->isTmReg(); }
isTDRReg() const3148         bool isTDRReg()  const {return base->isTDRReg();}
isA0() const3149         bool isA0()      const {return base->isA0();}
isGreg() const3150         bool isGreg()    const { return base->isGreg(); }
isWithSwizzle() const3151         bool isWithSwizzle() const {return (swizzle[0] != '\0');}
3152         bool isScalar() const;
isAddress() const3153         bool isAddress() const {return base->isAddress();}
isScalarAddr() const3154         bool isScalarAddr() const { return base->isScalarAddr(); }
3155 
3156         unsigned short             ExRegNum(bool&) const;
3157         unsigned short             ExSubRegNum(bool&);
3158         unsigned short             ExIndSubRegNum(bool&);
3159         short                      ExIndImmVal(void);
3160 
3161         void                       computePReg();
3162 
isIndirect() const3163         bool isIndirect() const { return acc != Direct; }
3164 
3165         unsigned computeRightBound(uint8_t exec_size) override;
3166         G4_CmpRelation compareOperand(G4_Operand *opnd) override;
3167 
setType(G4_Type ty)3168         void setType(G4_Type ty)
3169         {
3170             // FIXME: we should forbid setType() where ty has a different size than old type
3171             bool recomputeLeftBound = false;
3172 
3173             if (TypeSize(type) != TypeSize(ty))
3174             {
3175                 unsetRightBound();
3176                 recomputeLeftBound = true;
3177             }
3178 
3179             type = ty;
3180 
3181             if (recomputeLeftBound)
3182             {
3183                 computeLeftBound();
3184             }
3185         }
3186 
setRegion(const RegionDesc * rd,bool isInvariant=false)3187         void setRegion(const RegionDesc* rd, bool isInvariant = false)
3188         {
3189             if (!isInvariant && !desc->isEqual(rd))
3190             {
3191                 unsetRightBound();
3192                 desc = rd;
3193                 computeLeftBound();
3194             }
3195             else
3196             {
3197                 desc = rd;
3198             }
3199         }
3200 
3201         bool isNativeType() const;
3202         bool isNativePackedRowRegion() const;
3203         bool isNativePackedRegion() const;
3204         bool evenlySplitCrossGRF(uint8_t execSize, bool &sameSubRegOff, bool &vertCrossGRF, bool &contRegion, uint8_t &eleInFirstGRF);
3205         bool evenlySplitCrossGRF(uint8_t execSize);
3206         bool coverTwoGRF();
3207         bool checkGRFAlign();
3208         bool hasFixedSubregOffset(uint32_t& offset);
3209         bool isNativePackedSrcRegion();
3210         uint8_t getMaxExecSize(int pos, uint8_t maxExSize, bool allowCrossGRF, uint16_t &vs, uint16_t &wd, bool &twoGRFsrc);
3211 
isSpilled() const3212         bool isSpilled() const
3213         {
3214             if (getBase() && getBase()->isRegVar())
3215             {
3216                 return getBase()->asRegVar()->isSpilled();
3217             }
3218 
3219             return false;
3220         }
3221 
3222         // return the byte offset from the region start for the element at "pos"
getByteOffset(int pos) const3223         int getByteOffset(int pos) const
3224         {
3225             int rowIdx = pos / desc->width;
3226             int colIdx = pos % desc->width;
3227             return rowIdx * desc->vertStride * getElemSize() + colIdx * desc->horzStride * getElemSize();
3228         }
3229 
3230         void rewriteContiguousRegion(IR_Builder& builder, uint16_t opNum);
3231 
3232     };
3233 }
3234 enum ChannelEnable {
3235     NoChannelEnable = 0,
3236     ChannelEnable_X = 1,
3237     ChannelEnable_Y = 2,
3238     ChannelEnable_XY = 3,
3239     ChannelEnable_Z = 4,
3240     ChannelEnable_W = 8,
3241     ChannelEnable_ZW = 0xC,
3242     ChannelEnable_XYZW = 0xF
3243 };
3244 
3245 namespace vISA
3246 {
3247 
3248 class G4_DstRegRegion final : public G4_Operand
3249 {
3250     friend class IR_Builder;
3251     ChannelEnable  writeMask;   // this should only be set in binary encoding
3252 
3253     G4_RegAccess   acc;         // direct, indirect GenReg or indirect MsgReg
3254     short          regOff;        // base+regOff is the starting register of the region
3255     short          subRegOff;    // sub reg offset related to the regVar in "base"
3256     short          immAddrOff;    // imm addr offset for indirect dst
3257     unsigned short horzStride;    // <DstRegion> has only horzStride
3258 
G4_DstRegRegion(G4_RegAccess a,G4_VarBase * b,short roff,short sroff,unsigned short hstride,G4_Type ty,G4_AccRegSel regSel=ACC_UNDEFINED)3259     G4_DstRegRegion(G4_RegAccess a,
3260         G4_VarBase* b,
3261         short roff,
3262         short sroff,
3263         unsigned short hstride,
3264         G4_Type ty,
3265         G4_AccRegSel regSel = ACC_UNDEFINED) :
3266         G4_Operand(G4_Operand::dstRegRegion, ty, b), acc(a), horzStride(hstride)
3267     {
3268         immAddrOff = 0;
3269         writeMask = NoChannelEnable;
3270         accRegSel = regSel;
3271 
3272         regOff = (roff == ((short)UNDEFINED_SHORT)) ? 0 : roff;
3273         subRegOff = sroff;
3274 
3275         computeLeftBound();
3276         right_bound = 0;
3277     }
3278 
3279     // DstRegRegion should only be constructed through IR_Builder
operator new(size_t sz,Mem_Manager & m)3280     void *operator new(size_t sz, Mem_Manager& m) { return m.alloc(sz); }
3281 
3282 public:
3283     G4_DstRegRegion(G4_DstRegRegion& rgn);
3284     G4_DstRegRegion(G4_DstRegRegion& rgn, G4_VarBase* new_base);
3285 
3286     void computeLeftBound();
3287 
getRegAccess() const3288     G4_RegAccess   getRegAccess() const { return acc; }
getRegOff() const3289     short          getRegOff() const { return regOff; }
getSubRegOff() const3290     short          getSubRegOff() const { return subRegOff; }
3291 
isCrossGRFDst()3292     bool isCrossGRFDst()
3293     {
3294         if (isNullReg())
3295         {
3296             return inst != NULL &&
3297                 (unsigned)inst->getExecSize() * getTypeSize() * horzStride > numEltPerGRF<Type_UB>();
3298         }
3299         if (isRightBoundSet() == false)
3300         {
3301             // computeRightBound populates crossGRFDst field
3302             getInst()->computeRightBound(this);
3303         }
3304 
3305         return (left_bound / numEltPerGRF<Type_UB>()) != right_bound / numEltPerGRF<Type_UB>();
3306     }
getHorzStride() const3307     unsigned short getHorzStride() const { return horzStride; }
getWriteMask() const3308     ChannelEnable  getWriteMask() const { return writeMask; }
3309     void           setWriteMask(ChannelEnable channels);
getAddrImm() const3310     short          getAddrImm() const { return immAddrOff; }
getElemSize() const3311     unsigned short getElemSize() const { return getTypeSize(); }
getExecTypeSize() const3312     unsigned short getExecTypeSize() const { return horzStride * getElemSize(); }
3313 
setImmAddrOff(short off)3314     void setImmAddrOff(short off) { immAddrOff = off; }
3315     bool obeySymbolRegRule() const;
3316     void emit(std::ostream& output, bool symbolreg = false) override;
3317     void emitRegVarOff(std::ostream& output, bool symbolreg = false);
3318 
isAreg() const3319     bool isAreg()    const { return base->isAreg(); }
isNullReg() const3320     bool isNullReg() const { return base->isNullReg(); }
isIpReg() const3321     bool isIpReg()   const { return base->isIpReg(); }
isFlag() const3322     bool isFlag()    const { return base->isFlag(); }
isNReg() const3323     bool isNReg()    const { return base->isNReg(); }
isAccReg() const3324     bool isAccReg()  const { return base->isAccReg(); }
isMaskReg() const3325     bool isMaskReg() const { return base->isMaskReg(); }
isMsReg() const3326     bool isMsReg()   const { return base->isMsReg(); }
isSrReg() const3327     bool isSrReg()   const { return base->isSrReg(); }
isCrReg() const3328     bool isCrReg()   const { return base->isCrReg(); }
isDbgReg() const3329     bool isDbgReg()  const { return base->isDbgReg(); }
isTmReg() const3330     bool isTmReg()   const { return base->isTmReg(); }
isTDRReg() const3331     bool isTDRReg()  const { return base->isTDRReg(); }
isA0() const3332     bool isA0()      const { return base->isA0(); }
isGreg() const3333     bool isGreg()    const { return base->isGreg(); }
isAddress() const3334     bool isAddress() const { return base->isAddress(); }
isScalarAddr() const3335     bool isScalarAddr() const { return base->isScalarAddr(); }
3336 
3337     unsigned short             ExRegNum(bool&);
3338     unsigned short             ExSubRegNum(bool&);
3339     unsigned short             ExIndSubRegNum(bool&);
3340     short                      ExIndImmVal(void);
3341     void                       computePReg();
3342 
isIndirect() const3343     bool isIndirect() const { return acc != Direct; }
3344 
setType(G4_Type ty)3345     void setType(G4_Type ty)
3346     {
3347         bool recomputeLeftBound = false;
3348 
3349         if (TypeSize(type) != TypeSize(ty))
3350         {
3351             unsetRightBound();
3352             recomputeLeftBound = true;
3353         }
3354 
3355         type = ty;
3356 
3357         if (recomputeLeftBound)
3358         {
3359             computeLeftBound();
3360 
3361             if (getInst())
3362             {
3363                 getInst()->computeLeftBoundForImplAcc(getInst()->getImplAccDst());
3364                 getInst()->computeLeftBoundForImplAcc(getInst()->getImplAccSrc());
3365             }
3366         }
3367     }
3368 
setHorzStride(unsigned short hs)3369     void setHorzStride(unsigned short hs)
3370     {
3371         if (horzStride != hs)
3372         {
3373             unsetRightBound();
3374         }
3375 
3376         horzStride = hs;
3377     }
3378     void setDstBitVec(uint8_t exec_size);
3379     unsigned computeRightBound(uint8_t exec_size) override;
3380     G4_CmpRelation compareOperand(G4_Operand *opnd) override;
3381     bool isNativeType() const;
3382     bool isNativePackedRowRegion() const;
3383     bool isNativePackedRegion() const;
3384     bool coverGRF(uint16_t numGRF, uint8_t execSize);
3385     bool goodOneGRFDst(uint8_t execSize);
3386     bool goodtwoGRFDst(uint8_t execSize);
3387     bool evenlySplitCrossGRF(uint8_t execSize);
3388     bool checkGRFAlign() const;
3389     bool hasFixedSubregOffset(uint32_t& offset);
3390     uint8_t getMaxExecSize(int pos, uint8_t maxExSize, bool twoGRFsrc);
isSpilled() const3391     bool isSpilled() const
3392     {
3393         if (getBase() && getBase()->isRegVar())
3394         {
3395             return getBase()->asRegVar()->isSpilled();
3396         }
3397 
3398         return false;
3399     }
3400 };
3401 }
3402 
3403 typedef enum
3404 {
3405     PRED_DEFAULT,
3406     PRED_ANY2H,
3407     PRED_ANY4H,
3408     PRED_ANY8H,
3409     PRED_ANY16H,
3410     PRED_ANY32H,
3411     PRED_ALL2H,
3412     PRED_ALL4H,
3413     PRED_ALL8H,
3414     PRED_ALL16H,
3415     PRED_ALL32H,
3416     PRED_ANYV,
3417     PRED_ALLV,
3418     PRED_ANY_WHOLE,   // any of the flag-bits
3419     PRED_ALL_WHOLE    // all of the flag-bits
3420 } G4_Predicate_Control;
3421 
3422 typedef enum
3423 {
3424     PRED_ALIGN16_DEFAULT = 1,
3425     PRED_ALIGN16_X = 2,
3426     PRED_ALIGN16_Y = 3,
3427     PRED_ALIGN16_Z = 4,
3428     PRED_ALIGN16_W = 5,
3429     PRED_ALIGN16_ANY4H = 6,
3430     PRED_ALIGN16_ALL4H = 7
3431 } G4_Align16_Predicate_Control;
3432 
3433 namespace vISA
3434 {
3435 //
3436 // predicate control for inst
3437 //
3438 class G4_Predicate final : public G4_Operand
3439 {
3440     friend class IR_Builder;
3441 
3442     G4_PredState   state;         // + or -
3443     unsigned short subRegOff;
3444     G4_Predicate_Control control;
3445 
3446     // this is only used at the very end by binary and asm emission, and
3447     // internally the align1 control above is always used instead even for align16 instructions.
3448     // currently this is always PRED_ALIGN16_DEFAULT except for simd1 inst,
3449     // for which it's PRED_ALIGN16_X
3450     G4_Align16_Predicate_Control align16Control;
3451 
3452     // Special predicate : it's equivalent to noMask and used for WA
3453     bool isPredicateSameAsNoMask;
3454 
G4_Predicate(G4_PredState s,G4_VarBase * flag,unsigned short srOff,G4_Predicate_Control ctrl)3455     G4_Predicate(G4_PredState s, G4_VarBase *flag, unsigned short srOff,
3456                  G4_Predicate_Control ctrl)
3457         : G4_Operand(G4_Operand::predicate, flag), state(s), subRegOff(srOff),
3458           control(ctrl), align16Control(PRED_ALIGN16_DEFAULT),
3459           isPredicateSameAsNoMask(false)
3460     {
3461         top_dcl = getBase()->asRegVar()->getDeclare();
3462         MUST_BE_TRUE(flag->isFlag(), ERROR_INTERNAL_ARGUMENT);
3463         if (getBase()->asRegVar()->getPhyReg())
3464         {
3465             left_bound = srOff * 16;
3466 
3467             byteOffset = srOff * 2;
3468 
3469             auto flagNum = getBase()->asRegVar()->getPhyReg()->asAreg()->getFlagNum();
3470             left_bound += flagNum * 32;
3471             byteOffset += flagNum * 4;
3472         }
3473         else
3474         {
3475             left_bound = 0;
3476             byteOffset = 0;
3477         }
3478     }
3479 
3480 public:
3481     G4_Predicate(G4_Predicate& prd);
3482 
operator new(size_t sz,Mem_Manager & m)3483     void *operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
getSubRegOff() const3484     unsigned short getSubRegOff() const { return subRegOff; }
getRegOff() const3485     unsigned short getRegOff() const
3486     {
3487         MUST_BE_TRUE(getBase()->isAreg(), ERROR_INTERNAL_ARGUMENT);
3488         return getBase()->asRegVar()->getPhyReg()->asAreg()->getFlagNum();
3489     }
3490 
getState() const3491     G4_PredState   getState() const { return state; }
setState(G4_PredState s)3492     void   setState(G4_PredState s) { state = s; }
getControl() const3493     G4_Predicate_Control    getControl() const { return control; }
setControl(G4_Predicate_Control PredCtrl)3494     void setControl(G4_Predicate_Control PredCtrl) { control = PredCtrl; }
3495     bool samePredicate(const G4_Predicate& prd) const;
3496     void emit(std::ostream& output, bool symbolreg = false) override;
3497     void emit_body(std::ostream& output, bool symbolreg);
3498 
setAlign16PredicateControl(G4_Align16_Predicate_Control control)3499     void setAlign16PredicateControl(G4_Align16_Predicate_Control control) { align16Control = control; }
getAlign16PredicateControl() const3500     G4_Align16_Predicate_Control getAlign16PredicateControl() const { return align16Control; }
3501 
3502     unsigned computeRightBound(uint8_t exec_size) override;
3503     G4_CmpRelation compareOperand(G4_Operand *opnd) override;
3504     void splitPred();
setSameAsNoMask(bool v)3505     void setSameAsNoMask(bool v) { isPredicateSameAsNoMask = v; };
isSameAsNoMask() const3506     bool isSameAsNoMask() const { return isPredicateSameAsNoMask; }
getPredCtrlGroupSize() const3507     unsigned getPredCtrlGroupSize() const
3508     {
3509         switch (control)
3510         {
3511         case PRED_ANY2H:
3512         case PRED_ALL2H:
3513             return 2;
3514         case PRED_ANY4H:
3515         case PRED_ALL4H:
3516             return 4;
3517         case PRED_ANY8H:
3518         case PRED_ALL8H:
3519             return 8;
3520         case PRED_ANY16H:
3521         case PRED_ALL16H:
3522             return 16;
3523         case PRED_ANY32H:
3524         case PRED_ALL32H:
3525             return 32;
3526         default:
3527             return 1;
3528         }
3529     }
isAnyH(G4_Predicate_Control Ctrl)3530     static bool isAnyH(G4_Predicate_Control Ctrl)
3531     {
3532         switch (Ctrl)
3533         {
3534         default:
3535             break;
3536         case PRED_ANY2H:
3537         case PRED_ANY4H:
3538         case PRED_ANY8H:
3539         case PRED_ANY16H:
3540         case PRED_ANY32H:
3541             return true;
3542         }
3543         return false;
3544     }
isAllH(G4_Predicate_Control Ctrl)3545     static bool isAllH(G4_Predicate_Control Ctrl)
3546     {
3547         switch (Ctrl)
3548         {
3549         default:
3550             break;
3551         case PRED_ALL2H:
3552         case PRED_ALL4H:
3553         case PRED_ALL8H:
3554         case PRED_ALL16H:
3555         case PRED_ALL32H:
3556             return true;
3557         }
3558         return false;
3559     }
3560 };
3561 
3562 //
3563 // condition modifier for inst
3564 //
3565 class G4_CondMod final : public G4_Operand
3566 {
3567     friend class IR_Builder;
3568     G4_CondModifier   mod;
3569     unsigned short subRegOff;
3570 
G4_CondMod(G4_CondModifier m,G4_VarBase * flag,unsigned short off)3571     G4_CondMod(G4_CondModifier m, G4_VarBase *flag, unsigned short off)
3572         : G4_Operand(G4_Operand::condMod, flag), mod(m), subRegOff(off)
3573     {
3574         if (flag != nullptr)
3575         {
3576             top_dcl = getBase()->asRegVar()->getDeclare();
3577             MUST_BE_TRUE(flag->isFlag(), ERROR_INTERNAL_ARGUMENT);
3578             if (getBase()->asRegVar()->getPhyReg())
3579             {
3580                 left_bound = off * 16;
3581                 byteOffset = off * 2;
3582 
3583                 auto flagNum = getBase()->asRegVar()->getPhyReg()->asAreg()->getFlagNum();
3584                 left_bound += flagNum * 32;
3585                 byteOffset += flagNum * 4;
3586             }
3587             else
3588             {
3589                 left_bound = 0;
3590                 byteOffset = 0;
3591             }
3592         }
3593     }
3594 
3595 public:
3596     G4_CondMod(G4_CondMod &cMod);
operator new(size_t sz,Mem_Manager & m)3597     void *operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
getMod() const3598     G4_CondModifier getMod() const { return mod; }
getRegOff() const3599     unsigned short getRegOff() const
3600     {
3601         MUST_BE_TRUE(getBase()->isAreg(), ERROR_INTERNAL_ARGUMENT);
3602         MUST_BE_TRUE(getBase()->asRegVar()->getPhyReg(), "getRegOff is called for non-PhyReg");
3603         return getBase()->asRegVar()->getPhyReg()->asAreg()->getFlagNum();
3604     }
getSubRegOff() const3605     unsigned short getSubRegOff() const { return subRegOff; }
3606     bool sameCondMod(const G4_CondMod& prd) const;
3607     void emit(std::ostream& output, bool symbolreg = false) override;
3608 
3609     // Get condition modifier when operands are reversed.
getReverseCondMod(G4_CondModifier mod)3610     static G4_CondModifier getReverseCondMod(G4_CondModifier mod)
3611     {
3612         switch (mod)
3613         {
3614         default:
3615             break;
3616         case Mod_g:
3617             return Mod_le;
3618         case Mod_ge:
3619             return Mod_l;
3620         case Mod_l:
3621             return Mod_ge;
3622         case Mod_le:
3623             return Mod_g;
3624         }
3625 
3626         return mod;
3627     }
3628 
3629     unsigned computeRightBound(uint8_t exec_size) override;
3630     G4_CmpRelation compareOperand(G4_Operand *opnd) override;
3631     void splitCondMod();
3632 };
3633 
3634 class G4_AddrExp final : public G4_Operand
3635 {
3636     G4_RegVar* const m_addressedReg;
3637     int m_offset;  //current implementation: byte offset
3638 
3639 public:
G4_AddrExp(G4_RegVar * reg,int offset,G4_Type ty)3640     G4_AddrExp(G4_RegVar *reg, int offset, G4_Type ty)
3641       : G4_Operand(G4_Operand::addrExp, ty), m_addressedReg(reg),
3642         m_offset(offset) {}
3643 
operator new(size_t sz,Mem_Manager & m)3644     void *operator new(size_t sz, Mem_Manager& m) {return m.alloc(sz);}
3645 
getRegVar() const3646     const G4_RegVar* getRegVar() const { return m_addressedReg; }
getRegVar()3647           G4_RegVar* getRegVar()       { return m_addressedReg; }
getOffset() const3648     int getOffset() const { return m_offset; }
setOffset(int tOffset)3649     void setOffset(int tOffset) { m_offset = tOffset; }
3650 
3651     int eval();
isRegAllocPartaker() const3652     bool isRegAllocPartaker() const { return m_addressedReg->isRegAllocPartaker(); }
3653 
3654     void emit(std::ostream& output, bool symbolreg = false);
3655 };
3656 
getRegAccess() const3657 inline G4_RegAccess G4_Operand::getRegAccess() const
3658 {
3659     if (isSrcRegRegion())
3660         return asSrcRegRegion()->getRegAccess();
3661     else if (isDstRegRegion())
3662         return asDstRegRegion()->getRegAccess();
3663     return Direct;
3664 }
3665 
isGreg() const3666 inline bool G4_Operand::isGreg() const
3667 {
3668     return isRegRegion() && const_cast<G4_VarBase *>(getBase())->isGreg();
3669 }
isAreg() const3670 inline bool G4_Operand::isAreg() const
3671 {
3672     return isRegRegion() && const_cast<G4_VarBase *>(getBase())->isAreg();
3673 }
isNullReg() const3674 inline bool G4_Operand::isNullReg() const
3675 {
3676     return isRegRegion() && const_cast<G4_VarBase *>(getBase())->isNullReg();
3677 }
isIpReg() const3678 inline bool G4_Operand::isIpReg() const
3679 {
3680     return isRegRegion() && const_cast<G4_VarBase *>(getBase())->isIpReg();
3681 }
isNReg() const3682 inline bool G4_Operand::isNReg() const
3683 {
3684     return isRegRegion() && const_cast<G4_VarBase *>(getBase())->isNReg();
3685 }
isAccReg() const3686 inline bool G4_Operand::isAccReg() const
3687 {
3688     return isRegRegion() && const_cast<G4_VarBase *>(getBase())->isAccReg();
3689 }
isFlag() const3690 inline bool G4_Operand::isFlag() const
3691 {
3692     if (isRegRegion() && const_cast<G4_VarBase *>(getBase())->isFlag())
3693         return true;
3694     return isPredicate() || isCondMod();
3695 }
isMaskReg() const3696 inline bool G4_Operand::isMaskReg() const
3697 {
3698     return isRegRegion() && const_cast<G4_VarBase *>(getBase())->isMaskReg();
3699 }
isMsReg() const3700 inline bool G4_Operand::isMsReg() const
3701 {
3702     return isRegRegion() && const_cast<G4_VarBase *>(getBase())->isMsReg();
3703 }
isSrReg() const3704 inline bool G4_Operand::isSrReg() const
3705 {
3706     return isRegRegion() && const_cast<G4_VarBase *>(getBase())->isSrReg();
3707 }
isCrReg() const3708 inline bool G4_Operand::isCrReg() const
3709 {
3710     return isRegRegion() && const_cast<G4_VarBase *>(getBase())->isCrReg();
3711 }
isDbgReg() const3712 inline bool G4_Operand::isDbgReg() const
3713 {
3714     return isRegRegion() && const_cast<G4_VarBase *>(getBase())->isDbgReg();
3715 }
isTmReg() const3716 inline bool G4_Operand::isTmReg() const
3717 {
3718     return isRegRegion() && const_cast<G4_VarBase *>(getBase())->isTmReg();
3719 }
isTDRReg() const3720 inline bool G4_Operand::isTDRReg() const
3721 {
3722     return isRegRegion() && const_cast<G4_VarBase *>(getBase())->isTDRReg();
3723 }
isA0() const3724 inline bool G4_Operand::isA0() const
3725 {
3726     return isRegRegion() && const_cast<G4_VarBase *>(getBase())->isA0();
3727 }
isAddress() const3728 inline bool G4_Operand::isAddress() const
3729 {
3730     return isRegRegion() && const_cast<G4_VarBase *>(getBase())->isAddress();
3731 }
isScalarAddr() const3732 inline bool G4_Operand::isScalarAddr() const
3733 {
3734     return isRegRegion() && const_cast<G4_VarBase*>(getBase())->isScalarAddr();
3735 }
3736 
3737 // Inlined members of G4_VarBase
isAreg() const3738 inline bool G4_VarBase::isAreg() const
3739 {
3740     if (isRegVar())
3741         return asRegVar()->isAreg();
3742     return isPhyAreg();
3743 }
isGreg() const3744 inline bool G4_VarBase::isGreg() const
3745 {
3746     if (isRegVar())
3747         return asRegVar()->isGreg();
3748     return isPhyGreg();
3749 }
isNullReg() const3750 inline bool G4_VarBase::isNullReg() const
3751 {
3752     if (isRegVar())
3753         return asRegVar()->isNullReg();
3754     return isPhyAreg() && asAreg()->isNullReg();
3755 }
isIpReg() const3756 inline bool G4_VarBase::isIpReg() const
3757 {
3758     if (isRegVar())
3759         return asRegVar()->isIpReg();
3760     return isPhyAreg() && asAreg()->isIpReg();
3761 }
isFlag() const3762 inline bool G4_VarBase::isFlag() const
3763 {
3764     if (isRegVar())
3765         return asRegVar()->isFlag();
3766     return isPhyAreg() && asAreg()->isFlag();
3767 }
isNReg() const3768 inline bool G4_VarBase::isNReg() const
3769 {
3770     if (isRegVar())
3771         return asRegVar()->isNReg();
3772     return isPhyAreg() && asAreg()->isNReg();
3773 }
isAccReg() const3774 inline bool G4_VarBase::isAccReg() const
3775 {
3776     if (isRegVar())
3777         return asRegVar()->isAccReg();
3778     return isPhyAreg() && asAreg()->isAccReg();
3779 }
isMaskReg() const3780 inline bool G4_VarBase::isMaskReg() const
3781 {
3782     if (isRegVar())
3783         return asRegVar()->isMaskReg();
3784     return isPhyAreg() && asAreg()->isMaskReg();
3785 }
isMsReg() const3786 inline bool G4_VarBase::isMsReg() const
3787 {
3788     if (isRegVar())
3789         return asRegVar()->isMsReg();
3790     return isPhyAreg() && asAreg()->isMsReg();
3791 }
isSrReg() const3792 inline bool G4_VarBase::isSrReg() const
3793 {
3794     if (isRegVar())
3795         return asRegVar()->isSrReg();
3796     return isPhyAreg() && asAreg()->isSrReg();
3797 }
isCrReg() const3798 inline bool G4_VarBase::isCrReg() const
3799 {
3800     if (isRegVar())
3801         return asRegVar()->isCrReg();
3802     return isPhyAreg() && asAreg()->isCrReg();
3803 }
isDbgReg() const3804 inline bool G4_VarBase::isDbgReg() const
3805 {
3806     if (isRegVar())
3807         return asRegVar()->isDbgReg();
3808     return isPhyAreg() && asAreg()->isDbgReg();
3809 }
isTmReg() const3810 inline bool G4_VarBase::isTmReg() const
3811 {
3812     if (isRegVar())
3813         return asRegVar()->isTmReg();
3814     return isPhyAreg() && asAreg()->isTmReg();
3815 }
isTDRReg() const3816 inline bool G4_VarBase::isTDRReg() const
3817 {
3818     if (isRegVar())
3819         return asRegVar()->isTDRReg();
3820     return isPhyAreg() && asAreg()->isTDRReg();
3821 }
isA0() const3822 inline bool G4_VarBase::isA0() const
3823 {
3824     if (isRegVar())
3825         return asRegVar()->isA0();
3826     return isPhyAreg() && asAreg()->isA0();
3827 }
isAddress() const3828 inline bool G4_VarBase::isAddress() const
3829 {
3830     if (isRegVar())
3831         return asRegVar()->isAddress();
3832     return isPhyAreg() && asAreg()->isA0();
3833 }
isScalarAddr() const3834 inline bool G4_VarBase::isScalarAddr() const
3835 {
3836     if (isRegVar())
3837         return asRegVar()->isScalarAddr();
3838     return false;
3839 }
isSpReg() const3840 inline bool G4_VarBase::isSpReg() const
3841 {
3842     if (isRegVar())
3843     {
3844         return asRegVar()->isSpReg();
3845     }
3846     return isPhyAreg() && asAreg()->isSpReg();
3847 }
3848 
3849 /// return the physical AReg associated with this VarBase objkect.
3850 /// This is either the VarBase itself, or if this is a RegVar the phyAReg it is allocated to.
3851 /// return null if VarBase is not a AReg or it's a RegVar that has not been assigned a AReg yet
getAreg() const3852 inline G4_Areg* G4_VarBase::getAreg() const
3853 {
3854     G4_Areg* areg = nullptr;
3855     if (isRegVar())
3856     {
3857         G4_VarBase* phyReg = asRegVar()->getPhyReg();
3858         if (phyReg && phyReg->isAreg())
3859         {
3860             areg = phyReg->asAreg();
3861         }
3862     }
3863     else if (isPhyAreg())
3864     {
3865         areg = asAreg();
3866     }
3867     return areg;
3868 }
3869 
3870 // CR/SR/SP/TM0/IP do not have scoreboard
noScoreBoard() const3871 inline bool G4_VarBase::noScoreBoard() const
3872 {
3873     G4_Areg* areg = getAreg();
3874 
3875     if (areg != nullptr)
3876     {
3877         return areg->isCrReg() || areg->isSrReg() || areg->isSpReg() ||
3878             areg->isTmReg() || areg->isIpReg() || areg->isDbgReg();
3879     }
3880     else
3881     {
3882         return false;
3883     }
3884 }
3885 
3886 
isRegAllocPartaker() const3887 inline bool G4_VarBase::isRegAllocPartaker() const
3888 {
3889     return isRegVar() && asRegVar()->isRegAllocPartaker();
3890 }
3891 
3892 // G4_RegVar methods
getLocId() const3893 inline unsigned G4_RegVar::getLocId() const
3894 {
3895     MUST_BE_TRUE(type == RegVarType::AddrSpillLoc, "Unexpected type in getLocId()");
3896 
3897     G4_RegVarAddrSpillLoc* addrSpillLoc =
3898     static_cast <G4_RegVarAddrSpillLoc*>(const_cast<G4_RegVar*>(this));
3899     return addrSpillLoc->getLocId();
3900 }
3901 
isRegVarSpill() const3902 inline  bool G4_RegVar::isRegVarSpill() const
3903 {
3904     if (isRegVarTransient())
3905     {
3906         G4_RegVarTransient* transientVar =
3907             static_cast<G4_RegVarTransient*>(const_cast<G4_RegVar*>(this));
3908         return transientVar->isRegVarSpill();
3909     }
3910     return false;
3911 }
3912 
isRegVarFill() const3913 inline bool G4_RegVar::isRegVarFill() const
3914 {
3915     if (isRegVarTransient())
3916     {
3917         G4_RegVarTransient* transientVar =
3918             static_cast<G4_RegVarTransient*>(const_cast<G4_RegVar*>(this));
3919         return transientVar->isRegVarFill();
3920     }
3921     return false;
3922 }
3923 
getBaseRegVar()3924 inline G4_RegVar* G4_RegVar::getBaseRegVar()
3925 {
3926     if (type == RegVarType::Transient)
3927     {
3928         G4_RegVarTransient* transient = static_cast<G4_RegVarTransient*>(this);
3929         return transient->getBaseRegVar();
3930     }
3931     else if (type == RegVarType::GRFSpillTmp)
3932     {
3933         G4_RegVarTmp* tmp = static_cast<G4_RegVarTmp*>(this);
3934         return tmp->getBaseRegVar();
3935     }
3936 
3937     // For Default, AddrSpillLoc
3938     return this;
3939 }
3940 
getAbsBaseRegVar()3941 inline G4_RegVar* G4_RegVar::getAbsBaseRegVar()
3942 {
3943     if (type == RegVarType::Transient || type == RegVarType::GRFSpillTmp)
3944     {
3945         G4_RegVar * base;
3946         for (base = getBaseRegVar(); base->getBaseRegVar() != base; base = base->getBaseRegVar());
3947         return base;
3948     }
3949 
3950     return this;
3951 }
3952 
getNonTransientBaseRegVar()3953 inline G4_RegVar* G4_RegVar::getNonTransientBaseRegVar()
3954 {
3955     if (type == RegVarType::Transient)
3956     {
3957         G4_RegVarTransient* transient = static_cast<G4_RegVarTransient*>(this);
3958         return transient->getNonTransientBaseRegVar();
3959     }
3960     else if (type == RegVarType::GRFSpillTmp)
3961     {
3962         G4_RegVarTmp* tmp = static_cast<G4_RegVarTmp*>(this);
3963         return tmp->getNonTransientBaseRegVar();
3964     }
3965 
3966     return this;
3967 }
3968 
3969 //
3970 // place for holding all physical register operands
3971 //
3972 class PhyRegPool
3973 {
3974     unsigned maxGRFNum;
3975     G4_Greg** GRF_Table;
3976     G4_Areg* ARF_Table[AREG_LAST];
3977 public:
3978     PhyRegPool(Mem_Manager&m, unsigned int maxRegisterNumber); // create all physical register operands
3979     void rebuildRegPool(Mem_Manager& m, unsigned int numRegisters);
getGreg(unsigned i)3980     G4_Greg* getGreg(unsigned i)
3981     {
3982         MUST_BE_TRUE(i < maxGRFNum, "invalid GRF");
3983         return GRF_Table[i];
3984     }
3985 
getNullReg()3986     G4_Areg* getNullReg() { return ARF_Table[AREG_NULL]; }
getMask0Reg()3987     G4_Areg* getMask0Reg() { return ARF_Table[AREG_MASK0]; }
getAcc0Reg()3988     G4_Areg* getAcc0Reg() { return ARF_Table[AREG_ACC0]; }
getAcc1Reg()3989     G4_Areg* getAcc1Reg() { return ARF_Table[AREG_ACC1]; }
getDbgReg()3990     G4_Areg* getDbgReg() { return ARF_Table[AREG_DBG]; }
getMs0Reg()3991     G4_Areg* getMs0Reg() { return ARF_Table[AREG_MS0]; }
getSr0Reg()3992     G4_Areg* getSr0Reg() { return ARF_Table[AREG_SR0]; }
getCr0Reg()3993     G4_Areg* getCr0Reg() { return ARF_Table[AREG_CR0]; }
getTm0Reg()3994     G4_Areg* getTm0Reg() { return ARF_Table[AREG_TM0]; }
getAddrReg()3995     G4_Areg* getAddrReg() { return ARF_Table[AREG_A0]; }
getN0Reg()3996     G4_Areg* getN0Reg() { return ARF_Table[AREG_N0]; }
getN1Reg()3997     G4_Areg* getN1Reg() { return ARF_Table[AREG_N1]; }
getIpReg()3998     G4_Areg* getIpReg() { return ARF_Table[AREG_IP]; }
getF0Reg()3999     G4_Areg* getF0Reg() { return ARF_Table[AREG_F0]; }
getF1Reg()4000     G4_Areg* getF1Reg() { return ARF_Table[AREG_F1]; }
getTDRReg()4001     G4_Areg* getTDRReg() { return ARF_Table[AREG_TDR0]; }
getSPReg()4002     G4_Areg* getSPReg() { return ARF_Table[AREG_SP]; }
getF2Reg()4003     G4_Areg* getF2Reg() { return ARF_Table[AREG_F2]; }
getF3Reg()4004     G4_Areg* getF3Reg() { return ARF_Table[AREG_F3]; }
4005 
4006     // map int to flag areg
getFlagAreg(int flagNum)4007     G4_Areg* getFlagAreg(int flagNum)
4008     {
4009         switch (flagNum)
4010         {
4011             case 0:
4012                 return getF0Reg();
4013             case 1:
4014                 return getF1Reg();
4015             case 2:
4016                 return getF2Reg();
4017             case 3:
4018                 return getF3Reg();
4019             default:
4020                 assert(false && "unexpected flag register value");
4021                 return nullptr;
4022         }
4023     }
4024 };
4025 
getOperand(Gen4_Operand_Number opnd_num)4026 inline G4_Operand* G4_INST::getOperand(Gen4_Operand_Number opnd_num)
4027 {
4028     if (isPseudoAddrMovIntrinsic() && isSrcNum(opnd_num))
4029         return asIntrinsicInst()->getOperand(opnd_num);
4030     if (isInstrinsicOnlySrcNum(opnd_num))
4031         return NULL;
4032     return const_cast<G4_Operand*>(((const G4_INST*)this)->getOperand(opnd_num));
4033 }
4034 
getSrc(unsigned i) const4035 inline G4_Operand* G4_INST::getSrc(unsigned i) const
4036 {
4037     if (isPseudoAddrMovIntrinsic())
4038         return asIntrinsicInst()->getIntrinsicSrc(i);
4039     else
4040     {
4041         MUST_BE_TRUE(i < G4_MAX_SRCS, ERROR_INTERNAL_ARGUMENT);
4042         return srcs[i];
4043     }
4044 }
4045 
getNumSrc() const4046 inline int G4_INST::getNumSrc() const
4047 {
4048     return isIntrinsic() ? asIntrinsicInst()->getNumSrc()
4049                          : G4_Inst_Table[op].n_srcs;
4050 }
4051 
getNumDst() const4052 inline int G4_INST::getNumDst() const
4053 {
4054     return isIntrinsic() ? asIntrinsicInst()->getNumDst()
4055         : G4_Inst_Table[op].n_dst;
4056 }
4057 
isPseudoUse() const4058 inline bool G4_INST::isPseudoUse() const
4059 {
4060     return isIntrinsic() && asIntrinsicInst()->getIntrinsicId() == Intrinsic::Use;
4061 }
4062 
isPseudoKill() const4063 inline bool G4_INST::isPseudoKill() const
4064 {
4065     return isIntrinsic() && asIntrinsicInst()->getIntrinsicId() == Intrinsic::PseudoKill;
4066 }
4067 
isLifeTimeEnd() const4068 inline bool G4_INST::isLifeTimeEnd() const
4069 {
4070     return isIntrinsic() && asIntrinsicInst()->getIntrinsicId() == Intrinsic::PseudoUse;
4071 }
4072 
isSpillIntrinsic() const4073 inline bool G4_INST::isSpillIntrinsic() const
4074 {
4075     return isIntrinsic() && asIntrinsicInst()->getIntrinsicId() == Intrinsic::Spill;
4076 }
4077 
asSpillIntrinsic() const4078 inline G4_SpillIntrinsic* G4_INST::asSpillIntrinsic() const
4079 {
4080     MUST_BE_TRUE(isSpillIntrinsic(), "not a spill intrinsic");
4081     return const_cast<G4_SpillIntrinsic*>(reinterpret_cast<const G4_SpillIntrinsic*>(this));
4082 }
4083 
isFillIntrinsic() const4084 inline bool G4_INST::isFillIntrinsic() const
4085 {
4086     return isIntrinsic() && asIntrinsicInst()->getIntrinsicId() == Intrinsic::Fill;
4087 }
4088 
asFillIntrinsic() const4089 inline G4_FillIntrinsic* G4_INST::asFillIntrinsic() const
4090 {
4091     MUST_BE_TRUE(isFillIntrinsic(), "not a fill intrinsic");
4092     return const_cast<G4_FillIntrinsic*>(reinterpret_cast<const G4_FillIntrinsic*>(this));
4093 }
4094 
isPseudoAddrMovIntrinsic() const4095 inline bool G4_INST::isPseudoAddrMovIntrinsic() const
4096 {
4097     return isIntrinsic() && asIntrinsicInst()->getIntrinsicId() == Intrinsic::PseudoAddrMov;
4098 }
4099 
isSplitIntrinsic() const4100 inline bool G4_INST::isSplitIntrinsic() const
4101 {
4102     return isIntrinsic() && asIntrinsicInst()->getIntrinsicId() == Intrinsic::Split;
4103 }
4104 
isCallerSave() const4105 inline bool G4_INST::isCallerSave() const
4106 {
4107     return isIntrinsic() && asIntrinsicInst()->getIntrinsicId() == Intrinsic::CallerSave;
4108 }
4109 
isCallerRestore() const4110 inline bool G4_INST::isCallerRestore() const
4111 {
4112     return isIntrinsic() && asIntrinsicInst()->getIntrinsicId() == Intrinsic::CallerRestore;
4113 }
4114 
isCalleeSave() const4115 inline bool G4_INST::isCalleeSave() const
4116 {
4117     return isIntrinsic() && asIntrinsicInst()->getIntrinsicId() == Intrinsic::CalleeSave;
4118 }
4119 
isCalleeRestore() const4120 inline bool G4_INST::isCalleeRestore() const
4121 {
4122     return isIntrinsic() && asIntrinsicInst()->getIntrinsicId() == Intrinsic::CalleeRestore;
4123 }
4124 
isRelocationMov() const4125 inline bool G4_INST::isRelocationMov() const
4126 {
4127     return isMov() && srcs[0]->isRelocImm();
4128 }
4129 
getLabelStr() const4130 inline const char* G4_INST::getLabelStr() const
4131 {
4132     MUST_BE_TRUE(srcs[0] && srcs[0]->isLabel(), ERROR_UNKNOWN);
4133     return srcs[0]->asLabel()->getLabel();
4134 }
4135 
isUniformGoto(unsigned KernelSimdSize) const4136 inline bool G4_InstCF::isUniformGoto(unsigned KernelSimdSize) const
4137 {
4138     assert(op == G4_goto);
4139     const G4_Predicate *pred = getPredicate();
4140     if (getExecSize() == g4::SIMD1 || pred == nullptr)
4141         return true;
4142 
4143     // This is uniform if group size equals to the kernel simd size.
4144     return pred->getPredCtrlGroupSize() == KernelSimdSize;
4145 }
4146 
isIndirectJmp() const4147 inline bool G4_InstCF::isIndirectJmp() const
4148 {
4149     return op == G4_jmpi && !srcs[0]->isLabel();
4150 }
4151 
getJipLabelStr() const4152 inline const char* G4_InstCF::getJipLabelStr() const
4153 {
4154     MUST_BE_TRUE(jip != NULL && jip->isLabel(), ERROR_UNKNOWN);
4155     return jip->asLabel()->getLabel();
4156 }
4157 
getUipLabelStr() const4158 inline const char* G4_InstCF::getUipLabelStr() const
4159 {
4160     MUST_BE_TRUE(uip != NULL && uip->isLabel(), ERROR_UNKNOWN);
4161     return uip->asLabel()->getLabel();
4162 }
4163 
isIndirectCall() const4164 inline bool G4_InstCF::isIndirectCall() const
4165 {
4166     return op == G4_pseudo_fcall && !getSrc(0)->isLabel();
4167 }
4168 
computeSpillFillOperandBound(G4_Operand * opnd,unsigned int LB,int numReg)4169 static void computeSpillFillOperandBound(G4_Operand* opnd, unsigned int LB, int numReg)
4170 {
4171     if (numReg == 0)
4172     {
4173         return;
4174     }
4175 
4176     // read/write in units of GRF.
4177     unsigned RB = std::min(opnd->getTopDcl()->getByteSize(),
4178         LB + numReg * numEltPerGRF<Type_UB>()) - 1;
4179 
4180     unsigned NBytes = RB - LB + 1;
4181     opnd->setBitVecFromSize(NBytes);
4182     opnd->setRightBound(RB);
4183 }
4184 
4185 class G4_SpillIntrinsic : public G4_InstIntrinsic
4186 {
4187 public:
G4_SpillIntrinsic(const IR_Builder & builder,G4_Predicate * prd,Intrinsic intrinId,G4_ExecSize execSize,G4_DstRegRegion * d,G4_Operand * header,G4_Operand * payload,G4_Operand * s2,G4_InstOpts opt)4188     G4_SpillIntrinsic(
4189         const IR_Builder& builder,
4190         G4_Predicate* prd,
4191         Intrinsic intrinId,
4192         G4_ExecSize execSize,
4193         G4_DstRegRegion* d,
4194         G4_Operand* header,
4195         G4_Operand* payload,
4196         G4_Operand* s2,
4197         G4_InstOpts opt) :
4198         G4_InstIntrinsic(builder, prd, intrinId, execSize, d, header, payload, s2, opt)
4199     {
4200 
4201     }
4202 
4203     const static unsigned int InvalidOffset = 0xfffffffe;
4204 
isOffBP() const4205     bool isOffBP() const { return getFP() != nullptr; }
4206 
getNumRows() const4207     uint32_t getNumRows() const { return numRows; }
getOffset() const4208     uint32_t getOffset() const { return offset; }
getOffsetInBytes() const4209     uint32_t getOffsetInBytes() const { return offset * getGRFSize(); }
getFP() const4210     G4_Declare* getFP() const { return fp; }
getHeader() const4211     G4_SrcRegRegion* getHeader() const { return getSrc(0)->asSrcRegRegion(); }
getPayload() const4212     G4_SrcRegRegion* getPayload() const { return getSrc(1)->asSrcRegRegion(); }
4213 
setNumRows(uint32_t r)4214     void setNumRows(uint32_t r) { numRows = r; }
setOffset(uint32_t o)4215     void setOffset(uint32_t o) { offset = o; }
setFP(G4_Declare * f)4216     void setFP(G4_Declare* f) { fp = f; }
4217 
isOffsetValid() const4218     bool isOffsetValid() const { return offset != InvalidOffset; }
4219 
computeRightBound(G4_Operand * opnd)4220     void computeRightBound(G4_Operand* opnd)
4221     {
4222         uint16_t numReg = 0;
4223         if (opnd == getSrc(1))
4224         {
4225             numReg = asSpillIntrinsic()->getNumRows();
4226         }
4227         else if (opnd->isSrcRegRegion() && opnd == getSrc(0))
4228         {
4229             numReg = 1;
4230         }
4231         computeSpillFillOperandBound(opnd, opnd->left_bound, numReg);
4232     }
4233 
4234 private:
4235     G4_Declare* fp = nullptr;
4236     uint32_t numRows = 0;
4237     uint32_t offset = InvalidOffset;
4238 };
4239 
4240 class G4_PseudoAddrMovIntrinsic : public G4_InstIntrinsic
4241 {
4242 public:
G4_PseudoAddrMovIntrinsic(const IR_Builder & builder,Intrinsic intrinId,G4_DstRegRegion * d,G4_Operand * s0,G4_Operand * s1,G4_Operand * s2,G4_Operand * s3,G4_Operand * s4,G4_Operand * s5,G4_Operand * s6,G4_Operand * s7)4243     G4_PseudoAddrMovIntrinsic(
4244         const IR_Builder& builder,
4245         Intrinsic intrinId,
4246         G4_DstRegRegion* d,
4247         G4_Operand* s0,
4248         G4_Operand* s1,
4249         G4_Operand* s2,
4250         G4_Operand* s3,
4251         G4_Operand* s4,
4252         G4_Operand* s5,
4253         G4_Operand* s6,
4254         G4_Operand* s7) :
4255         G4_InstIntrinsic(builder, nullptr, intrinId, G4_ExecSize(1), d, s0, s1, s2, s3, s4, s5, s6, s7, InstOpt_NoOpt)
4256     {
4257     }
4258 };
4259 
4260 class G4_FillIntrinsic : public G4_InstIntrinsic
4261 {
4262 public:
G4_FillIntrinsic(const IR_Builder & builder,G4_Predicate * prd,Intrinsic intrinId,G4_ExecSize execSize,G4_DstRegRegion * d,G4_Operand * header,G4_Operand * s1,G4_Operand * s2,G4_InstOpts opt)4263     G4_FillIntrinsic(
4264         const IR_Builder& builder,
4265         G4_Predicate* prd,
4266         Intrinsic intrinId,
4267         G4_ExecSize execSize,
4268         G4_DstRegRegion* d,
4269         G4_Operand* header,
4270         G4_Operand* s1,
4271         G4_Operand* s2,
4272         G4_InstOpts opt) :
4273         G4_InstIntrinsic(builder, prd, intrinId, execSize, d, header, s1, s2, opt)
4274     {
4275 
4276     }
4277 
4278     const static unsigned int InvalidOffset = 0xfffffffe;
4279 
isOffBP() const4280     bool isOffBP() const { return getFP() != nullptr; }
4281 
getNumRows() const4282     uint32_t getNumRows() const { return numRows; }
getOffset() const4283     uint32_t getOffset() const { return offset; }
getOffsetInBytes() const4284     uint32_t getOffsetInBytes() const { return offset * getGRFSize(); }
getFP() const4285     G4_Declare* getFP() const { return fp; }
getHeader() const4286     G4_SrcRegRegion* getHeader() const { return getSrc(0)->asSrcRegRegion(); }
4287 
setNumRows(uint32_t r)4288     void setNumRows(uint32_t r) { numRows = r; }
setOffset(uint32_t o)4289     void setOffset(uint32_t o) { offset = o; }
setFP(G4_Declare * f)4290     void setFP(G4_Declare* f) { fp = f; }
4291 
isOffsetValid()4292     bool isOffsetValid() { return offset != InvalidOffset; }
4293 
computeRightBound(G4_Operand * opnd)4294     void computeRightBound(G4_Operand* opnd)
4295     {
4296         uint16_t numReg = 0;
4297         if (opnd == getDst())
4298         {
4299             numReg = asFillIntrinsic()->getNumRows();
4300         }
4301         else if (opnd->isSrcRegRegion() &&
4302             (opnd == getSrc(0) || opnd == getSrc(1)))
4303         {
4304             numReg = 1;
4305         }
4306         computeSpillFillOperandBound(opnd, opnd->left_bound, numReg);
4307     }
4308 
4309 private:
4310     G4_Declare* fp = nullptr;
4311     uint32_t numRows = 0;
4312     uint32_t offset = InvalidOffset;
4313 };
4314 
isScalarSrc() const4315 inline bool G4_Operand::isScalarSrc() const
4316 {
4317     return isImm() || isAddrExp() || (isSrcRegRegion() && asSrcRegRegion()->isScalar());
4318 }
4319 
getBaseRegVarRootDeclare() const4320 inline const G4_Declare *G4_Operand::getBaseRegVarRootDeclare() const
4321 {
4322     return getBase()->asRegVar()->getDeclare()->getRootDeclare();
4323 }
getBaseRegVarRootDeclare()4324 inline G4_Declare *G4_Operand::getBaseRegVarRootDeclare()
4325 {
4326     return getBase()->asRegVar()->getDeclare()->getRootDeclare();
4327 }
4328 
writesFlag() const4329 inline bool G4_INST::writesFlag() const
4330 {
4331     return (mod && op != G4_sel) || (dst && dst->isFlag());
4332 }
4333 
4334 } // namespace vISA
4335 
4336 #endif
4337