1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef _IGA_BACKEND_GED_ENCODER_H_
10 #define _IGA_BACKEND_GED_ENCODER_H_
11 
12 #include "GEDBitProcessor.hpp"
13 #include "IGAToGEDTranslation.hpp"
14 #include "../EncoderOpts.hpp"
15 #include "../../ErrorHandler.hpp"
16 #include "../../IR/Instruction.hpp"
17 #include "../../IR/Block.hpp"
18 #include "../../IR/Kernel.hpp"
19 #include "../../Timer/Timer.hpp"
20 
21 #include <list>
22 #include <map>
23 #include <vector>
24 
25 
26 namespace iga
27 {
28 #define GED_ENCODE(FUNC, ARG) \
29     GED_ENCODE_TO(FUNC, ARG, &m_gedInst)
30 #if defined(GED_TIMER) || defined(_DEBUG)
31 #define START_GED_TIMER() startIGATimer(TIMER_GED)
32 #define STOP_GED_TIMER()  stopIGATimer(TIMER_GED)
33 #else
34 #define START_GED_TIMER()
35 #define STOP_GED_TIMER()
36 #endif
37 
38 #if defined(TOTAL_ENCODE_TIMER) || defined(_DEBUG)
39 #define START_ENCODER_TIMER() startIGATimer(TIMER_TOTAL)
40 #define STOP_ENCODER_TIMER()  stopIGATimer(TIMER_TOTAL)
41 #else
42 #define START_ENCODER_TIMER()
43 #define STOP_ENCODER_TIMER()
44 #endif
45 
46 // uncomment to emit all the GED calls
47 // #define TRACE_GED_CALLS
48 
49 #ifdef TRACE_GED_CALLS
50 #define TRACE_GED_SETTER(FIELD, ARG, STATUS) \
51      std::cout << "Encoder.cpp:" << __LINE__ << ": GED_Set" << #FIELD << \
52         "(..," << (ARG) << ") results in " << (STATUS) << "\n"
53 #else
54 #define TRACE_GED_SETTER(FIELD, ARG, STATUS)
55 #endif
56 
57 
58 #define GED_ENCODE_TO(FIELD, ARG, GED) \
59     do { \
60         GED_RETURN_VALUE _status; \
61         START_GED_TIMER(); \
62         _status = GED_Set ## FIELD (GED, ARG); \
63         STOP_GED_TIMER(); \
64         TRACE_GED_SETTER(FIELD, ARG, _status); \
65         if (_status != GED_RETURN_VALUE_SUCCESS) { \
66             handleGedError(__LINE__, #FIELD, _status); \
67         } \
68     } while (0)
69 
70 
71     using BlockList = std::list<Block *, std_arena_based_allocator<Block*>>;
72 
73     class Encoder : protected GEDBitProcessor
74     {
75     public:
76         Encoder(const Model& model, ErrorHandler& eh, const EncoderOpts& eos);
77 
78         void encodeKernel(
79             Kernel& k,
80             MemManager &m,
81             void*& bits,
82             uint32_t& bitsLen);
83 
84         size_t getNumInstructionsEncoded() const;
85 
86         ///////////////////////////////////////////////////////////////////////
87         // PROFILING API FOR TESTING (must be compiled into)
88         //
89         // compile with MEASURE_COMPILATION_TIME
90         double getElapsedTimeMicros(unsigned int idx);
91         int64_t getElapsedTimeTicks(unsigned int idx);
92         std::string getTimerName(unsigned int idx);
93 
94     protected:
95 
96         // TODO: phase these out
97         void encodeKernelPreProcess(Kernel &k);
98         void doEncodeKernelPreProcess(Kernel &k);
99         void encodeFC(const Instruction &i);
100         void encodeTernaryInstruction(const Instruction& inst, GED_ACCESS_MODE accessMode);
101         void encodeTernaryAlign1Instruction(const Instruction& inst);
102         void encodeTernaryAlign16Instruction(const Instruction& inst);
103         void encodeSendDirectDestination(const Operand& dst);
104         void encodeSendDestinationDataType(const Operand& dst);
105         void encodeOptionsThreadControl(const Instruction& inst);
106 
107     protected:
108         void encodeDstReg(RegName regName, uint16_t regNum); // just the regnum
109 
110         void encodeImmVal(const ImmVal &val, Type type);
111 
112         template <SourceIndex S> void encodeSrcRegFile(GED_REG_FILE rf);
113         template <SourceIndex S> void encodeSrcRegionVert(Region::Vert v);
114         template <SourceIndex S> void encodeSrcType(Type t);
115         template <SourceIndex S> void encodeSrcAddrMode(GED_ADDR_MODE x);
116         template <SourceIndex S> void encodeSrcModifier(SrcModifier x);
117         template <SourceIndex S> void encodeSrcSubRegNum(uint32_t subRegInByte);
118 
119         template <SourceIndex S> void encodeSrcMathMacroReg(MathMacroExt a);
120 
121         template <SourceIndex S> void encodeSrcReg(RegName regName, uint16_t regNum);
122         template <SourceIndex S> void encodeSrcAddrImm(int32_t addrImm);
123         template <SourceIndex S> void encodeSrcAddrSubRegNum(uint32_t addrSubReg);
124         template <SourceIndex S> void encodeSrcRegion(const Region& r, bool hasRgnWi = true);
125         template <SourceIndex S> void encodeSrcRegionWidth(Region::Width w);
126         template <SourceIndex S> void encodeSrcRepCtrl(GED_REP_CTRL rep);
127         template <SourceIndex S> void encodeSrcChanSel(
128             GED_SWIZZLE chSelX,
129             GED_SWIZZLE chSelY,
130             GED_SWIZZLE chSelZ,
131             GED_SWIZZLE chSelW);
132 
133         template <SourceIndex S>
134         void encodeTernarySourceAlign16(const Instruction& inst);
135         void encodeTernaryDestinationAlign16(const Instruction& inst);
136 
137         template <SourceIndex S>
138         void encodeTernarySourceAlign1(const Instruction& inst);
139         void encodeTernaryDestinationAlign1(const Instruction& inst);
140 
141         void encodeTernarySrcRegionVert(SourceIndex S, Region::Vert v);
142 
143         template <SourceIndex S>
144         void encodeTernaryImmVal(const ImmVal &val, Type type);
145 
146         template <SourceIndex S> void encodeSrcRegionHorz(Region::Horz s);
147 
148 
149         void handleGedError(int line, const char *setter, GED_RETURN_VALUE status);
150 
151         // state that is valid over instance life
152         EncoderOpts                               m_opts;
153 
154         // state that is valid over encodeInst()
155         ged_ins_t                                 m_gedInst;
156         bool                                      m_encodeAlign16 = false;
157         Op                                        m_opcode = Op::INVALID;
158         size_t                                    m_numberInstructionsEncoded;
159 
160     private:
operator delete(void *,MemManager *)161         void operator delete(void*, MemManager*) { };
operator new(size_t sz,MemManager * m)162         void *operator new(size_t sz, MemManager* m) {return m->alloc(sz);};
163 
164         void encodeBlock(Block* blk);
165         void encodeInstruction(Instruction& inst);
166         void patchJumpOffsets();
167 
168         ///////////////////////////////////////////////////////////////////////
169         // BASIC INSTRUCTIONS
170         ///////////////////////////////////////////////////////////////////////
171         void encodeBasicInstruction(const Instruction& inst, GED_ACCESS_MODE accessMode);
172         void encodeBasicDestination(
173             const Instruction& inst,
174             const Operand& dst,
175             GED_ACCESS_MODE accessMode = GED_ACCESS_MODE_Align1);
176         template <SourceIndex S> void encodeBasicSource(
177             const Instruction& inst,
178             const Operand& src,
179             GED_ACCESS_MODE accessMode = GED_ACCESS_MODE_Align1);
180         ///////////////////////////////////////////////////////////////////////
181         // BRANCH INSTRUCTIONS
182         ///////////////////////////////////////////////////////////////////////
183         void encodeBranchingInstruction(const Instruction& inst);
184         void encodeBranchingInstructionSimplified(const Instruction& inst);
185         void encodeBranchDestination(const Operand& dst);
186         void encodeBranchSource(const Operand& src);
187         ///////////////////////////////////////////////////////////////////////
188         // SEND INSTRUCTIONS
189         ///////////////////////////////////////////////////////////////////////
190         void encodeSendInstruction(const Instruction& inst);
191         void encodeSendSource0(const Operand& src);
192         void encodeSendDestination(const Operand& dst);
193         void encodeSendsSource0(const Operand& src);
194         void encodeSendsSource1(const Operand& src);
195         void encodeSendsDestination(const Operand& dst);
196 
197         void encodeSendDescs(const Instruction& inst);
198         void encodeSendDescsPreXe(const Instruction& inst);
199         void encodeSendDescsXe(const Instruction& inst);
200         void encodeSendDescsXeHP(const Instruction& inst);
201         void encodeSendDescsXeHPG(const Instruction& inst);
202 
203         ///////////////////////////////////////////////////////////////////////
204         // SYNC INSTRUCTIONS
205         ///////////////////////////////////////////////////////////////////////
206         void encodeSyncInstruction(const Instruction& inst);
207 
208         ///////////////////////////////////////////////////////////////////////
209         // OTHER HELPER FUNCTIONS
210         ///////////////////////////////////////////////////////////////////////
211         void     setEncodedPC(Instruction *inst, int32_t encodePC);
212         int32_t  getEncodedPC(const Instruction *inst) const;
213 
214         bool getBlockOffset(const Block *b, uint32_t &pc);
215 
216         // handles encoding ARF registers as well as the easy GRF case
217         // caller actually pulls the trigger and encodes the bits, but this
218         // call can raise the encoding error
219         uint32_t         translateRegNum(int opIx, RegName reg, uint16_t regNum);
220         uint32_t         mathMacroRegToBits(int src, MathMacroExt mme); // ChSel and SubReg
221         GED_DST_CHAN_EN  mathMacroRegToChEn(MathMacroExt mme);
222 
223         void applyGedWorkarounds(const Kernel &k, size_t bitsLen);
224         void encodeOptions(const Instruction& inst);
225 
226         //////////////////////////////////////////////////////////////////////
227         // platform specific queries *but sometimes need the instruction too)
228         //
229         // GEN7p5 implicitly scales PC offsets by QW except for a few instructions
230         bool arePcsInQWords(const OpSpec &os) const;
231 
232         // Call need to have src0 region be set to:
233         // SKL and before: <2;2,1>
234         // ICL: <2;4,1>
235         // Later GENs ignore the region completely
236         bool callNeedsSrc0Region221(const Instruction &inst) const;
237         bool callNeedsSrc0Region241(const Instruction &inst) const;
238 
239         /////////////////////////////////////////////////////////////
240         // state valid over encodeKernel()
241         MemManager                               *m_mem;
242         uint8_t                                  *m_instBuf = nullptr; // the output bits
243         struct JumpPatch { // JIP and UIP label patching
244             Instruction    *inst; // the instruction
245             ged_ins_t       gedInst; // the partially constructed GED instruction
246             uint8_t        *bits; // where to encode it in the heap
JumpPatchiga::Encoder::JumpPatch247             JumpPatch(Instruction *i, const ged_ins_t &gi, uint8_t *bs)
248                 : inst(i), gedInst(gi), bits(bs) { }
249         };
250         std::vector<JumpPatch>                    m_needToPatch;
251         std::map<const Block *, int32_t>          m_blockToOffsetMap;
252 
253     public:
254         ////////////////////////////////////////////////////////////////
typeConvesionHelper(const ImmVal & val,Type type)255         static uint64_t typeConvesionHelper(const ImmVal &val, Type type)
256         {
257             uint64_t value = 0;
258             switch (type) {
259             case Type::UD:
260             case Type::F:
261             case Type::V:
262             case Type::UV:
263             case Type::VF:
264                 value = (uint64_t)val.u32;
265                 break;
266             case Type::D:
267                 value = (uint64_t)val.s32;
268                 break;
269             case Type::W:
270                 value = (uint64_t)val.s16;
271                 break;
272             case Type::UW:
273             case Type::HF:
274             case Type::BF:
275                 value = (uint64_t)val.u16;
276                 break;
277             case Type::DF:
278             case Type::UQ:
279             case Type::Q:
280                 value = val.u64;
281                 break;
282             case Type::B:
283             case Type::UB:
284                 // technically not reachable since we don't permit byte moves
285                 // from immediates
286                 value = val.u64;
287                 break;
288             case Type::BF8:
289                 value = (uint64_t)val.u8;
290                 break;
291             case Type::TF32:
292                 value = (uint64_t)val.u32;
293                 break;
294             default:
295                 break;
296             }
297 
298             return value;
299         }
300 
301     protected:
302         ////////////////////////////////////////////////////////////////
303         // allowable types for ternary Align1
isTernaryAlign1Floating(Type t)304         static bool isTernaryAlign1Floating(Type t) {
305             switch (t) {
306             case Type::HF:
307             case Type::BF:
308             case Type::BF8:
309             case Type::TF32:
310             case Type::F:
311             case Type::DF:
312             case Type::NF:
313                 return true;
314             default:
315                 return false;
316             }
317         }
318 
319         // allowable types for ternary Align1
320 
isTernaryAlign1Integral(Type t)321         static bool isTernaryAlign1Integral(Type t) {
322             switch (t) {
323             case Type::UQ: // technically uq not allows today, but maybe in future
324             case Type::Q: // same as :uq
325             case Type::UD:
326             case Type::D:
327             case Type::UW:
328             case Type::W:
329             case Type::UB:
330             case Type::B:
331                 return true;
332             default:
333                 return false;
334             }
335         }
336 
337 
338     }; //end: class definition Encoder
339 
encodeSrcRegFile(GED_REG_FILE rf)340     template <SourceIndex S> void Encoder::encodeSrcRegFile(GED_REG_FILE rf) {
341         if (S == SourceIndex::SRC0) {
342             GED_ENCODE(Src0RegFile, rf);
343         } else if (S == SourceIndex::SRC1) {
344             GED_ENCODE(Src1RegFile, rf);
345         } else {
346             GED_ENCODE(Src2RegFile, rf);
347         }
348     }
349 
encodeSrcRegionVert(Region::Vert v)350     template <SourceIndex S> void Encoder::encodeSrcRegionVert(Region::Vert v) {
351         if (S == SourceIndex::SRC0) {
352             GED_ENCODE(Src0VertStride, lowerRegionVert(v));
353         } else { // (S == SourceIndex::SRC1)
354             GED_ENCODE(Src1VertStride, lowerRegionVert(v));
355         } // S != SRC2 since ternary Align1 doesn't have bits for that
356     }
357 
encodeSrcType(Type t)358     template <SourceIndex S> void Encoder::encodeSrcType(Type t) {
359         if (S == SourceIndex::SRC0) {
360             GED_ENCODE(Src0DataType, lowerDataType(t));
361         } else if (S == SourceIndex::SRC1) {
362             GED_ENCODE(Src1DataType, lowerDataType(t));
363         } else {
364             GED_ENCODE(Src2DataType, lowerDataType(t));
365         }
366     }
367 
encodeSrcAddrMode(GED_ADDR_MODE x)368     template <SourceIndex S> void Encoder::encodeSrcAddrMode(GED_ADDR_MODE x) {
369         if (S == SourceIndex::SRC0) {
370             GED_ENCODE(Src0AddrMode, x);
371         } else {
372             GED_ENCODE(Src1AddrMode, x);
373         }
374     }
375 
encodeSrcModifier(SrcModifier x)376     template <SourceIndex S> void Encoder::encodeSrcModifier(SrcModifier x) {
377         if (S == SourceIndex::SRC0) {
378             GED_ENCODE(Src0SrcMod, lowerSrcMod(x));
379         } else if (S == SourceIndex::SRC1) {
380             GED_ENCODE(Src1SrcMod, lowerSrcMod(x));
381         } else {
382             GED_ENCODE(Src2SrcMod, lowerSrcMod(x));
383         }
384     }
385 
encodeSrcSubRegNum(uint32_t subRegInByte)386     template <SourceIndex S> void Encoder::encodeSrcSubRegNum(uint32_t subRegInByte) {
387         if (S == SourceIndex::SRC0) {
388             GED_ENCODE(Src0SubRegNum, subRegInByte);
389         } else if (S == SourceIndex::SRC1) {
390             GED_ENCODE(Src1SubRegNum, subRegInByte);
391         } else {
392             GED_ENCODE(Src2SubRegNum, subRegInByte);
393         }
394     }
395 
396     template <SourceIndex S>
encodeSrcMathMacroReg(MathMacroExt a)397     void Encoder::encodeSrcMathMacroReg(MathMacroExt a)
398     {
399         if (S == SourceIndex::SRC0) {
400             GED_ENCODE(Src0MathMacroExt, lowerSpecialAcc(a));
401         } else if (S == SourceIndex::SRC1) {
402             GED_ENCODE(Src1MathMacroExt, lowerSpecialAcc(a));
403         } else {
404             GED_ENCODE(Src2MathMacroExt, lowerSpecialAcc(a));
405         }
406     }
encodeSrcReg(RegName regName,uint16_t regNum)407     template <SourceIndex S> void Encoder::encodeSrcReg(
408         RegName regName,
409         uint16_t regNum)
410     {
411         uint32_t regBits = 0;
412         if (regName == RegName::GRF_R) {
413             regBits = regNum; // GRF fast path
414         } else { // ARF slower path
415             const RegInfo *ri = m_model.lookupRegInfoByRegName(regName);
416             if (!ri) {
417                 errorT("src", (int)S, ": unexpected register on this platform");
418             } else {
419                 uint8_t reg8;
420                 ri->encode((int)regNum, reg8);
421                 regBits = reg8; // widen for GED
422             }
423         }
424         if (S == SourceIndex::SRC0) {
425             GED_ENCODE(Src0RegNum, regBits);
426         } else if (S == SourceIndex::SRC1) {
427             GED_ENCODE(Src1RegNum, regBits);
428         } else {
429             GED_ENCODE(Src2RegNum, regBits);
430         }
431     }
432 
encodeSrcAddrImm(int32_t addrImm)433     template <SourceIndex S> void Encoder::encodeSrcAddrImm(int32_t addrImm) {
434         if (S == SourceIndex::SRC0) {
435             GED_ENCODE(Src0AddrImm, addrImm);
436         } else {
437             GED_ENCODE(Src1AddrImm, addrImm);
438         }
439     }
440     template <SourceIndex S>
encodeSrcAddrSubRegNum(uint32_t addrSubReg)441     void Encoder::encodeSrcAddrSubRegNum(uint32_t addrSubReg) {
442         if (S == SourceIndex::SRC0) {
443             GED_ENCODE(Src0AddrSubRegNum, addrSubReg);
444         } else {
445             GED_ENCODE(Src1AddrSubRegNum, addrSubReg);
446         }
447     }
448     template <SourceIndex S>
encodeSrcRegion(const Region & rgn,bool hasRgnWi)449     void Encoder::encodeSrcRegion(const Region &rgn, bool hasRgnWi) {
450         uint32_t v = 0;
451         if (rgn.getVt() == Region::Vert::VT_VxH) {
452             v = 0x3;
453         } else if (rgn.getVt() != Region::Vert::VT_INVALID) {
454             v = static_cast<uint32_t>(rgn.v);
455         } else {
456             errorT(S == SourceIndex::SRC0 ?
457                 "invalid region vertical stride on src0" :
458                 "invalid region vertical stride on src1");
459         }
460 
461         uint32_t w = static_cast<uint32_t>(rgn.getWi());
462         if (rgn.getWi() == Region::Width::WI_INVALID) {
463             errorT(S == SourceIndex::SRC0 ?
464                 "invalid region width on src0" :
465                 "invalid region width on src1");
466         }
467 
468         uint32_t h = static_cast<uint32_t>(rgn.getHz());
469         if (rgn.getHz() == Region::Horz::HZ_INVALID) {
470             h = 1;
471             errorT(S == SourceIndex::SRC0 ?
472                 "invalid region horizontal stride on src0" :
473                 "invalid region horizontal stride on src1");
474         }
475 
476         if (S == SourceIndex::SRC0) {
477             GED_ENCODE(Src0VertStride, v);
478             if (hasRgnWi) {
479                 GED_ENCODE(Src0Width, w);
480             } else {
481                 // some ops have an implicit width region
482                 // (e.g. some specialized instructions poaches Src0.RgnWi)
483                 //
484                 // Within the IR we use 1 so logic that depends on regioning
485                 // gets the correct behavior (hardware assumes w=1).
486                 w = 1;
487             }
488             GED_ENCODE(Src0HorzStride, h);
489         } else if (S == SourceIndex::SRC1) {
490             GED_ENCODE(Src1VertStride, v);
491             GED_ENCODE(Src1Width, w);
492             GED_ENCODE(Src1HorzStride, h);
493         } else {
494             IGA_ASSERT_FALSE(
495                 "Encoder::encodeSrcRegion: only works on src0 and src1");
496         }
497     }
498 
499     template <SourceIndex S>
encodeSrcRegionWidth(Region::Width w)500     void Encoder::encodeSrcRegionWidth(Region::Width w) {
501         if (S == SourceIndex::SRC0) {
502             GED_ENCODE(Src0Width, lowerRegionWidth(w));
503         } else { // (S == SourceIndex::SRC1)
504             GED_ENCODE(Src1Width, lowerRegionWidth(w));
505         } // S != SRC2 since ternary Align1 doesn't have bits for that
506     }
507 
508     template <SourceIndex S>
encodeTernaryImmVal(const ImmVal & val,Type type)509     void Encoder::encodeTernaryImmVal(const ImmVal &val, Type type) {
510         if (S == SourceIndex::SRC0) {
511             GED_ENCODE(Src0TernaryImm, typeConvesionHelper(val, type));
512         } else if (S == SourceIndex::SRC2) {
513             GED_ENCODE(Src2TernaryImm, typeConvesionHelper(val, type));
514         } else {
515             errorT("immediate operands not supported in src1 of ternary formats");
516         }
517     }
518 
519     template <SourceIndex S>
encodeSrcRegionHorz(Region::Horz s)520     void Encoder::encodeSrcRegionHorz(Region::Horz s) {
521         if (S == SourceIndex::SRC0) {
522             GED_ENCODE(Src0HorzStride, lowerRegionHorz(s));
523         } else if (S == SourceIndex::SRC1) {
524             GED_ENCODE(Src1HorzStride, lowerRegionHorz(s));
525         } else {
526             GED_ENCODE(Src2HorzStride, lowerRegionHorz(s));
527         }
528     }
529 } // end: namespace iga*
530 
531 namespace iga
532 {
533     typedef Encoder Encoder;
534 }
535 #endif
536