1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #ifndef _IGA_BACKEND_GED_ENCODER_H_ 10 #define _IGA_BACKEND_GED_ENCODER_H_ 11 12 #include "GEDBitProcessor.hpp" 13 #include "IGAToGEDTranslation.hpp" 14 #include "../EncoderOpts.hpp" 15 #include "../../ErrorHandler.hpp" 16 #include "../../IR/Instruction.hpp" 17 #include "../../IR/Block.hpp" 18 #include "../../IR/Kernel.hpp" 19 #include "../../Timer/Timer.hpp" 20 21 #include <list> 22 #include <map> 23 #include <vector> 24 25 26 namespace iga 27 { 28 #define GED_ENCODE(FUNC, ARG) \ 29 GED_ENCODE_TO(FUNC, ARG, &m_gedInst) 30 #if defined(GED_TIMER) || defined(_DEBUG) 31 #define START_GED_TIMER() startIGATimer(TIMER_GED) 32 #define STOP_GED_TIMER() stopIGATimer(TIMER_GED) 33 #else 34 #define START_GED_TIMER() 35 #define STOP_GED_TIMER() 36 #endif 37 38 #if defined(TOTAL_ENCODE_TIMER) || defined(_DEBUG) 39 #define START_ENCODER_TIMER() startIGATimer(TIMER_TOTAL) 40 #define STOP_ENCODER_TIMER() stopIGATimer(TIMER_TOTAL) 41 #else 42 #define START_ENCODER_TIMER() 43 #define STOP_ENCODER_TIMER() 44 #endif 45 46 // uncomment to emit all the GED calls 47 // #define TRACE_GED_CALLS 48 49 #ifdef TRACE_GED_CALLS 50 #define TRACE_GED_SETTER(FIELD, ARG, STATUS) \ 51 std::cout << "Encoder.cpp:" << __LINE__ << ": GED_Set" << #FIELD << \ 52 "(..," << (ARG) << ") results in " << (STATUS) << "\n" 53 #else 54 #define TRACE_GED_SETTER(FIELD, ARG, STATUS) 55 #endif 56 57 58 #define GED_ENCODE_TO(FIELD, ARG, GED) \ 59 do { \ 60 GED_RETURN_VALUE _status; \ 61 START_GED_TIMER(); \ 62 _status = GED_Set ## FIELD (GED, ARG); \ 63 STOP_GED_TIMER(); \ 64 TRACE_GED_SETTER(FIELD, ARG, _status); \ 65 if (_status != GED_RETURN_VALUE_SUCCESS) { \ 66 handleGedError(__LINE__, #FIELD, _status); \ 67 } \ 68 } while (0) 69 70 71 using BlockList = std::list<Block *, std_arena_based_allocator<Block*>>; 72 73 class Encoder : protected GEDBitProcessor 74 { 75 public: 76 Encoder(const Model& model, ErrorHandler& eh, const EncoderOpts& eos); 77 78 void encodeKernel( 79 Kernel& k, 80 MemManager &m, 81 void*& bits, 82 uint32_t& bitsLen); 83 84 size_t getNumInstructionsEncoded() const; 85 86 /////////////////////////////////////////////////////////////////////// 87 // PROFILING API FOR TESTING (must be compiled into) 88 // 89 // compile with MEASURE_COMPILATION_TIME 90 double getElapsedTimeMicros(unsigned int idx); 91 int64_t getElapsedTimeTicks(unsigned int idx); 92 std::string getTimerName(unsigned int idx); 93 94 protected: 95 96 // TODO: phase these out 97 void encodeKernelPreProcess(Kernel &k); 98 void doEncodeKernelPreProcess(Kernel &k); 99 void encodeFC(const Instruction &i); 100 void encodeTernaryInstruction(const Instruction& inst, GED_ACCESS_MODE accessMode); 101 void encodeTernaryAlign1Instruction(const Instruction& inst); 102 void encodeTernaryAlign16Instruction(const Instruction& inst); 103 void encodeSendDirectDestination(const Operand& dst); 104 void encodeSendDestinationDataType(const Operand& dst); 105 void encodeOptionsThreadControl(const Instruction& inst); 106 107 protected: 108 void encodeDstReg(RegName regName, uint16_t regNum); // just the regnum 109 110 void encodeImmVal(const ImmVal &val, Type type); 111 112 template <SourceIndex S> void encodeSrcRegFile(GED_REG_FILE rf); 113 template <SourceIndex S> void encodeSrcRegionVert(Region::Vert v); 114 template <SourceIndex S> void encodeSrcType(Type t); 115 template <SourceIndex S> void encodeSrcAddrMode(GED_ADDR_MODE x); 116 template <SourceIndex S> void encodeSrcModifier(SrcModifier x); 117 template <SourceIndex S> void encodeSrcSubRegNum(uint32_t subRegInByte); 118 119 template <SourceIndex S> void encodeSrcMathMacroReg(MathMacroExt a); 120 121 template <SourceIndex S> void encodeSrcReg(RegName regName, uint16_t regNum); 122 template <SourceIndex S> void encodeSrcAddrImm(int32_t addrImm); 123 template <SourceIndex S> void encodeSrcAddrSubRegNum(uint32_t addrSubReg); 124 template <SourceIndex S> void encodeSrcRegion(const Region& r, bool hasRgnWi = true); 125 template <SourceIndex S> void encodeSrcRegionWidth(Region::Width w); 126 template <SourceIndex S> void encodeSrcRepCtrl(GED_REP_CTRL rep); 127 template <SourceIndex S> void encodeSrcChanSel( 128 GED_SWIZZLE chSelX, 129 GED_SWIZZLE chSelY, 130 GED_SWIZZLE chSelZ, 131 GED_SWIZZLE chSelW); 132 133 template <SourceIndex S> 134 void encodeTernarySourceAlign16(const Instruction& inst); 135 void encodeTernaryDestinationAlign16(const Instruction& inst); 136 137 template <SourceIndex S> 138 void encodeTernarySourceAlign1(const Instruction& inst); 139 void encodeTernaryDestinationAlign1(const Instruction& inst); 140 141 void encodeTernarySrcRegionVert(SourceIndex S, Region::Vert v); 142 143 template <SourceIndex S> 144 void encodeTernaryImmVal(const ImmVal &val, Type type); 145 146 template <SourceIndex S> void encodeSrcRegionHorz(Region::Horz s); 147 148 149 void handleGedError(int line, const char *setter, GED_RETURN_VALUE status); 150 151 // state that is valid over instance life 152 EncoderOpts m_opts; 153 154 // state that is valid over encodeInst() 155 ged_ins_t m_gedInst; 156 bool m_encodeAlign16 = false; 157 Op m_opcode = Op::INVALID; 158 size_t m_numberInstructionsEncoded; 159 160 private: operator delete(void *,MemManager *)161 void operator delete(void*, MemManager*) { }; operator new(size_t sz,MemManager * m)162 void *operator new(size_t sz, MemManager* m) {return m->alloc(sz);}; 163 164 void encodeBlock(Block* blk); 165 void encodeInstruction(Instruction& inst); 166 void patchJumpOffsets(); 167 168 /////////////////////////////////////////////////////////////////////// 169 // BASIC INSTRUCTIONS 170 /////////////////////////////////////////////////////////////////////// 171 void encodeBasicInstruction(const Instruction& inst, GED_ACCESS_MODE accessMode); 172 void encodeBasicDestination( 173 const Instruction& inst, 174 const Operand& dst, 175 GED_ACCESS_MODE accessMode = GED_ACCESS_MODE_Align1); 176 template <SourceIndex S> void encodeBasicSource( 177 const Instruction& inst, 178 const Operand& src, 179 GED_ACCESS_MODE accessMode = GED_ACCESS_MODE_Align1); 180 /////////////////////////////////////////////////////////////////////// 181 // BRANCH INSTRUCTIONS 182 /////////////////////////////////////////////////////////////////////// 183 void encodeBranchingInstruction(const Instruction& inst); 184 void encodeBranchingInstructionSimplified(const Instruction& inst); 185 void encodeBranchDestination(const Operand& dst); 186 void encodeBranchSource(const Operand& src); 187 /////////////////////////////////////////////////////////////////////// 188 // SEND INSTRUCTIONS 189 /////////////////////////////////////////////////////////////////////// 190 void encodeSendInstruction(const Instruction& inst); 191 void encodeSendSource0(const Operand& src); 192 void encodeSendDestination(const Operand& dst); 193 void encodeSendsSource0(const Operand& src); 194 void encodeSendsSource1(const Operand& src); 195 void encodeSendsDestination(const Operand& dst); 196 197 void encodeSendDescs(const Instruction& inst); 198 void encodeSendDescsPreXe(const Instruction& inst); 199 void encodeSendDescsXe(const Instruction& inst); 200 void encodeSendDescsXeHP(const Instruction& inst); 201 void encodeSendDescsXeHPG(const Instruction& inst); 202 203 /////////////////////////////////////////////////////////////////////// 204 // SYNC INSTRUCTIONS 205 /////////////////////////////////////////////////////////////////////// 206 void encodeSyncInstruction(const Instruction& inst); 207 208 /////////////////////////////////////////////////////////////////////// 209 // OTHER HELPER FUNCTIONS 210 /////////////////////////////////////////////////////////////////////// 211 void setEncodedPC(Instruction *inst, int32_t encodePC); 212 int32_t getEncodedPC(const Instruction *inst) const; 213 214 bool getBlockOffset(const Block *b, uint32_t &pc); 215 216 // handles encoding ARF registers as well as the easy GRF case 217 // caller actually pulls the trigger and encodes the bits, but this 218 // call can raise the encoding error 219 uint32_t translateRegNum(int opIx, RegName reg, uint16_t regNum); 220 uint32_t mathMacroRegToBits(int src, MathMacroExt mme); // ChSel and SubReg 221 GED_DST_CHAN_EN mathMacroRegToChEn(MathMacroExt mme); 222 223 void applyGedWorkarounds(const Kernel &k, size_t bitsLen); 224 void encodeOptions(const Instruction& inst); 225 226 ////////////////////////////////////////////////////////////////////// 227 // platform specific queries *but sometimes need the instruction too) 228 // 229 // GEN7p5 implicitly scales PC offsets by QW except for a few instructions 230 bool arePcsInQWords(const OpSpec &os) const; 231 232 // Call need to have src0 region be set to: 233 // SKL and before: <2;2,1> 234 // ICL: <2;4,1> 235 // Later GENs ignore the region completely 236 bool callNeedsSrc0Region221(const Instruction &inst) const; 237 bool callNeedsSrc0Region241(const Instruction &inst) const; 238 239 ///////////////////////////////////////////////////////////// 240 // state valid over encodeKernel() 241 MemManager *m_mem; 242 uint8_t *m_instBuf = nullptr; // the output bits 243 struct JumpPatch { // JIP and UIP label patching 244 Instruction *inst; // the instruction 245 ged_ins_t gedInst; // the partially constructed GED instruction 246 uint8_t *bits; // where to encode it in the heap JumpPatchiga::Encoder::JumpPatch247 JumpPatch(Instruction *i, const ged_ins_t &gi, uint8_t *bs) 248 : inst(i), gedInst(gi), bits(bs) { } 249 }; 250 std::vector<JumpPatch> m_needToPatch; 251 std::map<const Block *, int32_t> m_blockToOffsetMap; 252 253 public: 254 //////////////////////////////////////////////////////////////// typeConvesionHelper(const ImmVal & val,Type type)255 static uint64_t typeConvesionHelper(const ImmVal &val, Type type) 256 { 257 uint64_t value = 0; 258 switch (type) { 259 case Type::UD: 260 case Type::F: 261 case Type::V: 262 case Type::UV: 263 case Type::VF: 264 value = (uint64_t)val.u32; 265 break; 266 case Type::D: 267 value = (uint64_t)val.s32; 268 break; 269 case Type::W: 270 value = (uint64_t)val.s16; 271 break; 272 case Type::UW: 273 case Type::HF: 274 case Type::BF: 275 value = (uint64_t)val.u16; 276 break; 277 case Type::DF: 278 case Type::UQ: 279 case Type::Q: 280 value = val.u64; 281 break; 282 case Type::B: 283 case Type::UB: 284 // technically not reachable since we don't permit byte moves 285 // from immediates 286 value = val.u64; 287 break; 288 case Type::BF8: 289 value = (uint64_t)val.u8; 290 break; 291 case Type::TF32: 292 value = (uint64_t)val.u32; 293 break; 294 default: 295 break; 296 } 297 298 return value; 299 } 300 301 protected: 302 //////////////////////////////////////////////////////////////// 303 // allowable types for ternary Align1 isTernaryAlign1Floating(Type t)304 static bool isTernaryAlign1Floating(Type t) { 305 switch (t) { 306 case Type::HF: 307 case Type::BF: 308 case Type::BF8: 309 case Type::TF32: 310 case Type::F: 311 case Type::DF: 312 case Type::NF: 313 return true; 314 default: 315 return false; 316 } 317 } 318 319 // allowable types for ternary Align1 320 isTernaryAlign1Integral(Type t)321 static bool isTernaryAlign1Integral(Type t) { 322 switch (t) { 323 case Type::UQ: // technically uq not allows today, but maybe in future 324 case Type::Q: // same as :uq 325 case Type::UD: 326 case Type::D: 327 case Type::UW: 328 case Type::W: 329 case Type::UB: 330 case Type::B: 331 return true; 332 default: 333 return false; 334 } 335 } 336 337 338 }; //end: class definition Encoder 339 encodeSrcRegFile(GED_REG_FILE rf)340 template <SourceIndex S> void Encoder::encodeSrcRegFile(GED_REG_FILE rf) { 341 if (S == SourceIndex::SRC0) { 342 GED_ENCODE(Src0RegFile, rf); 343 } else if (S == SourceIndex::SRC1) { 344 GED_ENCODE(Src1RegFile, rf); 345 } else { 346 GED_ENCODE(Src2RegFile, rf); 347 } 348 } 349 encodeSrcRegionVert(Region::Vert v)350 template <SourceIndex S> void Encoder::encodeSrcRegionVert(Region::Vert v) { 351 if (S == SourceIndex::SRC0) { 352 GED_ENCODE(Src0VertStride, lowerRegionVert(v)); 353 } else { // (S == SourceIndex::SRC1) 354 GED_ENCODE(Src1VertStride, lowerRegionVert(v)); 355 } // S != SRC2 since ternary Align1 doesn't have bits for that 356 } 357 encodeSrcType(Type t)358 template <SourceIndex S> void Encoder::encodeSrcType(Type t) { 359 if (S == SourceIndex::SRC0) { 360 GED_ENCODE(Src0DataType, lowerDataType(t)); 361 } else if (S == SourceIndex::SRC1) { 362 GED_ENCODE(Src1DataType, lowerDataType(t)); 363 } else { 364 GED_ENCODE(Src2DataType, lowerDataType(t)); 365 } 366 } 367 encodeSrcAddrMode(GED_ADDR_MODE x)368 template <SourceIndex S> void Encoder::encodeSrcAddrMode(GED_ADDR_MODE x) { 369 if (S == SourceIndex::SRC0) { 370 GED_ENCODE(Src0AddrMode, x); 371 } else { 372 GED_ENCODE(Src1AddrMode, x); 373 } 374 } 375 encodeSrcModifier(SrcModifier x)376 template <SourceIndex S> void Encoder::encodeSrcModifier(SrcModifier x) { 377 if (S == SourceIndex::SRC0) { 378 GED_ENCODE(Src0SrcMod, lowerSrcMod(x)); 379 } else if (S == SourceIndex::SRC1) { 380 GED_ENCODE(Src1SrcMod, lowerSrcMod(x)); 381 } else { 382 GED_ENCODE(Src2SrcMod, lowerSrcMod(x)); 383 } 384 } 385 encodeSrcSubRegNum(uint32_t subRegInByte)386 template <SourceIndex S> void Encoder::encodeSrcSubRegNum(uint32_t subRegInByte) { 387 if (S == SourceIndex::SRC0) { 388 GED_ENCODE(Src0SubRegNum, subRegInByte); 389 } else if (S == SourceIndex::SRC1) { 390 GED_ENCODE(Src1SubRegNum, subRegInByte); 391 } else { 392 GED_ENCODE(Src2SubRegNum, subRegInByte); 393 } 394 } 395 396 template <SourceIndex S> encodeSrcMathMacroReg(MathMacroExt a)397 void Encoder::encodeSrcMathMacroReg(MathMacroExt a) 398 { 399 if (S == SourceIndex::SRC0) { 400 GED_ENCODE(Src0MathMacroExt, lowerSpecialAcc(a)); 401 } else if (S == SourceIndex::SRC1) { 402 GED_ENCODE(Src1MathMacroExt, lowerSpecialAcc(a)); 403 } else { 404 GED_ENCODE(Src2MathMacroExt, lowerSpecialAcc(a)); 405 } 406 } encodeSrcReg(RegName regName,uint16_t regNum)407 template <SourceIndex S> void Encoder::encodeSrcReg( 408 RegName regName, 409 uint16_t regNum) 410 { 411 uint32_t regBits = 0; 412 if (regName == RegName::GRF_R) { 413 regBits = regNum; // GRF fast path 414 } else { // ARF slower path 415 const RegInfo *ri = m_model.lookupRegInfoByRegName(regName); 416 if (!ri) { 417 errorT("src", (int)S, ": unexpected register on this platform"); 418 } else { 419 uint8_t reg8; 420 ri->encode((int)regNum, reg8); 421 regBits = reg8; // widen for GED 422 } 423 } 424 if (S == SourceIndex::SRC0) { 425 GED_ENCODE(Src0RegNum, regBits); 426 } else if (S == SourceIndex::SRC1) { 427 GED_ENCODE(Src1RegNum, regBits); 428 } else { 429 GED_ENCODE(Src2RegNum, regBits); 430 } 431 } 432 encodeSrcAddrImm(int32_t addrImm)433 template <SourceIndex S> void Encoder::encodeSrcAddrImm(int32_t addrImm) { 434 if (S == SourceIndex::SRC0) { 435 GED_ENCODE(Src0AddrImm, addrImm); 436 } else { 437 GED_ENCODE(Src1AddrImm, addrImm); 438 } 439 } 440 template <SourceIndex S> encodeSrcAddrSubRegNum(uint32_t addrSubReg)441 void Encoder::encodeSrcAddrSubRegNum(uint32_t addrSubReg) { 442 if (S == SourceIndex::SRC0) { 443 GED_ENCODE(Src0AddrSubRegNum, addrSubReg); 444 } else { 445 GED_ENCODE(Src1AddrSubRegNum, addrSubReg); 446 } 447 } 448 template <SourceIndex S> encodeSrcRegion(const Region & rgn,bool hasRgnWi)449 void Encoder::encodeSrcRegion(const Region &rgn, bool hasRgnWi) { 450 uint32_t v = 0; 451 if (rgn.getVt() == Region::Vert::VT_VxH) { 452 v = 0x3; 453 } else if (rgn.getVt() != Region::Vert::VT_INVALID) { 454 v = static_cast<uint32_t>(rgn.v); 455 } else { 456 errorT(S == SourceIndex::SRC0 ? 457 "invalid region vertical stride on src0" : 458 "invalid region vertical stride on src1"); 459 } 460 461 uint32_t w = static_cast<uint32_t>(rgn.getWi()); 462 if (rgn.getWi() == Region::Width::WI_INVALID) { 463 errorT(S == SourceIndex::SRC0 ? 464 "invalid region width on src0" : 465 "invalid region width on src1"); 466 } 467 468 uint32_t h = static_cast<uint32_t>(rgn.getHz()); 469 if (rgn.getHz() == Region::Horz::HZ_INVALID) { 470 h = 1; 471 errorT(S == SourceIndex::SRC0 ? 472 "invalid region horizontal stride on src0" : 473 "invalid region horizontal stride on src1"); 474 } 475 476 if (S == SourceIndex::SRC0) { 477 GED_ENCODE(Src0VertStride, v); 478 if (hasRgnWi) { 479 GED_ENCODE(Src0Width, w); 480 } else { 481 // some ops have an implicit width region 482 // (e.g. some specialized instructions poaches Src0.RgnWi) 483 // 484 // Within the IR we use 1 so logic that depends on regioning 485 // gets the correct behavior (hardware assumes w=1). 486 w = 1; 487 } 488 GED_ENCODE(Src0HorzStride, h); 489 } else if (S == SourceIndex::SRC1) { 490 GED_ENCODE(Src1VertStride, v); 491 GED_ENCODE(Src1Width, w); 492 GED_ENCODE(Src1HorzStride, h); 493 } else { 494 IGA_ASSERT_FALSE( 495 "Encoder::encodeSrcRegion: only works on src0 and src1"); 496 } 497 } 498 499 template <SourceIndex S> encodeSrcRegionWidth(Region::Width w)500 void Encoder::encodeSrcRegionWidth(Region::Width w) { 501 if (S == SourceIndex::SRC0) { 502 GED_ENCODE(Src0Width, lowerRegionWidth(w)); 503 } else { // (S == SourceIndex::SRC1) 504 GED_ENCODE(Src1Width, lowerRegionWidth(w)); 505 } // S != SRC2 since ternary Align1 doesn't have bits for that 506 } 507 508 template <SourceIndex S> encodeTernaryImmVal(const ImmVal & val,Type type)509 void Encoder::encodeTernaryImmVal(const ImmVal &val, Type type) { 510 if (S == SourceIndex::SRC0) { 511 GED_ENCODE(Src0TernaryImm, typeConvesionHelper(val, type)); 512 } else if (S == SourceIndex::SRC2) { 513 GED_ENCODE(Src2TernaryImm, typeConvesionHelper(val, type)); 514 } else { 515 errorT("immediate operands not supported in src1 of ternary formats"); 516 } 517 } 518 519 template <SourceIndex S> encodeSrcRegionHorz(Region::Horz s)520 void Encoder::encodeSrcRegionHorz(Region::Horz s) { 521 if (S == SourceIndex::SRC0) { 522 GED_ENCODE(Src0HorzStride, lowerRegionHorz(s)); 523 } else if (S == SourceIndex::SRC1) { 524 GED_ENCODE(Src1HorzStride, lowerRegionHorz(s)); 525 } else { 526 GED_ENCODE(Src2HorzStride, lowerRegionHorz(s)); 527 } 528 } 529 } // end: namespace iga* 530 531 namespace iga 532 { 533 typedef Encoder Encoder; 534 } 535 #endif 536