1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #ifndef _HWCONFORMITY_H_ 10 #define _HWCONFORMITY_H_ 11 12 #include "G4_IR.hpp" 13 #include "BuildIR.h" 14 #include "FlowGraph.h" 15 #include "Common_ISA_util.h" 16 #include "RegAlloc.h" 17 18 #include <map> 19 20 namespace vISA 21 { 22 class HWConformity 23 { 24 IR_Builder& builder; 25 G4_Kernel& kernel; 26 vISA::Mem_Manager& mem; 27 28 // this must be set before calling the individual fix functions 29 G4_BB* curBB = nullptr; 30 31 // This is added for data layout optimization. 32 // Currently it only targets packed-byte pattern. 33 // Can be extended later for other patterns. 34 enum AccessPattern { 35 ACCESS_PATTERN_UNDEF = 0, 36 ACCESS_PATTERN_PACKED_BYTE = 1, 37 ACCESS_PATTERN_INVALID = 2 38 }; 39 std::map<G4_Declare*, AccessPattern> dclAccessPattern; 40 getAccessPattern(G4_Declare * dcl)41 AccessPattern getAccessPattern(G4_Declare* dcl) 42 { 43 auto iter = dclAccessPattern.find(dcl); 44 if (iter == dclAccessPattern.end()) 45 { 46 return ACCESS_PATTERN_UNDEF; 47 } 48 return (*iter).second; 49 } setAccessPattern(G4_Declare * dcl,AccessPattern ap)50 void setAccessPattern(G4_Declare* dcl, AccessPattern ap) 51 { 52 dclAccessPattern[dcl] = ap; 53 } 54 bool markPackedByteReference(G4_Kernel& kernel, G4_Operand* opnd, G4_INST* inst); 55 G4_Operand* fixPackedByteReference(IR_Builder& builder, G4_Operand* opnd); 56 57 // helper functions 58 bool hasBadRegion(G4_INST *inst); 59 bool canSplitInst(G4_INST *inst, G4_INST *use_op); 60 bool splitInstListForByteDst(INST_LIST_ITER it, G4_BB *bb, uint16_t extypesize); 61 62 G4_DstRegRegion* insertMovAfter(INST_LIST_ITER& it, G4_DstRegRegion* dst, G4_Type type, G4_BB *bb, G4_SubReg_Align dstAlign = Any); 63 G4_Operand* insertMovBefore(INST_LIST_ITER it, uint32_t srcNum, G4_Type type, G4_BB *bb, 64 G4_SubReg_Align tmpAlign = Any); 65 G4_Operand* insertMovBefore(INST_LIST_ITER it, uint32_t srcNum, G4_Type type, G4_BB* bb, 66 uint16_t stride, G4_SubReg_Align tmpAlign = Any); 67 68 // replace src <srcNum> for inst <*it> with a temp variable of type <type> 69 // This is used to satisfy various HW restrictions on src type/alignment/region/modifier/etc. 70 void replaceSrc(INST_LIST_ITER it, uint32_t srcNum, G4_Type type, G4_BB* bb, G4_SubReg_Align tmpAlign = Any) 71 { 72 G4_INST* inst = *it; 73 inst->setSrc(insertMovBefore(it, srcNum, type, bb, tmpAlign), srcNum); 74 } 75 // replace dst for inst <*it> with a temp variable of type <type> 76 // the original dst is now the dst of a new move instruction from the temp variable. 77 // This is used to satisfy various HW restrictions on dst type/alignment/etc. 78 void replaceDst(INST_LIST_ITER it, G4_Type type, G4_SubReg_Align dstAlign = Any) 79 { 80 G4_INST* inst = *it; 81 inst->setDest(insertMovAfter(it, inst->getDst(), type, curBB, dstAlign)); 82 } 83 84 G4_SrcRegRegion* insertCopyBefore(INST_LIST_ITER it, uint32_t srcNum, G4_SubReg_Align tmpAlign, G4_BB *bb); 85 G4_SrcRegRegion* insertCopyAtBBEntry(G4_BB* bb, G4_ExecSize newExecSize, G4_Operand* src); 86 void broadcast(G4_BB* bb, INST_LIST_ITER it, int srcPos, G4_SubReg_Align subAlign); 87 88 G4_INST *splitInstWithByteDst(G4_INST *expand_op); 89 G4_SubReg_Align getDclAlignment(int opndBytes, G4_INST *inst, bool isScalar); 90 91 // HW conformity check functions 92 void fixPackedSource(INST_LIST_ITER it, G4_BB *bb); 93 bool fixMathInst(INST_LIST_ITER it, G4_BB *bb); 94 bool fixMULInst(INST_LIST_ITER &it, G4_BB *bb); 95 void fixMULHInst(INST_LIST_ITER &i, G4_BB *bb); 96 void fixOpnds(INST_LIST_ITER it, G4_BB *bb, G4_Type& exType); 97 bool fixLine(INST_LIST_ITER it, G4_BB *bb); 98 bool fixOpndType(INST_LIST_ITER it, G4_BB *bb); 99 void fixPackedHFConversions(INST_LIST_ITER it, G4_BB* bb); 100 101 bool fixIndirectOpnd(INST_LIST_ITER i, G4_BB *bb); 102 void fix3SrcInst(INST_LIST_ITER i, G4_BB* bb); 103 void fixAlign13SrcInst(INST_LIST_ITER i, G4_BB* bb); 104 void fixCompareInst(INST_LIST_ITER i, G4_BB *bb, G4_Type exType, int dst_elsize); 105 bool fixDstAlignment(INST_LIST_ITER i, G4_BB *bb, G4_Type extype, unsigned int dst_elsize); 106 void fixPredicateIndirectInst(INST_LIST_ITER i, G4_BB* bb); 107 bool fixDstAlignmentWithVectorImm(INST_LIST_ITER i, G4_BB *bb); 108 bool fixAcc(INST_LIST_ITER i, G4_BB* bb); 109 void fixDstHstride(INST_LIST_ITER i, int extypesize); 110 void fixMADInst(G4_BB* bb); 111 void fixSrcRegion(G4_INST *inst); 112 void conformBB(G4_BB* bb); 113 void fixSADA2Inst(G4_BB* bb); 114 void fixMixedHFInst(G4_BB* bb); 115 void fixSendInst(G4_BB* bb); 116 void fixsrc1src2Overlap(G4_BB* bb); 117 void fixOverlapInst(G4_BB* bb); 118 bool canSplitByteDst(G4_opcode op); 119 bool fixInstOpndTypeAlign(INST_LIST_ITER i, G4_BB* bb); 120 void fixOpndTypeAlign(G4_BB* bb); 121 void fixInstExecSize(G4_BB* bb); 122 bool reduceExecSize(INST_LIST_ITER iter, G4_BB* bb); 123 bool reduceExecSizeForMath(INST_LIST_ITER iter, G4_BB* bb); 124 bool checkSrcDstOverlap(INST_LIST_ITER iter, G4_BB* bb, bool compOpt); 125 void splitInstruction(INST_LIST_ITER iter, G4_BB* bb, bool compOpt, uint8_t numInFirstMov, bool rule4_11, bool allowSrcCrossGRF); 126 void splitSIMD32Inst(INST_LIST_ITER iter, G4_BB* bb); 127 bool evenlySplitInst(INST_LIST_ITER iter, G4_BB* bb, bool checkOverlap = true); 128 void moveSrcToGRF(INST_LIST_ITER it, uint32_t srcNum, uint16_t numGRF, G4_BB *bb); 129 void saveDst(INST_LIST_ITER& it, uint8_t stride, G4_BB *bb); 130 void restoreDst(INST_LIST_ITER& it, G4_DstRegRegion *origDst, G4_BB *bb); 131 void insertMovAfter(INST_LIST_ITER& it, uint16_t stride, G4_BB* bb); 132 void removeBadSrc(INST_LIST_ITER& it, G4_BB *bb, bool crossGRFDst, bool oneGRFSrc[3], bool badTwoGRFSrc[3]); 133 uint8_t checkMinExecSize(G4_opcode op); 134 void convertMAD2MulAdd(INST_LIST_ITER iter, G4_BB *bb); 135 void maintainDU4TempMov(G4_INST *inst, G4_INST *movInst); 136 void fixImm64(INST_LIST_ITER i, G4_BB* bb); 137 bool checkSrcCrossGRF(INST_LIST_ITER &i, G4_BB* bb); 138 G4_INST* checkSrcDefInst(G4_INST *inst, G4_INST *def_inst, uint32_t srcNum); 139 bool emulate64bMov(INST_LIST_ITER iter, G4_BB* bb); 140 bool fix64bInst(INST_LIST_ITER i, G4_BB* bb); 141 bool fixPlaneInst(INST_LIST_ITER i, G4_BB* bb); 142 void expandPlaneInst(INST_LIST_ITER i, G4_BB* bb); 143 bool fixAddcSubb(G4_BB* bb); 144 void fixDataLayout(); 145 void fixBFMixedMode(); 146 bool fixMov(INST_LIST_ITER i, G4_BB* bb); 147 bool fixRotate(INST_LIST_ITER i, G4_BB* bb); 148 bool fixIntToHFMove(G4_BB* bb); 149 150 bool isFloatOr64b(G4_INST *inst); 151 uint16_t getSrcStride(G4_SrcRegRegion* src); 152 void change64bStride2CopyToUD(INST_LIST_ITER it, G4_BB* bb); 153 bool fixBFMove(INST_LIST_ITER i, G4_BB* bb); 154 void fixUnalignedRegions(INST_LIST_ITER it, G4_BB* bb); 155 bool fixFcvt(INST_LIST_ITER i, G4_BB* bb); 156 void fixByteXBarRestriction(INST_LIST_ITER it, G4_BB* bb); 157 void fixDPAS(INST_LIST_ITER it, G4_BB* bb); 158 bool fixSrnd(INST_LIST_ITER i, G4_BB* bb); 159 void fixShiftInsts(INST_LIST_ITER i, G4_BB* bb); 160 161 void helperGenerateTempDst( 162 G4_BB *bb, 163 INST_LIST_ITER instIter, 164 G4_INST *inst, 165 uint8_t hStride, 166 G4_Type tempDstType, 167 G4_SubReg_Align subAlign = Any); 168 169 bool isGoodAlign16Src(G4_INST* inst, int srcPos); 170 bool isGoodAlign1TernarySrc(G4_INST* inst, int srcPos, bool canBeImm); 171 bool isGoodAlign1TernaryDst(G4_INST* inst) const; 172 void copyDwords(G4_Declare* dst, 173 int dstOffset, 174 G4_Declare* src, 175 int srcOffset, 176 int numDwords, 177 G4_BB* bb, 178 INST_LIST_ITER iter); 179 180 void copyDwordsIndirect(G4_Declare* dst, 181 G4_SrcRegRegion* src, 182 int numDwords, 183 G4_BB* bb, 184 INST_LIST_ITER iter); 185 186 void copyRegs(G4_Declare* dst, 187 int dstOffset, 188 G4_Declare* src, 189 int srcOffset, 190 int numRegs, 191 G4_BB* bb, 192 INST_LIST_ITER iter); 193 194 bool isFpMadPreferred(G4_BB *bb, INST_LIST_ITER iter); 195 bool generateFPMad(G4_BB* bb, INST_LIST_ITER iter); 196 bool generateAlign1Mad(G4_BB* bb, INST_LIST_ITER iter); 197 bool hasSameSubregOffset(G4_INST* inst) const; 198 bool hasSameSubregOffset(G4_INST* inst, uint32_t& byteOffset) const; 199 200 void fixImmAndARFSrc(INST_LIST_ITER it, G4_BB *bb); 201 void generateMacl(INST_LIST_ITER it, G4_BB *bb); 202 void doGenerateMacl(INST_LIST_ITER it, G4_BB *bb); 203 204 void fixSelCsel(INST_LIST_ITER it, G4_BB *bb); 205 206 void avoidDstSrcOverlap(PointsToAnalysis& p); 207 void avoidInstDstSrcOverlap(INST_LIST_ITER it, G4_BB* bb, PointsToAnalysis& p); 208 209 void replaceHFBFwithFloat(INST_LIST_ITER it, G4_BB* bb); 210 new(size_t sz,vISA::Mem_Manager & m)211 void* operator new(size_t sz, vISA::Mem_Manager& m) { return m.alloc(sz); } 212 213 bool checkSrcMod(INST_LIST_ITER it, G4_BB* bb, int srcPos); 214 215 void fixSrc2(INST_LIST_ITER it, G4_BB* bb, bool swapSrc0and2); 216 217 void fixVxHFloat64b(INST_LIST_ITER it, G4_BB* bb); 218 219 void fixPredCtrl(INST_LIST_ITER it, G4_BB* bb); 220 221 // Calla src register must be grf aligned (sub-reg offset must be 0) 222 void fixCalla(INST_LIST_ITER it, G4_BB* bb); 223 224 // If alignment and region of all operands of any instruction are conformed 225 // by a dedicated function, return true. 226 // This is used to skip generic conformity functions, such as fixOpndTypeAlign(). hasDedicateAlignRegionConformity(INST_LIST_ITER it)227 bool hasDedicateAlignRegionConformity(INST_LIST_ITER it) const 228 { 229 return hasDedicateAlignRegionConformity(*it); 230 } 231 bool hasDedicateAlignRegionConformity(const G4_INST *I) const; 232 233 void fixSrc1Region(INST_LIST_ITER it, G4_BB* bb); 234 235 INST_LIST_ITER fixMadwInst(INST_LIST_ITER i, G4_BB* bb); 236 237 void fixFloatARFDst(INST_LIST_ITER it, G4_BB* bb); 238 239 public: HWConformity(IR_Builder & b,G4_Kernel & k,vISA::Mem_Manager & m)240 HWConformity(IR_Builder& b, G4_Kernel &k, vISA::Mem_Manager& m) : 241 builder(b), kernel(k), mem(m) 242 { 243 } 244 void chkHWConformity(); 245 static void tryEliminateMadSrcModifier(IR_Builder &builder, G4_INST *inst); 246 void localizeForAcc(G4_BB* bb); 247 void splitDWMULInst(INST_LIST_ITER& start, INST_LIST_ITER& end, G4_BB* bb); 248 void fixMulSrc1(INST_LIST_ITER i, G4_BB* bb); 249 }; 250 } 251 //single entry point for HW conformity checks 252 extern void HWConformityChk(vISA::IR_Builder& builder, vISA::G4_Kernel& kernel, vISA::Mem_Manager& mem); 253 254 #endif /* _HWCONFORMITY_H_ */ 255