1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 #pragma once 10 11 #include "BlockCoalescing.hpp" 12 #include "PatternMatchPass.hpp" 13 #include "ShaderCodeGen.hpp" 14 #include "CoalescingEngine.hpp" 15 #include "Simd32Profitability.hpp" 16 #include "GenCodeGenModule.h" 17 #include "VariableReuseAnalysis.hpp" 18 #include "Compiler/MetaDataUtilsWrapper.h" 19 #include "common/LLVMWarningsPush.hpp" 20 #include <llvm/IR/DataLayout.h> 21 #include <llvm/IR/InlineAsm.h> 22 #include "llvm/IR/GetElementPtrTypeIterator.h" 23 #include "llvm/Analysis/CallGraph.h" 24 #include "common/LLVMWarningsPop.hpp" 25 #include "Compiler/IGCPassSupport.h" 26 #include "Probe/Assertion.h" 27 #include <functional> 28 29 namespace llvm 30 { 31 class GenIntrinsicInst; 32 } 33 34 namespace IGC 35 { 36 // Forward declaration 37 class IDebugEmitter; 38 struct PSSignature; 39 40 class EmitPass : public llvm::FunctionPass 41 { 42 public: 43 EmitPass(CShaderProgram::KernelShaderMap& shaders, SIMDMode mode, bool canAbortOnSpill, ShaderDispatchMode shaderMode, PSSignature* pSignature = nullptr); 44 45 virtual ~EmitPass(); 46 47 // Note: all analysis passes should be function passes. If a module analysis pass 48 // is used, it would invalidate function analysis passes and therefore cause 49 // those analysis passes to be invoked twice, which increases compiling time. getAnalysisUsage(llvm::AnalysisUsage & AU) const50 virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override 51 { 52 AU.addRequired<llvm::DominatorTreeWrapperPass>(); 53 AU.addRequired<WIAnalysis>(); 54 AU.addRequired<LiveVarsAnalysis>(); 55 AU.addRequired<CodeGenPatternMatch>(); 56 AU.addRequired<DeSSA>(); 57 AU.addRequired<BlockCoalescing>(); 58 AU.addRequired<CoalescingEngine>(); 59 AU.addRequired<MetaDataUtilsWrapper>(); 60 AU.addRequired<Simd32ProfitabilityAnalysis>(); 61 AU.addRequired<CodeGenContextWrapper>(); 62 AU.addRequired<VariableReuseAnalysis>(); 63 AU.setPreservesAll(); 64 } 65 66 virtual bool runOnFunction(llvm::Function& F) override; getPassName() const67 virtual llvm::StringRef getPassName() const override { return "EmitPass"; } 68 69 void CreateKernelShaderMap(CodeGenContext* ctx, IGC::IGCMD::MetaDataUtils* pMdUtils, llvm::Function& F); 70 71 void Frc(const SSource& source, const DstModifier& modifier); 72 void Floor(const SSource& source, const DstModifier& modifier); 73 void Mad(const SSource sources[3], const DstModifier& modifier); 74 void Lrp(const SSource sources[3], const DstModifier& modifier); 75 void Cmp(llvm::CmpInst::Predicate pred, const SSource sources[2], const DstModifier& modifier); 76 void Sub(const SSource[2], const DstModifier& mofidier); 77 void Xor(const SSource[2], const DstModifier& modifier); 78 void FDiv(const SSource[2], const DstModifier& modifier); 79 void Pow(const SSource sources[2], const DstModifier& modifier); 80 void Avg(const SSource sources[2], const DstModifier& modifier); 81 void Rsqrt(const SSource& source, const DstModifier& modifier); 82 void Sqrt(const SSource& source, const DstModifier& modifier); 83 void Select(const SSource sources[3], const DstModifier& modifier); 84 void PredAdd(const SSource& pred, bool invert, const SSource sources[2], const DstModifier& modifier); 85 void Mul(const SSource[2], const DstModifier& modifier); 86 void Mov(const SSource& source, const DstModifier& modifier); 87 void Unary(e_opcode opCode, const SSource sources[1], const DstModifier& modifier); 88 void Binary(e_opcode opCode, const SSource sources[2], const DstModifier& modifier); 89 void Tenary(e_opcode opCode, const SSource sources[3], const DstModifier& modifier); 90 void Bfn(uint8_t booleanFuncCtrl, const SSource sources[3], const DstModifier& modifier); 91 void CmpBfn(llvm::CmpInst::Predicate predicate, const SSource cmpSources[2], uint8_t booleanFuncCtrl, 92 const SSource bfnSources[3], const DstModifier& modifier); 93 94 void Mul64(CVariable* dst, CVariable* src[2], SIMDMode simdMode, bool noMask = false) const; 95 96 template<int N> 97 void Alu(e_opcode opCode, const SSource sources[N], const DstModifier& modifier); 98 99 void BinaryUnary(llvm::Instruction* inst, const SSource source[2], const DstModifier& modifier); 100 void CmpBoolOp(llvm::BinaryOperator* inst, 101 llvm::CmpInst::Predicate predicate, 102 const SSource source[2], 103 const SSource& bitSource, 104 const DstModifier& modifier); 105 void emitAluConditionMod(Pattern* aluPattern, llvm::Instruction* alu, llvm::CmpInst* cmp, int aluOprdNum); 106 107 void EmitAluIntrinsic(llvm::CallInst* I, const SSource source[2], const DstModifier& modifier); 108 void EmitSimpleAlu(llvm::Instruction* inst, const SSource source[2], const DstModifier& modifier); 109 void EmitSimpleAlu(llvm::Instruction* inst, CVariable* dst, CVariable* src0, CVariable* src1); 110 void EmitSimpleAlu(EOPCODE opCode, const SSource source[2], const DstModifier& modifier); 111 void EmitSimpleAlu(EOPCODE opCode, CVariable* dst, CVariable* src0, CVariable* src1); 112 void EmitMinMax(bool isMin, bool isUnsigned, const SSource source[2], const DstModifier& modifier); 113 void EmitUAdd(llvm::BinaryOperator* inst, const DstModifier& modifier); 114 void EmitFullMul32(bool isUnsigned, const SSource srcs[2], const DstModifier& dstMod); 115 void EmitFPToIntWithSat(bool isUnsigned, bool needBitCast, VISA_Type type, const SSource& source, const DstModifier& modifier); 116 void EmitNoModifier(llvm::Instruction* inst); 117 void EmitIntrinsicMessage(llvm::IntrinsicInst* inst); 118 void EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst); 119 void EmitSIToFPZExt(const SSource& source, const DstModifier& dstMod); 120 void EmitIntegerTruncWithSat(bool isSignedDst, bool isSignedSrc, const SSource& source, const DstModifier& dstMod); 121 void EmitAddPair(llvm::GenIntrinsicInst* GII, const SSource Sources[4], const DstModifier& DstMod); 122 void EmitSubPair(llvm::GenIntrinsicInst* GII, const SSource Sources[4], const DstModifier& DstMod); 123 void EmitMulPair(llvm::GenIntrinsicInst* GII, const SSource Sources[4], const DstModifier& DstMod); 124 void EmitPtrToPair(llvm::GenIntrinsicInst* GII, const SSource Sources[1], const DstModifier& DstMod); 125 void EmitInlineAsm(llvm::CallInst* inst); 126 127 void emitPairToPtr(llvm::GenIntrinsicInst* GII); 128 129 void emitMulAdd16(llvm::Instruction* I, const SSource source[2], const DstModifier& dstMod); 130 void emitCall(llvm::CallInst* inst); 131 void emitReturn(llvm::ReturnInst* inst); 132 void EmitInsertValueToStruct(llvm::InsertValueInst* II, bool forceVectorInit, const DstModifier& DstMod); 133 void EmitExtractValueFromStruct(llvm::ExtractValueInst* EI, const DstModifier& DstMod); 134 135 /// stack-call code-gen functions 136 void emitStackCall(llvm::CallInst* inst); 137 void emitStackFuncEntry(llvm::Function* F); 138 void emitStackFuncExit(llvm::ReturnInst* inst); 139 uint emitStackArgumentLoadOrStore(std::vector<CVariable*>& Args, bool isWrite); 140 void InitializeKernelStack(llvm::Function* pKernel); 141 142 // emits the visa relocation instructions for function/global symbols 143 void emitSymbolRelocation(llvm::Function& F); 144 145 void emitOutput(llvm::GenIntrinsicInst* inst); 146 void emitGS_SGV(llvm::SGVIntrinsic* inst); 147 void emitSampleOffset(llvm::GenIntrinsicInst* inst); 148 149 // TODO: unify the functions below and clean up 150 void emitStore(llvm::StoreInst* inst, llvm::Value* varOffset, llvm::ConstantInt* immOffset); 151 void emitStore3D(llvm::StoreInst* inst, llvm::Value* elemIdxV); 152 void emitStore3DInner(llvm::Value* pllValToStore, llvm::Value* pllDstPtr, llvm::Value* pllElmIdx); 153 154 void emitLoad(llvm::LoadInst* inst, llvm::Value* varOffset, llvm::ConstantInt* immOffset); // single load, no pattern 155 void emitLoad3DInner(llvm::LdRawIntrinsic* inst, ResourceDescriptor& resource, llvm::Value* elemIdxV); 156 157 // when resource is dynamically indexed, load/store must use special intrinsics 158 void emitLoadRawIndexed(llvm::LdRawIntrinsic* inst, llvm::Value* varOffset, llvm::ConstantInt* immOffset); 159 void emitStoreRawIndexed(llvm::StoreRawIntrinsic* inst, llvm::Value* varOffset, llvm::ConstantInt* immOffset); 160 void emitGetBufferPtr(llvm::GenIntrinsicInst* inst); 161 // \todo, remove this function after we lower all GEP to IntToPtr before CodeGen. 162 // Only remaining GEPs are for scratch in GFX path 163 void emitGEP(llvm::Instruction* inst); 164 165 // Emit lifetime start right before inst V. If ForAllInstance is true, emit lifestart 166 // for both instances; otherwise, just the current instance set in the calling context. 167 void emitLifetimeStart(CVariable* Var, llvm::BasicBlock* BB, llvm::Instruction* I, bool ForAllInstance); 168 169 // set the predicate with current active channels 170 void emitPredicateFromChannelIP(CVariable* dst, CVariable* alias = NULL); 171 172 // Helper methods for message emit functions. 173 template <typename T> 174 void prepareRenderTargetWritePayload( 175 T* inst, 176 llvm::DenseMap<llvm::Value*, CVariable**>& valueToVariableMap, 177 llvm::Value* color[], 178 uint8_t colorCnt, 179 //output: 180 CVariable** src, 181 bool* isUndefined, 182 CVariable*& source0Alpha, 183 CVariable*& oMaskOpnd, 184 CVariable*& outputDepthOpnd, 185 CVariable*& vStencilOpnd); 186 187 188 ResourceDescriptor GetSampleResourceHelper(llvm::SampleIntrinsic* inst); 189 190 void interceptSamplePayloadCoalescing( 191 llvm::SampleIntrinsic* inst, 192 uint numPart, 193 llvm::SmallVector<CVariable*, 4> & payload, 194 bool& payloadCovered 195 ); 196 197 template <typename T> 198 bool interceptRenderTargetWritePayloadCoalescing( 199 T* inst, 200 CVariable** src, 201 CVariable*& source0Alpha, 202 CVariable*& oMaskOpnd, 203 CVariable*& outputDepthOpnd, 204 CVariable*& vStencilOpnd, 205 llvm::DenseMap<llvm::Value*, CVariable**>& valueToVariableMap); 206 207 // message emit functions 208 void emitRenderTargetWrite(llvm::RTWritIntrinsic* inst, bool fromRet); 209 void emitDualBlendRT(llvm::RTDualBlendSourceIntrinsic* inst, bool fromRet); 210 void emitSimdLaneId(llvm::Instruction* inst); 211 void emitPatchInstanceId(llvm::Instruction* inst); 212 void emitSimdSize(llvm::Instruction* inst); 213 void emitSimdShuffle(llvm::Instruction* inst); 214 void emitSimdShuffleDown(llvm::Instruction* inst); 215 void emitSimdBlockRead(llvm::Instruction* inst, llvm::Value* ptrVal = nullptr); 216 void emitSimdBlockWrite(llvm::Instruction* inst, llvm::Value* ptrVal = nullptr); 217 void emitLegacySimdBlockWrite(llvm::Instruction* inst, llvm::Value* ptrVal = nullptr); 218 void emitLegacySimdBlockRead(llvm::Instruction* inst, llvm::Value* ptrVal = nullptr); 219 void emitSimdMediaBlockRead(llvm::Instruction* inst); 220 void emitSimdMediaBlockWrite(llvm::Instruction* inst); 221 void emitMediaBlockIO(const llvm::GenIntrinsicInst* inst, bool isRead); 222 void emitMediaBlockRectangleRead(llvm::Instruction* inst); 223 void emitURBWrite(llvm::GenIntrinsicInst* inst); 224 void emitURBReadCommon(llvm::GenIntrinsicInst* inst, const QuadEltUnit globalOffset, 225 llvm::Value* const perSlotOffset); 226 void emitURBRead(llvm::GenIntrinsicInst* inst); 227 void emitSampleInstruction(llvm::SampleIntrinsic* inst); 228 void emitLdInstruction(llvm::Instruction* inst); 229 void emitInfoInstruction(llvm::InfoIntrinsic* inst); 230 void emitGather4Instruction(llvm::SamplerGatherIntrinsic* inst); 231 void emitLdmsInstruction(llvm::Instruction* inst); 232 void emitTypedRead(llvm::Instruction* inst); 233 void emitTypedWrite(llvm::Instruction* inst); 234 void emitThreadGroupBarrier(llvm::Instruction* inst); 235 void emitMemoryFence(llvm::Instruction* inst); 236 void emitMemoryFence(void); 237 void emitTypedMemoryFence(llvm::Instruction* inst); 238 void emitFlushSamplerCache(); 239 void emitSurfaceInfo(llvm::GenIntrinsicInst* intrinsic); 240 getFPOffset()241 static uint64_t getFPOffset() { return SIZE_OWORD; } 242 void emitStackAlloca(llvm::GenIntrinsicInst* intrinsic); 243 void emitVLAStackAlloca(llvm::GenIntrinsicInst* intrinsic); 244 245 void emitUAVSerialize(); 246 247 void emitScalarAtomics( 248 llvm::Instruction* pInst, 249 ResourceDescriptor& resource, 250 AtomicOp atomic_op, 251 CVariable* pDstAddr, 252 CVariable* pSrc, 253 bool isA64, 254 int bitSize); 255 256 void emitScalarAtomicLoad( 257 llvm::Instruction* pInst, 258 ResourceDescriptor& resource, 259 CVariable* pDstAddr, 260 CVariable* pSrc, 261 bool isA64, 262 int bitSize); 263 264 /// reduction and prefix/postfix facilities 265 CVariable* ScanReducePrepareSrc(VISA_Type type, uint64_t identityValue, bool negate, bool secondHalf, 266 CVariable* src, CVariable* dst, CVariable* flag = nullptr); 267 CVariable* ReductionReduceHelper(e_opcode op, VISA_Type type, SIMDMode simd, CVariable* src); 268 void ReductionExpandHelper(e_opcode op, VISA_Type type, CVariable* src, CVariable* dst); 269 void ReductionClusteredSrcHelper(CVariable* (&pSrc)[2], CVariable* src, uint16_t numLanes, 270 VISA_Type type, uint numInst, bool secondHalf); 271 CVariable* ReductionClusteredReduceHelper(e_opcode op, VISA_Type type, SIMDMode simd, bool secondHalf, 272 CVariable* src, CVariable* dst); 273 void ReductionClusteredExpandHelper(e_opcode op, VISA_Type type, SIMDMode simd, const uint clusterSize, 274 bool secondHalf, CVariable* src, CVariable* dst); 275 /// reduction and prefix/postfix emitters 276 void emitReductionAll( 277 e_opcode op, 278 uint64_t identityValue, 279 VISA_Type type, 280 bool negate, 281 CVariable* src, 282 CVariable* dst); 283 void emitReductionClustered( 284 const e_opcode op, 285 const uint64_t identityValue, 286 const VISA_Type type, 287 const bool negate, 288 const unsigned int clusterSize, 289 CVariable* const src, 290 CVariable* const dst); 291 void emitPreOrPostFixOp( 292 e_opcode op, 293 uint64_t identityValue, 294 VISA_Type type, 295 bool negateSrc, 296 CVariable* src, 297 CVariable* result[2], 298 CVariable* Flag = nullptr, 299 bool isPrefix = false, 300 bool isQuad = false); 301 void emitPreOrPostFixOpScalar( 302 e_opcode op, 303 uint64_t identityValue, 304 VISA_Type type, 305 bool negateSrc, 306 CVariable* src, 307 CVariable* result[2], 308 CVariable* Flag, 309 bool isPrefix); 310 311 bool IsUniformAtomic(llvm::Instruction* pInst); 312 void emitAtomicRaw(llvm::GenIntrinsicInst* pInst); 313 void emitAtomicTyped(llvm::GenIntrinsicInst* pInst); 314 void emitAtomicCounter(llvm::GenIntrinsicInst* pInst); 315 void emitUniformAtomicCounter(llvm::GenIntrinsicInst* pInst); 316 void emitRenderTargetRead(llvm::GenIntrinsicInst* inst); 317 318 void emitDiscard(llvm::Instruction* inst); 319 void emitInitDiscardMask(llvm::GenIntrinsicInst* inst); 320 void emitUpdateDiscardMask(llvm::GenIntrinsicInst* inst); 321 void emitGetPixelMask(llvm::GenIntrinsicInst* inst); 322 323 void emitInput(llvm::Instruction* inst); 324 void emitcycleCounter(llvm::Instruction* inst); 325 void emitSetDebugReg(llvm::Instruction* inst); 326 void emitInsert(llvm::Instruction* inst); 327 void emitExtract(llvm::Instruction* inst); 328 void emitBitCast(llvm::BitCastInst* btCst); 329 void emitPtrToInt(llvm::PtrToIntInst* p2iCst); 330 void emitIntToPtr(llvm::IntToPtrInst* i2pCst); 331 void emitAddrSpaceCast(llvm::AddrSpaceCastInst* addrSpaceCast); 332 void emitBranch(llvm::BranchInst* br, const SSource& cond, e_predMode predMode); 333 void emitDiscardBranch(llvm::BranchInst* br, const SSource& cond); 334 void emitAluNoModifier(llvm::GenIntrinsicInst* inst); 335 336 void emitSGV(llvm::SGVIntrinsic* inst); 337 void emitPSSGV(llvm::GenIntrinsicInst* inst); 338 void emitCSSGV(llvm::GenIntrinsicInst* inst); 339 void getCoarsePixelSize(CVariable* destination, const uint component, bool isCodePatchCandidate = false); 340 void getPixelPosition(CVariable* destination, const uint component, bool isCodePatchCandidate = false); 341 void emitPixelPosition(llvm::GenIntrinsicInst* inst); 342 void emitPhaseOutput(llvm::GenIntrinsicInst* inst); 343 void emitPhaseInput(llvm::GenIntrinsicInst* inst); 344 345 void emitPSInput(llvm::Instruction* inst); 346 void emitPSInputMADHalf(llvm::Instruction* inst); 347 void emitPSInputPln(llvm::Instruction* inst); 348 void emitPSInputCst(llvm::Instruction* inst); 349 void emitEvalAttribute(llvm::GenIntrinsicInst* inst); 350 void emitInterpolate(llvm::GenIntrinsicInst* inst); 351 void emitInterpolate2(llvm::GenIntrinsicInst* inst); 352 void emitInterpolant(llvm::GenIntrinsicInst* inst); 353 354 void emitGradientX(const SSource& source, const DstModifier& modifier); 355 void emitGradientY(const SSource& source, const DstModifier& modifier); 356 void emitGradientXFine(const SSource& source, const DstModifier& modifier); 357 void emitGradientYFine(const SSource& source, const DstModifier& modifier); 358 359 void emitHSTessFactors(llvm::Instruction* pInst); 360 void emitHSSGV(llvm::GenIntrinsicInst* inst); 361 void emitf32tof16_rtz(llvm::GenIntrinsicInst* inst); 362 void emitfitof(llvm::GenIntrinsicInst* inst); 363 void emitFPOrtz(llvm::GenIntrinsicInst* inst); 364 void emitFMArtp(llvm::GenIntrinsicInst* inst); 365 void emitFMArtn(llvm::GenIntrinsicInst* inst); 366 void emitftoi(llvm::GenIntrinsicInst* inst); 367 void emitCtlz(const SSource& source); 368 369 void emitDSInput(llvm::Instruction* pInst); 370 void emitDSSGV(llvm::GenIntrinsicInst* inst); 371 372 // VME 373 void emitVMESendIME(llvm::GenIntrinsicInst* inst); 374 void emitVMESendFBR(llvm::GenIntrinsicInst* inst); 375 void emitVMESendSIC(llvm::GenIntrinsicInst* inst); 376 void emitVMESendIME2(llvm::GenIntrinsicInst* inst); 377 void emitVMESendFBR2(llvm::GenIntrinsicInst* inst); 378 void emitVMESendSIC2(llvm::GenIntrinsicInst* inst); 379 void emitCreateMessagePhases(llvm::GenIntrinsicInst* inst); 380 void emitSetMessagePhaseX_legacy(llvm::GenIntrinsicInst* inst); 381 void emitSetMessagePhase_legacy(llvm::GenIntrinsicInst* inst); 382 void emitGetMessagePhaseX(llvm::GenIntrinsicInst* inst); 383 void emitSetMessagePhaseX(llvm::GenIntrinsicInst* inst); 384 void emitGetMessagePhase(llvm::GenIntrinsicInst* inst); 385 void emitSetMessagePhase(llvm::GenIntrinsicInst* inst); 386 void emitSimdGetMessagePhase(llvm::GenIntrinsicInst* inst); 387 void emitBroadcastMessagePhase(llvm::GenIntrinsicInst* inst); 388 void emitSimdSetMessagePhase(llvm::GenIntrinsicInst* inst); 389 void emitSimdMediaRegionCopy(llvm::GenIntrinsicInst* inst); 390 void emitExtractMVAndSAD(llvm::GenIntrinsicInst* inst); 391 void emitCmpSADs(llvm::GenIntrinsicInst* inst); 392 393 // VA 394 void emitVideoAnalyticSLM(llvm::GenIntrinsicInst* inst, const DWORD responseLen); 395 // New VA without using SLM and barriers (result is returned in GRF). 396 void emitVideoAnalyticGRF(llvm::GenIntrinsicInst* inst, const DWORD responseLen); 397 398 // CrossLane Instructions 399 void emitWaveBallot(llvm::GenIntrinsicInst* inst); 400 void emitWaveInverseBallot(llvm::GenIntrinsicInst* inst); 401 void emitWaveShuffleIndex(llvm::GenIntrinsicInst* inst); 402 void emitWavePrefix(llvm::WavePrefixIntrinsic* I); 403 void emitQuadPrefix(llvm::QuadPrefixIntrinsic* I); 404 void emitWaveAll(llvm::GenIntrinsicInst* inst); 405 void emitWaveClustered(llvm::GenIntrinsicInst* inst); 406 407 // Those three "vector" version shall be combined with 408 // non-vector version. 409 bool isUniformStoreOCL(llvm::StoreInst* SI); 410 bool isUniformStoreOCL(llvm::Value* ptr, llvm::Value* storeVal); 411 void emitVectorBitCast(llvm::BitCastInst* BCI); 412 void emitVectorLoad(llvm::LoadInst* LI, llvm::Value* offset, llvm::ConstantInt* immOffset); 413 void emitVectorStore(llvm::StoreInst* SI, llvm::Value* offset, llvm::ConstantInt* immOffset); 414 void emitGenISACopy(llvm::GenIntrinsicInst* GenCopyInst); 415 void emitVectorCopy(CVariable* Dst, CVariable* Src, uint32_t nElts, 416 uint32_t DstSubRegOffset = 0, uint32_t SrcSubRegOffset = 0); 417 void emitCopyAll(CVariable* Dst, CVariable* Src, llvm::Type* Ty); 418 419 void emitPushFrameToStack(unsigned& pushSize); 420 void emitAddPointer(CVariable* Dst, CVariable* Src, CVariable* offset); 421 // emitAddPair - emulate 64bit addtition by 32-bit operations. 422 // Dst and Src0 must be a 64-bit type variable. 423 // Src1 mist be in 32-bit type variable/immediate 424 void emitAddPair(CVariable* Dst, CVariable* Src0, CVariable* Src1); 425 426 void emitSqrt(llvm::Instruction* inst); 427 void emitCanonicalize(llvm::Instruction* inst, const DstModifier& modifier); 428 void emitRsq(llvm::Instruction* inst); 429 void emitFrc(llvm::GenIntrinsicInst* inst); 430 431 void emitLLVMbswap(llvm::IntrinsicInst* inst); 432 void emitDP4A(llvm::GenIntrinsicInst* GII, 433 const SSource* source = nullptr, 434 const DstModifier& modifier = DstModifier()); 435 436 void emitLLVMStackSave(llvm::IntrinsicInst* inst); 437 void emitLLVMStackRestore(llvm::IntrinsicInst* inst); 438 439 void emitUnmaskedRegionBoundary(bool start); 440 void emitDpas(llvm::GenIntrinsicInst *GII, 441 const SSource* source, 442 const DstModifier& modifier); 443 void emitfcvt(llvm::GenIntrinsicInst *GII); 444 void emitStaticConstantPatchValue( 445 llvm::StaticConstantPatchIntrinsic* staticConstantPatch32); 446 // Debug Built-Ins 447 void emitStateRegID(uint32_t BitStart, uint32_t BitEnd); 448 void emitThreadPause(llvm::GenIntrinsicInst* inst); 449 450 void MovPhiSources(llvm::BasicBlock* bb); 451 452 void InitConstant(llvm::BasicBlock* BB); 453 void emitLifetimeStartAtEndOfBB(llvm::BasicBlock* BB); 454 void emitDebugPlaceholder(llvm::GenIntrinsicInst* I); 455 void emitDummyInst(llvm::GenIntrinsicInst* GII); 456 void emitImplicitArgIntrinsic(llvm::GenIntrinsicInst* I); 457 void emitStoreImplBufferPtr(llvm::GenIntrinsicInst* I); 458 void emitStoreLocalIdBufferPtr(llvm::GenIntrinsicInst* I); 459 void emitLoadImplBufferPtr(llvm::GenIntrinsicInst* I); 460 void emitLoadLocalIdBufferPtr(llvm::GenIntrinsicInst* I); 461 462 463 std::pair<llvm::Value*, llvm::Value*> getPairOutput(llvm::Value*) const; 464 465 //helper function 466 void SplitSIMD(llvm::Instruction* inst, uint numSources, uint headerSize, CVariable* payload, SIMDMode mode, uint half); 467 template<size_t N> 468 void JoinSIMD(CVariable* (&tempdst)[N], uint responseLength, SIMDMode mode); 469 CVariable* BroadcastIfUniform(CVariable* pVar, bool nomask = false); 470 uint DecideInstanceAndSlice(const llvm::BasicBlock& blk, SDAG& sdag, bool& slicing); 471 bool IsUndefOrZeroImmediate(const llvm::Value* value); isUndefOrConstInt0(const llvm::Value * val)472 inline bool isUndefOrConstInt0(const llvm::Value* val) 473 { 474 if (val == nullptr || 475 llvm::isa<llvm::UndefValue>(val) || 476 (llvm::isa<llvm::ConstantInt>(val) && 477 llvm::cast<llvm::ConstantInt>(val)->getZExtValue() == 0)) 478 { 479 return true; 480 } 481 return false; 482 } getOperandIfExist(llvm::Instruction * pInst,unsigned op)483 inline llvm::Value* getOperandIfExist(llvm::Instruction* pInst, unsigned op) 484 { 485 if (llvm::CallInst * pCall = llvm::dyn_cast<llvm::CallInst>(pInst)) 486 { 487 if (op < pCall->getNumArgOperands()) 488 { 489 return pInst->getOperand(op); 490 } 491 } 492 return nullptr; 493 } 494 IsGRFAligned(CVariable * pVar,e_alignment requiredAlign) const495 bool IsGRFAligned(CVariable* pVar, e_alignment requiredAlign) const 496 { 497 e_alignment align = pVar->GetAlign(); 498 if (requiredAlign == EALIGN_BYTE) 499 { 500 // trivial 501 return true; 502 } 503 if (requiredAlign == EALIGN_AUTO || align == EALIGN_AUTO) 504 { 505 // Can only assume that AUTO only matches AUTO (?) 506 // (keep the previous behavior unchanged.) 507 return align == requiredAlign; 508 } 509 return align >= requiredAlign; 510 } 511 512 CVariable* ExtendVariable(CVariable* pVar, e_alignment uniformAlign); 513 CVariable* BroadcastAndExtend(CVariable* pVar); 514 CVariable* TruncatePointer(CVariable* pVar); 515 CVariable* ReAlignUniformVariable(CVariable* pVar, e_alignment align); 516 CVariable* BroadcastAndTruncPointer(CVariable* pVar); 517 CVariable* IndexableResourceIndex(CVariable* indexVar, uint btiIndex); 518 ResourceDescriptor GetResourceVariable(llvm::Value* resourcePtr); 519 SamplerDescriptor GetSamplerVariable(llvm::Value* samplerPtr); 520 CVariable* ComputeSampleIntOffset(llvm::Instruction* sample, uint sourceIndex); 521 void emitPlnInterpolation(CVariable* bary, CVariable* inputvar); 522 523 CVariable* GetExecutionMask(); 524 CVariable* GetExecutionMask(CVariable* &vecMaskVar); 525 CVariable* GetHalfExecutionMask(); 526 CVariable* GetDispatchMask(); 527 CVariable* UniformCopy(CVariable* var); 528 CVariable* UniformCopy(CVariable* var, CVariable*& LaneOffset, CVariable* eMask = nullptr, bool doSub = false); 529 530 // generate loop header to process sample instruction with varying resource/sampler 531 bool ResourceLoopHeader( 532 ResourceDescriptor& resource, 533 SamplerDescriptor& sampler, 534 CVariable*& flag, 535 uint& label); 536 bool ResourceLoopHeader( 537 ResourceDescriptor& resource, 538 CVariable*& flag, 539 uint& label); 540 void ResourceLoopBackEdge(bool needLoop, CVariable* flag, uint label); 541 template<typename Func> ResourceLoop(ResourceDescriptor & resource,Func Fn)542 void ResourceLoop(ResourceDescriptor& resource, Func Fn) 543 { 544 uint label = 0; 545 CVariable* flag = nullptr; 546 bool needLoop = ResourceLoopHeader(resource, flag, label); 547 548 Fn(flag); 549 550 ResourceLoopBackEdge(needLoop, flag, label); 551 } 552 template<typename Func> ResourceLoop(ResourceDescriptor & resource,SamplerDescriptor & sampler,Func Fn)553 void ResourceLoop(ResourceDescriptor& resource, SamplerDescriptor& sampler, Func Fn) 554 { 555 uint label = 0; 556 CVariable* flag = nullptr; 557 bool needLoop = ResourceLoopHeader(resource, sampler, flag, label); 558 559 Fn(flag); 560 561 ResourceLoopBackEdge(needLoop, flag, label); 562 } 563 564 void ForceDMask(bool createJmpForDiscard = true); 565 void ResetVMask(bool createJmpForDiscard = true); 566 void setPredicateForDiscard(CVariable* pPredicate = nullptr); 567 568 void PackSIMD8HFRet(CVariable* dst); 569 unsigned int GetPrimitiveTypeSizeInRegisterInBits(const llvm::Type* Ty) const; 570 unsigned int GetPrimitiveTypeSizeInRegister(const llvm::Type* Ty) const; 571 unsigned int GetScalarTypeSizeInRegisterInBits(const llvm::Type* Ty) const; 572 unsigned int GetScalarTypeSizeInRegister(const llvm::Type* Ty) const; 573 574 /// return true if succeeds, false otherwise. 575 bool setCurrentShader(llvm::Function* F); 576 577 /// check if the dummy kernel requires compilation 578 bool compileSymbolTableKernel(llvm::Function* F); 579 580 // Arithmetic operations with constant folding 581 // Src0 and Src1 are the input operands 582 // DstPrototype is a prototype of the result of operation and may be used for cloning to a new variable 583 // Return a variable with the result of the compute which may be one the the sources, an immediate or a variable 584 CVariable* Mul(CVariable* Src0, CVariable* Src1, const CVariable* DstPrototype); 585 CVariable* Add(CVariable* Src0, CVariable* Src1, const CVariable* DstPrototype); 586 587 // temporary helper function 588 CVariable* GetSymbol(llvm::Value* v) const; 589 590 // Check if stateless indirect access is available 591 // If yes, increase the counter, otherwise do nothing 592 void CountStatelessIndirectAccess(llvm::Value* pointer, ResourceDescriptor resource); 593 594 // An indirect access happens when GPU loads from an address that was not directly given as one of the kernel arguments. 595 // It's usually a pointer loaded from memory pointed by a kernel argument. 596 // Otherwise the access is direct. 597 bool IsIndirectAccess(llvm::Value* value); 598 599 CVariable* GetSrcVariable(const SSource& source, bool fromConstPool = false); 600 void SetSourceModifiers(unsigned int sourceIndex, const SSource& source); 601 getCurrentBlock() const602 SBasicBlock& getCurrentBlock() const { return m_pattern->m_blocks[m_currentBlock]; } 603 604 CodeGenContext* m_pCtx = nullptr; 605 CVariable* m_destination = nullptr; 606 GenXFunctionGroupAnalysis* m_FGA = nullptr; 607 CodeGenPatternMatch* m_pattern = nullptr; 608 DeSSA* m_deSSA = nullptr; 609 BlockCoalescing* m_blockCoalescing = nullptr; 610 const SIMDMode m_SimdMode; 611 const ShaderDispatchMode m_ShaderDispatchMode; 612 CShaderProgram::KernelShaderMap& m_shaders; 613 CShader* m_currShader; 614 CEncoder* m_encoder; 615 const llvm::DataLayout* m_DL = nullptr; 616 CoalescingEngine* m_CE = nullptr; 617 VariableReuseAnalysis* m_VRA = nullptr; 618 ModuleMetaData* m_moduleMD = nullptr; 619 bool m_canAbortOnSpill; 620 PSSignature* const m_pSignature; 621 622 // Debug info emitter 623 IDebugEmitter* m_pDebugEmitter = nullptr; 624 625 llvm::DominatorTree* m_pDT = nullptr; 626 static char ID; 627 inline void ContextSwitchPayloadSection(bool first = true); 628 inline void ContextSwitchShaderBody(bool last = true); 629 630 private: 631 uint m_labelForDMaskJmp; 632 633 llvm::DenseMap<llvm::Instruction*, bool> instrMap; 634 635 // Current rounding Mode 636 // As RM of FPCvtInt and FP could be different, there 637 // are two fields to keep track of their current values. 638 // 639 // Default rounding modes: 640 // the rounding modes that are pre-defined by each API or 641 // shaders/kernels. 642 // 643 // Not all combinations of FP's RM and FPCvtInt's RM can be 644 // used as default. Currently, the default RMs have the 645 // following restrictions: 646 // 1. If FPCvtInt's RM = ROUND_TO_ZERO, FP's RM can be any; 647 // 2. Otherwise, FPCvtInt's RM must be the same as FP's RM 648 // 649 // The default remains unchanged throughout the entire 650 // shaders/kernels. Dynamically setting a different default 651 // rounding mode in the middle of a shader/kernel is not 652 // supported for now. And the default remains unchanged 653 // throughout the entire shaders/kernels. 654 // 655 // However, each instruction's RM can be set dynamically, 656 // such as via intrinsics. If an instruction needs setting RMs, 657 // its RMs must follow the above restrictions. So far, an 658 // instruction either relies on FP's RM or FPCvtInt's RM, but 659 // not both, thus setting an instruction's RM dynamically 660 // cannot violate the above restrictions. 661 // 662 ERoundingMode m_roundingMode_FP; 663 ERoundingMode m_roundingMode_FPCvtInt; 664 665 uint m_currentBlock = (uint) -1; 666 667 bool m_currFuncHasSubroutine = false; 668 669 // Used to relocate phi-mov to different BB. phiMovToBB is the map from "fromBB" 670 // to "toBB" (meaning to move phi-mov from "fromBB" to "toBB"). See MovPhiSources. 671 llvm::DenseMap<llvm::BasicBlock*, llvm::BasicBlock*> phiMovToBB; 672 bool canRelocatePhiMov( 673 llvm::BasicBlock* otherBB, llvm::BasicBlock* phiMovBB, llvm::BasicBlock* phiBB); 674 bool isCandidateIfStmt( 675 llvm::BasicBlock* ifBB, llvm::BasicBlock*& otherBB, llvm::BasicBlock*& emptyBB); 676 677 // Used to check for the constraint types with the actual llvmIR params for inlineASM instructions 678 bool validateInlineAsmConstraints(llvm::CallInst* inst, llvm::SmallVector<llvm::StringRef, 8> & constraints); 679 680 void emitGetMessagePhaseType(llvm::GenIntrinsicInst* inst, VISA_Type type, uint32_t width); 681 void emitSetMessagePhaseType(llvm::GenIntrinsicInst* inst, VISA_Type type); 682 void emitSetMessagePhaseType_legacy(llvm::GenIntrinsicInst* inst, VISA_Type type); 683 684 void emitScan(llvm::Value* Src, IGC::WaveOps Op, 685 bool isInclusiveScan, llvm::Value* Mask, bool isQuad); 686 687 // Cached per lane offset variables. This is a per basic block data 688 // structure. For each entry, the first item is the scalar type size in 689 // bytes, the second item is the corresponding symbol. 690 llvm::SmallVector<std::pair<unsigned, CVariable*>, 4> PerLaneOffsetVars; 691 692 // Helper function to reduce common code for emitting indirect address 693 // computation. getOrCreatePerLaneOffsetVariable(unsigned TypeSizeInBytes)694 CVariable* getOrCreatePerLaneOffsetVariable(unsigned TypeSizeInBytes) 695 { 696 for (auto Item : PerLaneOffsetVars) 697 { 698 if (Item.first == TypeSizeInBytes) 699 { 700 IGC_ASSERT_MESSAGE(Item.second, "null variable"); 701 return Item.second; 702 } 703 } 704 CVariable* Var = m_currShader->GetPerLaneOffsetsReg(TypeSizeInBytes); 705 PerLaneOffsetVars.push_back(std::make_pair(TypeSizeInBytes, Var)); 706 return Var; 707 } 708 709 // Emit code in slice starting from (reverse) iterator I. Return the 710 // iterator to the next pattern to emit. 711 SBasicBlock::reverse_iterator emitInSlice(SBasicBlock& block, 712 SBasicBlock::reverse_iterator I); 713 714 /** 715 * Reuse SampleDescriptor for sampleID, so that we can pass it to 716 * ResourceLoop to generate loop for non-uniform values. 717 */ getSampleIDVariable(llvm::Value * sampleIdVar)718 inline SamplerDescriptor getSampleIDVariable(llvm::Value* sampleIdVar) 719 { 720 SamplerDescriptor sampler; 721 sampler.m_sampler = GetSymbol(sampleIdVar); 722 return sampler; 723 } 724 725 CVariable* UnpackOrBroadcastIfUniform(CVariable* pVar); 726 getGRFSize() const727 int getGRFSize() const { return m_currShader->getGRFSize(); } 728 729 void initDefaultRoundingMode(); 730 ERoundingMode GetRoundingMode_FPCvtInt(llvm::Instruction* pInst); 731 ERoundingMode GetRoundingMode_FP(llvm::Instruction* inst); 732 void SetRoundingMode_FP(ERoundingMode RM_FP); 733 void SetRoundingMode_FPCvtInt(ERoundingMode RM_FPCvtInt); 734 bool setRMExplicitly(llvm::Instruction* inst); 735 void ResetRoundingMode(llvm::Instruction* inst); 736 // returns true if the instruction does not care about the rounding mode settings 737 bool ignoreRoundingMode(llvm::Instruction* inst) const; 738 739 // A64 load/store with HWA that make sure the offset hi part is the same per LS call 740 // addrUnifrom: if the load/store address is uniform, we can skip A64 WA 741 void emitGatherA64(llvm::Value* loadInst, CVariable* dst, CVariable* offset, unsigned elemSize, unsigned numElems, bool addrUniform); 742 void emitGather4A64(llvm::Value* loadInst, CVariable* dst, CVariable* offset, bool addrUniform); 743 void emitScatterA64(CVariable* val, CVariable* offset, unsigned elementSize, unsigned numElems, bool addrUniform); 744 void emitScatter4A64(CVariable* src, CVariable* offset, bool addrUniform); 745 746 // Helper functions that create loop for above WA 747 void A64LSLoopHead(CVariable* addr, CVariable*& curMask, CVariable*& lsPred, uint& label); 748 void A64LSLoopTail(CVariable* curMask, CVariable* lsPred, uint label); 749 750 // Helper function to check if A64 WA is required 751 bool hasA64WAEnable() const; 752 753 bool isHalfGRFReturn(CVariable* dst, SIMDMode simdMode); 754 755 void emitFeedbackEnable(); 756 757 void emitAddrSpaceToGenericCast(llvm::AddrSpaceCastInst* addrSpaceCast, CVariable* srcV, unsigned tag); 758 759 // used for loading/storing uniform value using scatter/gather messages. 760 CVariable* prepareAddressForUniform( 761 CVariable* AddrVar, uint32_t EltBytes, uint32_t NElts, uint32_t ExecSz, e_alignment Align); 762 CVariable* prepareDataForUniform(CVariable* DataVar, uint32_t ExecSz, e_alignment Align); 763 bool m_isDuplicate; 764 CVariable* m_tmpDest = nullptr; 765 766 }; 767 768 } // namespace IGC 769