1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #pragma once
10 
11 #include "BlockCoalescing.hpp"
12 #include "PatternMatchPass.hpp"
13 #include "ShaderCodeGen.hpp"
14 #include "CoalescingEngine.hpp"
15 #include "Simd32Profitability.hpp"
16 #include "GenCodeGenModule.h"
17 #include "VariableReuseAnalysis.hpp"
18 #include "Compiler/MetaDataUtilsWrapper.h"
19 #include "common/LLVMWarningsPush.hpp"
20 #include <llvm/IR/DataLayout.h>
21 #include <llvm/IR/InlineAsm.h>
22 #include "llvm/IR/GetElementPtrTypeIterator.h"
23 #include "llvm/Analysis/CallGraph.h"
24 #include "common/LLVMWarningsPop.hpp"
25 #include "Compiler/IGCPassSupport.h"
26 #include "Probe/Assertion.h"
27 #include <functional>
28 
29 namespace llvm
30 {
31     class GenIntrinsicInst;
32 }
33 
34 namespace IGC
35 {
36 // Forward declaration
37 class IDebugEmitter;
38 struct PSSignature;
39 
40 class EmitPass : public llvm::FunctionPass
41 {
42 public:
43     EmitPass(CShaderProgram::KernelShaderMap& shaders, SIMDMode mode, bool canAbortOnSpill, ShaderDispatchMode shaderMode, PSSignature* pSignature = nullptr);
44 
45     virtual ~EmitPass();
46 
47     // Note:  all analysis passes should be function passes. If a module analysis pass
48     //        is used, it would invalidate function analysis passes and therefore cause
49     //        those analysis passes to be invoked twice, which increases compiling time.
getAnalysisUsage(llvm::AnalysisUsage & AU) const50     virtual void getAnalysisUsage(llvm::AnalysisUsage& AU) const override
51     {
52         AU.addRequired<llvm::DominatorTreeWrapperPass>();
53         AU.addRequired<WIAnalysis>();
54         AU.addRequired<LiveVarsAnalysis>();
55         AU.addRequired<CodeGenPatternMatch>();
56         AU.addRequired<DeSSA>();
57         AU.addRequired<BlockCoalescing>();
58         AU.addRequired<CoalescingEngine>();
59         AU.addRequired<MetaDataUtilsWrapper>();
60         AU.addRequired<Simd32ProfitabilityAnalysis>();
61         AU.addRequired<CodeGenContextWrapper>();
62         AU.addRequired<VariableReuseAnalysis>();
63         AU.setPreservesAll();
64     }
65 
66     virtual bool runOnFunction(llvm::Function& F) override;
getPassName() const67     virtual llvm::StringRef getPassName() const  override { return "EmitPass"; }
68 
69     void CreateKernelShaderMap(CodeGenContext* ctx, IGC::IGCMD::MetaDataUtils* pMdUtils, llvm::Function& F);
70 
71     void Frc(const SSource& source, const DstModifier& modifier);
72     void Floor(const SSource& source, const DstModifier& modifier);
73     void Mad(const SSource sources[3], const DstModifier& modifier);
74     void Lrp(const SSource sources[3], const DstModifier& modifier);
75     void Cmp(llvm::CmpInst::Predicate pred, const SSource sources[2], const DstModifier& modifier);
76     void Sub(const SSource[2], const DstModifier& mofidier);
77     void Xor(const SSource[2], const DstModifier& modifier);
78     void FDiv(const SSource[2], const DstModifier& modifier);
79     void Pow(const SSource sources[2], const DstModifier& modifier);
80     void Avg(const SSource sources[2], const DstModifier& modifier);
81     void Rsqrt(const SSource& source, const DstModifier& modifier);
82     void Sqrt(const SSource& source, const DstModifier& modifier);
83     void Select(const SSource sources[3], const DstModifier& modifier);
84     void PredAdd(const SSource& pred, bool invert, const SSource sources[2], const DstModifier& modifier);
85     void Mul(const SSource[2], const DstModifier& modifier);
86     void Mov(const SSource& source, const DstModifier& modifier);
87     void Unary(e_opcode opCode, const SSource sources[1], const DstModifier& modifier);
88     void Binary(e_opcode opCode, const SSource sources[2], const DstModifier& modifier);
89     void Tenary(e_opcode opCode, const SSource sources[3], const DstModifier& modifier);
90     void Bfn(uint8_t booleanFuncCtrl, const SSource sources[3], const DstModifier& modifier);
91     void CmpBfn(llvm::CmpInst::Predicate predicate, const SSource cmpSources[2], uint8_t booleanFuncCtrl,
92         const SSource bfnSources[3], const DstModifier& modifier);
93 
94     void Mul64(CVariable* dst, CVariable* src[2], SIMDMode simdMode, bool noMask = false) const;
95 
96     template<int N>
97     void Alu(e_opcode opCode, const SSource sources[N], const DstModifier& modifier);
98 
99     void BinaryUnary(llvm::Instruction* inst, const  SSource source[2], const DstModifier& modifier);
100     void CmpBoolOp(llvm::BinaryOperator* inst,
101         llvm::CmpInst::Predicate predicate,
102         const  SSource source[2],
103         const SSource& bitSource,
104         const DstModifier& modifier);
105     void emitAluConditionMod(Pattern* aluPattern, llvm::Instruction* alu, llvm::CmpInst* cmp, int aluOprdNum);
106 
107     void EmitAluIntrinsic(llvm::CallInst* I, const SSource source[2], const DstModifier& modifier);
108     void EmitSimpleAlu(llvm::Instruction* inst, const SSource source[2], const DstModifier& modifier);
109     void EmitSimpleAlu(llvm::Instruction* inst, CVariable* dst, CVariable* src0, CVariable* src1);
110     void EmitSimpleAlu(EOPCODE opCode, const SSource source[2], const DstModifier& modifier);
111     void EmitSimpleAlu(EOPCODE opCode, CVariable* dst, CVariable* src0, CVariable* src1);
112     void EmitMinMax(bool isMin, bool isUnsigned, const SSource source[2], const DstModifier& modifier);
113     void EmitUAdd(llvm::BinaryOperator* inst, const DstModifier& modifier);
114     void EmitFullMul32(bool isUnsigned, const SSource srcs[2], const DstModifier& dstMod);
115     void EmitFPToIntWithSat(bool isUnsigned, bool needBitCast, VISA_Type type, const SSource& source, const DstModifier& modifier);
116     void EmitNoModifier(llvm::Instruction* inst);
117     void EmitIntrinsicMessage(llvm::IntrinsicInst* inst);
118     void EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst);
119     void EmitSIToFPZExt(const SSource& source, const DstModifier& dstMod);
120     void EmitIntegerTruncWithSat(bool isSignedDst, bool isSignedSrc, const SSource& source, const DstModifier& dstMod);
121     void EmitAddPair(llvm::GenIntrinsicInst* GII, const SSource Sources[4], const DstModifier& DstMod);
122     void EmitSubPair(llvm::GenIntrinsicInst* GII, const SSource Sources[4], const DstModifier& DstMod);
123     void EmitMulPair(llvm::GenIntrinsicInst* GII, const SSource Sources[4], const DstModifier& DstMod);
124     void EmitPtrToPair(llvm::GenIntrinsicInst* GII, const SSource Sources[1], const DstModifier& DstMod);
125     void EmitInlineAsm(llvm::CallInst* inst);
126 
127     void emitPairToPtr(llvm::GenIntrinsicInst* GII);
128 
129     void emitMulAdd16(llvm::Instruction* I, const SSource source[2], const DstModifier& dstMod);
130     void emitCall(llvm::CallInst* inst);
131     void emitReturn(llvm::ReturnInst* inst);
132     void EmitInsertValueToStruct(llvm::InsertValueInst* II, bool forceVectorInit, const DstModifier& DstMod);
133     void EmitExtractValueFromStruct(llvm::ExtractValueInst* EI, const DstModifier& DstMod);
134 
135     /// stack-call code-gen functions
136     void emitStackCall(llvm::CallInst* inst);
137     void emitStackFuncEntry(llvm::Function* F);
138     void emitStackFuncExit(llvm::ReturnInst* inst);
139     uint emitStackArgumentLoadOrStore(std::vector<CVariable*>& Args, bool isWrite);
140     void InitializeKernelStack(llvm::Function* pKernel);
141 
142     // emits the visa relocation instructions for function/global symbols
143     void emitSymbolRelocation(llvm::Function& F);
144 
145     void emitOutput(llvm::GenIntrinsicInst* inst);
146     void emitGS_SGV(llvm::SGVIntrinsic* inst);
147     void emitSampleOffset(llvm::GenIntrinsicInst* inst);
148 
149     // TODO: unify the functions below and clean up
150     void emitStore(llvm::StoreInst* inst, llvm::Value* varOffset, llvm::ConstantInt* immOffset);
151     void emitStore3D(llvm::StoreInst* inst, llvm::Value* elemIdxV);
152     void emitStore3DInner(llvm::Value* pllValToStore, llvm::Value* pllDstPtr, llvm::Value* pllElmIdx);
153 
154     void emitLoad(llvm::LoadInst* inst, llvm::Value* varOffset, llvm::ConstantInt* immOffset);   // single load, no pattern
155     void emitLoad3DInner(llvm::LdRawIntrinsic* inst, ResourceDescriptor& resource, llvm::Value* elemIdxV);
156 
157     // when resource is dynamically indexed, load/store must use special intrinsics
158     void emitLoadRawIndexed(llvm::LdRawIntrinsic* inst, llvm::Value* varOffset, llvm::ConstantInt* immOffset);
159     void emitStoreRawIndexed(llvm::StoreRawIntrinsic* inst, llvm::Value* varOffset, llvm::ConstantInt* immOffset);
160     void emitGetBufferPtr(llvm::GenIntrinsicInst* inst);
161     // \todo, remove this function after we lower all GEP to IntToPtr before CodeGen.
162     // Only remaining GEPs are for scratch in GFX path
163     void emitGEP(llvm::Instruction* inst);
164 
165     // Emit lifetime start right before inst V. If ForAllInstance is true, emit lifestart
166     // for both instances; otherwise, just the current instance set in the calling context.
167     void emitLifetimeStart(CVariable* Var, llvm::BasicBlock* BB, llvm::Instruction* I, bool ForAllInstance);
168 
169     // set the predicate with current active channels
170     void emitPredicateFromChannelIP(CVariable* dst, CVariable* alias = NULL);
171 
172     // Helper methods for message emit functions.
173     template <typename T>
174     void prepareRenderTargetWritePayload(
175         T* inst,
176         llvm::DenseMap<llvm::Value*, CVariable**>& valueToVariableMap,
177         llvm::Value* color[],
178         uint8_t colorCnt,
179         //output:
180         CVariable** src,
181         bool* isUndefined,
182         CVariable*& source0Alpha,
183         CVariable*& oMaskOpnd,
184         CVariable*& outputDepthOpnd,
185         CVariable*& vStencilOpnd);
186 
187 
188     ResourceDescriptor GetSampleResourceHelper(llvm::SampleIntrinsic* inst);
189 
190     void interceptSamplePayloadCoalescing(
191         llvm::SampleIntrinsic* inst,
192         uint numPart,
193         llvm::SmallVector<CVariable*, 4> & payload,
194         bool& payloadCovered
195     );
196 
197     template <typename T>
198     bool interceptRenderTargetWritePayloadCoalescing(
199         T* inst,
200         CVariable** src,
201         CVariable*& source0Alpha,
202         CVariable*& oMaskOpnd,
203         CVariable*& outputDepthOpnd,
204         CVariable*& vStencilOpnd,
205         llvm::DenseMap<llvm::Value*, CVariable**>& valueToVariableMap);
206 
207     // message emit functions
208     void emitRenderTargetWrite(llvm::RTWritIntrinsic* inst, bool fromRet);
209     void emitDualBlendRT(llvm::RTDualBlendSourceIntrinsic* inst, bool fromRet);
210     void emitSimdLaneId(llvm::Instruction* inst);
211     void emitPatchInstanceId(llvm::Instruction* inst);
212     void emitSimdSize(llvm::Instruction* inst);
213     void emitSimdShuffle(llvm::Instruction* inst);
214     void emitSimdShuffleDown(llvm::Instruction* inst);
215     void emitSimdBlockRead(llvm::Instruction* inst, llvm::Value* ptrVal = nullptr);
216     void emitSimdBlockWrite(llvm::Instruction* inst, llvm::Value* ptrVal = nullptr);
217     void emitLegacySimdBlockWrite(llvm::Instruction* inst, llvm::Value* ptrVal = nullptr);
218     void emitLegacySimdBlockRead(llvm::Instruction* inst, llvm::Value* ptrVal = nullptr);
219     void emitSimdMediaBlockRead(llvm::Instruction* inst);
220     void emitSimdMediaBlockWrite(llvm::Instruction* inst);
221     void emitMediaBlockIO(const llvm::GenIntrinsicInst* inst, bool isRead);
222     void emitMediaBlockRectangleRead(llvm::Instruction* inst);
223     void emitURBWrite(llvm::GenIntrinsicInst* inst);
224     void emitURBReadCommon(llvm::GenIntrinsicInst* inst, const QuadEltUnit globalOffset,
225         llvm::Value* const perSlotOffset);
226     void emitURBRead(llvm::GenIntrinsicInst* inst);
227     void emitSampleInstruction(llvm::SampleIntrinsic* inst);
228     void emitLdInstruction(llvm::Instruction* inst);
229     void emitInfoInstruction(llvm::InfoIntrinsic* inst);
230     void emitGather4Instruction(llvm::SamplerGatherIntrinsic* inst);
231     void emitLdmsInstruction(llvm::Instruction* inst);
232     void emitTypedRead(llvm::Instruction* inst);
233     void emitTypedWrite(llvm::Instruction* inst);
234     void emitThreadGroupBarrier(llvm::Instruction* inst);
235     void emitMemoryFence(llvm::Instruction* inst);
236     void emitMemoryFence(void);
237     void emitTypedMemoryFence(llvm::Instruction* inst);
238     void emitFlushSamplerCache();
239     void emitSurfaceInfo(llvm::GenIntrinsicInst* intrinsic);
240 
getFPOffset()241     static uint64_t getFPOffset() { return SIZE_OWORD; }
242     void emitStackAlloca(llvm::GenIntrinsicInst* intrinsic);
243     void emitVLAStackAlloca(llvm::GenIntrinsicInst* intrinsic);
244 
245     void emitUAVSerialize();
246 
247     void emitScalarAtomics(
248         llvm::Instruction* pInst,
249         ResourceDescriptor& resource,
250         AtomicOp atomic_op,
251         CVariable* pDstAddr,
252         CVariable* pSrc,
253         bool isA64,
254         int bitSize);
255 
256     void emitScalarAtomicLoad(
257         llvm::Instruction* pInst,
258         ResourceDescriptor& resource,
259         CVariable* pDstAddr,
260         CVariable* pSrc,
261         bool isA64,
262         int bitSize);
263 
264     /// reduction and prefix/postfix facilities
265     CVariable* ScanReducePrepareSrc(VISA_Type type, uint64_t identityValue, bool negate, bool secondHalf,
266         CVariable* src, CVariable* dst, CVariable* flag = nullptr);
267     CVariable* ReductionReduceHelper(e_opcode op, VISA_Type type, SIMDMode simd, CVariable* src);
268     void ReductionExpandHelper(e_opcode op, VISA_Type type, CVariable* src, CVariable* dst);
269     void ReductionClusteredSrcHelper(CVariable* (&pSrc)[2], CVariable* src, uint16_t numLanes,
270         VISA_Type type, uint numInst, bool secondHalf);
271     CVariable* ReductionClusteredReduceHelper(e_opcode op, VISA_Type type, SIMDMode simd, bool secondHalf,
272         CVariable* src, CVariable* dst);
273     void ReductionClusteredExpandHelper(e_opcode op, VISA_Type type, SIMDMode simd, const uint clusterSize,
274         bool secondHalf, CVariable* src, CVariable* dst);
275     /// reduction and prefix/postfix emitters
276     void emitReductionAll(
277         e_opcode op,
278         uint64_t identityValue,
279         VISA_Type type,
280         bool negate,
281         CVariable* src,
282         CVariable* dst);
283     void emitReductionClustered(
284         const e_opcode op,
285         const uint64_t identityValue,
286         const VISA_Type type,
287         const bool negate,
288         const unsigned int clusterSize,
289         CVariable* const src,
290         CVariable* const dst);
291     void emitPreOrPostFixOp(
292         e_opcode op,
293         uint64_t identityValue,
294         VISA_Type type,
295         bool negateSrc,
296         CVariable* src,
297         CVariable* result[2],
298         CVariable* Flag = nullptr,
299         bool isPrefix = false,
300         bool isQuad = false);
301     void emitPreOrPostFixOpScalar(
302         e_opcode op,
303         uint64_t identityValue,
304         VISA_Type type,
305         bool negateSrc,
306         CVariable* src,
307         CVariable* result[2],
308         CVariable* Flag,
309         bool isPrefix);
310 
311     bool IsUniformAtomic(llvm::Instruction* pInst);
312     void emitAtomicRaw(llvm::GenIntrinsicInst* pInst);
313     void emitAtomicTyped(llvm::GenIntrinsicInst* pInst);
314     void emitAtomicCounter(llvm::GenIntrinsicInst* pInst);
315     void emitUniformAtomicCounter(llvm::GenIntrinsicInst* pInst);
316     void emitRenderTargetRead(llvm::GenIntrinsicInst* inst);
317 
318     void emitDiscard(llvm::Instruction* inst);
319     void emitInitDiscardMask(llvm::GenIntrinsicInst* inst);
320     void emitUpdateDiscardMask(llvm::GenIntrinsicInst* inst);
321     void emitGetPixelMask(llvm::GenIntrinsicInst* inst);
322 
323     void emitInput(llvm::Instruction* inst);
324     void emitcycleCounter(llvm::Instruction* inst);
325     void emitSetDebugReg(llvm::Instruction* inst);
326     void emitInsert(llvm::Instruction* inst);
327     void emitExtract(llvm::Instruction* inst);
328     void emitBitCast(llvm::BitCastInst* btCst);
329     void emitPtrToInt(llvm::PtrToIntInst* p2iCst);
330     void emitIntToPtr(llvm::IntToPtrInst* i2pCst);
331     void emitAddrSpaceCast(llvm::AddrSpaceCastInst* addrSpaceCast);
332     void emitBranch(llvm::BranchInst* br, const SSource& cond, e_predMode predMode);
333     void emitDiscardBranch(llvm::BranchInst* br, const SSource& cond);
334     void emitAluNoModifier(llvm::GenIntrinsicInst* inst);
335 
336     void emitSGV(llvm::SGVIntrinsic* inst);
337     void emitPSSGV(llvm::GenIntrinsicInst* inst);
338     void emitCSSGV(llvm::GenIntrinsicInst* inst);
339     void getCoarsePixelSize(CVariable* destination, const uint component, bool isCodePatchCandidate = false);
340     void getPixelPosition(CVariable* destination, const uint component, bool isCodePatchCandidate = false);
341     void emitPixelPosition(llvm::GenIntrinsicInst* inst);
342     void emitPhaseOutput(llvm::GenIntrinsicInst* inst);
343     void emitPhaseInput(llvm::GenIntrinsicInst* inst);
344 
345     void emitPSInput(llvm::Instruction* inst);
346     void emitPSInputMADHalf(llvm::Instruction* inst);
347     void emitPSInputPln(llvm::Instruction* inst);
348     void emitPSInputCst(llvm::Instruction* inst);
349     void emitEvalAttribute(llvm::GenIntrinsicInst* inst);
350     void emitInterpolate(llvm::GenIntrinsicInst* inst);
351     void emitInterpolate2(llvm::GenIntrinsicInst* inst);
352     void emitInterpolant(llvm::GenIntrinsicInst* inst);
353 
354     void emitGradientX(const SSource& source, const DstModifier& modifier);
355     void emitGradientY(const SSource& source, const DstModifier& modifier);
356     void emitGradientXFine(const SSource& source, const DstModifier& modifier);
357     void emitGradientYFine(const SSource& source, const DstModifier& modifier);
358 
359     void emitHSTessFactors(llvm::Instruction* pInst);
360     void emitHSSGV(llvm::GenIntrinsicInst* inst);
361     void emitf32tof16_rtz(llvm::GenIntrinsicInst* inst);
362     void emitfitof(llvm::GenIntrinsicInst* inst);
363     void emitFPOrtz(llvm::GenIntrinsicInst* inst);
364     void emitFMArtp(llvm::GenIntrinsicInst* inst);
365     void emitFMArtn(llvm::GenIntrinsicInst* inst);
366     void emitftoi(llvm::GenIntrinsicInst* inst);
367     void emitCtlz(const SSource& source);
368 
369     void emitDSInput(llvm::Instruction* pInst);
370     void emitDSSGV(llvm::GenIntrinsicInst* inst);
371 
372     // VME
373     void emitVMESendIME(llvm::GenIntrinsicInst* inst);
374     void emitVMESendFBR(llvm::GenIntrinsicInst* inst);
375     void emitVMESendSIC(llvm::GenIntrinsicInst* inst);
376     void emitVMESendIME2(llvm::GenIntrinsicInst* inst);
377     void emitVMESendFBR2(llvm::GenIntrinsicInst* inst);
378     void emitVMESendSIC2(llvm::GenIntrinsicInst* inst);
379     void emitCreateMessagePhases(llvm::GenIntrinsicInst* inst);
380     void emitSetMessagePhaseX_legacy(llvm::GenIntrinsicInst* inst);
381     void emitSetMessagePhase_legacy(llvm::GenIntrinsicInst* inst);
382     void emitGetMessagePhaseX(llvm::GenIntrinsicInst* inst);
383     void emitSetMessagePhaseX(llvm::GenIntrinsicInst* inst);
384     void emitGetMessagePhase(llvm::GenIntrinsicInst* inst);
385     void emitSetMessagePhase(llvm::GenIntrinsicInst* inst);
386     void emitSimdGetMessagePhase(llvm::GenIntrinsicInst* inst);
387     void emitBroadcastMessagePhase(llvm::GenIntrinsicInst* inst);
388     void emitSimdSetMessagePhase(llvm::GenIntrinsicInst* inst);
389     void emitSimdMediaRegionCopy(llvm::GenIntrinsicInst* inst);
390     void emitExtractMVAndSAD(llvm::GenIntrinsicInst* inst);
391     void emitCmpSADs(llvm::GenIntrinsicInst* inst);
392 
393     // VA
394     void emitVideoAnalyticSLM(llvm::GenIntrinsicInst* inst, const DWORD responseLen);
395     // New VA without using SLM and barriers (result is returned in GRF).
396     void emitVideoAnalyticGRF(llvm::GenIntrinsicInst* inst, const DWORD responseLen);
397 
398     // CrossLane Instructions
399     void emitWaveBallot(llvm::GenIntrinsicInst* inst);
400     void emitWaveInverseBallot(llvm::GenIntrinsicInst* inst);
401     void emitWaveShuffleIndex(llvm::GenIntrinsicInst* inst);
402     void emitWavePrefix(llvm::WavePrefixIntrinsic* I);
403     void emitQuadPrefix(llvm::QuadPrefixIntrinsic* I);
404     void emitWaveAll(llvm::GenIntrinsicInst* inst);
405     void emitWaveClustered(llvm::GenIntrinsicInst* inst);
406 
407     // Those three "vector" version shall be combined with
408     // non-vector version.
409     bool isUniformStoreOCL(llvm::StoreInst* SI);
410     bool isUniformStoreOCL(llvm::Value* ptr, llvm::Value* storeVal);
411     void emitVectorBitCast(llvm::BitCastInst* BCI);
412     void emitVectorLoad(llvm::LoadInst* LI, llvm::Value* offset, llvm::ConstantInt* immOffset);
413     void emitVectorStore(llvm::StoreInst* SI, llvm::Value* offset, llvm::ConstantInt* immOffset);
414     void emitGenISACopy(llvm::GenIntrinsicInst* GenCopyInst);
415     void emitVectorCopy(CVariable* Dst, CVariable* Src, uint32_t nElts,
416         uint32_t DstSubRegOffset = 0, uint32_t SrcSubRegOffset = 0);
417     void emitCopyAll(CVariable* Dst, CVariable* Src, llvm::Type* Ty);
418 
419     void emitPushFrameToStack(unsigned& pushSize);
420     void emitAddPointer(CVariable* Dst, CVariable* Src, CVariable* offset);
421     // emitAddPair - emulate 64bit addtition by 32-bit operations.
422     // Dst and Src0 must be a 64-bit type variable.
423     // Src1 mist be in 32-bit type variable/immediate
424     void emitAddPair(CVariable* Dst, CVariable* Src0, CVariable* Src1);
425 
426     void emitSqrt(llvm::Instruction* inst);
427     void emitCanonicalize(llvm::Instruction* inst, const DstModifier& modifier);
428     void emitRsq(llvm::Instruction* inst);
429     void emitFrc(llvm::GenIntrinsicInst* inst);
430 
431     void emitLLVMbswap(llvm::IntrinsicInst* inst);
432     void emitDP4A(llvm::GenIntrinsicInst* GII,
433         const SSource* source = nullptr,
434         const DstModifier& modifier = DstModifier());
435 
436     void emitLLVMStackSave(llvm::IntrinsicInst* inst);
437     void emitLLVMStackRestore(llvm::IntrinsicInst* inst);
438 
439     void emitUnmaskedRegionBoundary(bool start);
440     void emitDpas(llvm::GenIntrinsicInst *GII,
441                   const SSource* source,
442                   const DstModifier& modifier);
443     void emitfcvt(llvm::GenIntrinsicInst *GII);
444     void emitStaticConstantPatchValue(
445         llvm::StaticConstantPatchIntrinsic* staticConstantPatch32);
446     // Debug Built-Ins
447     void emitStateRegID(uint32_t BitStart, uint32_t BitEnd);
448     void emitThreadPause(llvm::GenIntrinsicInst* inst);
449 
450     void MovPhiSources(llvm::BasicBlock* bb);
451 
452     void InitConstant(llvm::BasicBlock* BB);
453     void emitLifetimeStartAtEndOfBB(llvm::BasicBlock* BB);
454     void emitDebugPlaceholder(llvm::GenIntrinsicInst* I);
455     void emitDummyInst(llvm::GenIntrinsicInst* GII);
456     void emitImplicitArgIntrinsic(llvm::GenIntrinsicInst* I);
457     void emitStoreImplBufferPtr(llvm::GenIntrinsicInst* I);
458     void emitStoreLocalIdBufferPtr(llvm::GenIntrinsicInst* I);
459     void emitLoadImplBufferPtr(llvm::GenIntrinsicInst* I);
460     void emitLoadLocalIdBufferPtr(llvm::GenIntrinsicInst* I);
461 
462 
463     std::pair<llvm::Value*, llvm::Value*> getPairOutput(llvm::Value*) const;
464 
465     //helper function
466     void SplitSIMD(llvm::Instruction* inst, uint numSources, uint headerSize, CVariable* payload, SIMDMode mode, uint half);
467     template<size_t N>
468     void JoinSIMD(CVariable* (&tempdst)[N], uint responseLength, SIMDMode mode);
469     CVariable* BroadcastIfUniform(CVariable* pVar, bool nomask = false);
470     uint DecideInstanceAndSlice(const llvm::BasicBlock& blk, SDAG& sdag, bool& slicing);
471     bool IsUndefOrZeroImmediate(const llvm::Value* value);
isUndefOrConstInt0(const llvm::Value * val)472     inline bool isUndefOrConstInt0(const llvm::Value* val)
473     {
474         if (val == nullptr ||
475             llvm::isa<llvm::UndefValue>(val) ||
476             (llvm::isa<llvm::ConstantInt>(val) &&
477                 llvm::cast<llvm::ConstantInt>(val)->getZExtValue() == 0))
478         {
479             return true;
480         }
481         return false;
482     }
getOperandIfExist(llvm::Instruction * pInst,unsigned op)483     inline llvm::Value* getOperandIfExist(llvm::Instruction* pInst, unsigned op)
484     {
485         if (llvm::CallInst * pCall = llvm::dyn_cast<llvm::CallInst>(pInst))
486         {
487             if (op < pCall->getNumArgOperands())
488             {
489                 return pInst->getOperand(op);
490             }
491         }
492         return nullptr;
493     }
494 
IsGRFAligned(CVariable * pVar,e_alignment requiredAlign) const495     bool IsGRFAligned(CVariable* pVar, e_alignment requiredAlign) const
496     {
497         e_alignment align = pVar->GetAlign();
498         if (requiredAlign == EALIGN_BYTE)
499         {
500             // trivial
501             return true;
502         }
503         if (requiredAlign == EALIGN_AUTO || align == EALIGN_AUTO)
504         {
505             // Can only assume that AUTO only matches AUTO (?)
506             // (keep the previous behavior unchanged.)
507             return align == requiredAlign;
508         }
509         return align >= requiredAlign;
510     }
511 
512     CVariable* ExtendVariable(CVariable* pVar, e_alignment uniformAlign);
513     CVariable* BroadcastAndExtend(CVariable* pVar);
514     CVariable* TruncatePointer(CVariable* pVar);
515     CVariable* ReAlignUniformVariable(CVariable* pVar, e_alignment align);
516     CVariable* BroadcastAndTruncPointer(CVariable* pVar);
517     CVariable* IndexableResourceIndex(CVariable* indexVar, uint btiIndex);
518     ResourceDescriptor GetResourceVariable(llvm::Value* resourcePtr);
519     SamplerDescriptor GetSamplerVariable(llvm::Value* samplerPtr);
520     CVariable* ComputeSampleIntOffset(llvm::Instruction* sample, uint sourceIndex);
521     void emitPlnInterpolation(CVariable* bary, CVariable* inputvar);
522 
523     CVariable* GetExecutionMask();
524     CVariable* GetExecutionMask(CVariable* &vecMaskVar);
525     CVariable* GetHalfExecutionMask();
526     CVariable* GetDispatchMask();
527     CVariable* UniformCopy(CVariable* var);
528     CVariable* UniformCopy(CVariable* var, CVariable*& LaneOffset, CVariable* eMask = nullptr, bool doSub = false);
529 
530     // generate loop header to process sample instruction with varying resource/sampler
531     bool ResourceLoopHeader(
532         ResourceDescriptor& resource,
533         SamplerDescriptor& sampler,
534         CVariable*& flag,
535         uint& label);
536     bool ResourceLoopHeader(
537         ResourceDescriptor& resource,
538         CVariable*& flag,
539         uint& label);
540     void ResourceLoopBackEdge(bool needLoop, CVariable* flag, uint label);
541     template<typename Func>
ResourceLoop(ResourceDescriptor & resource,Func Fn)542     void ResourceLoop(ResourceDescriptor& resource, Func Fn)
543     {
544         uint label = 0;
545         CVariable* flag = nullptr;
546         bool needLoop = ResourceLoopHeader(resource, flag, label);
547 
548         Fn(flag);
549 
550         ResourceLoopBackEdge(needLoop, flag, label);
551     }
552     template<typename Func>
ResourceLoop(ResourceDescriptor & resource,SamplerDescriptor & sampler,Func Fn)553     void ResourceLoop(ResourceDescriptor& resource, SamplerDescriptor& sampler, Func Fn)
554     {
555         uint label = 0;
556         CVariable* flag = nullptr;
557         bool needLoop = ResourceLoopHeader(resource, sampler, flag, label);
558 
559         Fn(flag);
560 
561         ResourceLoopBackEdge(needLoop, flag, label);
562     }
563 
564     void ForceDMask(bool createJmpForDiscard = true);
565     void ResetVMask(bool createJmpForDiscard = true);
566     void setPredicateForDiscard(CVariable* pPredicate = nullptr);
567 
568     void PackSIMD8HFRet(CVariable* dst);
569     unsigned int GetPrimitiveTypeSizeInRegisterInBits(const llvm::Type* Ty) const;
570     unsigned int GetPrimitiveTypeSizeInRegister(const llvm::Type* Ty) const;
571     unsigned int GetScalarTypeSizeInRegisterInBits(const llvm::Type* Ty) const;
572     unsigned int GetScalarTypeSizeInRegister(const llvm::Type* Ty) const;
573 
574     /// return true if succeeds, false otherwise.
575     bool setCurrentShader(llvm::Function* F);
576 
577     /// check if the dummy kernel requires compilation
578     bool compileSymbolTableKernel(llvm::Function* F);
579 
580     // Arithmetic operations with constant folding
581     // Src0 and Src1 are the input operands
582     // DstPrototype is a prototype of the result of operation and may be used for cloning to a new variable
583     // Return a variable with the result of the compute which may be one the the sources, an immediate or a variable
584     CVariable* Mul(CVariable* Src0, CVariable* Src1, const CVariable* DstPrototype);
585     CVariable* Add(CVariable* Src0, CVariable* Src1, const CVariable* DstPrototype);
586 
587     // temporary helper function
588     CVariable* GetSymbol(llvm::Value* v) const;
589 
590     // Check if stateless indirect access is available
591     // If yes, increase the counter, otherwise do nothing
592     void CountStatelessIndirectAccess(llvm::Value* pointer, ResourceDescriptor resource);
593 
594     // An indirect access happens when GPU loads from an address that was not directly given as one of the kernel arguments.
595     // It's usually a pointer loaded from memory pointed by a kernel argument.
596     // Otherwise the access is direct.
597     bool IsIndirectAccess(llvm::Value* value);
598 
599     CVariable* GetSrcVariable(const SSource& source, bool fromConstPool = false);
600     void SetSourceModifiers(unsigned int sourceIndex, const SSource& source);
601 
getCurrentBlock() const602     SBasicBlock& getCurrentBlock() const { return m_pattern->m_blocks[m_currentBlock]; }
603 
604     CodeGenContext* m_pCtx = nullptr;
605     CVariable* m_destination = nullptr;
606     GenXFunctionGroupAnalysis* m_FGA = nullptr;
607     CodeGenPatternMatch* m_pattern = nullptr;
608     DeSSA* m_deSSA = nullptr;
609     BlockCoalescing* m_blockCoalescing = nullptr;
610     const SIMDMode m_SimdMode;
611     const ShaderDispatchMode m_ShaderDispatchMode;
612     CShaderProgram::KernelShaderMap& m_shaders;
613     CShader* m_currShader;
614     CEncoder* m_encoder;
615     const llvm::DataLayout* m_DL = nullptr;
616     CoalescingEngine* m_CE = nullptr;
617     VariableReuseAnalysis* m_VRA = nullptr;
618     ModuleMetaData* m_moduleMD = nullptr;
619     bool m_canAbortOnSpill;
620     PSSignature* const m_pSignature;
621 
622     // Debug info emitter
623     IDebugEmitter* m_pDebugEmitter = nullptr;
624 
625     llvm::DominatorTree* m_pDT = nullptr;
626     static char ID;
627     inline void ContextSwitchPayloadSection(bool first = true);
628     inline void ContextSwitchShaderBody(bool last = true);
629 
630 private:
631     uint m_labelForDMaskJmp;
632 
633     llvm::DenseMap<llvm::Instruction*, bool> instrMap;
634 
635     // Current rounding Mode
636     //   As RM of FPCvtInt and FP could be different, there
637     //   are two fields to keep track of their current values.
638     //
639     // Default rounding modes:
640     //   the rounding modes that are pre-defined by each API or
641     //   shaders/kernels.
642     //
643     //   Not all combinations of FP's RM and FPCvtInt's RM can be
644     //   used as default. Currently, the default RMs have the
645     //   following restrictions:
646     //      1. If FPCvtInt's RM = ROUND_TO_ZERO, FP's RM can be any;
647     //      2. Otherwise, FPCvtInt's RM must be the same as FP's RM
648     //
649     //   The default remains unchanged throughout the entire
650     //   shaders/kernels. Dynamically setting a different default
651     //   rounding mode in the middle of a shader/kernel is not
652     //   supported for now. And the default remains unchanged
653     //   throughout the entire shaders/kernels.
654     //
655     //   However, each instruction's RM can be set dynamically,
656     //   such as via intrinsics. If an instruction needs setting RMs,
657     //   its RMs must follow the above restrictions. So far, an
658     //   instruction either relies on FP's RM or FPCvtInt's RM, but
659     //   not both, thus setting an instruction's RM dynamically
660     //   cannot violate the above restrictions.
661     //
662     ERoundingMode m_roundingMode_FP;
663     ERoundingMode m_roundingMode_FPCvtInt;
664 
665     uint m_currentBlock = (uint) -1;
666 
667     bool m_currFuncHasSubroutine = false;
668 
669     // Used to relocate phi-mov to different BB. phiMovToBB is the map from "fromBB"
670     // to "toBB" (meaning to move phi-mov from "fromBB" to "toBB"). See MovPhiSources.
671     llvm::DenseMap<llvm::BasicBlock*, llvm::BasicBlock*>  phiMovToBB;
672     bool canRelocatePhiMov(
673         llvm::BasicBlock* otherBB, llvm::BasicBlock* phiMovBB, llvm::BasicBlock* phiBB);
674     bool isCandidateIfStmt(
675         llvm::BasicBlock* ifBB, llvm::BasicBlock*& otherBB, llvm::BasicBlock*& emptyBB);
676 
677     // Used to check for the constraint types with the actual llvmIR params for inlineASM instructions
678     bool validateInlineAsmConstraints(llvm::CallInst* inst, llvm::SmallVector<llvm::StringRef, 8> & constraints);
679 
680     void emitGetMessagePhaseType(llvm::GenIntrinsicInst* inst, VISA_Type type, uint32_t width);
681     void emitSetMessagePhaseType(llvm::GenIntrinsicInst* inst, VISA_Type type);
682     void emitSetMessagePhaseType_legacy(llvm::GenIntrinsicInst* inst, VISA_Type type);
683 
684     void emitScan(llvm::Value* Src, IGC::WaveOps Op,
685         bool isInclusiveScan, llvm::Value* Mask, bool isQuad);
686 
687     // Cached per lane offset variables. This is a per basic block data
688     // structure. For each entry, the first item is the scalar type size in
689     // bytes, the second item is the corresponding symbol.
690     llvm::SmallVector<std::pair<unsigned, CVariable*>, 4> PerLaneOffsetVars;
691 
692     // Helper function to reduce common code for emitting indirect address
693     // computation.
getOrCreatePerLaneOffsetVariable(unsigned TypeSizeInBytes)694     CVariable* getOrCreatePerLaneOffsetVariable(unsigned TypeSizeInBytes)
695     {
696         for (auto Item : PerLaneOffsetVars)
697         {
698             if (Item.first == TypeSizeInBytes)
699             {
700                 IGC_ASSERT_MESSAGE(Item.second, "null variable");
701                 return Item.second;
702             }
703         }
704         CVariable* Var = m_currShader->GetPerLaneOffsetsReg(TypeSizeInBytes);
705         PerLaneOffsetVars.push_back(std::make_pair(TypeSizeInBytes, Var));
706         return Var;
707     }
708 
709     // Emit code in slice starting from (reverse) iterator I. Return the
710     // iterator to the next pattern to emit.
711     SBasicBlock::reverse_iterator emitInSlice(SBasicBlock& block,
712         SBasicBlock::reverse_iterator I);
713 
714     /**
715         * Reuse SampleDescriptor for sampleID, so that we can pass it to
716         * ResourceLoop to generate loop for non-uniform values.
717         */
getSampleIDVariable(llvm::Value * sampleIdVar)718     inline SamplerDescriptor getSampleIDVariable(llvm::Value* sampleIdVar)
719     {
720         SamplerDescriptor sampler;
721         sampler.m_sampler = GetSymbol(sampleIdVar);
722         return sampler;
723     }
724 
725     CVariable* UnpackOrBroadcastIfUniform(CVariable* pVar);
726 
getGRFSize() const727     int getGRFSize() const { return m_currShader->getGRFSize(); }
728 
729     void initDefaultRoundingMode();
730     ERoundingMode GetRoundingMode_FPCvtInt(llvm::Instruction* pInst);
731     ERoundingMode GetRoundingMode_FP(llvm::Instruction* inst);
732     void SetRoundingMode_FP(ERoundingMode RM_FP);
733     void SetRoundingMode_FPCvtInt(ERoundingMode RM_FPCvtInt);
734     bool setRMExplicitly(llvm::Instruction* inst);
735     void ResetRoundingMode(llvm::Instruction* inst);
736     // returns true if the instruction does not care about the rounding mode settings
737     bool ignoreRoundingMode(llvm::Instruction* inst) const;
738 
739     // A64 load/store with HWA that make sure the offset hi part is the same per LS call
740     // addrUnifrom: if the load/store address is uniform, we can skip A64 WA
741     void emitGatherA64(llvm::Value* loadInst, CVariable* dst, CVariable* offset, unsigned elemSize, unsigned numElems, bool addrUniform);
742     void emitGather4A64(llvm::Value* loadInst, CVariable* dst, CVariable* offset, bool addrUniform);
743     void emitScatterA64(CVariable* val, CVariable* offset, unsigned elementSize, unsigned numElems, bool addrUniform);
744     void emitScatter4A64(CVariable* src, CVariable* offset, bool addrUniform);
745 
746     // Helper functions that create loop for above WA
747     void A64LSLoopHead(CVariable* addr, CVariable*& curMask, CVariable*& lsPred, uint& label);
748     void A64LSLoopTail(CVariable* curMask, CVariable* lsPred, uint label);
749 
750     // Helper function to check if A64 WA is required
751     bool hasA64WAEnable() const;
752 
753     bool isHalfGRFReturn(CVariable* dst, SIMDMode simdMode);
754 
755     void emitFeedbackEnable();
756 
757     void emitAddrSpaceToGenericCast(llvm::AddrSpaceCastInst* addrSpaceCast, CVariable* srcV, unsigned tag);
758 
759     // used for loading/storing uniform value using scatter/gather messages.
760     CVariable* prepareAddressForUniform(
761         CVariable* AddrVar, uint32_t EltBytes, uint32_t NElts, uint32_t ExecSz, e_alignment Align);
762     CVariable* prepareDataForUniform(CVariable* DataVar, uint32_t ExecSz, e_alignment Align);
763     bool m_isDuplicate;
764     CVariable* m_tmpDest = nullptr;
765 
766 };
767 
768 } // namespace IGC
769