1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2020-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 #ifndef GENX_UTIL_H
10 #define GENX_UTIL_H
11 
12 #include "FunctionGroup.h"
13 #include "GenXRegionUtils.h"
14 #include "GenXSubtarget.h"
15 
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/SetVector.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/LoopInfo.h"
20 #include "llvm/IR/Constant.h"
21 #include "llvm/IR/Dominators.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/InlineAsm.h"
24 #include "llvm/IR/Instructions.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/Module.h"
27 
28 #include "llvmWrapper/IR/DerivedTypes.h"
29 
30 #include "Probe/Assertion.h"
31 
32 #include <algorithm>
33 #include <cstdint>
34 #include <iterator>
35 #include <unordered_map>
36 #include <vector>
37 
38 namespace llvm {
39 namespace genx {
40 
41 // Utility function to get the integral log base 2 of an integer, or -1 if
42 // the input is not a power of 2.
exactLog2(unsigned Val)43 inline int exactLog2(unsigned Val)
44 {
45   unsigned CLZ = countLeadingZeros(Val, ZB_Width);
46   if (CLZ != 32 && 1U << (31 - CLZ) == Val)
47     return 31 - CLZ;
48   return -1;
49 }
50 
51 // Utility function to get the log base 2 of an integer, truncated to an
52 // integer, or -1 if the number is 0 or negative.
53 template<typename T>
log2(T Val)54 inline int log2(T Val)
55 {
56   if (Val <= 0)
57     return -1;
58   unsigned CLZ = countLeadingZeros((uint32_t)Val, ZB_Width);
59   return 31 - CLZ;
60 }
61 
62 // Common functionality for media ld/st lowering and CISA builder
roundedVal(T Val,T RoundUp)63 template <typename T> inline T roundedVal(T Val, T RoundUp) {
64   T RoundedVal = static_cast<T>(1) << genx::log2(Val);
65   if (RoundedVal < Val)
66     RoundedVal *= 2;
67   if (RoundedVal < RoundUp)
68     RoundedVal = RoundUp;
69   return RoundedVal;
70 }
71 
72 // createConvert : create a genx_convert intrinsic call
73 CallInst *createConvert(Value *In, const Twine &Name, Instruction *InsertBefore,
74                         Module *M = nullptr);
75 
76 // createConvertAddr : create a genx_convert_addr intrinsic call
77 CallInst *createConvertAddr(Value *In, int Offset, const Twine &Name,
78                             Instruction *InsertBefore, Module *M = nullptr);
79 
80 // createAddAddr : create a genx_add_addr intrinsic call
81 CallInst *createAddAddr(Value *Lhs, Value *Rhs, const Twine &Name,
82                         Instruction *InsertBefore, Module *M = nullptr);
83 
84 CallInst *createUnifiedRet(Type *Ty, const Twine &Name, Module *M);
85 
86 // getPredicateConstantAsInt : get a vXi1 constant's value as a single integer
87 unsigned getPredicateConstantAsInt(const Constant *C);
88 
89 // getConstantSubvector : get a contiguous region from a vector constant
90 Constant *getConstantSubvector(const Constant *V, unsigned StartIdx,
91                                unsigned Size);
92 
93 // concatConstants : concatenate two possibly vector constants, giving a vector
94 // constant
95 Constant *concatConstants(Constant *C1, Constant *C2);
96 
97 // findClosestCommonDominator : find latest common dominator of some
98 // instructions
99 Instruction *findClosestCommonDominator(DominatorTree *DT,
100                                         ArrayRef<Instruction *> Insts);
101 
102 // convertShlShr : convert Shl followed by AShr/LShr by the same amount into
103 // trunc+sext/zext
104 Instruction *convertShlShr(Instruction *Inst);
105 
106 // splitStructPhis : find struct phi nodes and split them
107 //
108 // Return:  whether code modified
109 //
110 // Each struct phi node is split into a separate phi node for each struct
111 // element. This is needed because the GenX backend's liveness and coalescing
112 // code cannot cope with a struct phi.
113 //
114 // This is run in two places: firstly in GenXLowering, so that pass can then
115 // simplify any InsertElement and ExtractElement instructions added by the
116 // struct phi splitting. But then it needs to be run again in GenXLiveness,
117 // because other passes can re-insert a struct phi. The case I saw in
118 // hevc_speed was something commoning up the struct return from two calls in an
119 // if..else..endif.
120 //
121 // BTW There's also GenXAggregatePseudoLowering pass that does the same.
122 bool splitStructPhis(Function *F);
123 bool splitStructPhi(PHINode *Phi);
124 
125 // normalize g_load with bitcasts.
126 //
127 // When a single g_load is being bitcast'ed to different types, clone g_loads.
128 bool normalizeGloads(Instruction *Inst);
129 
130 // fold bitcast instruction to store/load pointer operand if possible.
131 // Return this new instruction or nullptr.
132 Instruction *foldBitCastInst(Instruction *Inst);
133 
134 // Return the underlying global variable. Return nullptr if it does not exist.
135 GlobalVariable *getUnderlyingGlobalVariable(Value *V);
136 const GlobalVariable *getUnderlyingGlobalVariable(const Value *V);
137 GlobalVariable *getUnderlyingGlobalVariable(LoadInst *LI);
138 const GlobalVariable *getUnderlyingGlobalVariable(const LoadInst *LI);
139 
140 class Bale;
141 
142 bool isGlobalStore(Instruction *I);
143 bool isGlobalStore(StoreInst *ST);
144 
145 bool isGlobalLoad(Instruction *I);
146 bool isGlobalLoad(LoadInst* LI);
147 
148 // Check that V is correct as value for global store to StorePtr.
149 // This implies:
150 // 1) V is wrregion W;
151 // 2) Old value of W is result of gload L;
152 // 3) Pointer operand of L is derived from global variable of StorePtr.
153 bool isLegalValueForGlobalStore(Value *V, Value *StorePtr);
154 
155 // Check that global store ST operands meet condition of
156 // isLegalValueForGlobalStore.
157 bool isGlobalStoreLegal(StoreInst *ST);
158 
159 bool isIdentityBale(const Bale &B);
160 
161 // Check if region of value is OK for baling in to raw operand
162 //
163 // Enter:   V = value that is possibly rdregion/wrregion
164 //          IsWrite = true if caller wants to see wrregion, false for rdregion
165 //
166 // The region must be constant indexed, contiguous, and start on a GRF
167 // boundary.
168 bool isValueRegionOKForRaw(Value *V, bool IsWrite, const GenXSubtarget *ST);
169 
170 // Check if region is OK for baling in to raw operand
171 //
172 // The region must be constant indexed, contiguous, and start on a GRF
173 // boundary.
174 bool isRegionOKForRaw(const genx::Region &R, const GenXSubtarget *ST);
175 
176 // Skip optimizations on functions with large blocks.
skipOptWithLargeBlock(const Function & F)177 inline bool skipOptWithLargeBlock(const Function &F) {
178   return std::any_of(F.begin(), F.end(),
179                      [](const BasicBlock &BB) { return BB.size() >= 5000; });
180 }
181 
182 bool skipOptWithLargeBlock(FunctionGroup &FG);
183 
184 // getTwoAddressOperandNum : get operand number of two address operand
185 llvm::Optional<unsigned> getTwoAddressOperandNum(CallInst *II);
186 
187 // isNot : test whether an instruction is a "not" instruction (an xor with
188 //    constant all ones)
189 bool isNot(Instruction *Inst);
190 
191 // isPredNot : test whether an instruction is a "not" instruction (an xor
192 //    with constant all ones) with predicate (i1 or vector of i1) type
193 bool isPredNot(Instruction *Inst);
194 
195 // isIntNot : test whether an instruction is a "not" instruction (an xor
196 //    with constant all ones) with non-predicate type
197 bool isIntNot(Instruction *Inst);
198 
199 // getMaskOperand : get i1 vector type of genx intrinsic, return null
200 //    if there is no operand of such type or for non genx intrinsic.
201 //    If there are multiple operands of i1 vector type then return first
202 //    oparand.
203 Value *getMaskOperand(const Instruction *Inst);
204 
205 // invertCondition : Invert the given predicate value, possibly reusing
206 //    an existing copy.
207 Value *invertCondition(Value *Condition);
208 
209 // if V is a function pointer return function it points to,
210 //    nullptr otherwise
211 Function *getFunctionPointerFunc(Value *V);
212 
213 // return true if V is a const vector of function pointers
214 // considering any casts and extractelems within
215 bool isFuncPointerVec(Value *V);
216 
217 // isNoopCast : test if cast operation doesn't modify bitwise representation
218 // of value (in other words, it can be copy-coalesced).
219 bool isNoopCast(const CastInst *CI);
220 
221 // ShuffleVectorAnalyzer : class to analyze a shufflevector
222 class ShuffleVectorAnalyzer {
223   ShuffleVectorInst *SI;
224 
225 public:
ShuffleVectorAnalyzer(ShuffleVectorInst * SI)226   ShuffleVectorAnalyzer(ShuffleVectorInst *SI) : SI(SI) {}
227   // getAsSlice : return start index of slice, or -1 if shufflevector is not
228   //  slice
229   int getAsSlice();
230 
231   // Replicated slice descriptor.
232   // Replicated slice (e.g. 1 2 3 1 2 3) can be parametrized by
233   // initial offset (1), slice size (3) and replication count (2).
234   struct ReplicatedSlice {
235     unsigned InitialOffset;
236     unsigned SliceSize;
237     unsigned ReplicationCount;
ReplicatedSliceReplicatedSlice238     ReplicatedSlice(unsigned Offset, unsigned Size, unsigned Count)
239         : InitialOffset(Offset), SliceSize(Size), ReplicationCount(Count) {}
240   };
241 
242   // isReplicatedSlice : check whether shufflevector is replicated slice.
243   // Example of replicated slice:
244   // shufflevector <3 x T> x, undef, <6 x i32> <1, 2, 1, 2, 1, 2>.
245   bool isReplicatedSlice() const;
246 
isReplicatedSlice(ShuffleVectorInst * SI)247   static bool isReplicatedSlice(ShuffleVectorInst *SI) {
248     return ShuffleVectorAnalyzer(SI).isReplicatedSlice();
249   }
250 
251   // When we have replicated slice, its parameters are ealisy deduced
252   // from first and last elements of mask. This function decomposes
253   // replicated slice to its parameters.
getReplicatedSliceDescriptor()254   ReplicatedSlice getReplicatedSliceDescriptor() const {
255     IGC_ASSERT_MESSAGE(isReplicatedSlice(), "Expected replicated slice");
256     const unsigned TotalSize =
257         cast<IGCLLVM::FixedVectorType>(SI->getType())->getNumElements();
258     const unsigned SliceStart = SI->getMaskValue(0);
259     const unsigned SliceEnd = SI->getMaskValue(TotalSize - 1);
260     const unsigned SliceSize = SliceEnd - SliceStart + 1;
261     const unsigned ReplicationCount = TotalSize / SliceSize;
262     return ReplicatedSlice(SliceStart, SliceSize, ReplicationCount);
263   }
264 
getReplicatedSliceDescriptor(ShuffleVectorInst * SI)265   static ReplicatedSlice getReplicatedSliceDescriptor(ShuffleVectorInst *SI) {
266     return ShuffleVectorAnalyzer(SI).getReplicatedSliceDescriptor();
267   }
268 
269   // getAsUnslice : see if the shufflevector is an
270   //     unslice where the "old value" is operand 0 and operand 1 is another
271   //     shufflevector and operand 0 of that is the "new value" Returns start
272   //     index, or -1 if it is not an unslice
273   int getAsUnslice();
274   // getAsSplat : if shufflevector is a splat, get the splatted input, with the
275   //  element's vector index if the input is a vector
276   struct SplatInfo {
277     Value *Input;
278     unsigned Index;
SplatInfoSplatInfo279     SplatInfo(Value *Input, unsigned Index) : Input(Input), Index(Index) {}
280   };
281   SplatInfo getAsSplat();
282 
283   // Serialize this shuffulevector instruction.
284   Value *serialize();
285 
286   // Compute the cost in terms of number of insertelement instructions needed.
287   unsigned getSerializeCost(unsigned i);
288 
289   // To describe the region of one of two shufflevector instruction operands.
290   struct OperandRegionInfo {
291     Value *Op;
292     Region R;
293   };
294   OperandRegionInfo getMaskRegionPrefix(int StartIdx);
295 };
296 
297 // class for splitting i64 (both vector and scalar) to subregions of i32 vectors
298 // Used in GenxLowering and emulation routines
299 class IVSplitter {
300   Instruction &Inst;
301 
302   Type *ETy = nullptr;
303   Type *VI32Ty = nullptr;
304   size_t Len = 0;
305 
306   enum class RegionType { LoRegion, HiRegion, FirstHalf, SecondHalf };
307 
308   // Description of a RegionType in terms of initial offset and stride.
309   // Both ELOffset and ElStride are in elements.
310   struct RegionTrait {
311     size_t ElOffset = 0;
312     size_t ElStride = 0;
313   };
314 
315   // describeSplit: given a requested RegionType and a number of source elements
316   // returns the detailed descripton of how to form such a split (in terms of
317   // an initial offset and stride).
318   // Example:
319   //    describeSplit(SecondHalf, 10) should return RegionTrait{ 5, 1 }
320   static RegionTrait describeSplit(RegionType RT, size_t ElNum);
321 
322   // splitConstantVector: given a vector of constant values create
323   // a new constant vector containing only values corresponding to the
324   // desired RegionType
325   // Example:
326   //    splitConstantVector({ 1, 2, 3, 4}, HiRegion) -> {2, 4}
327   // Note: since every RegionType needs half of the original elements, the
328   // size of the input vector is expected to be even.
329   static Constant *splitConstantVector(const SmallVectorImpl<Constant *> &KV,
330                                        RegionType RT);
331   // createSplitRegion: given a type of the source vector (expected to be
332   // vector of i32 with even number of elements) and the desired RegionType
333   // returns genx::Region that can be used to construct an equivalent
334   // rdregion intrinsic
335   static genx::Region createSplitRegion(Type *SrcTy, RegionType RT);
336 
337   std::pair<Value *, Value *> splitValue(Value &Val, RegionType RT1,
338                                          const Twine &Name1, RegionType RT2,
339                                          const Twine &Name2,
340                                          bool FoldConstants);
341   Value* combineSplit(Value &V1, Value &V2, RegionType RT1, RegionType RT2,
342                       const Twine& Name, bool Scalarize);
343 
344 public:
345 
346   struct LoHiSplit {
347     Value *Lo;
348     Value *Hi;
349   };
350   struct HalfSplit {
351     Value *Left;
352     Value *Right;
353   };
354 
355   // Instruction is used as an insertion point, debug location source and
356   // as a source of operands to split.
357   // If BaseOpIdx indexes a scalar/vector operand of i64 type, then
358   // IsI64Operation shall return true, otherwise the value type of an
359   // instruction is used
360   IVSplitter(Instruction &Inst, const unsigned *BaseOpIdx = nullptr);
361 
362   // Splitted Operand is expected to be a scalar/vector of i64 type
363   LoHiSplit splitOperandLoHi(unsigned SourceIdx, bool FoldConstants = true);
364   HalfSplit splitOperandHalf(unsigned SourceIdx, bool FoldConstants = true);
365 
366   LoHiSplit splitValueLoHi(Value &V, bool FoldConstants = true);
367   HalfSplit splitValueHalf(Value &V, bool FoldConstants = true);
368 
369   // Combined values are expected to be a vector of i32 of the same size
370   Value *combineLoHiSplit(const LoHiSplit &Split, const Twine &Name,
371                           bool Scalarize);
372   Value *combineHalfSplit(const HalfSplit &Split, const Twine &Name,
373                           bool Scalarize);
374 
375   // convinence method for quick sanity checking
IsI64Operation()376   bool IsI64Operation() const { return ETy->isIntegerTy(64); }
377 };
378 
379 // adjustPhiNodesForBlockRemoval : adjust phi nodes when removing a block
380 void adjustPhiNodesForBlockRemoval(BasicBlock *Succ, BasicBlock *BB);
381 
382 /***********************************************************************
383  * sinkAdd : sink add(s) in address calculation
384  *
385  * Enter:   IdxVal = the original index value
386  *
387  * Return:  the new calculation for the index value
388  *
389  * This detects the case when a variable index in a region or element access
390  * is one or more constant add/subs then some mul/shl/truncs. It sinks
391  * the add/subs into a single add after the mul/shl/truncs, so the add
392  * stands a chance of being baled in as a constant offset in the region.
393  *
394  * If add sinking is successfully applied, it may leave now unused
395  * instructions behind, which need tidying by a later dead code removal
396  * pass.
397  */
398 Value *sinkAdd(Value *V);
399 
400 // Check if this is a mask packing operation, i.e. a bitcast from Vxi1 to
401 // integer i8, i16 or i32.
isMaskPacking(const Value * V)402 static inline bool isMaskPacking(const Value *V) {
403   if (auto BC = dyn_cast<BitCastInst>(V)) {
404     auto SrcTy = dyn_cast<IGCLLVM::FixedVectorType>(BC->getSrcTy());
405     if (!SrcTy || !SrcTy->getScalarType()->isIntegerTy(1))
406       return false;
407     unsigned NElts = SrcTy->getNumElements();
408     if (NElts != 8 && NElts != 16 && NElts != 32)
409       return false;
410     return V->getType()->getScalarType()->isIntegerTy(NElts);
411   }
412   return false;
413 }
414 
415 void LayoutBlocks(Function &func, LoopInfo &LI);
416 void LayoutBlocks(Function &func);
417 
418 // Metadata name for inline assemly instruction
419 constexpr const char *MD_genx_inline_asm_info = "genx.inlasm.constraints.info";
420 
421 // Inline assembly avaliable constraints
422 enum class ConstraintType : uint32_t {
423   Constraint_r,
424   Constraint_rw,
425   Constraint_i,
426   Constraint_n,
427   Constraint_F,
428   Constraint_a,
429   Constraint_cr,
430   Constraint_unknown
431 };
432 
433 // Represents info about inline asssembly operand
434 class GenXInlineAsmInfo {
435   genx::ConstraintType CTy = ConstraintType::Constraint_unknown;
436   int MatchingInput = -1;
437   bool IsOutput = false;
438 
439 public:
GenXInlineAsmInfo(genx::ConstraintType Ty,int MatchingInput,bool IsOutput)440   GenXInlineAsmInfo(genx::ConstraintType Ty, int MatchingInput, bool IsOutput)
441       : CTy(Ty), MatchingInput(MatchingInput), IsOutput(IsOutput) {}
hasMatchingInput()442   bool hasMatchingInput() const { return MatchingInput != -1; }
getMatchingInput()443   int getMatchingInput() const { return MatchingInput; }
isOutput()444   bool isOutput() const { return IsOutput; }
getConstraintType()445   genx::ConstraintType getConstraintType() const { return CTy; }
446 };
447 
448 // If input input constraint has matched output operand with same constraint
449 bool isInlineAsmMatchingInputConstraint(const InlineAsm::ConstraintInfo &Info);
450 
451 // Get matched output operand number for input operand
452 unsigned getInlineAsmMatchedOperand(const InlineAsm::ConstraintInfo &Info);
453 
454 // Get joined string representation of constraints
455 std::string getInlineAsmCodes(const InlineAsm::ConstraintInfo &Info);
456 
457 // Get constraint type
458 genx::ConstraintType getInlineAsmConstraintType(StringRef Codes);
459 
460 // Get vector of inline asm info for inline assembly instruction.
461 // Return empty vector if no constraint string in inline asm or
462 // if called before GenXInlineAsmLowering pass.
463 std::vector<GenXInlineAsmInfo> getGenXInlineAsmInfo(CallInst *CI);
464 
465 // Get vector of inline asm info from MDNode
466 std::vector<GenXInlineAsmInfo> getGenXInlineAsmInfo(MDNode *MD);
467 
468 bool hasConstraintOfType(const std::vector<GenXInlineAsmInfo> &ConstraintsInfo,
469                          genx::ConstraintType CTy);
470 
471 // Get number of outputs for inline assembly instruction
472 unsigned getInlineAsmNumOutputs(CallInst *CI);
473 
474 Type *getCorrespondingVectorOrScalar(Type *Ty);
475 
476 /* scalarVectorizeIfNeeded: scalarize of vectorize \p Inst if it is required
477  *
478  * Result of some instructions can be both Ty and <1 x Ty> value e.g. rdregion.
479  * It is sometimes required to replace uses of instructions with types
480  * [\p FirstType, \p LastType) with \p Inst. If types don't
481  * correspond this function places BitCastInst <1 x Ty> to Ty, or Ty to <1 x Ty>
482  * after \p Inst and returns the pointer to the instruction. If no cast is
483  * required, nullptr is returned.
484  */
485 template <
486     typename ConstIter,
487     typename std::enable_if<
488         std::is_base_of<
489             Type, typename std::remove_pointer<typename std::iterator_traits<
490                       ConstIter>::value_type>::type>::value,
491         int>::type = 0>
scalarizeOrVectorizeIfNeeded(Instruction * Inst,ConstIter FirstType,ConstIter LastType)492 CastInst *scalarizeOrVectorizeIfNeeded(Instruction *Inst, ConstIter FirstType,
493                                        ConstIter LastType) {
494   IGC_ASSERT_MESSAGE(Inst, "wrong argument");
495   IGC_ASSERT_MESSAGE(std::all_of(FirstType, LastType,
496                      [Inst](Type *Ty) {
497                        return Ty == Inst->getType() ||
498                               Ty == getCorrespondingVectorOrScalar(
499                                         Inst->getType());
500                      }),
501          "wrong arguments: type of instructions must correspond");
502 
503   if (Inst->getType()->isVectorTy() &&
504       cast<IGCLLVM::FixedVectorType>(Inst->getType())->getNumElements() > 1)
505     return nullptr;
506   bool needBitCast = std::any_of(
507       FirstType, LastType, [Inst](Type *Ty) { return Ty != Inst->getType(); });
508   if (!needBitCast)
509     return nullptr;
510   auto *CorrespondingTy = getCorrespondingVectorOrScalar(Inst->getType());
511   auto *BC = CastInst::Create(Instruction::BitCast, Inst, CorrespondingTy);
512   BC->insertAfter(Inst);
513   return BC;
514 }
515 /* scalarVectorizeIfNeeded: scalarize of vectorize \p Inst if it is required
516  *
517  * Result of some instructions can be both Ty and <1 x Ty> value e.g. rdregion.
518  * It is sometimes required to replace uses of instructions of [\p
519  * FirstInstToReplace, \p LastInstToReplace) with \p Inst. If types don't
520  * correspond this function places BitCastInst <1 x Ty> to Ty, or Ty to <1 x Ty>
521  * after \p Inst and returns the pointer to the instruction. If no cast is
522  * required, nullptr is returned.
523  */
524 template <typename ConstIter,
525           typename std::enable_if<
526               std::is_base_of<
527                   Instruction,
528                   typename std::remove_pointer<typename std::iterator_traits<
529                       ConstIter>::value_type>::type>::value,
530               int>::type = 0>
scalarizeOrVectorizeIfNeeded(Instruction * Inst,ConstIter FirstInstToReplace,ConstIter LastInstToReplace)531 CastInst *scalarizeOrVectorizeIfNeeded(Instruction *Inst,
532                                        ConstIter FirstInstToReplace,
533                                        ConstIter LastInstToReplace) {
534   std::vector<Type *> Types;
535   std::transform(FirstInstToReplace, LastInstToReplace,
536                  std::back_inserter(Types),
537                  [](Instruction *Inst) { return Inst->getType(); });
538   return scalarizeOrVectorizeIfNeeded(Inst, Types.begin(), Types.end());
539 }
540 
541 CastInst *scalarizeOrVectorizeIfNeeded(Instruction *Inst, Type *RefType);
542 
543 CastInst *scalarizeOrVectorizeIfNeeded(Instruction *Inst,
544                                        Instruction *InstToReplace);
545 
546 
547 // Returns log alignment for align type and target grf width, because ALIGN_GRF
548 // must be target-dependent.
549 unsigned getLogAlignment(VISA_Align Align, unsigned GRFWidth);
550 // The opposite of getLogAlignment.
551 VISA_Align getVISA_Align(unsigned LogAlignment, unsigned GRFWidth);
552 // Some log alignments cannot be transparently transformed to VISA_Align. This
553 // chooses suitable log alignment which is convertible to VISA_Align.
554 unsigned ceilLogAlignment(unsigned LogAlignment, unsigned GRFWidth);
555 
556 // Checks whether provided wrpredregion intrinsic can be encoded
557 // as legal SETP instruction.
558 bool isWrPredRegionLegalSetP(const CallInst &WrPredRegion);
559 
560 // Checks if V is a CallInst representing a direct call to F
561 // Many of our analyzes do not check whether a function F's user
562 // which is a CallInst calls exactly F. This may not be true
563 // when a function pointer is passed as an argument of a call to
564 // another function, e.g. genx.faddr intrinsic.
565 // Returns V casted to CallInst if the check is true,
566 // nullptr otherwise.
567 CallInst *checkFunctionCall(Value *V, Function *F);
568 
569 // Get possible number of GRFs for indirect region
570 unsigned getNumGRFsPerIndirectForRegion(const genx::Region &R,
571                                         const GenXSubtarget *ST, bool Allow2D);
572 // to control behavior of emulateI64Operation function
573 enum class EmulationFlag {
574   RAUW,
575   // RAUW and EraseFromParent, always returns a valid instruction
576   // either the original one or the last one from the result emulation sequence
577   RAUWE,
578   None,
579 };
580 // transforms operation on i64 type to an equivalent sequence that do not
581 // operate on i64 (but rather on i32)
582 // The implementation is contained in GenXEmulate pass sources
583 // Note: ideally, i64 emulation should be handled by GenXEmulate pass,
584 // however, some of our late passes like GetXPostLegalization or GenXCategory
585 // may introduce additional instructions which violate Subtarget restrictions -
586 // this function is intended to cope with such cases
587 Instruction *emulateI64Operation(const GenXSubtarget *ST, Instruction *In,
588                                  EmulationFlag AuxAction);
589 // BinaryDataAccumulator: it's a helper class to accumulate binary data
590 // in one buffer.
591 // Information about each stored section can be accessed via the key with
592 // which it was stored. The key must be unique.
593 // Accumulated/consolidated binary data can be accesed.
594 template <typename KeyT, typename DataT = uint8_t> class BinaryDataAccumulator {
595 public:
596   struct SectionInfoT {
597     int Offset = 0;
598     ArrayRef<DataT> Data;
599 
600     SectionInfoT() = default;
SectionInfoTSectionInfoT601     SectionInfoT(const DataT *BasePtr, int First, int Last)
602         : Offset{First}, Data{BasePtr + First, BasePtr + Last} {}
603 
getSizeSectionInfoT604     int getSize() const { return Data.size(); }
605   };
606 
607   struct SectionT {
608     KeyT Key;
609     SectionInfoT Info;
610   };
611 
612 private:
613   std::vector<DataT> Data;
614   using SectionSeq = std::vector<SectionT>;
615   SectionSeq Sections;
616 
617 public:
618   using value_type = typename SectionSeq::value_type;
619   using reference = typename SectionSeq::reference;
620   using const_reference = typename SectionSeq::const_reference;
621   using iterator = typename SectionSeq::iterator;
622   using const_iterator = typename SectionSeq::const_iterator;
623 
begin()624   iterator begin() { return Sections.begin(); }
begin()625   const_iterator begin() const { return Sections.begin(); }
cbegin()626   const_iterator cbegin() const { return Sections.cbegin(); }
end()627   iterator end() { return Sections.end(); }
end()628   const_iterator end() const { return Sections.end(); }
cend()629   const_iterator cend() const { return Sections.cend(); }
front()630   reference front() { return *begin(); }
front()631   const_reference front() const { return *begin(); }
back()632   reference back() { return *std::prev(end()); }
back()633   const_reference back() const { return *std::prev(end()); }
634 
635   // Append the data that is referenced by a \p Key and represented
636   // in range [\p First, \p Last), to the buffer.
637   // The range must consist of DataT elements.
638   template <typename InputIter>
append(KeyT Key,InputIter First,InputIter Last)639   void append(KeyT Key, InputIter First, InputIter Last) {
640     IGC_ASSERT_MESSAGE(
641         std::none_of(Sections.begin(), Sections.end(),
642                      [&Key](const SectionT &S) { return S.Key == Key; }),
643         "There's already a section with such key");
644     SectionT Section;
645     Section.Key = std::move(Key);
646     int Offset = Data.size();
647     std::copy(First, Last, std::back_inserter(Data));
648     Section.Info =
649         SectionInfoT{Data.data(), Offset, static_cast<int>(Data.size())};
650     Sections.push_back(std::move(Section));
651   }
652 
append(KeyT Key,ArrayRef<DataT> SectionBin)653   void append(KeyT Key, ArrayRef<DataT> SectionBin) {
654     append(std::move(Key), SectionBin.begin(), SectionBin.end());
655   }
656 
657   // Get information about the section referenced by \p Key.
getSectionInfo(const KeyT & Key)658   SectionInfoT getSectionInfo(const KeyT &Key) const {
659     auto SectionIt =
660         std::find_if(Sections.begin(), Sections.end(),
661                      [&Key](const SectionT &S) { return S.Key == Key; });
662     IGC_ASSERT_MESSAGE(SectionIt != Sections.end(),
663                        "There must be a section with such key");
664     return SectionIt->Info;
665   }
666 
667   // Get offset of the section referenced by \p Key.
getSectionOffset(const KeyT & Key)668   int getSectionOffset(const KeyT &Key) const {
669     return getSectionInfo(Key).Offset;
670   }
671   // Get size of the section referenced by \p Key.
getSectionSize(const KeyT & Key)672   int getSectionSize(const KeyT &Key) const { return getSectionInfo(Key).Size; }
673   // Get size of the whole collected data.
getFullSize()674   int getFullSize() const { return Data.size(); }
getNumSections()675   int getNumSections() const { return Sections.size(); }
676   // Data buffer empty.
empty()677   bool empty() const { return Data.empty(); }
678   // Emit the whole consolidated data.
emitConsolidatedData()679   std::vector<DataT> emitConsolidatedData() const & { return Data; }
emitConsolidatedData()680   std::vector<DataT> emitConsolidatedData() && { return std::move(Data); }
681 };
682 
683 // Not every global variable is a real global variable and should be eventually
684 // encoded as a global variable.
685 // GenX volatile and printf strings are exclusion for now.
686 // Printf strings should be already legalized to make it possible to use this
687 // function. Which should already be done in middle-end so no problem for
688 // calling it in codegen.
689 bool isRealGlobalVariable(const GlobalVariable &GV);
690 
691 // Get size of an struct field including the size of padding for the next field,
692 // or the tailing padding.
693 // For example for the 1st element of { i8, i32 } 4 bytes will be returned
694 // (likely in the most of layouts).
695 //
696 // Arguments:
697 //    \p ElemIdx - index of a struct field
698 //    \p NumOperands - the number of fields in struct
699 //                     (StructLayout doesn't expose it)
700 //    \p StructLayout - struct layout
701 //
702 // Returns the size in bytes.
703 std::size_t getStructElementPaddedSize(unsigned ElemIdx, unsigned NumOperands,
704                                        const StructLayout &Layout);
705 
706 // Determine if there is a store to global variable Addr in between of L1 and
707 // L2. L1 and L2 can be either vloads or regular stores.
708 bool hasMemoryDeps(Instruction *L1, Instruction *L2, Value *Addr,
709                    DominatorTree *DT);
710 
711 // Return true if V is rdregion from a load result.
712 bool isRdRFromGlobalLoad(Value *V);
713 
714 // Return true if wrregion has result of load as old value.
715 bool isWrRToGlobalLoad(Value *V);
716 
717 } // namespace genx
718 } // namespace llvm
719 
720 #endif // GENX_UTIL_H
721