1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2020-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 #ifndef GENX_UTIL_H
10 #define GENX_UTIL_H
11
12 #include "FunctionGroup.h"
13 #include "GenXRegionUtils.h"
14 #include "GenXSubtarget.h"
15
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/SetVector.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/LoopInfo.h"
20 #include "llvm/IR/Constant.h"
21 #include "llvm/IR/Dominators.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/InlineAsm.h"
24 #include "llvm/IR/Instructions.h"
25 #include "llvm/IR/IntrinsicInst.h"
26 #include "llvm/IR/Module.h"
27
28 #include "llvmWrapper/IR/DerivedTypes.h"
29
30 #include "Probe/Assertion.h"
31
32 #include <algorithm>
33 #include <cstdint>
34 #include <iterator>
35 #include <unordered_map>
36 #include <vector>
37
38 namespace llvm {
39 namespace genx {
40
41 // Utility function to get the integral log base 2 of an integer, or -1 if
42 // the input is not a power of 2.
exactLog2(unsigned Val)43 inline int exactLog2(unsigned Val)
44 {
45 unsigned CLZ = countLeadingZeros(Val, ZB_Width);
46 if (CLZ != 32 && 1U << (31 - CLZ) == Val)
47 return 31 - CLZ;
48 return -1;
49 }
50
51 // Utility function to get the log base 2 of an integer, truncated to an
52 // integer, or -1 if the number is 0 or negative.
53 template<typename T>
log2(T Val)54 inline int log2(T Val)
55 {
56 if (Val <= 0)
57 return -1;
58 unsigned CLZ = countLeadingZeros((uint32_t)Val, ZB_Width);
59 return 31 - CLZ;
60 }
61
62 // Common functionality for media ld/st lowering and CISA builder
roundedVal(T Val,T RoundUp)63 template <typename T> inline T roundedVal(T Val, T RoundUp) {
64 T RoundedVal = static_cast<T>(1) << genx::log2(Val);
65 if (RoundedVal < Val)
66 RoundedVal *= 2;
67 if (RoundedVal < RoundUp)
68 RoundedVal = RoundUp;
69 return RoundedVal;
70 }
71
72 // createConvert : create a genx_convert intrinsic call
73 CallInst *createConvert(Value *In, const Twine &Name, Instruction *InsertBefore,
74 Module *M = nullptr);
75
76 // createConvertAddr : create a genx_convert_addr intrinsic call
77 CallInst *createConvertAddr(Value *In, int Offset, const Twine &Name,
78 Instruction *InsertBefore, Module *M = nullptr);
79
80 // createAddAddr : create a genx_add_addr intrinsic call
81 CallInst *createAddAddr(Value *Lhs, Value *Rhs, const Twine &Name,
82 Instruction *InsertBefore, Module *M = nullptr);
83
84 CallInst *createUnifiedRet(Type *Ty, const Twine &Name, Module *M);
85
86 // getPredicateConstantAsInt : get a vXi1 constant's value as a single integer
87 unsigned getPredicateConstantAsInt(const Constant *C);
88
89 // getConstantSubvector : get a contiguous region from a vector constant
90 Constant *getConstantSubvector(const Constant *V, unsigned StartIdx,
91 unsigned Size);
92
93 // concatConstants : concatenate two possibly vector constants, giving a vector
94 // constant
95 Constant *concatConstants(Constant *C1, Constant *C2);
96
97 // findClosestCommonDominator : find latest common dominator of some
98 // instructions
99 Instruction *findClosestCommonDominator(DominatorTree *DT,
100 ArrayRef<Instruction *> Insts);
101
102 // convertShlShr : convert Shl followed by AShr/LShr by the same amount into
103 // trunc+sext/zext
104 Instruction *convertShlShr(Instruction *Inst);
105
106 // splitStructPhis : find struct phi nodes and split them
107 //
108 // Return: whether code modified
109 //
110 // Each struct phi node is split into a separate phi node for each struct
111 // element. This is needed because the GenX backend's liveness and coalescing
112 // code cannot cope with a struct phi.
113 //
114 // This is run in two places: firstly in GenXLowering, so that pass can then
115 // simplify any InsertElement and ExtractElement instructions added by the
116 // struct phi splitting. But then it needs to be run again in GenXLiveness,
117 // because other passes can re-insert a struct phi. The case I saw in
118 // hevc_speed was something commoning up the struct return from two calls in an
119 // if..else..endif.
120 //
121 // BTW There's also GenXAggregatePseudoLowering pass that does the same.
122 bool splitStructPhis(Function *F);
123 bool splitStructPhi(PHINode *Phi);
124
125 // normalize g_load with bitcasts.
126 //
127 // When a single g_load is being bitcast'ed to different types, clone g_loads.
128 bool normalizeGloads(Instruction *Inst);
129
130 // fold bitcast instruction to store/load pointer operand if possible.
131 // Return this new instruction or nullptr.
132 Instruction *foldBitCastInst(Instruction *Inst);
133
134 // Return the underlying global variable. Return nullptr if it does not exist.
135 GlobalVariable *getUnderlyingGlobalVariable(Value *V);
136 const GlobalVariable *getUnderlyingGlobalVariable(const Value *V);
137 GlobalVariable *getUnderlyingGlobalVariable(LoadInst *LI);
138 const GlobalVariable *getUnderlyingGlobalVariable(const LoadInst *LI);
139
140 class Bale;
141
142 bool isGlobalStore(Instruction *I);
143 bool isGlobalStore(StoreInst *ST);
144
145 bool isGlobalLoad(Instruction *I);
146 bool isGlobalLoad(LoadInst* LI);
147
148 // Check that V is correct as value for global store to StorePtr.
149 // This implies:
150 // 1) V is wrregion W;
151 // 2) Old value of W is result of gload L;
152 // 3) Pointer operand of L is derived from global variable of StorePtr.
153 bool isLegalValueForGlobalStore(Value *V, Value *StorePtr);
154
155 // Check that global store ST operands meet condition of
156 // isLegalValueForGlobalStore.
157 bool isGlobalStoreLegal(StoreInst *ST);
158
159 bool isIdentityBale(const Bale &B);
160
161 // Check if region of value is OK for baling in to raw operand
162 //
163 // Enter: V = value that is possibly rdregion/wrregion
164 // IsWrite = true if caller wants to see wrregion, false for rdregion
165 //
166 // The region must be constant indexed, contiguous, and start on a GRF
167 // boundary.
168 bool isValueRegionOKForRaw(Value *V, bool IsWrite, const GenXSubtarget *ST);
169
170 // Check if region is OK for baling in to raw operand
171 //
172 // The region must be constant indexed, contiguous, and start on a GRF
173 // boundary.
174 bool isRegionOKForRaw(const genx::Region &R, const GenXSubtarget *ST);
175
176 // Skip optimizations on functions with large blocks.
skipOptWithLargeBlock(const Function & F)177 inline bool skipOptWithLargeBlock(const Function &F) {
178 return std::any_of(F.begin(), F.end(),
179 [](const BasicBlock &BB) { return BB.size() >= 5000; });
180 }
181
182 bool skipOptWithLargeBlock(FunctionGroup &FG);
183
184 // getTwoAddressOperandNum : get operand number of two address operand
185 llvm::Optional<unsigned> getTwoAddressOperandNum(CallInst *II);
186
187 // isNot : test whether an instruction is a "not" instruction (an xor with
188 // constant all ones)
189 bool isNot(Instruction *Inst);
190
191 // isPredNot : test whether an instruction is a "not" instruction (an xor
192 // with constant all ones) with predicate (i1 or vector of i1) type
193 bool isPredNot(Instruction *Inst);
194
195 // isIntNot : test whether an instruction is a "not" instruction (an xor
196 // with constant all ones) with non-predicate type
197 bool isIntNot(Instruction *Inst);
198
199 // getMaskOperand : get i1 vector type of genx intrinsic, return null
200 // if there is no operand of such type or for non genx intrinsic.
201 // If there are multiple operands of i1 vector type then return first
202 // oparand.
203 Value *getMaskOperand(const Instruction *Inst);
204
205 // invertCondition : Invert the given predicate value, possibly reusing
206 // an existing copy.
207 Value *invertCondition(Value *Condition);
208
209 // if V is a function pointer return function it points to,
210 // nullptr otherwise
211 Function *getFunctionPointerFunc(Value *V);
212
213 // return true if V is a const vector of function pointers
214 // considering any casts and extractelems within
215 bool isFuncPointerVec(Value *V);
216
217 // isNoopCast : test if cast operation doesn't modify bitwise representation
218 // of value (in other words, it can be copy-coalesced).
219 bool isNoopCast(const CastInst *CI);
220
221 // ShuffleVectorAnalyzer : class to analyze a shufflevector
222 class ShuffleVectorAnalyzer {
223 ShuffleVectorInst *SI;
224
225 public:
ShuffleVectorAnalyzer(ShuffleVectorInst * SI)226 ShuffleVectorAnalyzer(ShuffleVectorInst *SI) : SI(SI) {}
227 // getAsSlice : return start index of slice, or -1 if shufflevector is not
228 // slice
229 int getAsSlice();
230
231 // Replicated slice descriptor.
232 // Replicated slice (e.g. 1 2 3 1 2 3) can be parametrized by
233 // initial offset (1), slice size (3) and replication count (2).
234 struct ReplicatedSlice {
235 unsigned InitialOffset;
236 unsigned SliceSize;
237 unsigned ReplicationCount;
ReplicatedSliceReplicatedSlice238 ReplicatedSlice(unsigned Offset, unsigned Size, unsigned Count)
239 : InitialOffset(Offset), SliceSize(Size), ReplicationCount(Count) {}
240 };
241
242 // isReplicatedSlice : check whether shufflevector is replicated slice.
243 // Example of replicated slice:
244 // shufflevector <3 x T> x, undef, <6 x i32> <1, 2, 1, 2, 1, 2>.
245 bool isReplicatedSlice() const;
246
isReplicatedSlice(ShuffleVectorInst * SI)247 static bool isReplicatedSlice(ShuffleVectorInst *SI) {
248 return ShuffleVectorAnalyzer(SI).isReplicatedSlice();
249 }
250
251 // When we have replicated slice, its parameters are ealisy deduced
252 // from first and last elements of mask. This function decomposes
253 // replicated slice to its parameters.
getReplicatedSliceDescriptor()254 ReplicatedSlice getReplicatedSliceDescriptor() const {
255 IGC_ASSERT_MESSAGE(isReplicatedSlice(), "Expected replicated slice");
256 const unsigned TotalSize =
257 cast<IGCLLVM::FixedVectorType>(SI->getType())->getNumElements();
258 const unsigned SliceStart = SI->getMaskValue(0);
259 const unsigned SliceEnd = SI->getMaskValue(TotalSize - 1);
260 const unsigned SliceSize = SliceEnd - SliceStart + 1;
261 const unsigned ReplicationCount = TotalSize / SliceSize;
262 return ReplicatedSlice(SliceStart, SliceSize, ReplicationCount);
263 }
264
getReplicatedSliceDescriptor(ShuffleVectorInst * SI)265 static ReplicatedSlice getReplicatedSliceDescriptor(ShuffleVectorInst *SI) {
266 return ShuffleVectorAnalyzer(SI).getReplicatedSliceDescriptor();
267 }
268
269 // getAsUnslice : see if the shufflevector is an
270 // unslice where the "old value" is operand 0 and operand 1 is another
271 // shufflevector and operand 0 of that is the "new value" Returns start
272 // index, or -1 if it is not an unslice
273 int getAsUnslice();
274 // getAsSplat : if shufflevector is a splat, get the splatted input, with the
275 // element's vector index if the input is a vector
276 struct SplatInfo {
277 Value *Input;
278 unsigned Index;
SplatInfoSplatInfo279 SplatInfo(Value *Input, unsigned Index) : Input(Input), Index(Index) {}
280 };
281 SplatInfo getAsSplat();
282
283 // Serialize this shuffulevector instruction.
284 Value *serialize();
285
286 // Compute the cost in terms of number of insertelement instructions needed.
287 unsigned getSerializeCost(unsigned i);
288
289 // To describe the region of one of two shufflevector instruction operands.
290 struct OperandRegionInfo {
291 Value *Op;
292 Region R;
293 };
294 OperandRegionInfo getMaskRegionPrefix(int StartIdx);
295 };
296
297 // class for splitting i64 (both vector and scalar) to subregions of i32 vectors
298 // Used in GenxLowering and emulation routines
299 class IVSplitter {
300 Instruction &Inst;
301
302 Type *ETy = nullptr;
303 Type *VI32Ty = nullptr;
304 size_t Len = 0;
305
306 enum class RegionType { LoRegion, HiRegion, FirstHalf, SecondHalf };
307
308 // Description of a RegionType in terms of initial offset and stride.
309 // Both ELOffset and ElStride are in elements.
310 struct RegionTrait {
311 size_t ElOffset = 0;
312 size_t ElStride = 0;
313 };
314
315 // describeSplit: given a requested RegionType and a number of source elements
316 // returns the detailed descripton of how to form such a split (in terms of
317 // an initial offset and stride).
318 // Example:
319 // describeSplit(SecondHalf, 10) should return RegionTrait{ 5, 1 }
320 static RegionTrait describeSplit(RegionType RT, size_t ElNum);
321
322 // splitConstantVector: given a vector of constant values create
323 // a new constant vector containing only values corresponding to the
324 // desired RegionType
325 // Example:
326 // splitConstantVector({ 1, 2, 3, 4}, HiRegion) -> {2, 4}
327 // Note: since every RegionType needs half of the original elements, the
328 // size of the input vector is expected to be even.
329 static Constant *splitConstantVector(const SmallVectorImpl<Constant *> &KV,
330 RegionType RT);
331 // createSplitRegion: given a type of the source vector (expected to be
332 // vector of i32 with even number of elements) and the desired RegionType
333 // returns genx::Region that can be used to construct an equivalent
334 // rdregion intrinsic
335 static genx::Region createSplitRegion(Type *SrcTy, RegionType RT);
336
337 std::pair<Value *, Value *> splitValue(Value &Val, RegionType RT1,
338 const Twine &Name1, RegionType RT2,
339 const Twine &Name2,
340 bool FoldConstants);
341 Value* combineSplit(Value &V1, Value &V2, RegionType RT1, RegionType RT2,
342 const Twine& Name, bool Scalarize);
343
344 public:
345
346 struct LoHiSplit {
347 Value *Lo;
348 Value *Hi;
349 };
350 struct HalfSplit {
351 Value *Left;
352 Value *Right;
353 };
354
355 // Instruction is used as an insertion point, debug location source and
356 // as a source of operands to split.
357 // If BaseOpIdx indexes a scalar/vector operand of i64 type, then
358 // IsI64Operation shall return true, otherwise the value type of an
359 // instruction is used
360 IVSplitter(Instruction &Inst, const unsigned *BaseOpIdx = nullptr);
361
362 // Splitted Operand is expected to be a scalar/vector of i64 type
363 LoHiSplit splitOperandLoHi(unsigned SourceIdx, bool FoldConstants = true);
364 HalfSplit splitOperandHalf(unsigned SourceIdx, bool FoldConstants = true);
365
366 LoHiSplit splitValueLoHi(Value &V, bool FoldConstants = true);
367 HalfSplit splitValueHalf(Value &V, bool FoldConstants = true);
368
369 // Combined values are expected to be a vector of i32 of the same size
370 Value *combineLoHiSplit(const LoHiSplit &Split, const Twine &Name,
371 bool Scalarize);
372 Value *combineHalfSplit(const HalfSplit &Split, const Twine &Name,
373 bool Scalarize);
374
375 // convinence method for quick sanity checking
IsI64Operation()376 bool IsI64Operation() const { return ETy->isIntegerTy(64); }
377 };
378
379 // adjustPhiNodesForBlockRemoval : adjust phi nodes when removing a block
380 void adjustPhiNodesForBlockRemoval(BasicBlock *Succ, BasicBlock *BB);
381
382 /***********************************************************************
383 * sinkAdd : sink add(s) in address calculation
384 *
385 * Enter: IdxVal = the original index value
386 *
387 * Return: the new calculation for the index value
388 *
389 * This detects the case when a variable index in a region or element access
390 * is one or more constant add/subs then some mul/shl/truncs. It sinks
391 * the add/subs into a single add after the mul/shl/truncs, so the add
392 * stands a chance of being baled in as a constant offset in the region.
393 *
394 * If add sinking is successfully applied, it may leave now unused
395 * instructions behind, which need tidying by a later dead code removal
396 * pass.
397 */
398 Value *sinkAdd(Value *V);
399
400 // Check if this is a mask packing operation, i.e. a bitcast from Vxi1 to
401 // integer i8, i16 or i32.
isMaskPacking(const Value * V)402 static inline bool isMaskPacking(const Value *V) {
403 if (auto BC = dyn_cast<BitCastInst>(V)) {
404 auto SrcTy = dyn_cast<IGCLLVM::FixedVectorType>(BC->getSrcTy());
405 if (!SrcTy || !SrcTy->getScalarType()->isIntegerTy(1))
406 return false;
407 unsigned NElts = SrcTy->getNumElements();
408 if (NElts != 8 && NElts != 16 && NElts != 32)
409 return false;
410 return V->getType()->getScalarType()->isIntegerTy(NElts);
411 }
412 return false;
413 }
414
415 void LayoutBlocks(Function &func, LoopInfo &LI);
416 void LayoutBlocks(Function &func);
417
418 // Metadata name for inline assemly instruction
419 constexpr const char *MD_genx_inline_asm_info = "genx.inlasm.constraints.info";
420
421 // Inline assembly avaliable constraints
422 enum class ConstraintType : uint32_t {
423 Constraint_r,
424 Constraint_rw,
425 Constraint_i,
426 Constraint_n,
427 Constraint_F,
428 Constraint_a,
429 Constraint_cr,
430 Constraint_unknown
431 };
432
433 // Represents info about inline asssembly operand
434 class GenXInlineAsmInfo {
435 genx::ConstraintType CTy = ConstraintType::Constraint_unknown;
436 int MatchingInput = -1;
437 bool IsOutput = false;
438
439 public:
GenXInlineAsmInfo(genx::ConstraintType Ty,int MatchingInput,bool IsOutput)440 GenXInlineAsmInfo(genx::ConstraintType Ty, int MatchingInput, bool IsOutput)
441 : CTy(Ty), MatchingInput(MatchingInput), IsOutput(IsOutput) {}
hasMatchingInput()442 bool hasMatchingInput() const { return MatchingInput != -1; }
getMatchingInput()443 int getMatchingInput() const { return MatchingInput; }
isOutput()444 bool isOutput() const { return IsOutput; }
getConstraintType()445 genx::ConstraintType getConstraintType() const { return CTy; }
446 };
447
448 // If input input constraint has matched output operand with same constraint
449 bool isInlineAsmMatchingInputConstraint(const InlineAsm::ConstraintInfo &Info);
450
451 // Get matched output operand number for input operand
452 unsigned getInlineAsmMatchedOperand(const InlineAsm::ConstraintInfo &Info);
453
454 // Get joined string representation of constraints
455 std::string getInlineAsmCodes(const InlineAsm::ConstraintInfo &Info);
456
457 // Get constraint type
458 genx::ConstraintType getInlineAsmConstraintType(StringRef Codes);
459
460 // Get vector of inline asm info for inline assembly instruction.
461 // Return empty vector if no constraint string in inline asm or
462 // if called before GenXInlineAsmLowering pass.
463 std::vector<GenXInlineAsmInfo> getGenXInlineAsmInfo(CallInst *CI);
464
465 // Get vector of inline asm info from MDNode
466 std::vector<GenXInlineAsmInfo> getGenXInlineAsmInfo(MDNode *MD);
467
468 bool hasConstraintOfType(const std::vector<GenXInlineAsmInfo> &ConstraintsInfo,
469 genx::ConstraintType CTy);
470
471 // Get number of outputs for inline assembly instruction
472 unsigned getInlineAsmNumOutputs(CallInst *CI);
473
474 Type *getCorrespondingVectorOrScalar(Type *Ty);
475
476 /* scalarVectorizeIfNeeded: scalarize of vectorize \p Inst if it is required
477 *
478 * Result of some instructions can be both Ty and <1 x Ty> value e.g. rdregion.
479 * It is sometimes required to replace uses of instructions with types
480 * [\p FirstType, \p LastType) with \p Inst. If types don't
481 * correspond this function places BitCastInst <1 x Ty> to Ty, or Ty to <1 x Ty>
482 * after \p Inst and returns the pointer to the instruction. If no cast is
483 * required, nullptr is returned.
484 */
485 template <
486 typename ConstIter,
487 typename std::enable_if<
488 std::is_base_of<
489 Type, typename std::remove_pointer<typename std::iterator_traits<
490 ConstIter>::value_type>::type>::value,
491 int>::type = 0>
scalarizeOrVectorizeIfNeeded(Instruction * Inst,ConstIter FirstType,ConstIter LastType)492 CastInst *scalarizeOrVectorizeIfNeeded(Instruction *Inst, ConstIter FirstType,
493 ConstIter LastType) {
494 IGC_ASSERT_MESSAGE(Inst, "wrong argument");
495 IGC_ASSERT_MESSAGE(std::all_of(FirstType, LastType,
496 [Inst](Type *Ty) {
497 return Ty == Inst->getType() ||
498 Ty == getCorrespondingVectorOrScalar(
499 Inst->getType());
500 }),
501 "wrong arguments: type of instructions must correspond");
502
503 if (Inst->getType()->isVectorTy() &&
504 cast<IGCLLVM::FixedVectorType>(Inst->getType())->getNumElements() > 1)
505 return nullptr;
506 bool needBitCast = std::any_of(
507 FirstType, LastType, [Inst](Type *Ty) { return Ty != Inst->getType(); });
508 if (!needBitCast)
509 return nullptr;
510 auto *CorrespondingTy = getCorrespondingVectorOrScalar(Inst->getType());
511 auto *BC = CastInst::Create(Instruction::BitCast, Inst, CorrespondingTy);
512 BC->insertAfter(Inst);
513 return BC;
514 }
515 /* scalarVectorizeIfNeeded: scalarize of vectorize \p Inst if it is required
516 *
517 * Result of some instructions can be both Ty and <1 x Ty> value e.g. rdregion.
518 * It is sometimes required to replace uses of instructions of [\p
519 * FirstInstToReplace, \p LastInstToReplace) with \p Inst. If types don't
520 * correspond this function places BitCastInst <1 x Ty> to Ty, or Ty to <1 x Ty>
521 * after \p Inst and returns the pointer to the instruction. If no cast is
522 * required, nullptr is returned.
523 */
524 template <typename ConstIter,
525 typename std::enable_if<
526 std::is_base_of<
527 Instruction,
528 typename std::remove_pointer<typename std::iterator_traits<
529 ConstIter>::value_type>::type>::value,
530 int>::type = 0>
scalarizeOrVectorizeIfNeeded(Instruction * Inst,ConstIter FirstInstToReplace,ConstIter LastInstToReplace)531 CastInst *scalarizeOrVectorizeIfNeeded(Instruction *Inst,
532 ConstIter FirstInstToReplace,
533 ConstIter LastInstToReplace) {
534 std::vector<Type *> Types;
535 std::transform(FirstInstToReplace, LastInstToReplace,
536 std::back_inserter(Types),
537 [](Instruction *Inst) { return Inst->getType(); });
538 return scalarizeOrVectorizeIfNeeded(Inst, Types.begin(), Types.end());
539 }
540
541 CastInst *scalarizeOrVectorizeIfNeeded(Instruction *Inst, Type *RefType);
542
543 CastInst *scalarizeOrVectorizeIfNeeded(Instruction *Inst,
544 Instruction *InstToReplace);
545
546
547 // Returns log alignment for align type and target grf width, because ALIGN_GRF
548 // must be target-dependent.
549 unsigned getLogAlignment(VISA_Align Align, unsigned GRFWidth);
550 // The opposite of getLogAlignment.
551 VISA_Align getVISA_Align(unsigned LogAlignment, unsigned GRFWidth);
552 // Some log alignments cannot be transparently transformed to VISA_Align. This
553 // chooses suitable log alignment which is convertible to VISA_Align.
554 unsigned ceilLogAlignment(unsigned LogAlignment, unsigned GRFWidth);
555
556 // Checks whether provided wrpredregion intrinsic can be encoded
557 // as legal SETP instruction.
558 bool isWrPredRegionLegalSetP(const CallInst &WrPredRegion);
559
560 // Checks if V is a CallInst representing a direct call to F
561 // Many of our analyzes do not check whether a function F's user
562 // which is a CallInst calls exactly F. This may not be true
563 // when a function pointer is passed as an argument of a call to
564 // another function, e.g. genx.faddr intrinsic.
565 // Returns V casted to CallInst if the check is true,
566 // nullptr otherwise.
567 CallInst *checkFunctionCall(Value *V, Function *F);
568
569 // Get possible number of GRFs for indirect region
570 unsigned getNumGRFsPerIndirectForRegion(const genx::Region &R,
571 const GenXSubtarget *ST, bool Allow2D);
572 // to control behavior of emulateI64Operation function
573 enum class EmulationFlag {
574 RAUW,
575 // RAUW and EraseFromParent, always returns a valid instruction
576 // either the original one or the last one from the result emulation sequence
577 RAUWE,
578 None,
579 };
580 // transforms operation on i64 type to an equivalent sequence that do not
581 // operate on i64 (but rather on i32)
582 // The implementation is contained in GenXEmulate pass sources
583 // Note: ideally, i64 emulation should be handled by GenXEmulate pass,
584 // however, some of our late passes like GetXPostLegalization or GenXCategory
585 // may introduce additional instructions which violate Subtarget restrictions -
586 // this function is intended to cope with such cases
587 Instruction *emulateI64Operation(const GenXSubtarget *ST, Instruction *In,
588 EmulationFlag AuxAction);
589 // BinaryDataAccumulator: it's a helper class to accumulate binary data
590 // in one buffer.
591 // Information about each stored section can be accessed via the key with
592 // which it was stored. The key must be unique.
593 // Accumulated/consolidated binary data can be accesed.
594 template <typename KeyT, typename DataT = uint8_t> class BinaryDataAccumulator {
595 public:
596 struct SectionInfoT {
597 int Offset = 0;
598 ArrayRef<DataT> Data;
599
600 SectionInfoT() = default;
SectionInfoTSectionInfoT601 SectionInfoT(const DataT *BasePtr, int First, int Last)
602 : Offset{First}, Data{BasePtr + First, BasePtr + Last} {}
603
getSizeSectionInfoT604 int getSize() const { return Data.size(); }
605 };
606
607 struct SectionT {
608 KeyT Key;
609 SectionInfoT Info;
610 };
611
612 private:
613 std::vector<DataT> Data;
614 using SectionSeq = std::vector<SectionT>;
615 SectionSeq Sections;
616
617 public:
618 using value_type = typename SectionSeq::value_type;
619 using reference = typename SectionSeq::reference;
620 using const_reference = typename SectionSeq::const_reference;
621 using iterator = typename SectionSeq::iterator;
622 using const_iterator = typename SectionSeq::const_iterator;
623
begin()624 iterator begin() { return Sections.begin(); }
begin()625 const_iterator begin() const { return Sections.begin(); }
cbegin()626 const_iterator cbegin() const { return Sections.cbegin(); }
end()627 iterator end() { return Sections.end(); }
end()628 const_iterator end() const { return Sections.end(); }
cend()629 const_iterator cend() const { return Sections.cend(); }
front()630 reference front() { return *begin(); }
front()631 const_reference front() const { return *begin(); }
back()632 reference back() { return *std::prev(end()); }
back()633 const_reference back() const { return *std::prev(end()); }
634
635 // Append the data that is referenced by a \p Key and represented
636 // in range [\p First, \p Last), to the buffer.
637 // The range must consist of DataT elements.
638 template <typename InputIter>
append(KeyT Key,InputIter First,InputIter Last)639 void append(KeyT Key, InputIter First, InputIter Last) {
640 IGC_ASSERT_MESSAGE(
641 std::none_of(Sections.begin(), Sections.end(),
642 [&Key](const SectionT &S) { return S.Key == Key; }),
643 "There's already a section with such key");
644 SectionT Section;
645 Section.Key = std::move(Key);
646 int Offset = Data.size();
647 std::copy(First, Last, std::back_inserter(Data));
648 Section.Info =
649 SectionInfoT{Data.data(), Offset, static_cast<int>(Data.size())};
650 Sections.push_back(std::move(Section));
651 }
652
append(KeyT Key,ArrayRef<DataT> SectionBin)653 void append(KeyT Key, ArrayRef<DataT> SectionBin) {
654 append(std::move(Key), SectionBin.begin(), SectionBin.end());
655 }
656
657 // Get information about the section referenced by \p Key.
getSectionInfo(const KeyT & Key)658 SectionInfoT getSectionInfo(const KeyT &Key) const {
659 auto SectionIt =
660 std::find_if(Sections.begin(), Sections.end(),
661 [&Key](const SectionT &S) { return S.Key == Key; });
662 IGC_ASSERT_MESSAGE(SectionIt != Sections.end(),
663 "There must be a section with such key");
664 return SectionIt->Info;
665 }
666
667 // Get offset of the section referenced by \p Key.
getSectionOffset(const KeyT & Key)668 int getSectionOffset(const KeyT &Key) const {
669 return getSectionInfo(Key).Offset;
670 }
671 // Get size of the section referenced by \p Key.
getSectionSize(const KeyT & Key)672 int getSectionSize(const KeyT &Key) const { return getSectionInfo(Key).Size; }
673 // Get size of the whole collected data.
getFullSize()674 int getFullSize() const { return Data.size(); }
getNumSections()675 int getNumSections() const { return Sections.size(); }
676 // Data buffer empty.
empty()677 bool empty() const { return Data.empty(); }
678 // Emit the whole consolidated data.
emitConsolidatedData()679 std::vector<DataT> emitConsolidatedData() const & { return Data; }
emitConsolidatedData()680 std::vector<DataT> emitConsolidatedData() && { return std::move(Data); }
681 };
682
683 // Not every global variable is a real global variable and should be eventually
684 // encoded as a global variable.
685 // GenX volatile and printf strings are exclusion for now.
686 // Printf strings should be already legalized to make it possible to use this
687 // function. Which should already be done in middle-end so no problem for
688 // calling it in codegen.
689 bool isRealGlobalVariable(const GlobalVariable &GV);
690
691 // Get size of an struct field including the size of padding for the next field,
692 // or the tailing padding.
693 // For example for the 1st element of { i8, i32 } 4 bytes will be returned
694 // (likely in the most of layouts).
695 //
696 // Arguments:
697 // \p ElemIdx - index of a struct field
698 // \p NumOperands - the number of fields in struct
699 // (StructLayout doesn't expose it)
700 // \p StructLayout - struct layout
701 //
702 // Returns the size in bytes.
703 std::size_t getStructElementPaddedSize(unsigned ElemIdx, unsigned NumOperands,
704 const StructLayout &Layout);
705
706 // Determine if there is a store to global variable Addr in between of L1 and
707 // L2. L1 and L2 can be either vloads or regular stores.
708 bool hasMemoryDeps(Instruction *L1, Instruction *L2, Value *Addr,
709 DominatorTree *DT);
710
711 // Return true if V is rdregion from a load result.
712 bool isRdRFromGlobalLoad(Value *V);
713
714 // Return true if wrregion has result of load as old value.
715 bool isWrRToGlobalLoad(Value *V);
716
717 } // namespace genx
718 } // namespace llvm
719
720 #endif // GENX_UTIL_H
721