1 /*========================== begin_copyright_notice ============================
2
3 Copyright (C) 2018-2021 Intel Corporation
4
5 SPDX-License-Identifier: MIT
6
7 ============================= end_copyright_notice ===========================*/
8
9 //
10 /// GenXEmulate
11 /// -----------
12 ///
13 /// GenXEmulate is a mudule pass that emulates certain LLVM IR instructions.
14 ///
15 //===----------------------------------------------------------------------===//
16
17 #define DEBUG_TYPE "GENX_EMULATION"
18
19 #include "GenX.h"
20 #include "GenXSubtarget.h"
21 #include "GenXTargetMachine.h"
22 #include "GenXUtil.h"
23 #include "IGC/common/StringMacros.hpp"
24 #include "Probe/Assertion.h"
25
26 #include "llvmWrapper/IR/DerivedTypes.h"
27 #include "llvmWrapper/IR/Function.h"
28
29 #include "vc/BiF/Tools.h"
30 #include "vc/GenXOpts/Utils/InternalMetadata.h"
31 #include "vc/Support/BackendConfig.h"
32 #include "vc/Support/GenXDiagnostic.h"
33 #include "vc/Utils/General/BiF.h"
34
35 #include "llvm/GenXIntrinsics/GenXIntrinsics.h"
36 #include "llvm/GenXIntrinsics/GenXMetadata.h"
37
38 #include <llvm/Analysis/TargetFolder.h>
39 #include <llvm/CodeGen/TargetPassConfig.h>
40 #include <llvm/IR/Function.h>
41 #include <llvm/IR/IRBuilder.h>
42 #include <llvm/IR/InstVisitor.h>
43 #include <llvm/IR/Module.h>
44 #include <llvm/Linker/Linker.h>
45 #include <llvm/Pass.h>
46 #include <llvm/Support/Process.h>
47 #include <llvm/Support/raw_ostream.h>
48
49 #include "Probe/Assertion.h"
50
51 #include <array>
52 #include <string>
53
54 using namespace llvm;
55 using namespace genx;
56
57 static constexpr const char *LibraryFunctionPrefix = "__cm_intrinsic_impl_";
58 static constexpr const char *EmuLibSDivPrefix = "__cm_intrinsic_impl_sdiv";
59 static constexpr const char *EmuLibSRemPrefix = "__cm_intrinsic_impl_srem";
60 static constexpr const char *EmuLibUDivPrefix = "__cm_intrinsic_impl_udiv";
61 static constexpr const char *EmuLibURemPrefix = "__cm_intrinsic_impl_urem";
62 static constexpr const char *EmuLibFP2UIPrefix = "__cm_intrinsic_impl_fp2ui";
63 static constexpr const char *EmuLibFP2SIPrefix = "__cm_intrinsic_impl_fp2si";
64 static constexpr const char *EmuLibUI2FPPrefix = "__cm_intrinsic_impl_ui2fp";
65 static constexpr const char *EmuLibSI2FPPrefix = "__cm_intrinsic_impl_si2fp";
66
67 struct PrefixOpcode {
68 const char *Prefix;
69 const unsigned Opcode;
70 };
71 constexpr std::array<PrefixOpcode, 4> DivRemPrefixes = {
72 {{EmuLibSDivPrefix, BinaryOperator::SDiv},
73 {EmuLibSRemPrefix, BinaryOperator::SRem},
74 {EmuLibUDivPrefix, BinaryOperator::UDiv},
75 {EmuLibURemPrefix, BinaryOperator::URem}}};
76
77 constexpr std::array<PrefixOpcode, 4> EmulationFPConvertsPrefixes = {
78 {{EmuLibFP2UIPrefix, Instruction::FPToUI},
79 {EmuLibFP2SIPrefix, Instruction::FPToSI},
80 {EmuLibUI2FPPrefix, Instruction::UIToFP},
81 {EmuLibSI2FPPrefix, Instruction::SIToFP}}};
82
83 static constexpr const char *RoundingRtzSuffix = "__rtz_";
84 static constexpr const char *RoundingRteSuffix = "__rte_";
85 static constexpr const char *RoundingRtpSuffix = "__rtp_";
86 static constexpr const char *RoundingRtnSuffix = "__rtn_";
87
88 // TODO: move this to vc-intrinsics`
89 static constexpr int VCRoundingRTE = 0;
90 static constexpr int VCRoundingRTP = 1 << 4;
91 static constexpr int VCRoundingRTN = 2 << 4;
92 static constexpr int VCRoundingRTZ = 3 << 4;
93
94 namespace {
95
96 static cl::opt<bool> OptDbgOnlyDisableDivremEmulation(
97 "vc-dbgonly-emu-disable-divrem", cl::init(false), cl::Hidden,
98 cl::desc("do not load divrem emulation functions"));
99 // Currenly, we have no guarantee that each and every genx intrinsic
100 // is emulated. Only the most frequently encounted are.
101 // This flag is to help finding such undetected cases.
102 static cl::opt<bool> OptStrictChecksEnable("vc-i64emu-strict-checks",
103 cl::init(false), cl::Hidden,
104 cl::desc("enables strict checks"));
105 static cl::opt<bool>
106 OptStricterSVM("vc-i64emu-strict-report-svm", cl::init(false), cl::Hidden,
107 cl::desc("strict check will break on svm* operations"));
108 // NOTE: probably should be true by default
109 static cl::opt<bool>
110 OptStricterAtomic("vc-i64emu-strict-report-atomic", cl::init(false),
111 cl::Hidden,
112 cl::desc("strict check will break on 64-bit atomics"));
113 static cl::opt<bool> OptStricterOword(
114 "vc-i64emu-strict-report-oword", cl::init(false), cl::Hidden,
115 cl::desc("strict check will break on 64-bit oword reads/writes"));
116 static cl::opt<bool> OptStricterAlloc(
117 "vc-i64emu-strict-report-alloc", cl::init(false), cl::Hidden,
118 cl::desc("strict check will break on 64-bit alloc"));
119 static cl::opt<bool> OptStricterFaddr(
120 "vc-i64emu-strict-report-faddr", cl::init(false), cl::Hidden,
121 cl::desc("strict check will break on 64-bit faddr"));
122 static cl::opt<bool>
123 OptStricterConst("vc-i64emu-strict-const", cl::init(false), cl::Hidden,
124 cl::desc("strict check will break on 64-bit constanti"));
125 static cl::opt<bool> OptStricterRegions(
126 "vc-i64emu-strict-regions", cl::init(false), cl::Hidden,
127 cl::desc("strict check will break on 64-bit rdregion/wrregion"));
128 static cl::opt<bool> OptStricterConverts(
129 "vc-i64emu-strict-converts", cl::init(false), cl::Hidden,
130 cl::desc("strict check will break on 64-bit convers which are NOT noop"));
131 // TODO: we expect this to be turned on by default
132 static cl::opt<bool> OptStrictEmulationRequests(
133 "vc-i64emu-strict-requests", cl::init(false),
134 cl::Hidden,
135 cl::desc("Explicit emulation requests are subject to stricter checks"));
136 static cl::opt<bool> OptIcmpEnable("vc-i64emu-icmp-enable", cl::init(true),
137 cl::Hidden,
138 cl::desc("enable icmp emulation"));
139 static cl::opt<bool> OptProcessPtrs("vc-i64emu-ptrs-enable", cl::init(true),
140 cl::Hidden,
141 cl::desc("enable icmp emulation"));
142 static cl::opt<bool> OptConvertPartialPredicates(
143 "vc-i64emu-icmp-ppred-lowering", cl::init(true), cl::Hidden,
144 cl::desc("if \"partial predicates\" shall be converted to icmp"));
145
146 using IRBuilder = IRBuilder<TargetFolder>;
147 struct OpType {
148 unsigned Opcode;
149 Type *ResType;
150 Type *FirstArgType;
151 };
152 static std::function<bool(const OpType &, const OpType &)> OpTypeComparator =
__anon79d88a180202(const OpType &ot1, const OpType &ot2) 153 [](const OpType &ot1, const OpType &ot2) -> bool {
154 if (ot1.Opcode < ot2.Opcode)
155 return true;
156 if (ot2.Opcode < ot1.Opcode)
157 return false;
158 if (ot1.ResType < ot2.ResType)
159 return true;
160 if (ot2.ResType < ot1.ResType)
161 return false;
162 return ot1.FirstArgType < ot2.FirstArgType;
163 };
164
processToEraseList(T & EraseList)165 template <typename T> static void processToEraseList(T &EraseList) {
166 std::for_each(EraseList.begin(), EraseList.end(),
167 [](auto *Item) { Item->eraseFromParent(); });
168 EraseList.clear();
169 }
170
171 class GenXEmulate : public ModulePass {
172
173 friend Instruction *llvm::genx::emulateI64Operation(const GenXSubtarget *ST,
174 Instruction *In,
175 EmulationFlag AuxAction);
176 std::vector<Instruction *> DiscracedList;
177 // Maps <opcode, type> to its corresponding emulation function.
178 std::map<OpType, Function *, decltype(OpTypeComparator)> EmulationFuns{
179 OpTypeComparator};
180
181 std::vector<Instruction *> ToErase;
182 const GenXSubtarget *ST = nullptr;
183
184 class Emu64Expander : public InstVisitor<Emu64Expander, Value *> {
185
186 friend InstVisitor<Emu64Expander, Value *>;
187
188 const GenXSubtarget &ST;
189 std::map<OpType, Function *, decltype(OpTypeComparator)> *EmulationFuns;
190
191 IVSplitter SplitBuilder;
192 Instruction &Inst;
193
194 Value *expandBitwiseOp(BinaryOperator &);
195 Value *expandBitLogicOp(BinaryOperator &);
196
197 Value *visitAdd(BinaryOperator &);
198 Value *visitSub(BinaryOperator &);
199 Value *visitAnd(BinaryOperator &);
200 Value *visitOr(BinaryOperator &);
201 Value *visitXor(BinaryOperator &);
202 Value *visitSelectInst(SelectInst &I);
203 Value *visitICmp(ICmpInst &);
204
205 Value *visitShl(BinaryOperator &);
206 Value *visitLShr(BinaryOperator &);
207 Value *visitAShr(BinaryOperator &);
208
209 Value *buildRightShift(IVSplitter &SplitBuilder, BinaryOperator &Op);
210
211 Value *visitZExtInst(ZExtInst &I);
212 Value *visitSExtInst(SExtInst &I);
213
214 Value *visitPtrToInt(PtrToIntInst &I);
215 Value *visitIntToPtr(IntToPtrInst &I);
216
217 Value *visitGenxTrunc(CallInst &CI);
218 Value *visitGenxMinMax(CallInst &CI);
219 // genx_absi
220 Value *visitGenxAbsi(CallInst &CI);
221 // handles genx_{XX}add_sat cases
222 Value *visitGenxAddSat(CallInst &CI);
223 // handles genx_fpto{X}i_sat cases
224 Value *visitGenxFPToISat(CallInst &CI);
225
226 // [+] bitcast
227 // [-] genx.constanti ?
228 // [-] genx.scatter ?
229 // [-] genx.gather ?
230 Value *visitCallInst(CallInst &CI);
visitInstruction(Instruction & I)231 Value *visitInstruction(Instruction &I) { return nullptr; }
232
233 // if the value is not an Instruciton (like ConstExpr), return the original
234 // value. Return the emulated sequence otherwise
235 Value *ensureEmulated(Value *Val);
236
237 static bool isI64PointerOp(const Instruction &I);
238 static bool isConvertOfI64(const Instruction &I);
239 static bool isI64ToFP(const Instruction &I);
240 static bool isI64Cmp(const Instruction &I);
241 static bool isI64AddSat(const Instruction &I);
242 static Value *detectBitwiseNot(BinaryOperator &);
243 static Type *changeScalarType(Type *T, Type *NewTy);
244
245 struct VectorInfo {
246 Value *V;
247 IGCLLVM::FixedVectorType *VTy;
248 };
249 static VectorInfo toVector(IRBuilder &Builder, Value *In);
250 static bool getConstantUI32Values(Value *V,
251 SmallVectorImpl<uint32_t> &Result);
252
253 // functors to help with shift emulation
254 struct LessThan32 {
operator ()__anon79d88a180111::GenXEmulate::Emu64Expander::LessThan32255 bool operator()(uint64_t Val) const { return Val < 32u; }
256 };
257 struct GreaterThan32 {
operator ()__anon79d88a180111::GenXEmulate::Emu64Expander::GreaterThan32258 bool operator()(uint64_t Val) const { return Val > 32u; }
259 };
260 struct Equals32 {
operator ()__anon79d88a180111::GenXEmulate::Emu64Expander::Equals32261 bool operator()(uint64_t Val) const { return Val == 32u; }
262 };
263
needsEmulation() const264 bool needsEmulation() const {
265 return (SplitBuilder.IsI64Operation() || isI64Cmp(Inst) ||
266 isConvertOfI64(Inst) || isI64PointerOp(Inst) ||
267 isI64AddSat(Inst));
268 }
269
getIRBuilder()270 IRBuilder getIRBuilder() {
271 return IRBuilder(Inst.getParent(), BasicBlock::iterator(&Inst),
272 TargetFolder(Inst.getModule()->getDataLayout()));
273 }
274
275 class ConstantEmitter {
276 public:
ConstantEmitter(Value * V)277 ConstantEmitter(Value *V)
278 : ElNum(
279 cast<IGCLLVM::FixedVectorType>(V->getType())->getNumElements()),
280 Ty32(Type::getInt32Ty(V->getContext())) {}
getSplat(unsigned Val) const281 Constant *getSplat(unsigned Val) const {
282 auto *KV = Constant::getIntegerValue(Ty32, APInt(32, Val));
283 return ConstantDataVector::getSplat(ElNum, KV);
284 }
getZero() const285 Constant *getZero() const { return Constant::getNullValue(getVTy()); }
getOnes() const286 Constant *getOnes() const { return Constant::getAllOnesValue(getVTy()); }
getVTy() const287 Type *getVTy() const {
288 return IGCLLVM::FixedVectorType::get(Ty32, ElNum);
289 }
290
291 private:
292 unsigned ElNum = 0;
293 Type *Ty32 = nullptr;
294 };
295
296 public:
Emu64Expander(const GenXSubtarget & ST,Instruction & I,std::map<OpType,Function *,decltype(OpTypeComparator)> * EF=nullptr)297 Emu64Expander(
298 const GenXSubtarget &ST, Instruction &I,
299 std::map<OpType, Function *, decltype(OpTypeComparator)> *EF = nullptr)
300 : ST(ST), SplitBuilder(I), Inst(I), EmulationFuns(EF) {}
301
getSubtarget() const302 const GenXSubtarget &getSubtarget() const { return ST; }
tryExpand()303 Value *tryExpand() {
304 if (!needsEmulation())
305 return nullptr;
306 LLVM_DEBUG(dbgs() << "i64-emu: trying " << Inst << "\n");
307 auto *Result = visit(Inst);
308
309 if (Result)
310 LLVM_DEBUG(dbgs() << "i64-emu: emulated with " << *Result << "\n");
311
312 return Result;
313 }
314 using LHSplit = IVSplitter::LoHiSplit;
315 Value *buildTernaryAddition(IRBuilder &Builder, Value &A, Value &B,
316 Value &C, const Twine &Name) const;
317 struct AddSubExtResult {
318 Value *Val; // Main Value
319 Value *CB; // Carry/Borrow
320 };
321 static AddSubExtResult buildAddc(Module *M, IRBuilder &B, Value &R,
322 Value &L, const Twine &Prefix);
323 static AddSubExtResult buildSubb(Module *M, IRBuilder &B, Value &L,
324 Value &R, const Twine &Prefix);
325 static Value *buildGeneralICmp(IRBuilder &B, CmpInst::Predicate P,
326 bool IsPartialPredicate, const LHSplit &L,
327 const LHSplit &R);
328 static Value *tryOptimizedShr(IRBuilder &B, IVSplitter &SplitBuilder,
329 BinaryOperator &Op, ArrayRef<uint32_t> Sa);
330 static Value *tryOptimizedShl(IRBuilder &B, IVSplitter &SplitBuilder,
331 BinaryOperator &Op, ArrayRef<uint32_t> Sa);
332 static Value *buildGenericRShift(IRBuilder &B, IVSplitter &SplitBuilder,
333 BinaryOperator &Op);
334
335 enum Rounding {
336 // Not used currenly
337 };
338 struct ShiftInfo {
ShiftInfo__anon79d88a180111::GenXEmulate::Emu64Expander::ShiftInfo339 ShiftInfo(Value *ShaIn, Value *Sh32In, Value *Mask1In, Value *Mask0In)
340 : Sha{ShaIn}, Sh32{Sh32In}, Mask1{Mask1In}, Mask0{Mask0In} {}
341 // Masked Shift Amount
342 Value *Sha = nullptr;
343 // 32 - Sha
344 Value *Sh32 = nullptr;
345 // To zero-out the high part (shift >= 32)
346 Value *Mask1 = nullptr;
347 // To negate results if Sha = 0
348 Value *Mask0 = nullptr;
349 };
350 static Value *buildPartialRShift(IRBuilder &B, Value *SrcLo, Value *SrcHi,
351 const ShiftInfo &SI);
352 static ShiftInfo constructShiftInfo(IRBuilder &B, Value *Base);
353
354 static bool hasStrictEmulationRequirement(Instruction *Inst);
355 };
356
357 public:
358 static char ID;
GenXEmulate()359 explicit GenXEmulate() : ModulePass(ID) {}
getPassName() const360 StringRef getPassName() const override { return "GenX emulation"; }
361 void getAnalysisUsage(AnalysisUsage &AU) const override;
362 bool runOnModule(Module &M) override;
363 void runOnFunction(Function &F);
364
365 private:
366 Value *emulateInst(Instruction *Inst);
367 Function *getEmulationFunction(const Instruction *Inst) const;
368 void buildEmuFunCache(Module &M);
369
370 // Check if a function is to emulate instructions.
isEmulationFunction(const Function * F)371 static bool isEmulationFunction(const Function* F) {
372 return F->hasFnAttribute(genx::FunctionMD::VCEmulationRoutine);
373 }
374 };
375
376 } // end namespace
377
isI64PointerOp(const Instruction & I)378 bool GenXEmulate::Emu64Expander::isI64PointerOp(const Instruction &I) {
379 auto Opcode = I.getOpcode();
380 const DataLayout &DL = I.getModule()->getDataLayout();
381 if (Opcode == Instruction::ICmp) {
382 auto *OpSTy = I.getOperand(0)->getType()->getScalarType();
383 if (!OpSTy->isPointerTy())
384 return false;
385 if (DL.getTypeSizeInBits(OpSTy) < 64)
386 return false;
387 return true;
388 }
389 if (Opcode == Instruction::PtrToInt || Opcode == Instruction::IntToPtr) {
390 auto *PtrType = I.getType()->getScalarType();
391 auto *IntType = I.getOperand(0)->getType()->getScalarType();
392 if (Opcode == Instruction::PtrToInt)
393 std::swap(PtrType, IntType);
394 if (cast<CastInst>(&I)->isNoopCast(DL))
395 return false;
396 return (DL.getTypeSizeInBits(PtrType) == 64 ||
397 DL.getTypeSizeInBits(IntType) == 64);
398 }
399 return false;
400 }
isConvertOfI64(const Instruction & I)401 bool GenXEmulate::Emu64Expander::isConvertOfI64(const Instruction &I) {
402
403 if (GenXEmulate::Emu64Expander::isI64ToFP(I))
404 return true;
405
406 auto IID = GenXIntrinsic::getAnyIntrinsicID(&I);
407 switch (IID) {
408 case GenXIntrinsic::genx_uutrunc_sat:
409 case GenXIntrinsic::genx_sstrunc_sat:
410 case GenXIntrinsic::genx_ustrunc_sat:
411 case GenXIntrinsic::genx_sutrunc_sat:
412 return I.getOperand(0)->getType()->getScalarType()->isIntegerTy(64);
413 }
414 return false;
415 }
isI64ToFP(const Instruction & I)416 bool GenXEmulate::Emu64Expander::isI64ToFP(const Instruction &I) {
417 if (Instruction::UIToFP != I.getOpcode() &&
418 Instruction::SIToFP != I.getOpcode()) {
419 return false;
420 }
421 return I.getOperand(0)->getType()->getScalarType()->isIntegerTy(64);
422 }
isI64Cmp(const Instruction & I)423 bool GenXEmulate::Emu64Expander::isI64Cmp(const Instruction &I) {
424 if (Instruction::ICmp != I.getOpcode())
425 return false;
426 return I.getOperand(0)->getType()->getScalarType()->isIntegerTy(64);
427 }
isI64AddSat(const Instruction & I)428 bool GenXEmulate::Emu64Expander::isI64AddSat(const Instruction &I) {
429 if (auto *CI = dyn_cast<CallInst>(&I)) {
430 switch (GenXIntrinsic::getAnyIntrinsicID(CI)) {
431 case GenXIntrinsic::genx_suadd_sat:
432 case GenXIntrinsic::genx_usadd_sat:
433 case GenXIntrinsic::genx_uuadd_sat:
434 case GenXIntrinsic::genx_ssadd_sat: {
435 Value *Arg0 = I.getOperand(0);
436 Value *Arg1 = I.getOperand(1);
437 return Arg0->getType()->isIntOrIntVectorTy(64) &&
438 Arg1->getType()->isIntOrIntVectorTy(64);
439 }
440 default:
441 return false;
442 }
443 }
444 return false;
445 }
446
detectBitwiseNot(BinaryOperator & Op)447 Value *GenXEmulate::Emu64Expander::detectBitwiseNot(BinaryOperator &Op) {
448 if (Instruction::Xor != Op.getOpcode())
449 return nullptr;
450
451 auto isAllOnes = [](const Value *V) {
452 if (auto *C = dyn_cast<Constant>(V))
453 return C->isAllOnesValue();
454 return false;
455 };
456
457 if (isAllOnes(Op.getOperand(1)))
458 return Op.getOperand(0);
459 if (isAllOnes(Op.getOperand(0)))
460 return Op.getOperand(1);
461
462 return nullptr;
463 }
464
465 // Changes scalar to scalar, vector to vector
changeScalarType(Type * T,Type * NewTy)466 Type *GenXEmulate::Emu64Expander::changeScalarType(Type *T, Type *NewTy) {
467 IGC_ASSERT_MESSAGE(NewTy == NewTy->getScalarType(), "NewTy must be scalar");
468 return (T->isVectorTy())
469 ? IGCLLVM::FixedVectorType::get(
470 NewTy, cast<IGCLLVM::FixedVectorType>(T)->getNumElements())
471 : NewTy;
472 }
473
474 // changes vector/scalar i64 type so it now uses scalar type i32
475 // <2 x i64> -> <4 x i32>
476 // i64 -> <2 x i32>
convertI64TypeToI32(const Type * OldType)477 static Type *convertI64TypeToI32(const Type *OldType) {
478 IGC_ASSERT_MESSAGE(OldType, "Error: nullptr input");
479 IGC_ASSERT_MESSAGE(OldType->isIntOrIntVectorTy(),
480 "Error: OldType not int or int vector type");
481 IGC_ASSERT_MESSAGE(OldType->getScalarType()->isIntegerTy(64),
482 "Error: OldType Scalar type not i64");
483
484 bool OldTypeIsVec = isa<IGCLLVM::FixedVectorType>(OldType);
485
486 Type *Int32Ty = Type::getInt32Ty(OldType->getContext());
487
488 unsigned OldWidth =
489 OldTypeIsVec ? cast<IGCLLVM::FixedVectorType>(OldType)->getNumElements()
490 : 1;
491
492 constexpr unsigned Multiplier = 2;
493 unsigned NewWidth = OldWidth * Multiplier;
494 return IGCLLVM::FixedVectorType::get(Int32Ty, NewWidth);
495 }
496
497 // Change type and exec size, like
498 // or <2 x i64> -> or <4 x i32>
499 // or i64 -> or < 2 x i32>
500 //
501 // So, resulted llvm IR:
502 // From:
503 // %res = or <2 x i64> %val1, %val2
504 // To:
505 // %val1.cast = bitcast %val1 to <4 x i32>
506 // %val2.cast = bitcast %val2 to <4 x i32>
507 // %res.tmp = or <4 x i32> %val1.cast, %val2.cast
508 // %res = bitcast %res.tmp to <2 x i64>
expandBitLogicOp(BinaryOperator & Op)509 Value *GenXEmulate::Emu64Expander::expandBitLogicOp(BinaryOperator &Op) {
510 auto Builder = getIRBuilder();
511
512 Type *PrevBinOpTy = Op.getType();
513 Type *NextBinOpTy = convertI64TypeToI32(PrevBinOpTy);
514 IGC_ASSERT(NextBinOpTy);
515
516 Value *Op0 = Op.getOperand(0);
517 Value *Op1 = Op.getOperand(1);
518
519 Value *Op0Cast =
520 Builder.CreateBitCast(Op0, NextBinOpTy, Op0->getName() + ".cast");
521 Value *Op1Cast =
522 Builder.CreateBitCast(Op1, NextBinOpTy, Op1->getName() + ".cast");
523
524 Value *BinOp = Builder.CreateBinOp(Op.getOpcode(), Op0Cast, Op1Cast,
525 Twine("int_emu.") + Inst.getName());
526
527 return Builder.CreateBitCast(BinOp, PrevBinOpTy, Op.getName() + ".cast");
528 }
529
expandBitwiseOp(BinaryOperator & Op)530 Value *GenXEmulate::Emu64Expander::expandBitwiseOp(BinaryOperator &Op) {
531 auto Src0 = SplitBuilder.splitOperandHalf(0);
532 auto Src1 = SplitBuilder.splitOperandHalf(1);
533
534 auto Builder = getIRBuilder();
535
536 Value *Part1 = Builder.CreateBinOp(Op.getOpcode(), Src0.Left, Src1.Left,
537 Inst.getName() + ".part1");
538 Value *Part2 = Builder.CreateBinOp(Op.getOpcode(), Src0.Right, Src1.Right,
539 Inst.getName() + ".part2");
540 return SplitBuilder.combineHalfSplit(
541 {Part1, Part2}, Twine("int_emu.") + Op.getOpcodeName() + ".",
542 Inst.getType()->isIntegerTy());
543 }
visitAdd(BinaryOperator & Op)544 Value *GenXEmulate::Emu64Expander::visitAdd(BinaryOperator &Op) {
545 auto Src0 = SplitBuilder.splitOperandLoHi(0);
546 auto Src1 = SplitBuilder.splitOperandLoHi(1);
547
548 auto Builder = getIRBuilder();
549 // add64 transforms as:
550 // [add_lo, carry] = genx_addc(src0.l0, src1.lo)
551 // add_hi = add(carry, add(src0.hi, src1.hi))
552 // add64 = combine(add_lo,add_hi)
553 auto AddcRes = buildAddc(Inst.getModule(), Builder, *Src0.Lo, *Src1.Lo,
554 "int_emu.add64.lo.");
555 auto *AddLo = AddcRes.Val;
556 auto *AddHi =
557 buildTernaryAddition(Builder, *AddcRes.CB, *Src0.Hi, *Src1.Hi, "add_hi");
558 return SplitBuilder.combineLoHiSplit(
559 {AddLo, AddHi}, Twine("int_emu.") + Op.getOpcodeName() + ".",
560 Inst.getType()->isIntegerTy());
561 }
visitSub(BinaryOperator & Op)562 Value *GenXEmulate::Emu64Expander::visitSub(BinaryOperator &Op) {
563 auto Src0 = SplitBuilder.splitOperandLoHi(0);
564 auto Src1 = SplitBuilder.splitOperandLoHi(1);
565
566 auto *SubbFunct = GenXIntrinsic::getGenXDeclaration(
567 Inst.getModule(), GenXIntrinsic::genx_subb,
568 {Src0.Lo->getType(), Src1.Lo->getType()});
569
570 auto Builder = getIRBuilder();
571 // sub64 transforms as:
572 // [sub_lo, borrow] = genx_subb(src0.l0, src1.lo)
573 // sub_hi = add(src0.hi, add(-borrow, -src1.hi))
574 // sub64 = combine(sub_lo, sub_hi)
575 using namespace GenXIntrinsic::GenXResult;
576 auto *SubbVal = Builder.CreateCall(SubbFunct, {Src0.Lo, Src1.Lo}, "subb");
577 auto *SubLo = Builder.CreateExtractValue(SubbVal, {IdxSubb_Sub}, "subb.sub");
578 auto *Borrow =
579 Builder.CreateExtractValue(SubbVal, {IdxSubb_Borrow}, "subb.borrow");
580 auto *MinusBorrow = Builder.CreateNeg(Borrow, "borrow.negate");
581 auto *MinusS1Hi = Builder.CreateNeg(Src1.Hi, "negative.src1_hi");
582 auto *SubHi = buildTernaryAddition(Builder, *Src0.Hi, *MinusBorrow,
583 *MinusS1Hi, "sub_hi");
584 return SplitBuilder.combineLoHiSplit(
585 {SubLo, SubHi}, Twine("int_emu.") + Op.getOpcodeName() + ".",
586 Inst.getType()->isIntegerTy());
587 }
visitAnd(BinaryOperator & Op)588 Value *GenXEmulate::Emu64Expander::visitAnd(BinaryOperator &Op) {
589 return expandBitLogicOp(Op);
590 }
visitOr(BinaryOperator & Op)591 Value *GenXEmulate::Emu64Expander::visitOr(BinaryOperator &Op) {
592 return expandBitLogicOp(Op);
593 }
visitXor(BinaryOperator & Op)594 Value *GenXEmulate::Emu64Expander::visitXor(BinaryOperator &Op) {
595 if (auto *NotOperand = detectBitwiseNot(Op)) {
596 unsigned OperandIdx = NotOperand == Op.getOperand(0) ? 0 : 1;
597 auto Src0 = SplitBuilder.splitOperandHalf(OperandIdx);
598 auto *Part1 = BinaryOperator::CreateNot(Src0.Left, ".part1_not", &Inst);
599 auto *Part2 = BinaryOperator::CreateNot(Src0.Right, ".part2_not", &Inst);
600 Part1->setDebugLoc(Inst.getDebugLoc());
601 Part2->setDebugLoc(Inst.getDebugLoc());
602 return SplitBuilder.combineHalfSplit({Part1, Part2}, "int_emu.not.",
603 Op.getType()->isIntegerTy());
604 }
605 return expandBitLogicOp(Op);
606 }
607 GenXEmulate::Emu64Expander::VectorInfo
toVector(IRBuilder & Builder,Value * In)608 GenXEmulate::Emu64Expander::toVector(IRBuilder &Builder, Value *In) {
609 if (In->getType()->isVectorTy())
610 return {In, cast<IGCLLVM::FixedVectorType>(In->getType())};
611
612 if (auto *CIn = dyn_cast<ConstantInt>(In)) {
613 uint64_t CVals[] = {CIn->getZExtValue()};
614 auto *VectorValue = ConstantDataVector::get(In->getContext(), CVals);
615 return {VectorValue,
616 cast<IGCLLVM::FixedVectorType>(VectorValue->getType())};
617 }
618 auto *VTy = IGCLLVM::FixedVectorType::get(In->getType(), 1);
619 auto *VectorValue = Builder.CreateBitCast(In, VTy);
620 return {VectorValue, VTy};
621 // Note: alternatively, we could do something like this:
622 // Value *UndefVector = UndefValue::get(VTy);
623 // return Builder.CreateInsertElement(UndefVector, In, (uint64_t)0, ...
624 }
getConstantUI32Values(Value * V,SmallVectorImpl<uint32_t> & Result)625 bool GenXEmulate::Emu64Expander::getConstantUI32Values(
626 Value *V, SmallVectorImpl<uint32_t> &Result) {
627
628 auto FitsUint32 = [](uint64_t V) {
629 return V <= std::numeric_limits<uint32_t>::max();
630 };
631 Result.clear();
632 if (auto *Scalar = dyn_cast<ConstantInt>(V)) {
633 uint64_t Value = Scalar->getZExtValue();
634 if (!FitsUint32(Value))
635 return false;
636 Result.push_back(Value);
637 return true;
638 }
639 auto *SeqVal = dyn_cast<ConstantDataSequential>(V);
640 if (!SeqVal)
641 return false;
642
643 Result.reserve(SeqVal->getNumElements());
644 for (unsigned i = 0; i < SeqVal->getNumElements(); ++i) {
645 auto *CV = dyn_cast_or_null<ConstantInt>(SeqVal->getAggregateElement(i));
646 if (!CV)
647 return false;
648 uint64_t Value = CV->getZExtValue();
649 if (!FitsUint32(Value))
650 return false;
651 Result.push_back(Value);
652 }
653 return true;
654 }
visitSelectInst(SelectInst & I)655 Value *GenXEmulate::Emu64Expander::visitSelectInst(SelectInst &I) {
656 auto SrcTrue = SplitBuilder.splitOperandLoHi(1);
657 auto SrcFalse = SplitBuilder.splitOperandLoHi(2);
658 auto *Cond = I.getCondition();
659
660 auto Builder = getIRBuilder();
661 // sel from 64-bit values transforms as:
662 // split TrueVal and FalseVal on lo/hi parts
663 // lo_part = self(cond, src0.l0, src1.lo)
664 // hi_part = self(cond, src0.hi, src1.hi)
665 // result = combine(lo_part, hi_part)
666 auto *SelLo = Builder.CreateSelect(Cond, SrcTrue.Lo, SrcFalse.Lo, "sel.lo");
667 auto *SelHi = Builder.CreateSelect(Cond, SrcTrue.Hi, SrcFalse.Hi, "sel.hi");
668 return SplitBuilder.combineLoHiSplit(
669 {SelLo, SelHi}, Twine("int_emu.") + I.getOpcodeName() + ".",
670 I.getType()->isIntegerTy());
671 }
visitICmp(ICmpInst & Cmp)672 Value *GenXEmulate::Emu64Expander::visitICmp(ICmpInst &Cmp) {
673 if (!OptIcmpEnable)
674 return nullptr;
675
676 auto Builder = getIRBuilder();
677
678 if (isI64PointerOp(Cmp)) {
679
680 if (!OptProcessPtrs) {
681 LLVM_DEBUG(dbgs() << "i64-emu::WARNING: " << Cmp << " won't be emulated\n");
682 return nullptr;
683 }
684
685 Type *Ty64 = Builder.getInt64Ty();
686 if (Cmp.getType()->isVectorTy()) {
687 auto NumElements =
688 cast<IGCLLVM::FixedVectorType>(Cmp.getType())->getNumElements();
689 Ty64 = IGCLLVM::FixedVectorType::get(Ty64, NumElements);
690 }
691 auto *IL = Builder.CreatePtrToInt(Cmp.getOperand(0), Ty64);
692 auto *IR = Builder.CreatePtrToInt(Cmp.getOperand(1), Ty64);
693 // Create new 64-bit compare
694 auto *NewICMP = Builder.CreateICmp(Cmp.getPredicate(), IL, IR);
695 return ensureEmulated(NewICMP);
696 }
697
698 const bool PartialPredicate =
699 std::any_of(Cmp.user_begin(), Cmp.user_end(), [](const User *U) {
700 auto IID = GenXIntrinsic::getAnyIntrinsicID(U);
701 return IID == GenXIntrinsic::genx_wrpredregion ||
702 IID == GenXIntrinsic::genx_wrpredpredregion;
703 });
704
705 unsigned BaseOperand = 0;
706 const bool FoldConstants = !(PartialPredicate && OptConvertPartialPredicates);
707 IVSplitter Splitter(Cmp, &BaseOperand);
708 auto Src0 = Splitter.splitOperandLoHi(0, FoldConstants);
709 auto Src1 = Splitter.splitOperandLoHi(1, FoldConstants);
710
711 Value *Result = buildGeneralICmp(Builder, Cmp.getPredicate(),
712 PartialPredicate, Src0, Src1);
713
714 if (Cmp.getType()->isIntegerTy() && !Result->getType()->isIntegerTy()) {
715 // we expect this cast to be possible
716 IGC_ASSERT(Cmp.getType() == Result->getType()->getScalarType());
717 Result = Builder.CreateBitCast(Result, Cmp.getType(),
718 Result->getName() + ".toi");
719 }
720 return Result;
721 }
visitShl(BinaryOperator & Op)722 Value *GenXEmulate::Emu64Expander::visitShl(BinaryOperator &Op) {
723
724 auto Builder = getIRBuilder();
725
726 llvm::SmallVector<uint32_t, 8> ShaVals;
727 if (getConstantUI32Values(Op.getOperand(1), ShaVals)) {
728 auto *Result = tryOptimizedShl(Builder, SplitBuilder, Op, ShaVals);
729 if (Result)
730 return Result;
731 }
732
733 auto L = SplitBuilder.splitOperandLoHi(0);
734 auto R = SplitBuilder.splitOperandLoHi(1);
735
736 auto SI = constructShiftInfo(Builder, R.Lo);
737 ConstantEmitter K(L.Lo);
738
739 // Shift Left
740 // 1. Calculate MASK1. MASK1 is 0 when the shift is >= 32 (large shift)
741 // 2. Calculate MASK0. MASK0 is 0 iff the shift is 0
742 // 3. Calculate Lo part:
743 // [(L.Lo *SHL* SHA) *AND* MASK1 | MASK1 to ensure zero if large shift
744 auto *Lo = Builder.CreateAnd(Builder.CreateShl(L.Lo, SI.Sha), SI.Mask1);
745 // 4. Calculate Hi part:
746 // Hl1: [L.Lo *SHL* (SHA - 32)] *AND* ~MASK1 | shifted out values, large shift
747 // Hl2: [(L.Lo *AND* MASK0) *LSR* (32 - SHA)] *AND* MASK1 | nz for small shift
748 // Hh: [(L.Hi *SHL* Sha)] *AND* MASK1 | MASK1 discards result if large shift
749 // Hi: *OR* the above
750 // NOTE: SI.Sh32 == (32 - SHA)
751 auto *Hl1 = Builder.CreateShl(L.Lo, Builder.CreateNeg(SI.Sh32));
752 Hl1 = Builder.CreateAnd(Hl1, Builder.CreateNot(SI.Mask1));
753
754 auto *Hl2 = Builder.CreateLShr(Builder.CreateAnd(L.Lo, SI.Mask0), SI.Sh32);
755 Hl2 = Builder.CreateAnd(Hl2, SI.Mask1);
756
757 auto *Hh = Builder.CreateAnd(Builder.CreateShl(L.Hi, SI.Sha), SI.Mask1);
758
759 auto *Hi = Builder.CreateOr(Hh, Builder.CreateOr(Hl1, Hl2));
760 return SplitBuilder.combineLoHiSplit(
761 {Lo, Hi}, Twine("int_emu.") + Op.getOpcodeName() + ".",
762 Op.getType()->isIntegerTy());
763 }
visitLShr(BinaryOperator & Op)764 Value *GenXEmulate::Emu64Expander::visitLShr(BinaryOperator &Op) {
765 return buildRightShift(SplitBuilder, Op);
766 }
visitAShr(BinaryOperator & Op)767 Value *GenXEmulate::Emu64Expander::visitAShr(BinaryOperator &Op) {
768 return buildRightShift(SplitBuilder, Op);
769 }
770
visitZExtInst(ZExtInst & I)771 Value *GenXEmulate::Emu64Expander::visitZExtInst(ZExtInst &I) {
772 auto Builder = getIRBuilder();
773 auto VOp = toVector(Builder, I.getOperand(0));
774 Value *LoPart = VOp.V;
775 if (VOp.VTy->getScalarType()->getPrimitiveSizeInBits() < 32) {
776 auto *ExtendedType = IGCLLVM::FixedVectorType::get(
777 Builder.getInt32Ty(), VOp.VTy->getNumElements());
778 LoPart = Builder.CreateZExt(LoPart, ExtendedType, ".zext32");
779 }
780 auto *ZeroValue = Constant::getNullValue(LoPart->getType());
781 return SplitBuilder.combineLoHiSplit({LoPart, ZeroValue}, "int_emu.zext64.",
782 Inst.getType()->isIntegerTy());
783 }
visitSExtInst(SExtInst & I)784 Value *GenXEmulate::Emu64Expander::visitSExtInst(SExtInst &I) {
785 auto Builder = getIRBuilder();
786 auto VOp = toVector(Builder, I.getOperand(0));
787 auto *LoPart = VOp.V;
788 if (VOp.VTy->getScalarType()->getPrimitiveSizeInBits() < 32) {
789 auto *ExtendedType = IGCLLVM::FixedVectorType::get(
790 Builder.getInt32Ty(), VOp.VTy->getNumElements());
791 LoPart = Builder.CreateSExt(LoPart, ExtendedType, ".sext32");
792 }
793 auto *HiPart = Builder.CreateAShr(LoPart, 31u, ".sign_hi");
794 return SplitBuilder.combineLoHiSplit({LoPart, HiPart}, "int_emu.sext64.",
795 Inst.getType()->isIntegerTy());
796 }
visitPtrToInt(PtrToIntInst & I)797 Value *GenXEmulate::Emu64Expander::visitPtrToInt(PtrToIntInst &I) {
798
799 const DataLayout &DL = I.getModule()->getDataLayout();
800 // do not emulate noop
801 if (cast<CastInst>(&I)->isNoopCast(DL))
802 return nullptr;
803
804 if (!OptProcessPtrs) {
805 LLVM_DEBUG(dbgs() << "i64-emu::WARNING: " << I << " won't be emulated\n");
806 return nullptr;
807 }
808 // ptr32 -> i64 conversions are not supported
809 if (DL.getTypeSizeInBits(I.getOperand(0)->getType()->getScalarType()) <
810 DL.getTypeSizeInBits(I.getType()->getScalarType())) {
811 LLVM_DEBUG(dbgs() << "i64-emu::ERROR: " << I << " can't be emulated\n");
812 vc::diagnose(I.getContext(), "GenXEmulate", &I,
813 "ptr32->i64 extensions are not supported");
814 }
815
816 auto Builder = getIRBuilder();
817 auto VOp = toVector(Builder, I.getOperand(0));
818
819 auto *VTy64 = IGCLLVM::FixedVectorType::get(Builder.getInt64Ty(),
820 VOp.VTy->getNumElements());
821 auto *Cast = Builder.CreatePtrToInt(VOp.V, VTy64);
822
823 auto *ResTy = I.getType();
824 unsigned Stride =
825 VTy64->getPrimitiveSizeInBits() / ResTy->getPrimitiveSizeInBits();
826 unsigned NumElements = VOp.VTy->getNumElements();
827
828 auto *VElTy = IGCLLVM::FixedVectorType::get(ResTy->getScalarType(),
829 Stride * NumElements);
830 auto *ElCast = Builder.CreateBitCast(Cast, VElTy, "int_emu.ptr2int.elcast.");
831 genx::Region R(ElCast);
832 R.NumElements = NumElements;
833 R.Stride = Stride;
834 R.Width = NumElements;
835 R.VStride = R.Stride * R.Width;
836 auto *Result = (Value *)R.createRdRegion(
837 ElCast, "int_emu.trunc." + I.getName() + ".", &I, I.getDebugLoc());
838 if (Result->getType() != ResTy) {
839 Result = Builder.CreateBitCast(
840 Result, ResTy, Twine("int_emu.trunc.") + I.getName() + ".to_s.");
841 }
842 return Result;
843 }
visitIntToPtr(IntToPtrInst & I)844 Value *GenXEmulate::Emu64Expander::visitIntToPtr(IntToPtrInst &I) {
845
846 const DataLayout &DL = I.getModule()->getDataLayout();
847 // do not emulate noop
848 if (cast<CastInst>(&I)->isNoopCast(DL))
849 return nullptr;
850
851 if (!OptProcessPtrs) {
852 LLVM_DEBUG(dbgs() << "i64-emu::WARNING: " << I << " won't be emulated\n");
853 return nullptr;
854 }
855 // i64 -> ptr32 truncations are not supported
856 if (DL.getTypeSizeInBits(I.getOperand(0)->getType()->getScalarType()) >
857 DL.getTypeSizeInBits(I.getType()->getScalarType())) {
858 LLVM_DEBUG(dbgs() << "i64-emu::ERROR: " << I << " can't be emulated\n");
859 vc::diagnose(I.getContext(), "GenXEmulate", &I,
860 "i64->ptr32 truncations are not supported");
861 }
862
863 auto Builder = getIRBuilder();
864 auto VOp = toVector(Builder, I.getOperand(0));
865
866 auto *VTy32 = IGCLLVM::FixedVectorType::get(Builder.getInt32Ty(),
867 VOp.VTy->getNumElements());
868 auto *VTy64 = IGCLLVM::FixedVectorType::get(Builder.getInt64Ty(),
869 VOp.VTy->getNumElements());
870 Value *VI32 = VOp.V;
871 if (VOp.VTy != VTy32)
872 VI32 = Builder.CreateZExt(VOp.V, VTy32);
873
874 auto *Zext64 = Builder.CreateZExt(VI32, VTy64);
875 auto *Zext = ensureEmulated(Zext64);
876
877 Type *ResType = I.getType();
878 Type *CnvType = ResType;
879 if (!ResType->isVectorTy()) {
880 CnvType = IGCLLVM::FixedVectorType::get(ResType, 1);
881 }
882 auto *Result = Builder.CreateIntToPtr(Zext, CnvType);
883 if (ResType != CnvType) {
884 Result = Builder.CreateBitCast(Result, ResType,
885 Twine("int_emu.") + I.getOpcodeName() + ".");
886 }
887 return Result;
888 }
visitGenxTrunc(CallInst & CI)889 Value *GenXEmulate::Emu64Expander::visitGenxTrunc(CallInst &CI) {
890
891 auto IID = GenXIntrinsic::getAnyIntrinsicID(&Inst);
892 unsigned DstSize = CI.getType()->getScalarType()->getPrimitiveSizeInBits();
893 IGC_ASSERT(DstSize == 8 || DstSize == 16 || DstSize == 32 || DstSize == 64);
894
895 // early exit
896 if (IID == GenXIntrinsic::genx_uutrunc_sat ||
897 IID == GenXIntrinsic::genx_sstrunc_sat) {
898 if (DstSize == 64)
899 return CI.getOperand(0);
900 }
901
902 auto Builder = getIRBuilder();
903 auto VOp = toVector(Builder, CI.getOperand(0));
904
905 auto MakeConstantSplat64 = [](IRBuilder &B, IGCLLVM::FixedVectorType *VTy,
906 uint64_t Value) {
907 auto *KV = Constant::getIntegerValue(B.getInt64Ty(), APInt(64, Value));
908 return ConstantDataVector::getSplat(VTy->getNumElements(), KV);
909 };
910 auto MaxDstSigned = [&](unsigned DstSize) {
911 uint64_t MaxVal = (1ull << (DstSize - 1)) - 1;
912 return MakeConstantSplat64(Builder, VOp.VTy, MaxVal);
913 };
914 auto MinDstSigned = [&](unsigned DstSize) {
915 uint64_t Ones = ~0ull;
916 uint64_t MinVal = Ones << (DstSize - 1);
917 return MakeConstantSplat64(Builder, VOp.VTy, MinVal);
918 };
919 auto MaxDstUnsigned = [&](unsigned DstSize) {
920 uint64_t MaxVal = ~0ull;
921 MaxVal = MaxVal >> (64 - DstSize);
922 return MakeConstantSplat64(Builder, VOp.VTy, MaxVal);
923 };
924 auto MinDstUnsigned = [&](unsigned DstSize) {
925 return MakeConstantSplat64(Builder, VOp.VTy, 0);
926 };
927
928 Value *Cond1 = nullptr;
929 Value *Limit1 = nullptr;
930 // optional
931 Value *Cond2 = nullptr;
932 Value *Limit2 = nullptr;
933
934 switch (IID) {
935 case GenXIntrinsic::genx_uutrunc_sat:
936 // UGT maxDstUnsigend -> maxDstUnsigned
937 Limit1 = MaxDstUnsigned(DstSize);
938 Cond1 = ensureEmulated(Builder.CreateICmpUGT(VOp.V, Limit1));
939 break;
940 case GenXIntrinsic::genx_sstrunc_sat:
941 // Result = Operand
942 // SGT (maxDstSigned) -> maxDstSigned
943 // SLT (minDstSigned) -> minDstSigned
944 // trunc
945 Limit1 = MaxDstSigned(DstSize);
946 Cond1 = ensureEmulated(Builder.CreateICmpSGT(VOp.V, Limit1));
947 Limit2 = MinDstSigned(DstSize);
948 Cond2 = ensureEmulated(Builder.CreateICmpSLT(VOp.V, Limit2));
949 break;
950 case GenXIntrinsic::genx_ustrunc_sat: // unsigned result, signed operand
951 // UGE (maxDstUnsigned) -> maxDstSigned
952 // Operand < 0 -> 0
953 // trunc
954 Limit1 = MaxDstUnsigned(DstSize);
955 Cond1 = ensureEmulated(Builder.CreateICmpUGE(VOp.V, Limit1));
956 Limit2 = MinDstUnsigned(DstSize);
957 Cond2 = ensureEmulated(Builder.CreateICmpSLT(VOp.V, Limit2));
958 break;
959 case GenXIntrinsic::genx_sutrunc_sat: // signed result, unsigned operand
960 // UGT (maxDstSigned) -> maxDstSigned
961 // trunc
962 Limit1 = MaxDstSigned(DstSize);
963 Cond1 = ensureEmulated(Builder.CreateICmpUGT(VOp.V, Limit1));
964 break;
965 }
966 IGC_ASSERT(Cond1 && Limit1);
967 auto *Result = ensureEmulated(Builder.CreateSelect(Cond1, Limit1, VOp.V));
968 if (Cond2) {
969 Result = ensureEmulated(Builder.CreateSelect(Cond2, Limit2, Result));
970 }
971 if (DstSize <= 32) {
972 auto Splitted = SplitBuilder.splitValueLoHi(*Result);
973 if (DstSize == 32) {
974 Result = Splitted.Lo;
975 } else {
976 // DIRTY HACK: since currently our backend does not support
977 // llvm trunc instruction, we just build a 32-bit trunc.sat instead
978 unsigned ElNum = VOp.VTy->getNumElements();
979 auto *CnvType =
980 IGCLLVM::FixedVectorType::get(CI.getType()->getScalarType(), ElNum);
981 // Result = Builder.CreateTrunc(Result, CnvType);
982 Function *TrSatF = GenXIntrinsic::getAnyDeclaration(
983 CI.getModule(), IID, {CnvType, Splitted.Lo->getType()});
984 Result = Builder.CreateCall(TrSatF, Splitted.Lo, "int_emu.trunc.sat.small.");
985 }
986 }
987 if (Result->getType() == CI.getType())
988 return Result;
989
990 return Builder.CreateBitCast(Result, CI.getType());
991 }
visitGenxMinMax(CallInst & CI)992 Value *GenXEmulate::Emu64Expander::visitGenxMinMax(CallInst &CI) {
993
994 auto Builder = getIRBuilder();
995 Value* Lhs = CI.getOperand(0);
996 Value* Rhs = CI.getOperand(1);
997
998 Value* CondVal = nullptr;
999 // We create 2 64-bit operations:
1000 // compare and select.
1001 // Then we replace those with yet-another expander instance
1002 auto IID = GenXIntrinsic::getAnyIntrinsicID(&Inst);
1003 switch (IID) {
1004 case GenXIntrinsic::genx_umax:
1005 CondVal = Builder.CreateICmpUGT(Lhs, Rhs);
1006 break;
1007 case GenXIntrinsic::genx_smax:
1008 CondVal = Builder.CreateICmpSGT(Lhs, Rhs);
1009 break;
1010 case GenXIntrinsic::genx_umin:
1011 CondVal = Builder.CreateICmpULT(Lhs, Rhs);
1012 break;
1013 case GenXIntrinsic::genx_smin:
1014 CondVal = Builder.CreateICmpSLT(Lhs, Rhs);
1015 break;
1016 }
1017 IGC_ASSERT(CondVal);
1018 CondVal = ensureEmulated(CondVal);
1019 return ensureEmulated(Builder.CreateSelect(CondVal, Lhs, Rhs));
1020 }
1021
visitGenxAbsi(CallInst & CI)1022 Value *GenXEmulate::Emu64Expander::visitGenxAbsi(CallInst &CI) {
1023 auto Builder = getIRBuilder();
1024 auto Src = SplitBuilder.splitOperandLoHi(0);
1025 // we check the sign, and if
1026 ConstantEmitter K(Src.Hi);
1027 auto *VOprnd = toVector(Builder, CI.getOperand(0)).V;
1028 // This would be a 64-bit operation on a vector types
1029 auto *NegatedOpnd = Builder.CreateNeg(VOprnd);
1030 NegatedOpnd = ensureEmulated(NegatedOpnd);
1031
1032 auto NegSplit = SplitBuilder.splitValueLoHi(*NegatedOpnd);
1033
1034 auto *FlagSignSet = Builder.CreateICmpSLT(Src.Hi, K.getZero());
1035 auto *Lo = Builder.CreateSelect(FlagSignSet, NegSplit.Lo, Src.Lo);
1036 auto *Hi = Builder.CreateSelect(FlagSignSet, NegSplit.Hi, Src.Hi);
1037
1038 return SplitBuilder.combineLoHiSplit({Lo, Hi}, "int_emu.genxabsi.",
1039 CI.getType()->isIntegerTy());
1040 }
visitGenxAddSat(CallInst & CI)1041 Value *GenXEmulate::Emu64Expander::visitGenxAddSat(CallInst &CI) {
1042
1043 auto Src0 = SplitBuilder.splitOperandLoHi(0);
1044 auto Src1 = SplitBuilder.splitOperandLoHi(1);
1045
1046 auto *M = CI.getModule();
1047
1048 auto Builder = getIRBuilder();
1049 ConstantEmitter K(Src0.Lo);
1050
1051 Value *Result = nullptr;
1052 auto IID = GenXIntrinsic::getAnyIntrinsicID(&Inst);
1053 switch (IID) {
1054 case GenXIntrinsic::genx_uuadd_sat: {
1055 if (!SplitBuilder.IsI64Operation()) {
1056 auto LoAdd =
1057 buildAddc(M, Builder, *Src0.Lo, *Src1.Lo, "int_emu.uuadd.lo");
1058 // if there are any non-zero byte in hi parts of srcs
1059 // then positive saturation is produced
1060 auto *PosSat =
1061 Builder.CreateOr(Builder.CreateOr(Src0.Hi, Src1.Hi), LoAdd.CB);
1062 auto *Saturated =
1063 Builder.CreateICmpNE(PosSat, K.getZero(), "int_emu.uuadd.sat");
1064 Result = Builder.CreateSelect(Saturated, K.getOnes(), LoAdd.Val);
1065 } else {
1066 auto LoAdd =
1067 buildAddc(M, Builder, *Src0.Lo, *Src1.Lo, "int_emu.uuadd.lo");
1068 auto HiAdd1 =
1069 buildAddc(M, Builder, *Src0.Hi, *Src1.Hi, "int_emu.uuadd.hi1.");
1070 // add carry from low part
1071 auto HiAdd2 =
1072 buildAddc(M, Builder, *HiAdd1.Val, *LoAdd.CB, "int_emu.uuadd.h2.");
1073
1074 auto *HiResult = HiAdd2.Val;
1075 auto *Saturated =
1076 Builder.CreateICmpNE(Builder.CreateOr(HiAdd1.CB, HiAdd2.CB),
1077 K.getZero(), "int_emu.uuadd.sat.");
1078 auto *Lo = Builder.CreateSelect(Saturated, K.getOnes(), LoAdd.Val);
1079 auto *Hi = Builder.CreateSelect(Saturated, K.getOnes(), HiResult);
1080 Result = SplitBuilder.combineLoHiSplit({Lo, Hi}, "int_emu.uuadd.",
1081 CI.getType()->isIntegerTy());
1082 }
1083 } break;
1084 case GenXIntrinsic::genx_ssadd_sat: {
1085 auto LoAdd = buildAddc(M, Builder, *Src0.Lo, *Src1.Lo, "int_emu.ssadd.lo");
1086 auto HiAdd1 =
1087 buildAddc(M, Builder, *Src0.Hi, *Src1.Hi, "int_emu.ssadd.hi1.");
1088 // add carry from low part
1089 auto HiAdd2 =
1090 buildAddc(M, Builder, *HiAdd1.Val, *LoAdd.CB, "int_emu.ssadd.h2.");
1091 // auto F
1092 auto *MaskBit31 = K.getSplat(1 << 31);
1093 auto *MaxSigned32 = K.getSplat((1u << 31u) - 1u);
1094 //Overflow = (x >> (os - 1)) == (y >> (os - 1)) &&
1095 // (x >> (os - 1)) != (result >> (os - 1)) ? 1 : 0;
1096 auto *SignOp0 = Builder.CreateAnd(Src0.Hi, MaskBit31);
1097 auto *SignOp1 = Builder.CreateAnd(Src1.Hi, MaskBit31);
1098 auto *SignRes = Builder.CreateAnd(HiAdd2.Val, MaskBit31);
1099
1100 auto *FlagSignOpMatch = Builder.CreateICmpEQ(SignOp0, SignOp1);
1101 auto *FlagSignResMismatch = Builder.CreateICmpNE(SignOp0, SignRes);
1102 auto *FlagOverflow = Builder.CreateAnd(FlagSignOpMatch, FlagSignResMismatch);
1103
1104 // by default we assume that we have positive saturation
1105 auto *Lo = Builder.CreateSelect(FlagOverflow, K.getOnes(), LoAdd.Val);
1106 auto *Hi = Builder.CreateSelect(FlagOverflow, MaxSigned32, HiAdd2.Val);
1107 // if negative, change the saturation value
1108 auto *FlagNegativeSat = Builder.CreateAnd(FlagOverflow,
1109 Builder.CreateICmpSLT(SignOp0, K.getZero()));
1110 Lo = Builder.CreateSelect(FlagNegativeSat, K.getZero(), Lo);
1111 Hi = Builder.CreateSelect(FlagNegativeSat, K.getSplat(1 << 31), Hi);
1112
1113 Result = SplitBuilder.combineLoHiSplit({Lo, Hi}, "int_emu.ssadd.",
1114 CI.getType()->isIntegerTy());
1115 } break;
1116 case GenXIntrinsic::genx_suadd_sat:
1117 report_fatal_error(
1118 "int_emu: genx_suadd_sat is not supported by VC backend");
1119 break;
1120 case GenXIntrinsic::genx_usadd_sat:
1121 report_fatal_error(
1122 "int_emu: genx_usadd_sat is not supported by VC backend");
1123 break;
1124 default:
1125 IGC_ASSERT_MESSAGE(0, "unknown intrinsic passed to saturation add emu");
1126 }
1127
1128 if (Result->getType() != CI.getType()) {
1129 auto TruncID = (IID == GenXIntrinsic::genx_uuadd_sat)
1130 ? GenXIntrinsic::genx_uutrunc_sat
1131 : GenXIntrinsic::genx_sstrunc_sat;
1132 auto *TruncFunct = GenXIntrinsic::getGenXDeclaration(
1133 M, TruncID, {CI.getType(), Result->getType()});
1134 Result = Builder.CreateCall(TruncFunct, {Result}, "int_emu.trunc.sat");
1135 Result = ensureEmulated(Result);
1136 }
1137
1138 return Result;
1139 }
1140
visitGenxFPToISat(CallInst & CI)1141 Value *GenXEmulate::Emu64Expander::visitGenxFPToISat(CallInst &CI) {
1142 if (CI.getType()->getScalarType()->isDoubleTy())
1143 vc::diagnose(CI.getContext(), "GenXEmulate", &CI,
1144 "double->UI conversions are not supported");
1145
1146 auto IID = GenXIntrinsic::getAnyIntrinsicID(&Inst);
1147 IGC_ASSERT_MESSAGE(IID == GenXIntrinsic::genx_fptosi_sat ||
1148 IID == GenXIntrinsic::genx_fptoui_sat,
1149 "unknown intrinsic passed to fptoi_sat emu");
1150 const bool IsSigned = (IID == GenXIntrinsic::genx_fptosi_sat) ? true : false;
1151
1152 auto Builder = getIRBuilder();
1153 unsigned Opcode = IsSigned ? Instruction::FPToSI : Instruction::FPToSI;
1154
1155 Type *Ty = CI.getType();
1156 auto *F = CI.getCalledFunction();
1157 IGC_ASSERT(F);
1158 Type *Ty2 = IGCLLVM::getArg(*F, 0)->getType();
1159 OpType OpAndType{Opcode, Ty, Ty2};
1160 if (!EmulationFuns)
1161 vc::diagnose(CI.getContext(), "GenXEmulate", &CI,
1162 "Emulation was called without initialization");
1163
1164 auto Iter = EmulationFuns->find(OpAndType);
1165 if (Iter == EmulationFuns->end())
1166 vc::diagnose(CI.getContext(), "GenXEmulate", &CI,
1167 "Unsupported instruction for emulation");
1168
1169 SmallVector<Value *, 8> Args(CI.arg_operands());
1170
1171 return Builder.CreateCall(Iter->second, Args);
1172 }
1173
visitCallInst(CallInst & CI)1174 Value *GenXEmulate::Emu64Expander::visitCallInst(CallInst &CI) {
1175 switch (GenXIntrinsic::getAnyIntrinsicID(&Inst)) {
1176 case GenXIntrinsic::genx_uutrunc_sat:
1177 case GenXIntrinsic::genx_sstrunc_sat:
1178 case GenXIntrinsic::genx_ustrunc_sat:
1179 case GenXIntrinsic::genx_sutrunc_sat:
1180 return visitGenxTrunc(CI);
1181 case GenXIntrinsic::genx_umin:
1182 case GenXIntrinsic::genx_umax:
1183 case GenXIntrinsic::genx_smin:
1184 case GenXIntrinsic::genx_smax:
1185 return visitGenxMinMax(CI);
1186 case GenXIntrinsic::genx_absi:
1187 return visitGenxAbsi(CI);
1188 case GenXIntrinsic::genx_suadd_sat:
1189 case GenXIntrinsic::genx_usadd_sat:
1190 case GenXIntrinsic::genx_uuadd_sat:
1191 case GenXIntrinsic::genx_ssadd_sat:
1192 return visitGenxAddSat(CI);
1193 case GenXIntrinsic::genx_fptosi_sat:
1194 case GenXIntrinsic::genx_fptoui_sat:
1195 return visitGenxFPToISat(CI);
1196 }
1197 return nullptr;
1198 }
ensureEmulated(Value * Val)1199 Value *GenXEmulate::Emu64Expander::ensureEmulated(Value *Val) {
1200 Instruction *Inst = dyn_cast<Instruction>(Val);
1201 if (!Inst)
1202 return Val;
1203 auto *Emulated = Emu64Expander(ST, *Inst, EmulationFuns).tryExpand();
1204 if (!Emulated)
1205 return Val;
1206 Inst->eraseFromParent();
1207 return Emulated;
1208 }
buildTernaryAddition(IRBuilder & Builder,Value & A,Value & B,Value & C,const Twine & Name) const1209 Value *GenXEmulate::Emu64Expander::buildTernaryAddition(
1210 IRBuilder &Builder, Value &A, Value &B, Value &C, const Twine &Name) const {
1211 if (ST.hasAdd3Bfn()) {
1212 auto *Add3Funct = GenXIntrinsic::getGenXDeclaration(
1213 Inst.getModule(), GenXIntrinsic::genx_add3, {A.getType(), B.getType()});
1214 return Builder.CreateCall(Add3Funct, {&A, &B, &C}, "add3." + Name);
1215 }
1216 auto *SubH = Builder.CreateAdd(&A, &B, Name + ".part");
1217 return Builder.CreateAdd(SubH, &C, Name);
1218 }
1219 GenXEmulate::Emu64Expander::AddSubExtResult
buildAddc(Module * M,IRBuilder & Builder,Value & L,Value & R,const Twine & Prefix)1220 GenXEmulate::Emu64Expander::buildAddc(Module *M, IRBuilder &Builder, Value &L,
1221 Value &R, const Twine &Prefix) {
1222 IGC_ASSERT(L.getType() == R.getType());
1223
1224 auto *AddcFunct = GenXIntrinsic::getGenXDeclaration(
1225 M, GenXIntrinsic::genx_addc, {L.getType(), R.getType()});
1226
1227 using namespace GenXIntrinsic::GenXResult;
1228 auto *AddcVal =
1229 Builder.CreateCall(AddcFunct, {&L, &R}, Prefix + "aggregate.");
1230 auto *Add =
1231 Builder.CreateExtractValue(AddcVal, {IdxAddc_Add}, Prefix + "add.");
1232 auto *Carry =
1233 Builder.CreateExtractValue(AddcVal, {IdxAddc_Carry}, Prefix + "carry.");
1234 return {Add, Carry};
1235 }
1236 GenXEmulate::Emu64Expander::AddSubExtResult
buildSubb(Module * M,IRBuilder & Builder,Value & L,Value & R,const Twine & Prefix)1237 GenXEmulate::Emu64Expander::buildSubb(Module *M, IRBuilder &Builder, Value &L,
1238 Value &R, const Twine &Prefix) {
1239
1240 IGC_ASSERT(L.getType() == R.getType());
1241
1242 auto *SubbFunct = GenXIntrinsic::getGenXDeclaration(
1243 M, GenXIntrinsic::genx_subb, {L.getType(), R.getType()});
1244
1245 using namespace GenXIntrinsic::GenXResult;
1246 auto *SubbVal =
1247 Builder.CreateCall(SubbFunct, {&L, &R}, Prefix + "aggregate.");
1248 auto *Sub =
1249 Builder.CreateExtractValue(SubbVal, {IdxSubb_Sub}, Prefix + "sub.");
1250 auto *Borrow =
1251 Builder.CreateExtractValue(SubbVal, {IdxSubb_Borrow}, Prefix + "borrow.");
1252 return {Sub, Borrow};
1253 }
buildGeneralICmp(IRBuilder & Builder,CmpInst::Predicate P,bool IsPartialPredicate,const LHSplit & Src0,const LHSplit & Src1)1254 Value *GenXEmulate::Emu64Expander::buildGeneralICmp(IRBuilder &Builder,
1255 CmpInst::Predicate P,
1256 bool IsPartialPredicate,
1257 const LHSplit &Src0,
1258 const LHSplit &Src1) {
1259
1260 auto getEmulateCond1 = [](const CmpInst::Predicate P) {
1261 // For the unsigned predicate the first condition stays the same
1262 if (CmpInst::isUnsigned(P))
1263 return P;
1264 switch (P) {
1265 // transform signed predicate to an unsigned one
1266 case CmpInst::ICMP_SGT:
1267 return CmpInst::ICMP_UGT;
1268 case CmpInst::ICMP_SGE:
1269 return CmpInst::ICMP_UGE;
1270 case CmpInst::ICMP_SLT:
1271 return CmpInst::ICMP_ULT;
1272 case CmpInst::ICMP_SLE:
1273 return CmpInst::ICMP_ULE;
1274 default:
1275 llvm_unreachable("unexpected ICMP predicate for first condition");
1276 }
1277 };
1278 auto getEmulateCond2 = [](const CmpInst::Predicate P) {
1279 // discard EQ part
1280 switch (P) {
1281 case CmpInst::ICMP_SGT:
1282 case CmpInst::ICMP_SGE:
1283 return CmpInst::ICMP_SGT;
1284 case CmpInst::ICMP_SLT:
1285 case CmpInst::ICMP_SLE:
1286 return CmpInst::ICMP_SLT;
1287 case CmpInst::ICMP_UGT:
1288 case CmpInst::ICMP_UGE:
1289 return CmpInst::ICMP_UGT;
1290 case CmpInst::ICMP_ULT:
1291 case CmpInst::ICMP_ULE:
1292 return CmpInst::ICMP_ULT;
1293 default:
1294 llvm_unreachable("unexpected ICMP predicate for second condition");
1295 }
1296 };
1297
1298 std::pair<Value *, Value *> ResultParts = {};
1299 switch (P) {
1300 case CmpInst::ICMP_EQ: {
1301 auto *T0 = Builder.CreateICmpEQ(Src0.Lo, Src1.Lo);
1302 auto *T1 = Builder.CreateICmpEQ(Src0.Hi, Src1.Hi);
1303 ResultParts = {T0, T1};
1304 break;
1305 }
1306 case CmpInst::ICMP_NE: {
1307 auto *T0 = Builder.CreateICmpNE(Src0.Lo, Src1.Lo);
1308 auto *T1 = Builder.CreateICmpNE(Src0.Hi, Src1.Hi);
1309 ResultParts = {T0, T1};
1310 break;
1311 }
1312 default: {
1313 CmpInst::Predicate EmuP1 = getEmulateCond1(P);
1314 CmpInst::Predicate EmuP2 = getEmulateCond2(P);
1315 auto *T0 = Builder.CreateICmp(EmuP1, Src0.Lo, Src1.Lo);
1316 auto *T1 = Builder.CreateICmpEQ(Src0.Hi, Src1.Hi);
1317 auto *T2 = Builder.CreateAnd(T1, T0);
1318 auto *T3 = Builder.CreateICmp(EmuP2, Src0.Hi, Src1.Hi);
1319 ResultParts = {T2, T3};
1320 break;
1321 }
1322 }
1323 auto ResultCond = (P == CmpInst::ICMP_EQ) ? Instruction::BinaryOps::And
1324 : Instruction::BinaryOps::Or;
1325 if (!IsPartialPredicate || !OptConvertPartialPredicates) {
1326 return Builder.CreateBinOp(
1327 ResultCond, ResultParts.first, ResultParts.second,
1328 "int_emu.cmp." + CmpInst::getPredicateName(P) + ".");
1329 }
1330 // Note:
1331 // The reason for doing this conversion is that our backend has no
1332 // convinient way to represent partial updates of predicates with anything
1333 // except for icmp instructions. In the current codebase we have -
1334 // we are unable to create a proper visa for the following case ("pseudo" IR):
1335 // bale {
1336 // %ne1 = or <8 x i1> %a, %b
1337 // %j = call <16 x i1> wrpredregion(<16 x i1> undef, <8 x i1> %ne1, i32 0)
1338 // }
1339 // bale {
1340 // %ne2 = or <8 x i1> %c, %d
1341 // %joined = call <16 x i1> wrpredregion(<16 x i1> %j, <8 x i1> %ne1, i32 8)
1342 // }
1343 // As such we convert such cases to the following sequence: 2xsel->or->cmp
1344 ConstantEmitter K(Src0.Lo);
1345 auto *L = Builder.CreateSelect(ResultParts.first, K.getOnes(), K.getZero());
1346 auto *R = Builder.CreateSelect(ResultParts.second, K.getOnes(), K.getZero());
1347 auto *IPred = Builder.CreateBinOp(ResultCond, L, R,
1348 "int_emu.cmp.part.int." +
1349 CmpInst::getPredicateName(P) + ".");
1350 return Builder.CreateICmpEQ(IPred, K.getOnes(),
1351 "int_emu.cmp.part.i1" +
1352 CmpInst::getPredicateName(P) + ".");
1353 }
buildRightShift(IVSplitter & SplitBuilder,BinaryOperator & Op)1354 Value *GenXEmulate::Emu64Expander::buildRightShift(IVSplitter &SplitBuilder,
1355 BinaryOperator &Op) {
1356 auto Builder = getIRBuilder();
1357
1358 llvm::SmallVector<uint32_t, 8> ShaVals;
1359 if (getConstantUI32Values(Op.getOperand(1), ShaVals)) {
1360 auto *Result = tryOptimizedShr(Builder, SplitBuilder, Op, ShaVals);
1361 if (Result)
1362 return Result;
1363 }
1364 return buildGenericRShift(Builder, SplitBuilder, Op);
1365 }
tryOptimizedShr(IRBuilder & Builder,IVSplitter & SplitBuilder,BinaryOperator & Op,ArrayRef<uint32_t> Sa)1366 Value *GenXEmulate::Emu64Expander::tryOptimizedShr(IRBuilder &Builder,
1367 IVSplitter &SplitBuilder,
1368 BinaryOperator &Op,
1369 ArrayRef<uint32_t> Sa) {
1370 auto Operand = SplitBuilder.splitOperandLoHi(0);
1371 Value *LoPart{};
1372 Value *HiPart{};
1373
1374 ConstantEmitter K(Operand.Lo);
1375
1376 bool IsLogical = Op.getOpcode() == Instruction::LShr;
1377
1378 if (std::all_of(Sa.begin(), Sa.end(), LessThan32())) {
1379 if (std::find(Sa.begin(), Sa.end(), 0) != Sa.end()) {
1380 // TODO: for now, we bail-out if zero is encountered. Theoretically
1381 // we could mask-out potentially poisoned values by inserting
1382 // [cmp/select] pair at the end of the if branch, but for now bailing
1383 // out is a more safe choice
1384 return nullptr;
1385 }
1386 auto *ShiftA = ConstantDataVector::get(Builder.getContext(), Sa);
1387 auto *Lo1 = Builder.CreateLShr(Operand.Lo, ShiftA);
1388 auto *Hi = (IsLogical) ? Builder.CreateLShr(Operand.Hi, ShiftA)
1389 : Builder.CreateAShr(Operand.Hi, ShiftA);
1390 auto *C32 = K.getSplat(32);
1391 auto *CShift = ConstantExpr::getSub(C32, ShiftA);
1392 auto *Lo2 = Builder.CreateShl(Operand.Hi, CShift);
1393 LoPart = Builder.CreateOr(Lo1, Lo2);
1394 HiPart = Hi;
1395 } else if (std::all_of(Sa.begin(), Sa.end(), Equals32())) {
1396 LoPart = Operand.Hi;
1397 if (IsLogical) {
1398 HiPart = K.getZero();
1399 } else {
1400 auto *C31 = K.getSplat(31);
1401 HiPart = Builder.CreateAShr(Operand.Hi, C31);
1402 }
1403 } else if (std::all_of(Sa.begin(), Sa.end(), GreaterThan32())) {
1404 auto *C32 = K.getSplat(32);
1405 auto *CRawShift = ConstantDataVector::get(Builder.getContext(), Sa);
1406 auto *CShift = ConstantExpr::getSub(CRawShift, C32);
1407 if (IsLogical) {
1408 LoPart = Builder.CreateLShr(Operand.Hi, CShift);
1409 HiPart = K.getZero();
1410 } else {
1411 auto *C31 = K.getSplat(31);
1412 LoPart = Builder.CreateAShr(Operand.Hi, CShift);
1413 HiPart = Builder.CreateAShr(Operand.Hi, C31);
1414 }
1415 } else {
1416 return nullptr;
1417 }
1418 IGC_ASSERT_MESSAGE(LoPart && HiPart, "could not construct optimized shr");
1419 return SplitBuilder.combineLoHiSplit(
1420 {LoPart, HiPart}, Twine("int_emu.") + Op.getOpcodeName() + ".",
1421 Op.getType()->isIntegerTy());
1422 }
tryOptimizedShl(IRBuilder & Builder,IVSplitter & SplitBuilder,BinaryOperator & Op,ArrayRef<uint32_t> Sa)1423 Value *GenXEmulate::Emu64Expander::tryOptimizedShl(IRBuilder &Builder,
1424 IVSplitter &SplitBuilder,
1425 BinaryOperator &Op,
1426 ArrayRef<uint32_t> Sa) {
1427 auto Operand = SplitBuilder.splitOperandLoHi(0);
1428 Value *LoPart{};
1429 Value *HiPart{};
1430
1431 ConstantEmitter K(Operand.Lo);
1432
1433 if (std::all_of(Sa.begin(), Sa.end(), LessThan32())) {
1434 if (std::find(Sa.begin(), Sa.end(), 0) != Sa.end()) {
1435 // TODO: for now, we bail-out if zero is encountered. Theoretically
1436 // we could mask-out potentially poisoned values by inserting
1437 // [cmp/select] pair at the end of the if branch, but for now bailing
1438 // out seems like safe choice
1439 return nullptr;
1440 }
1441 auto *CRawShift = ConstantDataVector::get(Builder.getContext(), Sa);
1442 LoPart = Builder.CreateShl(Operand.Lo, CRawShift);
1443 auto *C32 = K.getSplat(32);
1444 auto *CShift = ConstantExpr::getSub(C32, CRawShift);
1445 auto *Hi1 = Builder.CreateShl(Operand.Hi, CRawShift);
1446 auto *Hi2 = Builder.CreateLShr(Operand.Lo, CShift);
1447 HiPart = Builder.CreateOr(Hi1, Hi2);
1448 } else if (std::all_of(Sa.begin(), Sa.end(), Equals32())) {
1449 LoPart = K.getZero();
1450 HiPart = Operand.Lo;
1451 } else if (std::all_of(Sa.begin(), Sa.end(), GreaterThan32())) {
1452 LoPart = K.getZero();
1453 auto *C32 = K.getSplat(32);
1454 auto *CRawShift = ConstantDataVector::get(Builder.getContext(), Sa);
1455 auto *CShift = ConstantExpr::getSub(CRawShift, C32);
1456 HiPart = Builder.CreateShl(Operand.Lo, CShift);
1457 } else {
1458 return nullptr;
1459 }
1460 IGC_ASSERT_MESSAGE(LoPart && HiPart, "could not construct optimized shl");
1461 return SplitBuilder.combineLoHiSplit(
1462 {LoPart, HiPart}, Twine("int_emu.") + Op.getOpcodeName() + ".",
1463 Op.getType()->isIntegerTy());
1464 }
buildGenericRShift(IRBuilder & Builder,IVSplitter & SplitBuilder,BinaryOperator & Op)1465 Value *GenXEmulate::Emu64Expander::buildGenericRShift(IRBuilder &Builder,
1466 IVSplitter &SplitBuilder,
1467 BinaryOperator &Op) {
1468
1469 auto L = SplitBuilder.splitOperandLoHi(0);
1470 auto R = SplitBuilder.splitOperandLoHi(1);
1471
1472 auto SI = constructShiftInfo(Builder, R.Lo);
1473 ConstantEmitter K(L.Lo);
1474
1475 // Logical Shift Right
1476 // 1. Calculate MASK1. MASK1 is 0 when the shift is >= 32 (large shift)
1477 // 2. Calculate MASK0. MASK0 is 0 iff the shift is 0
1478 // 3. Calculate High part:
1479 // [(L.Hi *LSR* Sha) *AND* MASK1], "&" discards result is large shift
1480 // 4. Calculate Low part:
1481 // [(L.Hi & MASK0) *SHL* (32 - SHA)] & MASK1, bits from HI part shifted-out
1482 // to LOW
1483 // [(L.HI *LSR* (SHA - 32)] & ~MASK1, in case of large shift, all bits occupy
1484 // LOW
1485 // [(L.Lo *LSR* Sha) *AND* MASK1], "&" discards result if large shift
1486 // *OR* the above
1487 auto *Lo = buildPartialRShift(Builder, L.Lo, L.Hi, SI);
1488 auto *Hi = Builder.CreateAnd(Builder.CreateLShr(L.Hi, SI.Sha), SI.Mask1);
1489
1490 bool IsLogical = Op.getOpcode() == Instruction::LShr;
1491 if (!IsLogical) {
1492 // Arithmetic Shift Right
1493 // Do all the steps form Logical Shift
1494 // 5. SignedMask = L.Hi *ASR* 31
1495 // HIPART |= (SignedMask *SHL* (SH32 & MASK1)) & Mask0
1496 // HIPART &= Mask0 => apply full SignedMask for large shifts
1497 // LOPART |= (SignedMask *SHL* (63 - Sha)) & ~MASK1 =>
1498 // LOPART &= ~Mask1 => do not apply this for small shifts
1499 auto *SignedMask =
1500 Builder.CreateAShr(L.Hi, K.getSplat(31), "int_emu.asr.sign.");
1501
1502 auto *AuxHi =
1503 Builder.CreateShl(SignedMask, Builder.CreateAnd(SI.Sh32, SI.Mask1));
1504 AuxHi = Builder.CreateAnd(AuxHi, SI.Mask0);
1505
1506 auto *AuxLo = Builder.CreateShl(SignedMask,
1507 Builder.CreateSub(K.getSplat(63), SI.Sha));
1508 AuxLo = Builder.CreateAnd(AuxLo, Builder.CreateNot(SI.Mask1));
1509
1510 Lo = Builder.CreateOr(Lo, AuxLo);
1511 Hi = Builder.CreateOr(Hi, AuxHi);
1512 }
1513 return SplitBuilder.combineLoHiSplit(
1514 {Lo, Hi}, Twine("int_emu.") + Op.getOpcodeName() + ".",
1515 Op.getType()->isIntegerTy());
1516 }
1517
buildPartialRShift(IRBuilder & B,Value * SrcLo,Value * SrcHi,const ShiftInfo & SI)1518 Value *GenXEmulate::Emu64Expander::buildPartialRShift(IRBuilder &B,
1519 Value *SrcLo,
1520 Value *SrcHi,
1521 const ShiftInfo &SI) {
1522 ConstantEmitter K(SrcLo);
1523 // calculate part which went from hi part to low
1524 auto *TmpH1 = B.CreateShl(B.CreateAnd(SrcHi, SI.Mask0), SI.Sh32);
1525 TmpH1 = B.CreateAnd(TmpH1, SI.Mask1);
1526 // TmpH2 is for the case when the shift amount is greater than 32
1527 auto *TmpH2 = B.CreateLShr(SrcHi, B.CreateSub(SI.Sha, K.getSplat(32)));
1528 // Here we mask out tmph2 is the shift is less than 32
1529 TmpH2 = B.CreateAnd(TmpH2, B.CreateNot(SI.Mask1));
1530 // Mask1 will ensure that the result is discarded if the shift is large
1531 auto *TmpL = B.CreateAnd(B.CreateLShr(SrcLo, SI.Sha), SI.Mask1);
1532
1533 return B.CreateOr(B.CreateOr(TmpL, TmpH1), TmpH2, "int_emu.shif.r.lo.");
1534 }
1535 GenXEmulate::Emu64Expander::ShiftInfo
constructShiftInfo(IRBuilder & B,Value * RawSha)1536 GenXEmulate::Emu64Expander::constructShiftInfo(IRBuilder &B, Value *RawSha) {
1537 ConstantEmitter K(RawSha);
1538
1539 auto *Sha = B.CreateAnd(RawSha, K.getSplat(0x3f), "int_emu.shift.sha.");
1540 auto *Sh32 = B.CreateSub(K.getSplat(32), Sha, "int_emu.shift.sh32.");
1541 auto *FlagLargeShift = B.CreateICmpUGE(Sha, K.getSplat(32));
1542 auto *FlagZeroShift = B.CreateICmpEQ(Sha, K.getSplat(0));
1543
1544 auto *Mask1 = B.CreateSelect(FlagLargeShift, K.getZero(), K.getOnes());
1545 auto *Mask0 = B.CreateSelect(FlagZeroShift, K.getZero(), K.getOnes());
1546
1547 return ShiftInfo{Sha, Sh32, Mask1, Mask0};
1548 }
hasStrictEmulationRequirement(Instruction * Inst)1549 bool GenXEmulate::Emu64Expander::hasStrictEmulationRequirement(
1550 Instruction *Inst) {
1551 auto isI64Type = [](Type *T) {
1552 if (T->isVectorTy())
1553 T = cast<VectorType>(T)->getElementType();
1554 return T->isIntegerTy(64) == true;
1555 };
1556 bool ret64 = isI64Type(Inst->getType());
1557 bool uses64 = false;
1558 for (unsigned i = 0; i < Inst->getNumOperands(); ++i) {
1559 uses64 |= isI64Type(Inst->getOperand(i)->getType());
1560 }
1561 // if instruction does not touch i64 - it is free to go
1562 if (!ret64 && !uses64 && !isI64PointerOp(*Inst))
1563 return false;
1564
1565 // now things become (a little) complicated. Currently, we ignore some
1566 // instructions/intrinsic types, since they are acceptable by finalizer.
1567 // More specifically - everything which is lowered to a plain mov
1568 // (non-coverting) is fine.
1569 // It seems that sends with i64 addresses are fine too
1570
1571 // skip moves
1572 if (GenXIntrinsic::isWrRegion(Inst) || GenXIntrinsic::isRdRegion(Inst)) {
1573 return OptStricterRegions;
1574 }
1575
1576 // skip constants
1577 if (GenXIntrinsic::getAnyIntrinsicID(Inst) == GenXIntrinsic::genx_constanti)
1578 return OptStricterConst;
1579
1580 switch (GenXIntrinsic::getAnyIntrinsicID(Inst)) {
1581 case GenXIntrinsic::genx_svm_scatter:
1582 case GenXIntrinsic::genx_svm_gather:
1583 case GenXIntrinsic::genx_svm_scatter4_scaled:
1584 case GenXIntrinsic::genx_svm_gather4_scaled:
1585 case GenXIntrinsic::genx_svm_block_st:
1586 case GenXIntrinsic::genx_svm_block_ld:
1587 case GenXIntrinsic::genx_svm_block_ld_unaligned:
1588 return OptStricterSVM;
1589
1590 // TODO: not every atomic is covered here, we need to add more
1591 case GenXIntrinsic::genx_svm_atomic_add:
1592 case GenXIntrinsic::genx_svm_atomic_and:
1593 case GenXIntrinsic::genx_svm_atomic_cmpxchg:
1594 case GenXIntrinsic::genx_svm_atomic_dec:
1595 case GenXIntrinsic::genx_svm_atomic_fcmpwr:
1596 case GenXIntrinsic::genx_svm_atomic_fmax:
1597 case GenXIntrinsic::genx_svm_atomic_fmin:
1598 case GenXIntrinsic::genx_svm_atomic_imax:
1599 case GenXIntrinsic::genx_svm_atomic_imin:
1600 case GenXIntrinsic::genx_svm_atomic_inc:
1601 case GenXIntrinsic::genx_svm_atomic_max:
1602 case GenXIntrinsic::genx_svm_atomic_min:
1603 case GenXIntrinsic::genx_svm_atomic_or:
1604 case GenXIntrinsic::genx_svm_atomic_sub:
1605 case GenXIntrinsic::genx_svm_atomic_xchg:
1606 case GenXIntrinsic::genx_svm_atomic_xor:
1607 return OptStricterAtomic;
1608
1609 case GenXIntrinsic::genx_oword_st:
1610 case GenXIntrinsic::genx_oword_ld:
1611 case GenXIntrinsic::genx_oword_ld_unaligned:
1612 return OptStricterOword;
1613 case GenXIntrinsic::genx_alloca:
1614 return OptStricterAlloc;
1615 case GenXIntrinsic::genx_faddr:
1616 return OptStricterFaddr;
1617 }
1618
1619 switch (Inst->getOpcode()) {
1620 case Instruction::PtrToInt:
1621 case Instruction::IntToPtr: {
1622 const DataLayout &DL = Inst->getModule()->getDataLayout();
1623 if (!cast<CastInst>(Inst)->isNoopCast(DL))
1624 return OptStricterConverts;
1625 return false;
1626 }
1627 case Instruction::ICmp:
1628 return OptStrictChecksEnable;
1629 // skip bitcast and phi
1630 case Instruction::BitCast:
1631 case Instruction::PHI:
1632 return false;
1633 }
1634 return true;
1635 }
1636
getAnalysisUsage(AnalysisUsage & AU) const1637 void GenXEmulate::getAnalysisUsage(AnalysisUsage &AU) const {
1638 AU.addRequired<TargetPassConfig>();
1639 AU.setPreservesCFG();
1640 }
1641
runOnModule(Module & M)1642 bool GenXEmulate::runOnModule(Module &M) {
1643 bool Changed = false;
1644 ST = &getAnalysis<TargetPassConfig>()
1645 .getTM<GenXTargetMachine>()
1646 .getGenXSubtarget();
1647 buildEmuFunCache(M);
1648
1649 for (auto &F : M.getFunctionList())
1650 runOnFunction(F);
1651
1652 Changed |= !ToErase.empty();
1653 processToEraseList(ToErase);
1654
1655 auto IsOldEmulationFunction = [](const Function *F) {
1656 return F->getName().contains("__cm_intrinsic_impl_");
1657 };
1658 // Delete unused builtins, make used ones internal.
1659 for (auto I = M.begin(); I != M.end();) {
1660 Function &F = *I++;
1661 if (isEmulationFunction(&F) || IsOldEmulationFunction(&F)) {
1662 Changed = true;
1663 if (F.use_empty())
1664 F.eraseFromParent();
1665 else
1666 F.setLinkage(GlobalValue::InternalLinkage);
1667 }
1668 }
1669
1670 if (!DiscracedList.empty()) {
1671 for (const auto *Insn : DiscracedList) {
1672 llvm::errs() << "I64EMU-FAILURE: " << *Insn << "\n";
1673 }
1674 report_fatal_error("int_emu: strict emulation requirements failure", false);
1675 }
1676 return Changed;
1677 }
1678
runOnFunction(Function & F)1679 void GenXEmulate::runOnFunction(Function &F) {
1680 for (auto &BB : F.getBasicBlockList()) {
1681 for (auto I = BB.begin(); I != BB.end(); ++I) {
1682
1683 Instruction *Inst = &*I;
1684 auto *NewVal = emulateInst(Inst);
1685 if (NewVal) {
1686 Inst->replaceAllUsesWith(NewVal);
1687 ToErase.push_back(Inst);
1688 }
1689 }
1690 }
1691 return;
1692 }
1693
getEmulationFunction(const Instruction * Inst) const1694 Function *GenXEmulate::getEmulationFunction(const Instruction *Inst) const {
1695
1696 unsigned Opcode = Inst->getOpcode();
1697 Type *Ty = Inst->getType();
1698
1699 Type *Ty2 = nullptr;
1700 if (Inst->getNumOperands() > 0)
1701 Ty2 = Inst->getOperand(0)->getType();
1702 OpType OpAndType{Opcode, Ty, Ty2};
1703
1704 auto Iter = EmulationFuns.find(OpAndType);
1705 if (Iter != EmulationFuns.end()) {
1706 LLVM_DEBUG(dbgs() << "Emulation function: " << Iter->second->getName()
1707 << " shall be used for: " << *Inst << "\n");
1708 return Iter->second;
1709 }
1710
1711 return nullptr;
1712 }
1713
buildEmuFunCache(Module & M)1714 void GenXEmulate::buildEmuFunCache(Module &M) {
1715 EmulationFuns.clear();
1716
1717 auto UpdateCacheIfMatch = [this](Function &F, StringRef PrefixToMatch,
1718 unsigned OpCode) {
1719 const auto &Name = F.getName();
1720 if (!Name.startswith(PrefixToMatch))
1721 return false;
1722
1723 Type *Ty = F.getReturnType();
1724 Type *Ty2 = nullptr;
1725 if (F.arg_size() > 0)
1726 Ty2 = IGCLLVM::getArg(F, 0)->getType();
1727 IGC_ASSERT(EmulationFuns.find({OpCode, Ty, Ty2}) == EmulationFuns.end());
1728 EmulationFuns.insert({{OpCode, Ty, Ty2}, &F});
1729 return true;
1730 };
1731
1732 for (Function &F : M.getFunctionList()) {
1733 if (!isEmulationFunction(&F))
1734 continue;
1735 for (auto &PrOp : DivRemPrefixes)
1736 UpdateCacheIfMatch(F, PrOp.Prefix, PrOp.Opcode);
1737 if (ST->emulateLongLong()) {
1738 for (auto &PrOp : EmulationFPConvertsPrefixes)
1739 UpdateCacheIfMatch(F, PrOp.Prefix, PrOp.Opcode);
1740 }
1741 }
1742 }
1743
emulateInst(Instruction * Inst)1744 Value *GenXEmulate::emulateInst(Instruction *Inst) {
1745 Function *EmuFn = getEmulationFunction(Inst);
1746 if (EmuFn) {
1747 IGC_ASSERT(isEmulationFunction(EmuFn));
1748 IGC_ASSERT_MESSAGE(!isa<CallInst>(Inst), "call emulation not supported yet");
1749 llvm::IRBuilder<> Builder(Inst);
1750 SmallVector<Value *, 8> Args(Inst->operands());
1751 return Builder.CreateCall(EmuFn, Args);
1752 }
1753 IGC_ASSERT(ST);
1754 if (ST->emulateLongLong()) {
1755 Value *NewInst = Emu64Expander(*ST, *Inst, &EmulationFuns).tryExpand();
1756 if (!NewInst) {
1757 #ifndef NDEBUG
1758 if (Emu64Expander::hasStrictEmulationRequirement(Inst)) {
1759 LLVM_DEBUG(dbgs() << "i64-emu::WARNING: instruction may require "
1760 << "emulation: " << *Inst << "\n");
1761 }
1762 #endif // NDEBUG
1763 if (OptStrictChecksEnable &&
1764 Emu64Expander::hasStrictEmulationRequirement(Inst)) {
1765 DiscracedList.push_back(Inst);
1766 }
1767 }
1768
1769 return NewInst;
1770 }
1771 return nullptr;
1772 }
1773
emulateI64Operation(const GenXSubtarget * ST,Instruction * Inst,EmulationFlag AuxAction)1774 Instruction *llvm::genx::emulateI64Operation(const GenXSubtarget *ST,
1775 Instruction *Inst,
1776 EmulationFlag AuxAction) {
1777 LLVM_DEBUG(dbgs() << "i64-emu::WARNING: direct emulation routine was "
1778 << "called for " << *Inst << "\n");
1779 Instruction *NewInst = nullptr;
1780
1781 if (!ST->hasLongLong()) {
1782 Value *EmulatedResult = GenXEmulate::Emu64Expander(*ST, *Inst).tryExpand();
1783 NewInst = cast_or_null<Instruction>(EmulatedResult);
1784 // If there is no explicit request to enable i64 emulation - report
1785 // an error
1786 if (NewInst && !ST->emulateLongLong() && OptStrictEmulationRequests) {
1787 report_fatal_error("int_emu: target does not suport i64 types", false);
1788 }
1789 }
1790
1791 // NewInst can be nullptr if the instruction does not need emulation,
1792 // (like various casts)
1793 if (!NewInst) {
1794 // if EmulationFlag::RAUWE was requested, then caller expects that
1795 // that the returned instruction can be safely used.
1796 if (AuxAction == EmulationFlag::RAUWE)
1797 return Inst; // return the original instruction
1798 return nullptr;
1799 }
1800
1801 switch (AuxAction) {
1802 case EmulationFlag::RAUW:
1803 Inst->replaceAllUsesWith(NewInst);
1804 break;
1805 case EmulationFlag::RAUWE:
1806 Inst->replaceAllUsesWith(NewInst);
1807 Inst->eraseFromParent();
1808 break;
1809 case EmulationFlag::None:
1810 // do nothing
1811 break;
1812 }
1813 return NewInst;
1814 }
1815 char GenXEmulate::ID = 0;
1816
1817 namespace llvm {
1818 void initializeGenXEmulatePass(PassRegistry &);
1819 }
1820 INITIALIZE_PASS_BEGIN(GenXEmulate, "GenXEmulate", "GenXEmulate", false, false)
1821 INITIALIZE_PASS_END(GenXEmulate, "GenXEmulate", "GenXEmulate", false, false)
1822
createGenXEmulatePass()1823 ModulePass *llvm::createGenXEmulatePass() {
1824 initializeGenXEmulatePass(*PassRegistry::getPassRegistry());
1825 return new GenXEmulate;
1826 }
1827
1828 namespace {
1829 class GenXEmulationImport : public ModulePass {
1830 public:
1831 static char ID;
1832
GenXEmulationImport()1833 explicit GenXEmulationImport() : ModulePass(ID) {}
getPassName() const1834 StringRef getPassName() const override { return "GenX Emulation BiF Import"; }
getAnalysisUsage(AnalysisUsage & AU) const1835 void getAnalysisUsage(AnalysisUsage &AU) const override {
1836 AU.addRequired<TargetPassConfig>();
1837 AU.addRequired<GenXBackendConfig>();
1838 }
runOnModule(Module & M)1839 bool runOnModule(Module &M) override {
1840 if (OptDbgOnlyDisableDivremEmulation)
1841 return false;
1842 const GenXSubtarget &ST = getAnalysis<TargetPassConfig>()
1843 .getTM<GenXTargetMachine>()
1844 .getGenXSubtarget();
1845
1846 auto ModEmuFun =
1847 LoadEmuFunLib(M.getContext(), M.getDataLayout(), M.getTargetTriple());
1848 if (!ModEmuFun)
1849 return false;
1850
1851 PurgeUnneededEmulationFunctions(*ModEmuFun, ST);
1852
1853 if (Linker::linkModules(M, std::move(ModEmuFun)))
1854 report_fatal_error("Error linking emulation routines");
1855
1856 return true;
1857 }
1858
1859 private:
IsLibraryFunction(const Function & F)1860 static bool IsLibraryFunction(const Function &F) {
1861 const auto &Name = F.getName();
1862 return Name.startswith(LibraryFunctionPrefix);
1863 }
1864
1865 template <typename FilterFunction>
selectEmulationFunctions(Module & M,FilterFunction Flt)1866 static std::vector<Function *> selectEmulationFunctions(Module &M,
1867 FilterFunction Flt) {
1868 std::vector<Function *> Result;
1869 auto &&Selected = make_filter_range(M.functions(), [&Flt](Function &F) {
1870 if (!IsLibraryFunction(F))
1871 return false;
1872 return Flt(F);
1873 });
1874 llvm::transform(Selected, std::back_inserter(Result),
1875 [](Function &Fn) { return &Fn; });
1876 return Result;
1877 }
1878
PurgeNon64BitDivRemFunctions(Module & M)1879 static void PurgeNon64BitDivRemFunctions(Module &M) {
1880 auto ToErase = selectEmulationFunctions(M, [](Function &F) {
1881 if (F.getReturnType()->getScalarType()->isIntegerTy(64))
1882 return false;
1883 return std::any_of(DivRemPrefixes.begin(), DivRemPrefixes.end(),
1884 [&F](const auto &PrOp) {
1885 return F.getName().startswith(PrOp.Prefix);
1886 });
1887 });
1888 processToEraseList(ToErase);
1889 }
1890
PurgeFPConversionFunctions(Module & M,bool TargetHasFP64,bool TargetHasI64)1891 static void PurgeFPConversionFunctions(Module &M, bool TargetHasFP64,
1892 bool TargetHasI64) {
1893 auto ToErase = selectEmulationFunctions(M, [=](Function &F) {
1894 // Skip non-converts
1895 if (std::none_of(EmulationFPConvertsPrefixes.begin(),
1896 EmulationFPConvertsPrefixes.end(),
1897 [&F](const auto &PrOp) {
1898 return F.getName().startswith(PrOp.Prefix);
1899 }))
1900 return false;
1901
1902 bool IsFP64Operation =
1903 std::any_of(F.arg_begin(), F.arg_end(),
1904 [](const auto &Arg) {
1905 return Arg.getType()->getScalarType()->isDoubleTy();
1906 }) ||
1907 F.getReturnType()->getScalarType()->isDoubleTy();
1908
1909 // if target does not have support for I64 but does have FP64 - then
1910 // fp64 converts should be preserved
1911 if (!TargetHasI64 && TargetHasFP64 && IsFP64Operation) {
1912 return false;
1913 }
1914
1915 // If target does not have support for I64 and FP64 - then
1916 // fp64 converts should be removed
1917 if (!TargetHasI64 && !TargetHasFP64 && IsFP64Operation) {
1918 return true;
1919 }
1920
1921 return TargetHasI64;
1922 });
1923 processToEraseList(ToErase);
1924 }
1925
PurgeUnneededEmulationFunctions(Module & ModEmuFun,const GenXSubtarget & ST)1926 static void PurgeUnneededEmulationFunctions(Module &ModEmuFun,
1927 const GenXSubtarget &ST) {
1928 if (ST.hasIntDivRem32())
1929 PurgeNon64BitDivRemFunctions(ModEmuFun);
1930
1931 PurgeFPConversionFunctions(ModEmuFun, ST.hasFP64(), !ST.emulateLongLong());
1932 }
1933
DeriveRoundingAttributes(Function & F)1934 static void DeriveRoundingAttributes(Function &F) {
1935
1936 IGC_ASSERT(IsLibraryFunction(F));
1937
1938 const auto &Name = F.getName();
1939 if (Name.contains(RoundingRtzSuffix)) {
1940 F.addFnAttr(genx::FunctionMD::CMFloatControl,
1941 std::to_string(VCRoundingRTZ));
1942 return;
1943 }
1944 if (Name.contains(RoundingRteSuffix)) {
1945 F.addFnAttr(genx::FunctionMD::CMFloatControl,
1946 std::to_string(VCRoundingRTE));
1947 return;
1948 }
1949 if (Name.contains(RoundingRtpSuffix)) {
1950 F.addFnAttr(genx::FunctionMD::CMFloatControl,
1951 std::to_string(VCRoundingRTP));
1952 return;
1953 }
1954 if (Name.contains(RoundingRtnSuffix)) {
1955 F.addFnAttr(genx::FunctionMD::CMFloatControl,
1956 std::to_string(VCRoundingRTN));
1957 return;
1958 }
1959 }
1960
LoadEmuFunLib(LLVMContext & Ctx,const DataLayout & DL,const std::string & Triple)1961 std::unique_ptr<Module> LoadEmuFunLib(LLVMContext &Ctx, const DataLayout &DL,
1962 const std::string &Triple) {
1963
1964 MemoryBufferRef EmulationBiFBuffer =
1965 getAnalysis<GenXBackendConfig>().getBiFModule(BiFKind::VCEmulation);
1966
1967 // NOTE: to simplify LIT testing it is legal to have an empty buffer
1968 if (!EmulationBiFBuffer.getBufferSize())
1969 return nullptr;
1970
1971 auto BiFModule = vc::getBiFModuleOrReportError(EmulationBiFBuffer, Ctx);
1972
1973 BiFModule->setDataLayout(DL);
1974 BiFModule->setTargetTriple(Triple);
1975
1976 for (Function &F : *BiFModule) {
1977 if (!IsLibraryFunction(F))
1978 continue;
1979
1980 F.addFnAttr(genx::FunctionMD::VCEmulationRoutine);
1981 DeriveRoundingAttributes(F);
1982 }
1983
1984 return BiFModule;
1985 }
1986 };
1987 } // namespace
1988
1989 char GenXEmulationImport::ID = 0;
1990
1991 namespace llvm {
1992 void initializeGenXEmulationImportPass(PassRegistry &);
1993 }
1994 INITIALIZE_PASS_BEGIN(GenXEmulationImport, "GenXEmulationImport",
1995 "GenXEmulationImport", false, false)
1996 INITIALIZE_PASS_END(GenXEmulationImport, "GenXEmulationImport",
1997 "GenXEmulationImport", false, false)
createGenXEmulationImportPass()1998 ModulePass *llvm::createGenXEmulationImportPass() {
1999 initializeGenXEmulationImportPass(*PassRegistry::getPassRegistry());
2000 return new GenXEmulationImport;
2001 }
2002