10b57cec5SDimitry Andric //===- AMDGPULibCalls.cpp -------------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This file does AMD library function optimizations.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "AMDGPU.h"
150b57cec5SDimitry Andric #include "AMDGPULibFunc.h"
16e8d8bef9SDimitry Andric #include "GCNSubtarget.h"
175f757f3fSDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
185f757f3fSDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
195f757f3fSDimitry Andric #include "llvm/Analysis/ValueTracking.h"
205f757f3fSDimitry Andric #include "llvm/IR/AttributeMask.h"
215f757f3fSDimitry Andric #include "llvm/IR/Dominators.h"
22fe6060f1SDimitry Andric #include "llvm/IR/IRBuilder.h"
231fd87a68SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
241fd87a68SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
255f757f3fSDimitry Andric #include "llvm/IR/PatternMatch.h"
26480093f4SDimitry Andric #include "llvm/InitializePasses.h"
27bdd1243dSDimitry Andric #include <cmath>
28480093f4SDimitry Andric 
29480093f4SDimitry Andric #define DEBUG_TYPE "amdgpu-simplifylib"
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric using namespace llvm;
325f757f3fSDimitry Andric using namespace llvm::PatternMatch;
330b57cec5SDimitry Andric 
340b57cec5SDimitry Andric static cl::opt<bool> EnablePreLink("amdgpu-prelink",
350b57cec5SDimitry Andric   cl::desc("Enable pre-link mode optimizations"),
360b57cec5SDimitry Andric   cl::init(false),
370b57cec5SDimitry Andric   cl::Hidden);
380b57cec5SDimitry Andric 
390b57cec5SDimitry Andric static cl::list<std::string> UseNative("amdgpu-use-native",
400b57cec5SDimitry Andric   cl::desc("Comma separated list of functions to replace with native, or all"),
410b57cec5SDimitry Andric   cl::CommaSeparated, cl::ValueOptional,
420b57cec5SDimitry Andric   cl::Hidden);
430b57cec5SDimitry Andric 
448bcb0991SDimitry Andric #define MATH_PI      numbers::pi
458bcb0991SDimitry Andric #define MATH_E       numbers::e
468bcb0991SDimitry Andric #define MATH_SQRT2   numbers::sqrt2
478bcb0991SDimitry Andric #define MATH_SQRT1_2 numbers::inv_sqrt2
480b57cec5SDimitry Andric 
490b57cec5SDimitry Andric namespace llvm {
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric class AMDGPULibCalls {
520b57cec5SDimitry Andric private:
535f757f3fSDimitry Andric   const TargetLibraryInfo *TLInfo = nullptr;
545f757f3fSDimitry Andric   AssumptionCache *AC = nullptr;
555f757f3fSDimitry Andric   DominatorTree *DT = nullptr;
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric   typedef llvm::AMDGPULibFunc FuncInfo;
580b57cec5SDimitry Andric 
595f757f3fSDimitry Andric   bool UnsafeFPMath = false;
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric   // -fuse-native.
620b57cec5SDimitry Andric   bool AllNative = false;
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric   bool useNativeFunc(const StringRef F) const;
650b57cec5SDimitry Andric 
66349cc55cSDimitry Andric   // Return a pointer (pointer expr) to the function if function definition with
670b57cec5SDimitry Andric   // "FuncName" exists. It may create a new function prototype in pre-link mode.
680b57cec5SDimitry Andric   FunctionCallee getFunction(Module *M, const FuncInfo &fInfo);
690b57cec5SDimitry Andric 
70349cc55cSDimitry Andric   bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo);
710b57cec5SDimitry Andric 
720b57cec5SDimitry Andric   bool TDOFold(CallInst *CI, const FuncInfo &FInfo);
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric   /* Specialized optimizations */
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric   // pow/powr/pown
775f757f3fSDimitry Andric   bool fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric   // rootn
805f757f3fSDimitry Andric   bool fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
810b57cec5SDimitry Andric 
820b57cec5SDimitry Andric   // -fuse-native for sincos
830b57cec5SDimitry Andric   bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo);
840b57cec5SDimitry Andric 
850b57cec5SDimitry Andric   // evaluate calls if calls' arguments are constants.
865f757f3fSDimitry Andric   bool evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0, double &Res1,
875f757f3fSDimitry Andric                               Constant *copr0, Constant *copr1);
88349cc55cSDimitry Andric   bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo);
890b57cec5SDimitry Andric 
905f757f3fSDimitry Andric   /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value
915f757f3fSDimitry Andric   /// of cos, sincos call).
925f757f3fSDimitry Andric   std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg,
935f757f3fSDimitry Andric                                                      FastMathFlags FMF,
945f757f3fSDimitry Andric                                                      IRBuilder<> &B,
955f757f3fSDimitry Andric                                                      FunctionCallee Fsincos);
960b57cec5SDimitry Andric 
970b57cec5SDimitry Andric   // sin/cos
985f757f3fSDimitry Andric   bool fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo);
990b57cec5SDimitry Andric 
1000b57cec5SDimitry Andric   // __read_pipe/__write_pipe
101349cc55cSDimitry Andric   bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
102349cc55cSDimitry Andric                             const FuncInfo &FInfo);
1030b57cec5SDimitry Andric 
104349cc55cSDimitry Andric   // Get a scalar native builtin single argument FP function
1050b57cec5SDimitry Andric   FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo);
1060b57cec5SDimitry Andric 
1075f757f3fSDimitry Andric   /// Substitute a call to a known libcall with an intrinsic call. If \p
1085f757f3fSDimitry Andric   /// AllowMinSize is true, allow the replacement in a minsize function.
1095f757f3fSDimitry Andric   bool shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
1105f757f3fSDimitry Andric                                          bool AllowMinSizeF32 = false,
1115f757f3fSDimitry Andric                                          bool AllowF64 = false,
1125f757f3fSDimitry Andric                                          bool AllowStrictFP = false);
1135f757f3fSDimitry Andric   void replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
1145f757f3fSDimitry Andric                                          Intrinsic::ID IntrID);
1155f757f3fSDimitry Andric 
1165f757f3fSDimitry Andric   bool tryReplaceLibcallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI,
1175f757f3fSDimitry Andric                                             Intrinsic::ID IntrID,
1185f757f3fSDimitry Andric                                             bool AllowMinSizeF32 = false,
1195f757f3fSDimitry Andric                                             bool AllowF64 = false,
1205f757f3fSDimitry Andric                                             bool AllowStrictFP = false);
1215f757f3fSDimitry Andric 
1220b57cec5SDimitry Andric protected:
1235f757f3fSDimitry Andric   bool isUnsafeMath(const FPMathOperator *FPOp) const;
1245f757f3fSDimitry Andric   bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const;
1250b57cec5SDimitry Andric 
1265f757f3fSDimitry Andric   bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const;
1270b57cec5SDimitry Andric 
replaceCall(Instruction * I,Value * With)1285f757f3fSDimitry Andric   static void replaceCall(Instruction *I, Value *With) {
1295f757f3fSDimitry Andric     I->replaceAllUsesWith(With);
1305f757f3fSDimitry Andric     I->eraseFromParent();
1315f757f3fSDimitry Andric   }
1325f757f3fSDimitry Andric 
replaceCall(FPMathOperator * I,Value * With)1335f757f3fSDimitry Andric   static void replaceCall(FPMathOperator *I, Value *With) {
1345f757f3fSDimitry Andric     replaceCall(cast<Instruction>(I), With);
1350b57cec5SDimitry Andric   }
1360b57cec5SDimitry Andric 
1370b57cec5SDimitry Andric public:
AMDGPULibCalls()1385f757f3fSDimitry Andric   AMDGPULibCalls() {}
1390b57cec5SDimitry Andric 
1405f757f3fSDimitry Andric   bool fold(CallInst *CI);
1410b57cec5SDimitry Andric 
1425f757f3fSDimitry Andric   void initFunction(Function &F, FunctionAnalysisManager &FAM);
1430b57cec5SDimitry Andric   void initNativeFuncs();
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric   // Replace a normal math function call with that native version
1460b57cec5SDimitry Andric   bool useNative(CallInst *CI);
1470b57cec5SDimitry Andric };
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric } // end llvm namespace
1500b57cec5SDimitry Andric 
1510b57cec5SDimitry Andric template <typename IRB>
CreateCallEx(IRB & B,FunctionCallee Callee,Value * Arg,const Twine & Name="")1520b57cec5SDimitry Andric static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg,
1530b57cec5SDimitry Andric                               const Twine &Name = "") {
1540b57cec5SDimitry Andric   CallInst *R = B.CreateCall(Callee, Arg, Name);
1550b57cec5SDimitry Andric   if (Function *F = dyn_cast<Function>(Callee.getCallee()))
1560b57cec5SDimitry Andric     R->setCallingConv(F->getCallingConv());
1570b57cec5SDimitry Andric   return R;
1580b57cec5SDimitry Andric }
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric template <typename IRB>
CreateCallEx2(IRB & B,FunctionCallee Callee,Value * Arg1,Value * Arg2,const Twine & Name="")1610b57cec5SDimitry Andric static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1,
1620b57cec5SDimitry Andric                                Value *Arg2, const Twine &Name = "") {
1630b57cec5SDimitry Andric   CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name);
1640b57cec5SDimitry Andric   if (Function *F = dyn_cast<Function>(Callee.getCallee()))
1650b57cec5SDimitry Andric     R->setCallingConv(F->getCallingConv());
1660b57cec5SDimitry Andric   return R;
1670b57cec5SDimitry Andric }
1680b57cec5SDimitry Andric 
getPownType(FunctionType * FT)1695f757f3fSDimitry Andric static FunctionType *getPownType(FunctionType *FT) {
1705f757f3fSDimitry Andric   Type *PowNExpTy = Type::getInt32Ty(FT->getContext());
1715f757f3fSDimitry Andric   if (VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType()))
1725f757f3fSDimitry Andric     PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount());
1735f757f3fSDimitry Andric 
1745f757f3fSDimitry Andric   return FunctionType::get(FT->getReturnType(),
1755f757f3fSDimitry Andric                            {FT->getParamType(0), PowNExpTy}, false);
1765f757f3fSDimitry Andric }
1775f757f3fSDimitry Andric 
1780b57cec5SDimitry Andric //  Data structures for table-driven optimizations.
1790b57cec5SDimitry Andric //  FuncTbl works for both f32 and f64 functions with 1 input argument
1800b57cec5SDimitry Andric 
1810b57cec5SDimitry Andric struct TableEntry {
1820b57cec5SDimitry Andric   double   result;
1830b57cec5SDimitry Andric   double   input;
1840b57cec5SDimitry Andric };
1850b57cec5SDimitry Andric 
1860b57cec5SDimitry Andric /* a list of {result, input} */
1870b57cec5SDimitry Andric static const TableEntry tbl_acos[] = {
1880b57cec5SDimitry Andric   {MATH_PI / 2.0, 0.0},
1890b57cec5SDimitry Andric   {MATH_PI / 2.0, -0.0},
1900b57cec5SDimitry Andric   {0.0, 1.0},
1910b57cec5SDimitry Andric   {MATH_PI, -1.0}
1920b57cec5SDimitry Andric };
1930b57cec5SDimitry Andric static const TableEntry tbl_acosh[] = {
1940b57cec5SDimitry Andric   {0.0, 1.0}
1950b57cec5SDimitry Andric };
1960b57cec5SDimitry Andric static const TableEntry tbl_acospi[] = {
1970b57cec5SDimitry Andric   {0.5, 0.0},
1980b57cec5SDimitry Andric   {0.5, -0.0},
1990b57cec5SDimitry Andric   {0.0, 1.0},
2000b57cec5SDimitry Andric   {1.0, -1.0}
2010b57cec5SDimitry Andric };
2020b57cec5SDimitry Andric static const TableEntry tbl_asin[] = {
2030b57cec5SDimitry Andric   {0.0, 0.0},
2040b57cec5SDimitry Andric   {-0.0, -0.0},
2050b57cec5SDimitry Andric   {MATH_PI / 2.0, 1.0},
2060b57cec5SDimitry Andric   {-MATH_PI / 2.0, -1.0}
2070b57cec5SDimitry Andric };
2080b57cec5SDimitry Andric static const TableEntry tbl_asinh[] = {
2090b57cec5SDimitry Andric   {0.0, 0.0},
2100b57cec5SDimitry Andric   {-0.0, -0.0}
2110b57cec5SDimitry Andric };
2120b57cec5SDimitry Andric static const TableEntry tbl_asinpi[] = {
2130b57cec5SDimitry Andric   {0.0, 0.0},
2140b57cec5SDimitry Andric   {-0.0, -0.0},
2150b57cec5SDimitry Andric   {0.5, 1.0},
2160b57cec5SDimitry Andric   {-0.5, -1.0}
2170b57cec5SDimitry Andric };
2180b57cec5SDimitry Andric static const TableEntry tbl_atan[] = {
2190b57cec5SDimitry Andric   {0.0, 0.0},
2200b57cec5SDimitry Andric   {-0.0, -0.0},
2210b57cec5SDimitry Andric   {MATH_PI / 4.0, 1.0},
2220b57cec5SDimitry Andric   {-MATH_PI / 4.0, -1.0}
2230b57cec5SDimitry Andric };
2240b57cec5SDimitry Andric static const TableEntry tbl_atanh[] = {
2250b57cec5SDimitry Andric   {0.0, 0.0},
2260b57cec5SDimitry Andric   {-0.0, -0.0}
2270b57cec5SDimitry Andric };
2280b57cec5SDimitry Andric static const TableEntry tbl_atanpi[] = {
2290b57cec5SDimitry Andric   {0.0, 0.0},
2300b57cec5SDimitry Andric   {-0.0, -0.0},
2310b57cec5SDimitry Andric   {0.25, 1.0},
2320b57cec5SDimitry Andric   {-0.25, -1.0}
2330b57cec5SDimitry Andric };
2340b57cec5SDimitry Andric static const TableEntry tbl_cbrt[] = {
2350b57cec5SDimitry Andric   {0.0, 0.0},
2360b57cec5SDimitry Andric   {-0.0, -0.0},
2370b57cec5SDimitry Andric   {1.0, 1.0},
2380b57cec5SDimitry Andric   {-1.0, -1.0},
2390b57cec5SDimitry Andric };
2400b57cec5SDimitry Andric static const TableEntry tbl_cos[] = {
2410b57cec5SDimitry Andric   {1.0, 0.0},
2420b57cec5SDimitry Andric   {1.0, -0.0}
2430b57cec5SDimitry Andric };
2440b57cec5SDimitry Andric static const TableEntry tbl_cosh[] = {
2450b57cec5SDimitry Andric   {1.0, 0.0},
2460b57cec5SDimitry Andric   {1.0, -0.0}
2470b57cec5SDimitry Andric };
2480b57cec5SDimitry Andric static const TableEntry tbl_cospi[] = {
2490b57cec5SDimitry Andric   {1.0, 0.0},
2500b57cec5SDimitry Andric   {1.0, -0.0}
2510b57cec5SDimitry Andric };
2520b57cec5SDimitry Andric static const TableEntry tbl_erfc[] = {
2530b57cec5SDimitry Andric   {1.0, 0.0},
2540b57cec5SDimitry Andric   {1.0, -0.0}
2550b57cec5SDimitry Andric };
2560b57cec5SDimitry Andric static const TableEntry tbl_erf[] = {
2570b57cec5SDimitry Andric   {0.0, 0.0},
2580b57cec5SDimitry Andric   {-0.0, -0.0}
2590b57cec5SDimitry Andric };
2600b57cec5SDimitry Andric static const TableEntry tbl_exp[] = {
2610b57cec5SDimitry Andric   {1.0, 0.0},
2620b57cec5SDimitry Andric   {1.0, -0.0},
2630b57cec5SDimitry Andric   {MATH_E, 1.0}
2640b57cec5SDimitry Andric };
2650b57cec5SDimitry Andric static const TableEntry tbl_exp2[] = {
2660b57cec5SDimitry Andric   {1.0, 0.0},
2670b57cec5SDimitry Andric   {1.0, -0.0},
2680b57cec5SDimitry Andric   {2.0, 1.0}
2690b57cec5SDimitry Andric };
2700b57cec5SDimitry Andric static const TableEntry tbl_exp10[] = {
2710b57cec5SDimitry Andric   {1.0, 0.0},
2720b57cec5SDimitry Andric   {1.0, -0.0},
2730b57cec5SDimitry Andric   {10.0, 1.0}
2740b57cec5SDimitry Andric };
2750b57cec5SDimitry Andric static const TableEntry tbl_expm1[] = {
2760b57cec5SDimitry Andric   {0.0, 0.0},
2770b57cec5SDimitry Andric   {-0.0, -0.0}
2780b57cec5SDimitry Andric };
2790b57cec5SDimitry Andric static const TableEntry tbl_log[] = {
2800b57cec5SDimitry Andric   {0.0, 1.0},
2810b57cec5SDimitry Andric   {1.0, MATH_E}
2820b57cec5SDimitry Andric };
2830b57cec5SDimitry Andric static const TableEntry tbl_log2[] = {
2840b57cec5SDimitry Andric   {0.0, 1.0},
2850b57cec5SDimitry Andric   {1.0, 2.0}
2860b57cec5SDimitry Andric };
2870b57cec5SDimitry Andric static const TableEntry tbl_log10[] = {
2880b57cec5SDimitry Andric   {0.0, 1.0},
2890b57cec5SDimitry Andric   {1.0, 10.0}
2900b57cec5SDimitry Andric };
2910b57cec5SDimitry Andric static const TableEntry tbl_rsqrt[] = {
2920b57cec5SDimitry Andric   {1.0, 1.0},
2938bcb0991SDimitry Andric   {MATH_SQRT1_2, 2.0}
2940b57cec5SDimitry Andric };
2950b57cec5SDimitry Andric static const TableEntry tbl_sin[] = {
2960b57cec5SDimitry Andric   {0.0, 0.0},
2970b57cec5SDimitry Andric   {-0.0, -0.0}
2980b57cec5SDimitry Andric };
2990b57cec5SDimitry Andric static const TableEntry tbl_sinh[] = {
3000b57cec5SDimitry Andric   {0.0, 0.0},
3010b57cec5SDimitry Andric   {-0.0, -0.0}
3020b57cec5SDimitry Andric };
3030b57cec5SDimitry Andric static const TableEntry tbl_sinpi[] = {
3040b57cec5SDimitry Andric   {0.0, 0.0},
3050b57cec5SDimitry Andric   {-0.0, -0.0}
3060b57cec5SDimitry Andric };
3070b57cec5SDimitry Andric static const TableEntry tbl_sqrt[] = {
3080b57cec5SDimitry Andric   {0.0, 0.0},
3090b57cec5SDimitry Andric   {1.0, 1.0},
3100b57cec5SDimitry Andric   {MATH_SQRT2, 2.0}
3110b57cec5SDimitry Andric };
3120b57cec5SDimitry Andric static const TableEntry tbl_tan[] = {
3130b57cec5SDimitry Andric   {0.0, 0.0},
3140b57cec5SDimitry Andric   {-0.0, -0.0}
3150b57cec5SDimitry Andric };
3160b57cec5SDimitry Andric static const TableEntry tbl_tanh[] = {
3170b57cec5SDimitry Andric   {0.0, 0.0},
3180b57cec5SDimitry Andric   {-0.0, -0.0}
3190b57cec5SDimitry Andric };
3200b57cec5SDimitry Andric static const TableEntry tbl_tanpi[] = {
3210b57cec5SDimitry Andric   {0.0, 0.0},
3220b57cec5SDimitry Andric   {-0.0, -0.0}
3230b57cec5SDimitry Andric };
3240b57cec5SDimitry Andric static const TableEntry tbl_tgamma[] = {
3250b57cec5SDimitry Andric   {1.0, 1.0},
3260b57cec5SDimitry Andric   {1.0, 2.0},
3270b57cec5SDimitry Andric   {2.0, 3.0},
3280b57cec5SDimitry Andric   {6.0, 4.0}
3290b57cec5SDimitry Andric };
3300b57cec5SDimitry Andric 
HasNative(AMDGPULibFunc::EFuncId id)3310b57cec5SDimitry Andric static bool HasNative(AMDGPULibFunc::EFuncId id) {
3320b57cec5SDimitry Andric   switch(id) {
3330b57cec5SDimitry Andric   case AMDGPULibFunc::EI_DIVIDE:
3340b57cec5SDimitry Andric   case AMDGPULibFunc::EI_COS:
3350b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP:
3360b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP2:
3370b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP10:
3380b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG:
3390b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG2:
3400b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG10:
3410b57cec5SDimitry Andric   case AMDGPULibFunc::EI_POWR:
3420b57cec5SDimitry Andric   case AMDGPULibFunc::EI_RECIP:
3430b57cec5SDimitry Andric   case AMDGPULibFunc::EI_RSQRT:
3440b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SIN:
3450b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SINCOS:
3460b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SQRT:
3470b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TAN:
3480b57cec5SDimitry Andric     return true;
3490b57cec5SDimitry Andric   default:;
3500b57cec5SDimitry Andric   }
3510b57cec5SDimitry Andric   return false;
3520b57cec5SDimitry Andric }
3530b57cec5SDimitry Andric 
354fcaf7f86SDimitry Andric using TableRef = ArrayRef<TableEntry>;
3550b57cec5SDimitry Andric 
getOptTable(AMDGPULibFunc::EFuncId id)3560b57cec5SDimitry Andric static TableRef getOptTable(AMDGPULibFunc::EFuncId id) {
3570b57cec5SDimitry Andric   switch(id) {
3580b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ACOS:    return TableRef(tbl_acos);
3590b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ACOSH:   return TableRef(tbl_acosh);
3600b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ACOSPI:  return TableRef(tbl_acospi);
3610b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ASIN:    return TableRef(tbl_asin);
3620b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ASINH:   return TableRef(tbl_asinh);
3630b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ASINPI:  return TableRef(tbl_asinpi);
3640b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ATAN:    return TableRef(tbl_atan);
3650b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ATANH:   return TableRef(tbl_atanh);
3660b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ATANPI:  return TableRef(tbl_atanpi);
3670b57cec5SDimitry Andric   case AMDGPULibFunc::EI_CBRT:    return TableRef(tbl_cbrt);
3680b57cec5SDimitry Andric   case AMDGPULibFunc::EI_NCOS:
3690b57cec5SDimitry Andric   case AMDGPULibFunc::EI_COS:     return TableRef(tbl_cos);
3700b57cec5SDimitry Andric   case AMDGPULibFunc::EI_COSH:    return TableRef(tbl_cosh);
3710b57cec5SDimitry Andric   case AMDGPULibFunc::EI_COSPI:   return TableRef(tbl_cospi);
3720b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ERFC:    return TableRef(tbl_erfc);
3730b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ERF:     return TableRef(tbl_erf);
3740b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP:     return TableRef(tbl_exp);
3750b57cec5SDimitry Andric   case AMDGPULibFunc::EI_NEXP2:
3760b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP2:    return TableRef(tbl_exp2);
3770b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP10:   return TableRef(tbl_exp10);
3780b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXPM1:   return TableRef(tbl_expm1);
3790b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG:     return TableRef(tbl_log);
3800b57cec5SDimitry Andric   case AMDGPULibFunc::EI_NLOG2:
3810b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG2:    return TableRef(tbl_log2);
3820b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG10:   return TableRef(tbl_log10);
3830b57cec5SDimitry Andric   case AMDGPULibFunc::EI_NRSQRT:
3840b57cec5SDimitry Andric   case AMDGPULibFunc::EI_RSQRT:   return TableRef(tbl_rsqrt);
3850b57cec5SDimitry Andric   case AMDGPULibFunc::EI_NSIN:
3860b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SIN:     return TableRef(tbl_sin);
3870b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SINH:    return TableRef(tbl_sinh);
3880b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SINPI:   return TableRef(tbl_sinpi);
3890b57cec5SDimitry Andric   case AMDGPULibFunc::EI_NSQRT:
3900b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SQRT:    return TableRef(tbl_sqrt);
3910b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TAN:     return TableRef(tbl_tan);
3920b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TANH:    return TableRef(tbl_tanh);
3930b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TANPI:   return TableRef(tbl_tanpi);
3940b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TGAMMA:  return TableRef(tbl_tgamma);
3950b57cec5SDimitry Andric   default:;
3960b57cec5SDimitry Andric   }
3970b57cec5SDimitry Andric   return TableRef();
3980b57cec5SDimitry Andric }
3990b57cec5SDimitry Andric 
getVecSize(const AMDGPULibFunc & FInfo)4000b57cec5SDimitry Andric static inline int getVecSize(const AMDGPULibFunc& FInfo) {
4010b57cec5SDimitry Andric   return FInfo.getLeads()[0].VectorSize;
4020b57cec5SDimitry Andric }
4030b57cec5SDimitry Andric 
getArgType(const AMDGPULibFunc & FInfo)4040b57cec5SDimitry Andric static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) {
4050b57cec5SDimitry Andric   return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType;
4060b57cec5SDimitry Andric }
4070b57cec5SDimitry Andric 
getFunction(Module * M,const FuncInfo & fInfo)4080b57cec5SDimitry Andric FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) {
4090b57cec5SDimitry Andric   // If we are doing PreLinkOpt, the function is external. So it is safe to
4100b57cec5SDimitry Andric   // use getOrInsertFunction() at this stage.
4110b57cec5SDimitry Andric 
4120b57cec5SDimitry Andric   return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo)
4130b57cec5SDimitry Andric                        : AMDGPULibFunc::getFunction(M, fInfo);
4140b57cec5SDimitry Andric }
4150b57cec5SDimitry Andric 
parseFunctionName(const StringRef & FMangledName,FuncInfo & FInfo)4160b57cec5SDimitry Andric bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName,
417349cc55cSDimitry Andric                                        FuncInfo &FInfo) {
418349cc55cSDimitry Andric   return AMDGPULibFunc::parse(FMangledName, FInfo);
4190b57cec5SDimitry Andric }
4200b57cec5SDimitry Andric 
isUnsafeMath(const FPMathOperator * FPOp) const4215f757f3fSDimitry Andric bool AMDGPULibCalls::isUnsafeMath(const FPMathOperator *FPOp) const {
4225f757f3fSDimitry Andric   return UnsafeFPMath || FPOp->isFast();
4235f757f3fSDimitry Andric }
4245f757f3fSDimitry Andric 
isUnsafeFiniteOnlyMath(const FPMathOperator * FPOp) const4255f757f3fSDimitry Andric bool AMDGPULibCalls::isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const {
4265f757f3fSDimitry Andric   return UnsafeFPMath ||
4275f757f3fSDimitry Andric          (FPOp->hasApproxFunc() && FPOp->hasNoNaNs() && FPOp->hasNoInfs());
4285f757f3fSDimitry Andric }
4295f757f3fSDimitry Andric 
canIncreasePrecisionOfConstantFold(const FPMathOperator * FPOp) const4305f757f3fSDimitry Andric bool AMDGPULibCalls::canIncreasePrecisionOfConstantFold(
4315f757f3fSDimitry Andric     const FPMathOperator *FPOp) const {
4325f757f3fSDimitry Andric   // TODO: Refine to approxFunc or contract
4335f757f3fSDimitry Andric   return isUnsafeMath(FPOp);
4345f757f3fSDimitry Andric }
4355f757f3fSDimitry Andric 
initFunction(Function & F,FunctionAnalysisManager & FAM)4365f757f3fSDimitry Andric void AMDGPULibCalls::initFunction(Function &F, FunctionAnalysisManager &FAM) {
4375f757f3fSDimitry Andric   UnsafeFPMath = F.getFnAttribute("unsafe-fp-math").getValueAsBool();
4385f757f3fSDimitry Andric   AC = &FAM.getResult<AssumptionAnalysis>(F);
4395f757f3fSDimitry Andric   TLInfo = &FAM.getResult<TargetLibraryAnalysis>(F);
4405f757f3fSDimitry Andric   DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
4410b57cec5SDimitry Andric }
4420b57cec5SDimitry Andric 
useNativeFunc(const StringRef F) const4430b57cec5SDimitry Andric bool AMDGPULibCalls::useNativeFunc(const StringRef F) const {
444e8d8bef9SDimitry Andric   return AllNative || llvm::is_contained(UseNative, F);
4450b57cec5SDimitry Andric }
4460b57cec5SDimitry Andric 
initNativeFuncs()4470b57cec5SDimitry Andric void AMDGPULibCalls::initNativeFuncs() {
4480b57cec5SDimitry Andric   AllNative = useNativeFunc("all") ||
4490b57cec5SDimitry Andric               (UseNative.getNumOccurrences() && UseNative.size() == 1 &&
4500b57cec5SDimitry Andric                UseNative.begin()->empty());
4510b57cec5SDimitry Andric }
4520b57cec5SDimitry Andric 
sincosUseNative(CallInst * aCI,const FuncInfo & FInfo)4530b57cec5SDimitry Andric bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
4540b57cec5SDimitry Andric   bool native_sin = useNativeFunc("sin");
4550b57cec5SDimitry Andric   bool native_cos = useNativeFunc("cos");
4560b57cec5SDimitry Andric 
4570b57cec5SDimitry Andric   if (native_sin && native_cos) {
4580b57cec5SDimitry Andric     Module *M = aCI->getModule();
4590b57cec5SDimitry Andric     Value *opr0 = aCI->getArgOperand(0);
4600b57cec5SDimitry Andric 
4610b57cec5SDimitry Andric     AMDGPULibFunc nf;
4620b57cec5SDimitry Andric     nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType;
4630b57cec5SDimitry Andric     nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize;
4640b57cec5SDimitry Andric 
4650b57cec5SDimitry Andric     nf.setPrefix(AMDGPULibFunc::NATIVE);
4660b57cec5SDimitry Andric     nf.setId(AMDGPULibFunc::EI_SIN);
4670b57cec5SDimitry Andric     FunctionCallee sinExpr = getFunction(M, nf);
4680b57cec5SDimitry Andric 
4690b57cec5SDimitry Andric     nf.setPrefix(AMDGPULibFunc::NATIVE);
4700b57cec5SDimitry Andric     nf.setId(AMDGPULibFunc::EI_COS);
4710b57cec5SDimitry Andric     FunctionCallee cosExpr = getFunction(M, nf);
4720b57cec5SDimitry Andric     if (sinExpr && cosExpr) {
4730b57cec5SDimitry Andric       Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI);
4740b57cec5SDimitry Andric       Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI);
4750b57cec5SDimitry Andric       new StoreInst(cosval, aCI->getArgOperand(1), aCI);
4760b57cec5SDimitry Andric 
4770b57cec5SDimitry Andric       DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
4780b57cec5SDimitry Andric                                           << " with native version of sin/cos");
4790b57cec5SDimitry Andric 
4805f757f3fSDimitry Andric       replaceCall(aCI, sinval);
4810b57cec5SDimitry Andric       return true;
4820b57cec5SDimitry Andric     }
4830b57cec5SDimitry Andric   }
4840b57cec5SDimitry Andric   return false;
4850b57cec5SDimitry Andric }
4860b57cec5SDimitry Andric 
useNative(CallInst * aCI)4870b57cec5SDimitry Andric bool AMDGPULibCalls::useNative(CallInst *aCI) {
4880b57cec5SDimitry Andric   Function *Callee = aCI->getCalledFunction();
4895f757f3fSDimitry Andric   if (!Callee || aCI->isNoBuiltin())
4905f757f3fSDimitry Andric     return false;
4910b57cec5SDimitry Andric 
4920b57cec5SDimitry Andric   FuncInfo FInfo;
493349cc55cSDimitry Andric   if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() ||
4940b57cec5SDimitry Andric       FInfo.getPrefix() != AMDGPULibFunc::NOPFX ||
4950b57cec5SDimitry Andric       getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) ||
4960b57cec5SDimitry Andric       !(AllNative || useNativeFunc(FInfo.getName()))) {
4970b57cec5SDimitry Andric     return false;
4980b57cec5SDimitry Andric   }
4990b57cec5SDimitry Andric 
5000b57cec5SDimitry Andric   if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS)
5010b57cec5SDimitry Andric     return sincosUseNative(aCI, FInfo);
5020b57cec5SDimitry Andric 
5030b57cec5SDimitry Andric   FInfo.setPrefix(AMDGPULibFunc::NATIVE);
5040b57cec5SDimitry Andric   FunctionCallee F = getFunction(aCI->getModule(), FInfo);
5050b57cec5SDimitry Andric   if (!F)
5060b57cec5SDimitry Andric     return false;
5070b57cec5SDimitry Andric 
5080b57cec5SDimitry Andric   aCI->setCalledFunction(F);
5090b57cec5SDimitry Andric   DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI
5100b57cec5SDimitry Andric                                       << " with native version");
5110b57cec5SDimitry Andric   return true;
5120b57cec5SDimitry Andric }
5130b57cec5SDimitry Andric 
5140b57cec5SDimitry Andric // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe
5150b57cec5SDimitry Andric // builtin, with appended type size and alignment arguments, where 2 or 4
5160b57cec5SDimitry Andric // indicates the original number of arguments. The library has optimized version
5170b57cec5SDimitry Andric // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same
5180b57cec5SDimitry Andric // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N
5190b57cec5SDimitry Andric // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ...,
5200b57cec5SDimitry Andric // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4.
fold_read_write_pipe(CallInst * CI,IRBuilder<> & B,const FuncInfo & FInfo)5210b57cec5SDimitry Andric bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B,
522349cc55cSDimitry Andric                                           const FuncInfo &FInfo) {
5230b57cec5SDimitry Andric   auto *Callee = CI->getCalledFunction();
5240b57cec5SDimitry Andric   if (!Callee->isDeclaration())
5250b57cec5SDimitry Andric     return false;
5260b57cec5SDimitry Andric 
5270b57cec5SDimitry Andric   assert(Callee->hasName() && "Invalid read_pipe/write_pipe function");
5280b57cec5SDimitry Andric   auto *M = Callee->getParent();
5295ffd83dbSDimitry Andric   std::string Name = std::string(Callee->getName());
530349cc55cSDimitry Andric   auto NumArg = CI->arg_size();
5310b57cec5SDimitry Andric   if (NumArg != 4 && NumArg != 6)
5320b57cec5SDimitry Andric     return false;
5335f757f3fSDimitry Andric   ConstantInt *PacketSize =
5345f757f3fSDimitry Andric       dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 2));
5355f757f3fSDimitry Andric   ConstantInt *PacketAlign =
5365f757f3fSDimitry Andric       dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 1));
5375f757f3fSDimitry Andric   if (!PacketSize || !PacketAlign)
5380b57cec5SDimitry Andric     return false;
5395f757f3fSDimitry Andric 
5405f757f3fSDimitry Andric   unsigned Size = PacketSize->getZExtValue();
5415f757f3fSDimitry Andric   Align Alignment = PacketAlign->getAlignValue();
5425ffd83dbSDimitry Andric   if (Alignment != Size)
5430b57cec5SDimitry Andric     return false;
5440b57cec5SDimitry Andric 
545349cc55cSDimitry Andric   unsigned PtrArgLoc = CI->arg_size() - 3;
5465f757f3fSDimitry Andric   Value *PtrArg = CI->getArgOperand(PtrArgLoc);
5475f757f3fSDimitry Andric   Type *PtrTy = PtrArg->getType();
5480b57cec5SDimitry Andric 
5490b57cec5SDimitry Andric   SmallVector<llvm::Type *, 6> ArgTys;
5500b57cec5SDimitry Andric   for (unsigned I = 0; I != PtrArgLoc; ++I)
5510b57cec5SDimitry Andric     ArgTys.push_back(CI->getArgOperand(I)->getType());
5520b57cec5SDimitry Andric   ArgTys.push_back(PtrTy);
5530b57cec5SDimitry Andric 
5540b57cec5SDimitry Andric   Name = Name + "_" + std::to_string(Size);
5550b57cec5SDimitry Andric   auto *FTy = FunctionType::get(Callee->getReturnType(),
5560b57cec5SDimitry Andric                                 ArrayRef<Type *>(ArgTys), false);
5570b57cec5SDimitry Andric   AMDGPULibFunc NewLibFunc(Name, FTy);
5580b57cec5SDimitry Andric   FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc);
5590b57cec5SDimitry Andric   if (!F)
5600b57cec5SDimitry Andric     return false;
5610b57cec5SDimitry Andric 
5620b57cec5SDimitry Andric   SmallVector<Value *, 6> Args;
5630b57cec5SDimitry Andric   for (unsigned I = 0; I != PtrArgLoc; ++I)
5640b57cec5SDimitry Andric     Args.push_back(CI->getArgOperand(I));
5655f757f3fSDimitry Andric   Args.push_back(PtrArg);
5660b57cec5SDimitry Andric 
5670b57cec5SDimitry Andric   auto *NCI = B.CreateCall(F, Args);
5680b57cec5SDimitry Andric   NCI->setAttributes(CI->getAttributes());
5690b57cec5SDimitry Andric   CI->replaceAllUsesWith(NCI);
5700b57cec5SDimitry Andric   CI->dropAllReferences();
5710b57cec5SDimitry Andric   CI->eraseFromParent();
5720b57cec5SDimitry Andric 
5730b57cec5SDimitry Andric   return true;
5740b57cec5SDimitry Andric }
5750b57cec5SDimitry Andric 
isKnownIntegral(const Value * V,const DataLayout & DL,FastMathFlags FMF)5765f757f3fSDimitry Andric static bool isKnownIntegral(const Value *V, const DataLayout &DL,
5775f757f3fSDimitry Andric                             FastMathFlags FMF) {
5785f757f3fSDimitry Andric   if (isa<UndefValue>(V))
5795f757f3fSDimitry Andric     return true;
5800b57cec5SDimitry Andric 
5815f757f3fSDimitry Andric   if (const ConstantFP *CF = dyn_cast<ConstantFP>(V))
5825f757f3fSDimitry Andric     return CF->getValueAPF().isInteger();
5835f757f3fSDimitry Andric 
5845f757f3fSDimitry Andric   if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) {
5855f757f3fSDimitry Andric     for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) {
5865f757f3fSDimitry Andric       Constant *ConstElt = CDV->getElementAsConstant(i);
5875f757f3fSDimitry Andric       if (isa<UndefValue>(ConstElt))
5885f757f3fSDimitry Andric         continue;
5895f757f3fSDimitry Andric       const ConstantFP *CFP = dyn_cast<ConstantFP>(ConstElt);
5905f757f3fSDimitry Andric       if (!CFP || !CFP->getValue().isInteger())
5915f757f3fSDimitry Andric         return false;
5925f757f3fSDimitry Andric     }
5935f757f3fSDimitry Andric 
5945f757f3fSDimitry Andric     return true;
5955f757f3fSDimitry Andric   }
5965f757f3fSDimitry Andric 
5975f757f3fSDimitry Andric   const Instruction *I = dyn_cast<Instruction>(V);
5985f757f3fSDimitry Andric   if (!I)
59904eeddc0SDimitry Andric     return false;
6000b57cec5SDimitry Andric 
6015f757f3fSDimitry Andric   switch (I->getOpcode()) {
6025f757f3fSDimitry Andric   case Instruction::SIToFP:
6035f757f3fSDimitry Andric   case Instruction::UIToFP:
6045f757f3fSDimitry Andric     // TODO: Could check nofpclass(inf) on incoming argument
6055f757f3fSDimitry Andric     if (FMF.noInfs())
6065f757f3fSDimitry Andric       return true;
6070b57cec5SDimitry Andric 
6085f757f3fSDimitry Andric     // Need to check int size cannot produce infinity, which computeKnownFPClass
6095f757f3fSDimitry Andric     // knows how to do already.
6105f757f3fSDimitry Andric     return isKnownNeverInfinity(I, DL);
6115f757f3fSDimitry Andric   case Instruction::Call: {
6125f757f3fSDimitry Andric     const CallInst *CI = cast<CallInst>(I);
6135f757f3fSDimitry Andric     switch (CI->getIntrinsicID()) {
6145f757f3fSDimitry Andric     case Intrinsic::trunc:
6155f757f3fSDimitry Andric     case Intrinsic::floor:
6165f757f3fSDimitry Andric     case Intrinsic::ceil:
6175f757f3fSDimitry Andric     case Intrinsic::rint:
6185f757f3fSDimitry Andric     case Intrinsic::nearbyint:
6195f757f3fSDimitry Andric     case Intrinsic::round:
6205f757f3fSDimitry Andric     case Intrinsic::roundeven:
6215f757f3fSDimitry Andric       return (FMF.noInfs() && FMF.noNaNs()) ||
6225f757f3fSDimitry Andric              isKnownNeverInfOrNaN(I, DL, nullptr);
6230b57cec5SDimitry Andric     default:
6240b57cec5SDimitry Andric       break;
6250b57cec5SDimitry Andric     }
6260b57cec5SDimitry Andric 
6275f757f3fSDimitry Andric     break;
6285f757f3fSDimitry Andric   }
6295f757f3fSDimitry Andric   default:
6305f757f3fSDimitry Andric     break;
6315f757f3fSDimitry Andric   }
6325f757f3fSDimitry Andric 
6335f757f3fSDimitry Andric   return false;
6345f757f3fSDimitry Andric }
6355f757f3fSDimitry Andric 
6365f757f3fSDimitry Andric // This function returns false if no change; return true otherwise.
fold(CallInst * CI)6375f757f3fSDimitry Andric bool AMDGPULibCalls::fold(CallInst *CI) {
6385f757f3fSDimitry Andric   Function *Callee = CI->getCalledFunction();
6395f757f3fSDimitry Andric   // Ignore indirect calls.
6405f757f3fSDimitry Andric   if (!Callee || Callee->isIntrinsic() || CI->isNoBuiltin())
6415f757f3fSDimitry Andric     return false;
6425f757f3fSDimitry Andric 
6430b57cec5SDimitry Andric   FuncInfo FInfo;
644349cc55cSDimitry Andric   if (!parseFunctionName(Callee->getName(), FInfo))
6450b57cec5SDimitry Andric     return false;
6460b57cec5SDimitry Andric 
6470b57cec5SDimitry Andric   // Further check the number of arguments to see if they match.
6485f757f3fSDimitry Andric   // TODO: Check calling convention matches too
6495f757f3fSDimitry Andric   if (!FInfo.isCompatibleSignature(CI->getFunctionType()))
6500b57cec5SDimitry Andric     return false;
6510b57cec5SDimitry Andric 
6525f757f3fSDimitry Andric   LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << '\n');
6535f757f3fSDimitry Andric 
6540b57cec5SDimitry Andric   if (TDOFold(CI, FInfo))
6550b57cec5SDimitry Andric     return true;
6560b57cec5SDimitry Andric 
6575f757f3fSDimitry Andric   IRBuilder<> B(CI);
6585f757f3fSDimitry Andric 
6595f757f3fSDimitry Andric   if (FPMathOperator *FPOp = dyn_cast<FPMathOperator>(CI)) {
6600b57cec5SDimitry Andric     // Under unsafe-math, evaluate calls if possible.
6610b57cec5SDimitry Andric     // According to Brian Sumner, we can do this for all f32 function calls
6620b57cec5SDimitry Andric     // using host's double function calls.
6635f757f3fSDimitry Andric     if (canIncreasePrecisionOfConstantFold(FPOp) && evaluateCall(CI, FInfo))
6640b57cec5SDimitry Andric       return true;
6650b57cec5SDimitry Andric 
6665f757f3fSDimitry Andric     // Copy fast flags from the original call.
6675f757f3fSDimitry Andric     FastMathFlags FMF = FPOp->getFastMathFlags();
6685f757f3fSDimitry Andric     B.setFastMathFlags(FMF);
6695f757f3fSDimitry Andric 
6705f757f3fSDimitry Andric     // Specialized optimizations for each function call.
6715f757f3fSDimitry Andric     //
6725f757f3fSDimitry Andric     // TODO: Handle native functions
6730b57cec5SDimitry Andric     switch (FInfo.getId()) {
6745f757f3fSDimitry Andric     case AMDGPULibFunc::EI_EXP:
6755f757f3fSDimitry Andric       if (FMF.none())
6765f757f3fSDimitry Andric         return false;
6775f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp,
6785f757f3fSDimitry Andric                                                   FMF.approxFunc());
6795f757f3fSDimitry Andric     case AMDGPULibFunc::EI_EXP2:
6805f757f3fSDimitry Andric       if (FMF.none())
6815f757f3fSDimitry Andric         return false;
6825f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp2,
6835f757f3fSDimitry Andric                                                   FMF.approxFunc());
6845f757f3fSDimitry Andric     case AMDGPULibFunc::EI_LOG:
6855f757f3fSDimitry Andric       if (FMF.none())
6865f757f3fSDimitry Andric         return false;
6875f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log,
6885f757f3fSDimitry Andric                                                   FMF.approxFunc());
6895f757f3fSDimitry Andric     case AMDGPULibFunc::EI_LOG2:
6905f757f3fSDimitry Andric       if (FMF.none())
6915f757f3fSDimitry Andric         return false;
6925f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log2,
6935f757f3fSDimitry Andric                                                   FMF.approxFunc());
6945f757f3fSDimitry Andric     case AMDGPULibFunc::EI_LOG10:
6955f757f3fSDimitry Andric       if (FMF.none())
6965f757f3fSDimitry Andric         return false;
6975f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log10,
6985f757f3fSDimitry Andric                                                   FMF.approxFunc());
6995f757f3fSDimitry Andric     case AMDGPULibFunc::EI_FMIN:
7005f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::minnum,
7015f757f3fSDimitry Andric                                                   true, true);
7025f757f3fSDimitry Andric     case AMDGPULibFunc::EI_FMAX:
7035f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::maxnum,
7045f757f3fSDimitry Andric                                                   true, true);
7055f757f3fSDimitry Andric     case AMDGPULibFunc::EI_FMA:
7065f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fma, true,
7075f757f3fSDimitry Andric                                                   true);
7085f757f3fSDimitry Andric     case AMDGPULibFunc::EI_MAD:
7095f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fmuladd,
7105f757f3fSDimitry Andric                                                   true, true);
7115f757f3fSDimitry Andric     case AMDGPULibFunc::EI_FABS:
7125f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fabs, true,
7135f757f3fSDimitry Andric                                                   true, true);
7145f757f3fSDimitry Andric     case AMDGPULibFunc::EI_COPYSIGN:
7155f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::copysign,
7165f757f3fSDimitry Andric                                                   true, true, true);
7175f757f3fSDimitry Andric     case AMDGPULibFunc::EI_FLOOR:
7185f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::floor, true,
7195f757f3fSDimitry Andric                                                   true);
7205f757f3fSDimitry Andric     case AMDGPULibFunc::EI_CEIL:
7215f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::ceil, true,
7225f757f3fSDimitry Andric                                                   true);
7235f757f3fSDimitry Andric     case AMDGPULibFunc::EI_TRUNC:
7245f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::trunc, true,
7255f757f3fSDimitry Andric                                                   true);
7265f757f3fSDimitry Andric     case AMDGPULibFunc::EI_RINT:
7275f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::rint, true,
7285f757f3fSDimitry Andric                                                   true);
7295f757f3fSDimitry Andric     case AMDGPULibFunc::EI_ROUND:
7305f757f3fSDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::round, true,
7315f757f3fSDimitry Andric                                                   true);
7325f757f3fSDimitry Andric     case AMDGPULibFunc::EI_LDEXP: {
7335f757f3fSDimitry Andric       if (!shouldReplaceLibcallWithIntrinsic(CI, true, true))
7345f757f3fSDimitry Andric         return false;
7350b57cec5SDimitry Andric 
7365f757f3fSDimitry Andric       Value *Arg1 = CI->getArgOperand(1);
7375f757f3fSDimitry Andric       if (VectorType *VecTy = dyn_cast<VectorType>(CI->getType());
7385f757f3fSDimitry Andric           VecTy && !isa<VectorType>(Arg1->getType())) {
7395f757f3fSDimitry Andric         Value *SplatArg1 = B.CreateVectorSplat(VecTy->getElementCount(), Arg1);
7405f757f3fSDimitry Andric         CI->setArgOperand(1, SplatArg1);
7415f757f3fSDimitry Andric       }
7420b57cec5SDimitry Andric 
7435f757f3fSDimitry Andric       CI->setCalledFunction(Intrinsic::getDeclaration(
7445f757f3fSDimitry Andric           CI->getModule(), Intrinsic::ldexp,
7455f757f3fSDimitry Andric           {CI->getType(), CI->getArgOperand(1)->getType()}));
7465f757f3fSDimitry Andric       return true;
7475f757f3fSDimitry Andric     }
7485f757f3fSDimitry Andric     case AMDGPULibFunc::EI_POW: {
7495f757f3fSDimitry Andric       Module *M = Callee->getParent();
7505f757f3fSDimitry Andric       AMDGPULibFunc PowrInfo(AMDGPULibFunc::EI_POWR, FInfo);
7515f757f3fSDimitry Andric       FunctionCallee PowrFunc = getFunction(M, PowrInfo);
7525f757f3fSDimitry Andric       CallInst *Call = cast<CallInst>(FPOp);
7535f757f3fSDimitry Andric 
7545f757f3fSDimitry Andric       // pow(x, y) -> powr(x, y) for x >= -0.0
7555f757f3fSDimitry Andric       // TODO: Account for flags on current call
7565f757f3fSDimitry Andric       if (PowrFunc &&
7575f757f3fSDimitry Andric           cannotBeOrderedLessThanZero(FPOp->getOperand(0), M->getDataLayout(),
7585f757f3fSDimitry Andric                                       TLInfo, 0, AC, Call, DT)) {
7595f757f3fSDimitry Andric         Call->setCalledFunction(PowrFunc);
7605f757f3fSDimitry Andric         return fold_pow(FPOp, B, PowrInfo) || true;
7615f757f3fSDimitry Andric       }
7625f757f3fSDimitry Andric 
7635f757f3fSDimitry Andric       // pow(x, y) -> pown(x, y) for known integral y
7645f757f3fSDimitry Andric       if (isKnownIntegral(FPOp->getOperand(1), M->getDataLayout(),
7655f757f3fSDimitry Andric                           FPOp->getFastMathFlags())) {
7665f757f3fSDimitry Andric         FunctionType *PownType = getPownType(CI->getFunctionType());
7675f757f3fSDimitry Andric         AMDGPULibFunc PownInfo(AMDGPULibFunc::EI_POWN, PownType, true);
7685f757f3fSDimitry Andric         FunctionCallee PownFunc = getFunction(M, PownInfo);
7695f757f3fSDimitry Andric         if (PownFunc) {
7705f757f3fSDimitry Andric           // TODO: If the incoming integral value is an sitofp/uitofp, it won't
7715f757f3fSDimitry Andric           // fold out without a known range. We can probably take the source
7725f757f3fSDimitry Andric           // value directly.
7735f757f3fSDimitry Andric           Value *CastedArg =
7745f757f3fSDimitry Andric               B.CreateFPToSI(FPOp->getOperand(1), PownType->getParamType(1));
7755f757f3fSDimitry Andric           // Have to drop any nofpclass attributes on the original call site.
7765f757f3fSDimitry Andric           Call->removeParamAttrs(
7775f757f3fSDimitry Andric               1, AttributeFuncs::typeIncompatible(CastedArg->getType()));
7785f757f3fSDimitry Andric           Call->setCalledFunction(PownFunc);
7795f757f3fSDimitry Andric           Call->setArgOperand(1, CastedArg);
7805f757f3fSDimitry Andric           return fold_pow(FPOp, B, PownInfo) || true;
7815f757f3fSDimitry Andric         }
7825f757f3fSDimitry Andric       }
7835f757f3fSDimitry Andric 
7845f757f3fSDimitry Andric       return fold_pow(FPOp, B, FInfo);
7855f757f3fSDimitry Andric     }
7860b57cec5SDimitry Andric     case AMDGPULibFunc::EI_POWR:
7870b57cec5SDimitry Andric     case AMDGPULibFunc::EI_POWN:
7885f757f3fSDimitry Andric       return fold_pow(FPOp, B, FInfo);
7890b57cec5SDimitry Andric     case AMDGPULibFunc::EI_ROOTN:
7905f757f3fSDimitry Andric       return fold_rootn(FPOp, B, FInfo);
7910b57cec5SDimitry Andric     case AMDGPULibFunc::EI_SQRT:
7921db9f3b2SDimitry Andric       // TODO: Allow with strictfp + constrained intrinsic
7931db9f3b2SDimitry Andric       return tryReplaceLibcallWithSimpleIntrinsic(
7941db9f3b2SDimitry Andric           B, CI, Intrinsic::sqrt, true, true, /*AllowStrictFP=*/false);
7950b57cec5SDimitry Andric     case AMDGPULibFunc::EI_COS:
7960b57cec5SDimitry Andric     case AMDGPULibFunc::EI_SIN:
7975f757f3fSDimitry Andric       return fold_sincos(FPOp, B, FInfo);
7985f757f3fSDimitry Andric     default:
7990b57cec5SDimitry Andric       break;
8005f757f3fSDimitry Andric     }
8015f757f3fSDimitry Andric   } else {
8025f757f3fSDimitry Andric     // Specialized optimizations for each function call
8035f757f3fSDimitry Andric     switch (FInfo.getId()) {
8040b57cec5SDimitry Andric     case AMDGPULibFunc::EI_READ_PIPE_2:
8050b57cec5SDimitry Andric     case AMDGPULibFunc::EI_READ_PIPE_4:
8060b57cec5SDimitry Andric     case AMDGPULibFunc::EI_WRITE_PIPE_2:
8070b57cec5SDimitry Andric     case AMDGPULibFunc::EI_WRITE_PIPE_4:
8080b57cec5SDimitry Andric       return fold_read_write_pipe(CI, B, FInfo);
8090b57cec5SDimitry Andric     default:
8100b57cec5SDimitry Andric       break;
8110b57cec5SDimitry Andric     }
8125f757f3fSDimitry Andric   }
8130b57cec5SDimitry Andric 
8140b57cec5SDimitry Andric   return false;
8150b57cec5SDimitry Andric }
8160b57cec5SDimitry Andric 
TDOFold(CallInst * CI,const FuncInfo & FInfo)8170b57cec5SDimitry Andric bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) {
8180b57cec5SDimitry Andric   // Table-Driven optimization
8190b57cec5SDimitry Andric   const TableRef tr = getOptTable(FInfo.getId());
820fcaf7f86SDimitry Andric   if (tr.empty())
8210b57cec5SDimitry Andric     return false;
8220b57cec5SDimitry Andric 
823fcaf7f86SDimitry Andric   int const sz = (int)tr.size();
8240b57cec5SDimitry Andric   Value *opr0 = CI->getArgOperand(0);
8250b57cec5SDimitry Andric 
8260b57cec5SDimitry Andric   if (getVecSize(FInfo) > 1) {
8270b57cec5SDimitry Andric     if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) {
8280b57cec5SDimitry Andric       SmallVector<double, 0> DVal;
8290b57cec5SDimitry Andric       for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) {
8300b57cec5SDimitry Andric         ConstantFP *eltval = dyn_cast<ConstantFP>(
8310b57cec5SDimitry Andric                                CV->getElementAsConstant((unsigned)eltNo));
8320b57cec5SDimitry Andric         assert(eltval && "Non-FP arguments in math function!");
8330b57cec5SDimitry Andric         bool found = false;
8340b57cec5SDimitry Andric         for (int i=0; i < sz; ++i) {
835fcaf7f86SDimitry Andric           if (eltval->isExactlyValue(tr[i].input)) {
836fcaf7f86SDimitry Andric             DVal.push_back(tr[i].result);
8370b57cec5SDimitry Andric             found = true;
8380b57cec5SDimitry Andric             break;
8390b57cec5SDimitry Andric           }
8400b57cec5SDimitry Andric         }
8410b57cec5SDimitry Andric         if (!found) {
8420b57cec5SDimitry Andric           // This vector constants not handled yet.
8430b57cec5SDimitry Andric           return false;
8440b57cec5SDimitry Andric         }
8450b57cec5SDimitry Andric       }
8460b57cec5SDimitry Andric       LLVMContext &context = CI->getParent()->getParent()->getContext();
8470b57cec5SDimitry Andric       Constant *nval;
8480b57cec5SDimitry Andric       if (getArgType(FInfo) == AMDGPULibFunc::F32) {
8490b57cec5SDimitry Andric         SmallVector<float, 0> FVal;
8500b57cec5SDimitry Andric         for (unsigned i = 0; i < DVal.size(); ++i) {
8510b57cec5SDimitry Andric           FVal.push_back((float)DVal[i]);
8520b57cec5SDimitry Andric         }
8530b57cec5SDimitry Andric         ArrayRef<float> tmp(FVal);
8540b57cec5SDimitry Andric         nval = ConstantDataVector::get(context, tmp);
8550b57cec5SDimitry Andric       } else { // F64
8560b57cec5SDimitry Andric         ArrayRef<double> tmp(DVal);
8570b57cec5SDimitry Andric         nval = ConstantDataVector::get(context, tmp);
8580b57cec5SDimitry Andric       }
8590b57cec5SDimitry Andric       LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
8605f757f3fSDimitry Andric       replaceCall(CI, nval);
8610b57cec5SDimitry Andric       return true;
8620b57cec5SDimitry Andric     }
8630b57cec5SDimitry Andric   } else {
8640b57cec5SDimitry Andric     // Scalar version
8650b57cec5SDimitry Andric     if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) {
8660b57cec5SDimitry Andric       for (int i = 0; i < sz; ++i) {
867fcaf7f86SDimitry Andric         if (CF->isExactlyValue(tr[i].input)) {
868fcaf7f86SDimitry Andric           Value *nval = ConstantFP::get(CF->getType(), tr[i].result);
8690b57cec5SDimitry Andric           LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n");
8705f757f3fSDimitry Andric           replaceCall(CI, nval);
8710b57cec5SDimitry Andric           return true;
8720b57cec5SDimitry Andric         }
8730b57cec5SDimitry Andric       }
8740b57cec5SDimitry Andric     }
8750b57cec5SDimitry Andric   }
8760b57cec5SDimitry Andric 
8770b57cec5SDimitry Andric   return false;
8780b57cec5SDimitry Andric }
8790b57cec5SDimitry Andric 
8800b57cec5SDimitry Andric namespace llvm {
log2(double V)8810b57cec5SDimitry Andric static double log2(double V) {
8820b57cec5SDimitry Andric #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L
8830b57cec5SDimitry Andric   return ::log2(V);
8840b57cec5SDimitry Andric #else
8858bcb0991SDimitry Andric   return log(V) / numbers::ln2;
8860b57cec5SDimitry Andric #endif
8870b57cec5SDimitry Andric }
8880b57cec5SDimitry Andric }
8890b57cec5SDimitry Andric 
fold_pow(FPMathOperator * FPOp,IRBuilder<> & B,const FuncInfo & FInfo)8905f757f3fSDimitry Andric bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B,
8910b57cec5SDimitry Andric                               const FuncInfo &FInfo) {
8920b57cec5SDimitry Andric   assert((FInfo.getId() == AMDGPULibFunc::EI_POW ||
8930b57cec5SDimitry Andric           FInfo.getId() == AMDGPULibFunc::EI_POWR ||
8940b57cec5SDimitry Andric           FInfo.getId() == AMDGPULibFunc::EI_POWN) &&
8950b57cec5SDimitry Andric          "fold_pow: encounter a wrong function call");
8960b57cec5SDimitry Andric 
8975f757f3fSDimitry Andric   Module *M = B.GetInsertBlock()->getModule();
8985f757f3fSDimitry Andric   Type *eltType = FPOp->getType()->getScalarType();
8995f757f3fSDimitry Andric   Value *opr0 = FPOp->getOperand(0);
9005f757f3fSDimitry Andric   Value *opr1 = FPOp->getOperand(1);
9010b57cec5SDimitry Andric 
9025f757f3fSDimitry Andric   const APFloat *CF = nullptr;
9035f757f3fSDimitry Andric   const APInt *CINT = nullptr;
9045f757f3fSDimitry Andric   if (!match(opr1, m_APFloatAllowUndef(CF)))
9055f757f3fSDimitry Andric     match(opr1, m_APIntAllowUndef(CINT));
9060b57cec5SDimitry Andric 
9070b57cec5SDimitry Andric   // 0x1111111 means that we don't do anything for this call.
9080b57cec5SDimitry Andric   int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111);
9090b57cec5SDimitry Andric 
9105f757f3fSDimitry Andric   if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0)) {
9110b57cec5SDimitry Andric     //  pow/powr/pown(x, 0) == 1
9125f757f3fSDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1\n");
9130b57cec5SDimitry Andric     Constant *cnval = ConstantFP::get(eltType, 1.0);
9140b57cec5SDimitry Andric     if (getVecSize(FInfo) > 1) {
9150b57cec5SDimitry Andric       cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9160b57cec5SDimitry Andric     }
9175f757f3fSDimitry Andric     replaceCall(FPOp, cnval);
9180b57cec5SDimitry Andric     return true;
9190b57cec5SDimitry Andric   }
9200b57cec5SDimitry Andric   if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) {
9210b57cec5SDimitry Andric     // pow/powr/pown(x, 1.0) = x
9225f757f3fSDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
9235f757f3fSDimitry Andric     replaceCall(FPOp, opr0);
9240b57cec5SDimitry Andric     return true;
9250b57cec5SDimitry Andric   }
9260b57cec5SDimitry Andric   if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) {
9270b57cec5SDimitry Andric     // pow/powr/pown(x, 2.0) = x*x
9285f757f3fSDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << " * "
9295f757f3fSDimitry Andric                       << *opr0 << "\n");
9300b57cec5SDimitry Andric     Value *nval = B.CreateFMul(opr0, opr0, "__pow2");
9315f757f3fSDimitry Andric     replaceCall(FPOp, nval);
9320b57cec5SDimitry Andric     return true;
9330b57cec5SDimitry Andric   }
9340b57cec5SDimitry Andric   if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) {
9350b57cec5SDimitry Andric     // pow/powr/pown(x, -1.0) = 1.0/x
9365f757f3fSDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1 / " << *opr0 << "\n");
9370b57cec5SDimitry Andric     Constant *cnval = ConstantFP::get(eltType, 1.0);
9380b57cec5SDimitry Andric     if (getVecSize(FInfo) > 1) {
9390b57cec5SDimitry Andric       cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9400b57cec5SDimitry Andric     }
9410b57cec5SDimitry Andric     Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip");
9425f757f3fSDimitry Andric     replaceCall(FPOp, nval);
9430b57cec5SDimitry Andric     return true;
9440b57cec5SDimitry Andric   }
9450b57cec5SDimitry Andric 
9460b57cec5SDimitry Andric   if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) {
9470b57cec5SDimitry Andric     // pow[r](x, [-]0.5) = sqrt(x)
9480b57cec5SDimitry Andric     bool issqrt = CF->isExactlyValue(0.5);
9490b57cec5SDimitry Andric     if (FunctionCallee FPExpr =
9500b57cec5SDimitry Andric             getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT
9510b57cec5SDimitry Andric                                                 : AMDGPULibFunc::EI_RSQRT,
9520b57cec5SDimitry Andric                                          FInfo))) {
9535f757f3fSDimitry Andric       LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << FInfo.getName()
9545f757f3fSDimitry Andric                         << '(' << *opr0 << ")\n");
9550b57cec5SDimitry Andric       Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt"
9560b57cec5SDimitry Andric                                                         : "__pow2rsqrt");
9575f757f3fSDimitry Andric       replaceCall(FPOp, nval);
9580b57cec5SDimitry Andric       return true;
9590b57cec5SDimitry Andric     }
9600b57cec5SDimitry Andric   }
9610b57cec5SDimitry Andric 
9625f757f3fSDimitry Andric   if (!isUnsafeFiniteOnlyMath(FPOp))
9630b57cec5SDimitry Andric     return false;
9640b57cec5SDimitry Andric 
9650b57cec5SDimitry Andric   // Unsafe Math optimization
9660b57cec5SDimitry Andric 
9670b57cec5SDimitry Andric   // Remember that ci_opr1 is set if opr1 is integral
9680b57cec5SDimitry Andric   if (CF) {
9690b57cec5SDimitry Andric     double dval = (getArgType(FInfo) == AMDGPULibFunc::F32)
9705f757f3fSDimitry Andric                       ? (double)CF->convertToFloat()
9715f757f3fSDimitry Andric                       : CF->convertToDouble();
9720b57cec5SDimitry Andric     int ival = (int)dval;
9730b57cec5SDimitry Andric     if ((double)ival == dval) {
9740b57cec5SDimitry Andric       ci_opr1 = ival;
9750b57cec5SDimitry Andric     } else
9760b57cec5SDimitry Andric       ci_opr1 = 0x11111111;
9770b57cec5SDimitry Andric   }
9780b57cec5SDimitry Andric 
9790b57cec5SDimitry Andric   // pow/powr/pown(x, c) = [1/](x*x*..x); where
9800b57cec5SDimitry Andric   //   trunc(c) == c && the number of x == c && |c| <= 12
9810b57cec5SDimitry Andric   unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1;
9820b57cec5SDimitry Andric   if (abs_opr1 <= 12) {
9830b57cec5SDimitry Andric     Constant *cnval;
9840b57cec5SDimitry Andric     Value *nval;
9850b57cec5SDimitry Andric     if (abs_opr1 == 0) {
9860b57cec5SDimitry Andric       cnval = ConstantFP::get(eltType, 1.0);
9870b57cec5SDimitry Andric       if (getVecSize(FInfo) > 1) {
9880b57cec5SDimitry Andric         cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
9890b57cec5SDimitry Andric       }
9900b57cec5SDimitry Andric       nval = cnval;
9910b57cec5SDimitry Andric     } else {
9920b57cec5SDimitry Andric       Value *valx2 = nullptr;
9930b57cec5SDimitry Andric       nval = nullptr;
9940b57cec5SDimitry Andric       while (abs_opr1 > 0) {
9950b57cec5SDimitry Andric         valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0;
9960b57cec5SDimitry Andric         if (abs_opr1 & 1) {
9970b57cec5SDimitry Andric           nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2;
9980b57cec5SDimitry Andric         }
9990b57cec5SDimitry Andric         abs_opr1 >>= 1;
10000b57cec5SDimitry Andric       }
10010b57cec5SDimitry Andric     }
10020b57cec5SDimitry Andric 
10030b57cec5SDimitry Andric     if (ci_opr1 < 0) {
10040b57cec5SDimitry Andric       cnval = ConstantFP::get(eltType, 1.0);
10050b57cec5SDimitry Andric       if (getVecSize(FInfo) > 1) {
10060b57cec5SDimitry Andric         cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval);
10070b57cec5SDimitry Andric       }
10080b57cec5SDimitry Andric       nval = B.CreateFDiv(cnval, nval, "__1powprod");
10090b57cec5SDimitry Andric     }
10105f757f3fSDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
10110b57cec5SDimitry Andric                       << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0
10120b57cec5SDimitry Andric                       << ")\n");
10135f757f3fSDimitry Andric     replaceCall(FPOp, nval);
10140b57cec5SDimitry Andric     return true;
10150b57cec5SDimitry Andric   }
10160b57cec5SDimitry Andric 
10175f757f3fSDimitry Andric   // If we should use the generic intrinsic instead of emitting a libcall
10185f757f3fSDimitry Andric   const bool ShouldUseIntrinsic = eltType->isFloatTy() || eltType->isHalfTy();
10195f757f3fSDimitry Andric 
10200b57cec5SDimitry Andric   // powr ---> exp2(y * log2(x))
10210b57cec5SDimitry Andric   // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31))
10225f757f3fSDimitry Andric   FunctionCallee ExpExpr;
10235f757f3fSDimitry Andric   if (ShouldUseIntrinsic)
10245f757f3fSDimitry Andric     ExpExpr = Intrinsic::getDeclaration(M, Intrinsic::exp2, {FPOp->getType()});
10255f757f3fSDimitry Andric   else {
10265f757f3fSDimitry Andric     ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo));
10270b57cec5SDimitry Andric     if (!ExpExpr)
10280b57cec5SDimitry Andric       return false;
10295f757f3fSDimitry Andric   }
10300b57cec5SDimitry Andric 
10310b57cec5SDimitry Andric   bool needlog = false;
10320b57cec5SDimitry Andric   bool needabs = false;
10330b57cec5SDimitry Andric   bool needcopysign = false;
10340b57cec5SDimitry Andric   Constant *cnval = nullptr;
10350b57cec5SDimitry Andric   if (getVecSize(FInfo) == 1) {
10365f757f3fSDimitry Andric     CF = nullptr;
10375f757f3fSDimitry Andric     match(opr0, m_APFloatAllowUndef(CF));
10380b57cec5SDimitry Andric 
10390b57cec5SDimitry Andric     if (CF) {
10400b57cec5SDimitry Andric       double V = (getArgType(FInfo) == AMDGPULibFunc::F32)
10415f757f3fSDimitry Andric                      ? (double)CF->convertToFloat()
10425f757f3fSDimitry Andric                      : CF->convertToDouble();
10430b57cec5SDimitry Andric 
10440b57cec5SDimitry Andric       V = log2(std::abs(V));
10450b57cec5SDimitry Andric       cnval = ConstantFP::get(eltType, V);
10460b57cec5SDimitry Andric       needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) &&
10470b57cec5SDimitry Andric                      CF->isNegative();
10480b57cec5SDimitry Andric     } else {
10490b57cec5SDimitry Andric       needlog = true;
1050cb14a3feSDimitry Andric       needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
10510b57cec5SDimitry Andric     }
10520b57cec5SDimitry Andric   } else {
10530b57cec5SDimitry Andric     ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0);
10540b57cec5SDimitry Andric 
10550b57cec5SDimitry Andric     if (!CDV) {
10560b57cec5SDimitry Andric       needlog = true;
10570b57cec5SDimitry Andric       needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR;
10580b57cec5SDimitry Andric     } else {
10590b57cec5SDimitry Andric       assert ((int)CDV->getNumElements() == getVecSize(FInfo) &&
10600b57cec5SDimitry Andric               "Wrong vector size detected");
10610b57cec5SDimitry Andric 
10620b57cec5SDimitry Andric       SmallVector<double, 0> DVal;
10630b57cec5SDimitry Andric       for (int i=0; i < getVecSize(FInfo); ++i) {
10645f757f3fSDimitry Andric         double V = CDV->getElementAsAPFloat(i).convertToDouble();
10650b57cec5SDimitry Andric         if (V < 0.0) needcopysign = true;
10660b57cec5SDimitry Andric         V = log2(std::abs(V));
10670b57cec5SDimitry Andric         DVal.push_back(V);
10680b57cec5SDimitry Andric       }
10690b57cec5SDimitry Andric       if (getArgType(FInfo) == AMDGPULibFunc::F32) {
10700b57cec5SDimitry Andric         SmallVector<float, 0> FVal;
10710b57cec5SDimitry Andric         for (unsigned i=0; i < DVal.size(); ++i) {
10720b57cec5SDimitry Andric           FVal.push_back((float)DVal[i]);
10730b57cec5SDimitry Andric         }
10740b57cec5SDimitry Andric         ArrayRef<float> tmp(FVal);
10750b57cec5SDimitry Andric         cnval = ConstantDataVector::get(M->getContext(), tmp);
10760b57cec5SDimitry Andric       } else {
10770b57cec5SDimitry Andric         ArrayRef<double> tmp(DVal);
10780b57cec5SDimitry Andric         cnval = ConstantDataVector::get(M->getContext(), tmp);
10790b57cec5SDimitry Andric       }
10800b57cec5SDimitry Andric     }
10810b57cec5SDimitry Andric   }
10820b57cec5SDimitry Andric 
10830b57cec5SDimitry Andric   if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) {
10840b57cec5SDimitry Andric     // We cannot handle corner cases for a general pow() function, give up
10850b57cec5SDimitry Andric     // unless y is a constant integral value. Then proceed as if it were pown.
10865f757f3fSDimitry Andric     if (!isKnownIntegral(opr1, M->getDataLayout(), FPOp->getFastMathFlags()))
10870b57cec5SDimitry Andric       return false;
10880b57cec5SDimitry Andric   }
10890b57cec5SDimitry Andric 
10900b57cec5SDimitry Andric   Value *nval;
10910b57cec5SDimitry Andric   if (needabs) {
10925f757f3fSDimitry Andric     nval = B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0, nullptr, "__fabs");
10930b57cec5SDimitry Andric   } else {
10940b57cec5SDimitry Andric     nval = cnval ? cnval : opr0;
10950b57cec5SDimitry Andric   }
10960b57cec5SDimitry Andric   if (needlog) {
10975f757f3fSDimitry Andric     FunctionCallee LogExpr;
10985f757f3fSDimitry Andric     if (ShouldUseIntrinsic) {
10995f757f3fSDimitry Andric       LogExpr =
11005f757f3fSDimitry Andric           Intrinsic::getDeclaration(M, Intrinsic::log2, {FPOp->getType()});
11015f757f3fSDimitry Andric     } else {
11025f757f3fSDimitry Andric       LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo));
11030b57cec5SDimitry Andric       if (!LogExpr)
11040b57cec5SDimitry Andric         return false;
11055f757f3fSDimitry Andric     }
11065f757f3fSDimitry Andric 
11070b57cec5SDimitry Andric     nval = CreateCallEx(B,LogExpr, nval, "__log2");
11080b57cec5SDimitry Andric   }
11090b57cec5SDimitry Andric 
11100b57cec5SDimitry Andric   if (FInfo.getId() == AMDGPULibFunc::EI_POWN) {
11110b57cec5SDimitry Andric     // convert int(32) to fp(f32 or f64)
11120b57cec5SDimitry Andric     opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F");
11130b57cec5SDimitry Andric   }
11140b57cec5SDimitry Andric   nval = B.CreateFMul(opr1, nval, "__ylogx");
11150b57cec5SDimitry Andric   nval = CreateCallEx(B,ExpExpr, nval, "__exp2");
11160b57cec5SDimitry Andric 
11170b57cec5SDimitry Andric   if (needcopysign) {
11180b57cec5SDimitry Andric     Value *opr_n;
11190b57cec5SDimitry Andric     Type* rTy = opr0->getType();
11205f757f3fSDimitry Andric     Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits());
11210b57cec5SDimitry Andric     Type *nTy = nTyS;
11225ffd83dbSDimitry Andric     if (const auto *vTy = dyn_cast<FixedVectorType>(rTy))
11235ffd83dbSDimitry Andric       nTy = FixedVectorType::get(nTyS, vTy);
11240b57cec5SDimitry Andric     unsigned size = nTy->getScalarSizeInBits();
11255f757f3fSDimitry Andric     opr_n = FPOp->getOperand(1);
11260b57cec5SDimitry Andric     if (opr_n->getType()->isIntegerTy())
11275f757f3fSDimitry Andric       opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou");
11280b57cec5SDimitry Andric     else
11290b57cec5SDimitry Andric       opr_n = B.CreateFPToSI(opr1, nTy, "__ytou");
11300b57cec5SDimitry Andric 
11310b57cec5SDimitry Andric     Value *sign = B.CreateShl(opr_n, size-1, "__yeven");
11320b57cec5SDimitry Andric     sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign");
11330b57cec5SDimitry Andric     nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign);
11340b57cec5SDimitry Andric     nval = B.CreateBitCast(nval, opr0->getType());
11350b57cec5SDimitry Andric   }
11360b57cec5SDimitry Andric 
11375f757f3fSDimitry Andric   LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> "
11380b57cec5SDimitry Andric                     << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n");
11395f757f3fSDimitry Andric   replaceCall(FPOp, nval);
11400b57cec5SDimitry Andric 
11410b57cec5SDimitry Andric   return true;
11420b57cec5SDimitry Andric }
11430b57cec5SDimitry Andric 
fold_rootn(FPMathOperator * FPOp,IRBuilder<> & B,const FuncInfo & FInfo)11445f757f3fSDimitry Andric bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B,
11450b57cec5SDimitry Andric                                 const FuncInfo &FInfo) {
11465f757f3fSDimitry Andric   // skip vector function
11475f757f3fSDimitry Andric   if (getVecSize(FInfo) != 1)
11485f757f3fSDimitry Andric     return false;
11495f757f3fSDimitry Andric 
11505f757f3fSDimitry Andric   Value *opr0 = FPOp->getOperand(0);
11515f757f3fSDimitry Andric   Value *opr1 = FPOp->getOperand(1);
11520b57cec5SDimitry Andric 
11530b57cec5SDimitry Andric   ConstantInt *CINT = dyn_cast<ConstantInt>(opr1);
11540b57cec5SDimitry Andric   if (!CINT) {
11550b57cec5SDimitry Andric     return false;
11560b57cec5SDimitry Andric   }
11570b57cec5SDimitry Andric   int ci_opr1 = (int)CINT->getSExtValue();
11580b57cec5SDimitry Andric   if (ci_opr1 == 1) {  // rootn(x, 1) = x
11595f757f3fSDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n");
11605f757f3fSDimitry Andric     replaceCall(FPOp, opr0);
11610b57cec5SDimitry Andric     return true;
11620b57cec5SDimitry Andric   }
11635f757f3fSDimitry Andric 
11645f757f3fSDimitry Andric   Module *M = B.GetInsertBlock()->getModule();
11650b57cec5SDimitry Andric   if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x)
11660b57cec5SDimitry Andric     if (FunctionCallee FPExpr =
11670b57cec5SDimitry Andric             getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) {
11685f757f3fSDimitry Andric       LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0
11695f757f3fSDimitry Andric                         << ")\n");
11700b57cec5SDimitry Andric       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt");
11715f757f3fSDimitry Andric       replaceCall(FPOp, nval);
11720b57cec5SDimitry Andric       return true;
11730b57cec5SDimitry Andric     }
11740b57cec5SDimitry Andric   } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x)
11750b57cec5SDimitry Andric     if (FunctionCallee FPExpr =
11760b57cec5SDimitry Andric             getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) {
11775f757f3fSDimitry Andric       LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> cbrt(" << *opr0
11785f757f3fSDimitry Andric                         << ")\n");
11790b57cec5SDimitry Andric       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt");
11805f757f3fSDimitry Andric       replaceCall(FPOp, nval);
11810b57cec5SDimitry Andric       return true;
11820b57cec5SDimitry Andric     }
11830b57cec5SDimitry Andric   } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x
11845f757f3fSDimitry Andric     LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1.0 / " << *opr0 << "\n");
11850b57cec5SDimitry Andric     Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0),
11860b57cec5SDimitry Andric                                opr0,
11870b57cec5SDimitry Andric                                "__rootn2div");
11885f757f3fSDimitry Andric     replaceCall(FPOp, nval);
11890b57cec5SDimitry Andric     return true;
11900b57cec5SDimitry Andric   } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x)
11910b57cec5SDimitry Andric     if (FunctionCallee FPExpr =
11920b57cec5SDimitry Andric             getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) {
11935f757f3fSDimitry Andric       LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> rsqrt(" << *opr0
11940b57cec5SDimitry Andric                         << ")\n");
11950b57cec5SDimitry Andric       Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt");
11965f757f3fSDimitry Andric       replaceCall(FPOp, nval);
11970b57cec5SDimitry Andric       return true;
11980b57cec5SDimitry Andric     }
11990b57cec5SDimitry Andric   }
12000b57cec5SDimitry Andric   return false;
12010b57cec5SDimitry Andric }
12020b57cec5SDimitry Andric 
1203349cc55cSDimitry Andric // Get a scalar native builtin single argument FP function
getNativeFunction(Module * M,const FuncInfo & FInfo)12040b57cec5SDimitry Andric FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M,
12050b57cec5SDimitry Andric                                                  const FuncInfo &FInfo) {
12060b57cec5SDimitry Andric   if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()))
12070b57cec5SDimitry Andric     return nullptr;
12080b57cec5SDimitry Andric   FuncInfo nf = FInfo;
12090b57cec5SDimitry Andric   nf.setPrefix(AMDGPULibFunc::NATIVE);
12100b57cec5SDimitry Andric   return getFunction(M, nf);
12110b57cec5SDimitry Andric }
12120b57cec5SDimitry Andric 
12135f757f3fSDimitry Andric // Some library calls are just wrappers around llvm intrinsics, but compiled
12145f757f3fSDimitry Andric // conservatively. Preserve the flags from the original call site by
12155f757f3fSDimitry Andric // substituting them with direct calls with all the flags.
shouldReplaceLibcallWithIntrinsic(const CallInst * CI,bool AllowMinSizeF32,bool AllowF64,bool AllowStrictFP)12165f757f3fSDimitry Andric bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(const CallInst *CI,
12175f757f3fSDimitry Andric                                                        bool AllowMinSizeF32,
12185f757f3fSDimitry Andric                                                        bool AllowF64,
12195f757f3fSDimitry Andric                                                        bool AllowStrictFP) {
12205f757f3fSDimitry Andric   Type *FltTy = CI->getType()->getScalarType();
12215f757f3fSDimitry Andric   const bool IsF32 = FltTy->isFloatTy();
12225f757f3fSDimitry Andric 
12235f757f3fSDimitry Andric   // f64 intrinsics aren't implemented for most operations.
12245f757f3fSDimitry Andric   if (!IsF32 && !FltTy->isHalfTy() && (!AllowF64 || !FltTy->isDoubleTy()))
12255f757f3fSDimitry Andric     return false;
12265f757f3fSDimitry Andric 
12275f757f3fSDimitry Andric   // We're implicitly inlining by replacing the libcall with the intrinsic, so
12285f757f3fSDimitry Andric   // don't do it for noinline call sites.
12295f757f3fSDimitry Andric   if (CI->isNoInline())
12305f757f3fSDimitry Andric     return false;
12315f757f3fSDimitry Andric 
12325f757f3fSDimitry Andric   const Function *ParentF = CI->getFunction();
12335f757f3fSDimitry Andric   // TODO: Handle strictfp
12345f757f3fSDimitry Andric   if (!AllowStrictFP && ParentF->hasFnAttribute(Attribute::StrictFP))
12355f757f3fSDimitry Andric     return false;
12365f757f3fSDimitry Andric 
12375f757f3fSDimitry Andric   if (IsF32 && !AllowMinSizeF32 && ParentF->hasMinSize())
12385f757f3fSDimitry Andric     return false;
12395f757f3fSDimitry Andric   return true;
12405f757f3fSDimitry Andric }
12415f757f3fSDimitry Andric 
replaceLibCallWithSimpleIntrinsic(IRBuilder<> & B,CallInst * CI,Intrinsic::ID IntrID)12425f757f3fSDimitry Andric void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B,
12435f757f3fSDimitry Andric                                                        CallInst *CI,
12445f757f3fSDimitry Andric                                                        Intrinsic::ID IntrID) {
12455f757f3fSDimitry Andric   if (CI->arg_size() == 2) {
12465f757f3fSDimitry Andric     Value *Arg0 = CI->getArgOperand(0);
12475f757f3fSDimitry Andric     Value *Arg1 = CI->getArgOperand(1);
12485f757f3fSDimitry Andric     VectorType *Arg0VecTy = dyn_cast<VectorType>(Arg0->getType());
12495f757f3fSDimitry Andric     VectorType *Arg1VecTy = dyn_cast<VectorType>(Arg1->getType());
12505f757f3fSDimitry Andric     if (Arg0VecTy && !Arg1VecTy) {
12515f757f3fSDimitry Andric       Value *SplatRHS = B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1);
12525f757f3fSDimitry Andric       CI->setArgOperand(1, SplatRHS);
12535f757f3fSDimitry Andric     } else if (!Arg0VecTy && Arg1VecTy) {
12545f757f3fSDimitry Andric       Value *SplatLHS = B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0);
12555f757f3fSDimitry Andric       CI->setArgOperand(0, SplatLHS);
12565f757f3fSDimitry Andric     }
12575f757f3fSDimitry Andric   }
12585f757f3fSDimitry Andric 
12595f757f3fSDimitry Andric   CI->setCalledFunction(
12605f757f3fSDimitry Andric       Intrinsic::getDeclaration(CI->getModule(), IntrID, {CI->getType()}));
12615f757f3fSDimitry Andric }
12625f757f3fSDimitry Andric 
tryReplaceLibcallWithSimpleIntrinsic(IRBuilder<> & B,CallInst * CI,Intrinsic::ID IntrID,bool AllowMinSizeF32,bool AllowF64,bool AllowStrictFP)12635f757f3fSDimitry Andric bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic(
12645f757f3fSDimitry Andric     IRBuilder<> &B, CallInst *CI, Intrinsic::ID IntrID, bool AllowMinSizeF32,
12655f757f3fSDimitry Andric     bool AllowF64, bool AllowStrictFP) {
12665f757f3fSDimitry Andric   if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64,
12675f757f3fSDimitry Andric                                          AllowStrictFP))
12685f757f3fSDimitry Andric     return false;
12695f757f3fSDimitry Andric   replaceLibCallWithSimpleIntrinsic(B, CI, IntrID);
12705f757f3fSDimitry Andric   return true;
12715f757f3fSDimitry Andric }
12725f757f3fSDimitry Andric 
12735f757f3fSDimitry Andric std::tuple<Value *, Value *, Value *>
insertSinCos(Value * Arg,FastMathFlags FMF,IRBuilder<> & B,FunctionCallee Fsincos)12745f757f3fSDimitry Andric AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B,
12755f757f3fSDimitry Andric                              FunctionCallee Fsincos) {
12765f757f3fSDimitry Andric   DebugLoc DL = B.getCurrentDebugLocation();
12775f757f3fSDimitry Andric   Function *F = B.GetInsertBlock()->getParent();
12785f757f3fSDimitry Andric   B.SetInsertPointPastAllocas(F);
12795f757f3fSDimitry Andric 
12805f757f3fSDimitry Andric   AllocaInst *Alloc = B.CreateAlloca(Arg->getType(), nullptr, "__sincos_");
12815f757f3fSDimitry Andric 
12825f757f3fSDimitry Andric   if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) {
12835f757f3fSDimitry Andric     // If the argument is an instruction, it must dominate all uses so put our
12845f757f3fSDimitry Andric     // sincos call there. Otherwise, right after the allocas works well enough
12855f757f3fSDimitry Andric     // if it's an argument or constant.
12865f757f3fSDimitry Andric 
12875f757f3fSDimitry Andric     B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator());
12885f757f3fSDimitry Andric 
12895f757f3fSDimitry Andric     // SetInsertPoint unwelcomely always tries to set the debug loc.
12905f757f3fSDimitry Andric     B.SetCurrentDebugLocation(DL);
12915f757f3fSDimitry Andric   }
12925f757f3fSDimitry Andric 
12935f757f3fSDimitry Andric   Type *CosPtrTy = Fsincos.getFunctionType()->getParamType(1);
12945f757f3fSDimitry Andric 
12955f757f3fSDimitry Andric   // The allocaInst allocates the memory in private address space. This need
12965f757f3fSDimitry Andric   // to be addrspacecasted to point to the address space of cos pointer type.
12975f757f3fSDimitry Andric   // In OpenCL 2.0 this is generic, while in 1.2 that is private.
12985f757f3fSDimitry Andric   Value *CastAlloc = B.CreateAddrSpaceCast(Alloc, CosPtrTy);
12995f757f3fSDimitry Andric 
13005f757f3fSDimitry Andric   CallInst *SinCos = CreateCallEx2(B, Fsincos, Arg, CastAlloc);
13015f757f3fSDimitry Andric 
13025f757f3fSDimitry Andric   // TODO: Is it worth trying to preserve the location for the cos calls for the
13035f757f3fSDimitry Andric   // load?
13045f757f3fSDimitry Andric 
13055f757f3fSDimitry Andric   LoadInst *LoadCos = B.CreateLoad(Alloc->getAllocatedType(), Alloc);
13065f757f3fSDimitry Andric   return {SinCos, LoadCos, SinCos};
13075f757f3fSDimitry Andric }
13085f757f3fSDimitry Andric 
13090b57cec5SDimitry Andric // fold sin, cos -> sincos.
fold_sincos(FPMathOperator * FPOp,IRBuilder<> & B,const FuncInfo & fInfo)13105f757f3fSDimitry Andric bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B,
13115f757f3fSDimitry Andric                                  const FuncInfo &fInfo) {
13120b57cec5SDimitry Andric   assert(fInfo.getId() == AMDGPULibFunc::EI_SIN ||
13130b57cec5SDimitry Andric          fInfo.getId() == AMDGPULibFunc::EI_COS);
13145f757f3fSDimitry Andric 
13155f757f3fSDimitry Andric   if ((getArgType(fInfo) != AMDGPULibFunc::F32 &&
13165f757f3fSDimitry Andric        getArgType(fInfo) != AMDGPULibFunc::F64) ||
13175f757f3fSDimitry Andric       fInfo.getPrefix() != AMDGPULibFunc::NOPFX)
13185f757f3fSDimitry Andric     return false;
13195f757f3fSDimitry Andric 
13200b57cec5SDimitry Andric   bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN;
13210b57cec5SDimitry Andric 
13225f757f3fSDimitry Andric   Value *CArgVal = FPOp->getOperand(0);
13235f757f3fSDimitry Andric   CallInst *CI = cast<CallInst>(FPOp);
13240b57cec5SDimitry Andric 
13255f757f3fSDimitry Andric   Function *F = B.GetInsertBlock()->getParent();
13265f757f3fSDimitry Andric   Module *M = F->getParent();
13270b57cec5SDimitry Andric 
13285f757f3fSDimitry Andric   // Merge the sin and cos. For OpenCL 2.0, there may only be a generic pointer
13295f757f3fSDimitry Andric   // implementation. Prefer the private form if available.
13305f757f3fSDimitry Andric   AMDGPULibFunc SinCosLibFuncPrivate(AMDGPULibFunc::EI_SINCOS, fInfo);
13315f757f3fSDimitry Andric   SinCosLibFuncPrivate.getLeads()[0].PtrKind =
13325f757f3fSDimitry Andric       AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::PRIVATE_ADDRESS);
13330b57cec5SDimitry Andric 
13345f757f3fSDimitry Andric   AMDGPULibFunc SinCosLibFuncGeneric(AMDGPULibFunc::EI_SINCOS, fInfo);
13355f757f3fSDimitry Andric   SinCosLibFuncGeneric.getLeads()[0].PtrKind =
13365f757f3fSDimitry Andric       AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS);
13370b57cec5SDimitry Andric 
13385f757f3fSDimitry Andric   FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate);
13395f757f3fSDimitry Andric   FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric);
13405f757f3fSDimitry Andric   FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric;
13415f757f3fSDimitry Andric   if (!FSinCos)
13425f757f3fSDimitry Andric     return false;
13435f757f3fSDimitry Andric 
13445f757f3fSDimitry Andric   SmallVector<CallInst *> SinCalls;
13455f757f3fSDimitry Andric   SmallVector<CallInst *> CosCalls;
13465f757f3fSDimitry Andric   SmallVector<CallInst *> SinCosCalls;
13475f757f3fSDimitry Andric   FuncInfo PartnerInfo(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN,
13485f757f3fSDimitry Andric                        fInfo);
13495f757f3fSDimitry Andric   const std::string PairName = PartnerInfo.mangle();
13505f757f3fSDimitry Andric 
13515f757f3fSDimitry Andric   StringRef SinName = isSin ? CI->getCalledFunction()->getName() : PairName;
13525f757f3fSDimitry Andric   StringRef CosName = isSin ? PairName : CI->getCalledFunction()->getName();
13535f757f3fSDimitry Andric   const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle();
13545f757f3fSDimitry Andric   const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle();
13555f757f3fSDimitry Andric 
13565f757f3fSDimitry Andric   // Intersect the two sets of flags.
13575f757f3fSDimitry Andric   FastMathFlags FMF = FPOp->getFastMathFlags();
13585f757f3fSDimitry Andric   MDNode *FPMath = CI->getMetadata(LLVMContext::MD_fpmath);
13595f757f3fSDimitry Andric 
13605f757f3fSDimitry Andric   SmallVector<DILocation *> MergeDbgLocs = {CI->getDebugLoc()};
13615f757f3fSDimitry Andric 
13620b57cec5SDimitry Andric   for (User* U : CArgVal->users()) {
13635f757f3fSDimitry Andric     CallInst *XI = dyn_cast<CallInst>(U);
13645f757f3fSDimitry Andric     if (!XI || XI->getFunction() != F || XI->isNoBuiltin())
13650b57cec5SDimitry Andric       continue;
13660b57cec5SDimitry Andric 
13670b57cec5SDimitry Andric     Function *UCallee = XI->getCalledFunction();
13685f757f3fSDimitry Andric     if (!UCallee)
13690b57cec5SDimitry Andric       continue;
13700b57cec5SDimitry Andric 
13715f757f3fSDimitry Andric     bool Handled = true;
13725f757f3fSDimitry Andric 
13735f757f3fSDimitry Andric     if (UCallee->getName() == SinName)
13745f757f3fSDimitry Andric       SinCalls.push_back(XI);
13755f757f3fSDimitry Andric     else if (UCallee->getName() == CosName)
13765f757f3fSDimitry Andric       CosCalls.push_back(XI);
13775f757f3fSDimitry Andric     else if (UCallee->getName() == SinCosPrivateName ||
13785f757f3fSDimitry Andric              UCallee->getName() == SinCosGenericName)
13795f757f3fSDimitry Andric       SinCosCalls.push_back(XI);
13805f757f3fSDimitry Andric     else
13815f757f3fSDimitry Andric       Handled = false;
13825f757f3fSDimitry Andric 
13835f757f3fSDimitry Andric     if (Handled) {
13845f757f3fSDimitry Andric       MergeDbgLocs.push_back(XI->getDebugLoc());
13855f757f3fSDimitry Andric       auto *OtherOp = cast<FPMathOperator>(XI);
13865f757f3fSDimitry Andric       FMF &= OtherOp->getFastMathFlags();
13875f757f3fSDimitry Andric       FPMath = MDNode::getMostGenericFPMath(
13885f757f3fSDimitry Andric           FPMath, XI->getMetadata(LLVMContext::MD_fpmath));
13890b57cec5SDimitry Andric     }
13900b57cec5SDimitry Andric   }
13910b57cec5SDimitry Andric 
13925f757f3fSDimitry Andric   if (SinCalls.empty() || CosCalls.empty())
13930b57cec5SDimitry Andric     return false;
13940b57cec5SDimitry Andric 
13955f757f3fSDimitry Andric   B.setFastMathFlags(FMF);
13965f757f3fSDimitry Andric   B.setDefaultFPMathTag(FPMath);
13975f757f3fSDimitry Andric   DILocation *DbgLoc = DILocation::getMergedLocations(MergeDbgLocs);
13985f757f3fSDimitry Andric   B.SetCurrentDebugLocation(DbgLoc);
13990b57cec5SDimitry Andric 
14005f757f3fSDimitry Andric   auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF, B, FSinCos);
14010b57cec5SDimitry Andric 
14025f757f3fSDimitry Andric   auto replaceTrigInsts = [](ArrayRef<CallInst *> Calls, Value *Res) {
14035f757f3fSDimitry Andric     for (CallInst *C : Calls)
14045f757f3fSDimitry Andric       C->replaceAllUsesWith(Res);
14050b57cec5SDimitry Andric 
14065f757f3fSDimitry Andric     // Leave the other dead instructions to avoid clobbering iterators.
14075f757f3fSDimitry Andric   };
14085f757f3fSDimitry Andric 
14095f757f3fSDimitry Andric   replaceTrigInsts(SinCalls, Sin);
14105f757f3fSDimitry Andric   replaceTrigInsts(CosCalls, Cos);
14115f757f3fSDimitry Andric   replaceTrigInsts(SinCosCalls, SinCos);
14125f757f3fSDimitry Andric 
14135f757f3fSDimitry Andric   // It's safe to delete the original now.
14140b57cec5SDimitry Andric   CI->eraseFromParent();
14150b57cec5SDimitry Andric   return true;
14160b57cec5SDimitry Andric }
14170b57cec5SDimitry Andric 
evaluateScalarMathFunc(const FuncInfo & FInfo,double & Res0,double & Res1,Constant * copr0,Constant * copr1)14185f757f3fSDimitry Andric bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0,
14195f757f3fSDimitry Andric                                             double &Res1, Constant *copr0,
14205f757f3fSDimitry Andric                                             Constant *copr1) {
14210b57cec5SDimitry Andric   // By default, opr0/opr1/opr3 holds values of float/double type.
14220b57cec5SDimitry Andric   // If they are not float/double, each function has to its
14230b57cec5SDimitry Andric   // operand separately.
14245f757f3fSDimitry Andric   double opr0 = 0.0, opr1 = 0.0;
14250b57cec5SDimitry Andric   ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0);
14260b57cec5SDimitry Andric   ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1);
14270b57cec5SDimitry Andric   if (fpopr0) {
14280b57cec5SDimitry Andric     opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64)
14290b57cec5SDimitry Andric              ? fpopr0->getValueAPF().convertToDouble()
14300b57cec5SDimitry Andric              : (double)fpopr0->getValueAPF().convertToFloat();
14310b57cec5SDimitry Andric   }
14320b57cec5SDimitry Andric 
14330b57cec5SDimitry Andric   if (fpopr1) {
14340b57cec5SDimitry Andric     opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64)
14350b57cec5SDimitry Andric              ? fpopr1->getValueAPF().convertToDouble()
14360b57cec5SDimitry Andric              : (double)fpopr1->getValueAPF().convertToFloat();
14370b57cec5SDimitry Andric   }
14380b57cec5SDimitry Andric 
14390b57cec5SDimitry Andric   switch (FInfo.getId()) {
14400b57cec5SDimitry Andric   default : return false;
14410b57cec5SDimitry Andric 
14420b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ACOS:
14430b57cec5SDimitry Andric     Res0 = acos(opr0);
14440b57cec5SDimitry Andric     return true;
14450b57cec5SDimitry Andric 
14460b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ACOSH:
14470b57cec5SDimitry Andric     // acosh(x) == log(x + sqrt(x*x - 1))
14480b57cec5SDimitry Andric     Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0));
14490b57cec5SDimitry Andric     return true;
14500b57cec5SDimitry Andric 
14510b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ACOSPI:
14520b57cec5SDimitry Andric     Res0 = acos(opr0) / MATH_PI;
14530b57cec5SDimitry Andric     return true;
14540b57cec5SDimitry Andric 
14550b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ASIN:
14560b57cec5SDimitry Andric     Res0 = asin(opr0);
14570b57cec5SDimitry Andric     return true;
14580b57cec5SDimitry Andric 
14590b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ASINH:
14600b57cec5SDimitry Andric     // asinh(x) == log(x + sqrt(x*x + 1))
14610b57cec5SDimitry Andric     Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0));
14620b57cec5SDimitry Andric     return true;
14630b57cec5SDimitry Andric 
14640b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ASINPI:
14650b57cec5SDimitry Andric     Res0 = asin(opr0) / MATH_PI;
14660b57cec5SDimitry Andric     return true;
14670b57cec5SDimitry Andric 
14680b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ATAN:
14690b57cec5SDimitry Andric     Res0 = atan(opr0);
14700b57cec5SDimitry Andric     return true;
14710b57cec5SDimitry Andric 
14720b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ATANH:
14730b57cec5SDimitry Andric     // atanh(x) == (log(x+1) - log(x-1))/2;
14740b57cec5SDimitry Andric     Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0;
14750b57cec5SDimitry Andric     return true;
14760b57cec5SDimitry Andric 
14770b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ATANPI:
14780b57cec5SDimitry Andric     Res0 = atan(opr0) / MATH_PI;
14790b57cec5SDimitry Andric     return true;
14800b57cec5SDimitry Andric 
14810b57cec5SDimitry Andric   case AMDGPULibFunc::EI_CBRT:
14820b57cec5SDimitry Andric     Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0);
14830b57cec5SDimitry Andric     return true;
14840b57cec5SDimitry Andric 
14850b57cec5SDimitry Andric   case AMDGPULibFunc::EI_COS:
14860b57cec5SDimitry Andric     Res0 = cos(opr0);
14870b57cec5SDimitry Andric     return true;
14880b57cec5SDimitry Andric 
14890b57cec5SDimitry Andric   case AMDGPULibFunc::EI_COSH:
14900b57cec5SDimitry Andric     Res0 = cosh(opr0);
14910b57cec5SDimitry Andric     return true;
14920b57cec5SDimitry Andric 
14930b57cec5SDimitry Andric   case AMDGPULibFunc::EI_COSPI:
14940b57cec5SDimitry Andric     Res0 = cos(MATH_PI * opr0);
14950b57cec5SDimitry Andric     return true;
14960b57cec5SDimitry Andric 
14970b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP:
14980b57cec5SDimitry Andric     Res0 = exp(opr0);
14990b57cec5SDimitry Andric     return true;
15000b57cec5SDimitry Andric 
15010b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP2:
15020b57cec5SDimitry Andric     Res0 = pow(2.0, opr0);
15030b57cec5SDimitry Andric     return true;
15040b57cec5SDimitry Andric 
15050b57cec5SDimitry Andric   case AMDGPULibFunc::EI_EXP10:
15060b57cec5SDimitry Andric     Res0 = pow(10.0, opr0);
15070b57cec5SDimitry Andric     return true;
15080b57cec5SDimitry Andric 
15090b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG:
15100b57cec5SDimitry Andric     Res0 = log(opr0);
15110b57cec5SDimitry Andric     return true;
15120b57cec5SDimitry Andric 
15130b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG2:
15140b57cec5SDimitry Andric     Res0 = log(opr0) / log(2.0);
15150b57cec5SDimitry Andric     return true;
15160b57cec5SDimitry Andric 
15170b57cec5SDimitry Andric   case AMDGPULibFunc::EI_LOG10:
15180b57cec5SDimitry Andric     Res0 = log(opr0) / log(10.0);
15190b57cec5SDimitry Andric     return true;
15200b57cec5SDimitry Andric 
15210b57cec5SDimitry Andric   case AMDGPULibFunc::EI_RSQRT:
15220b57cec5SDimitry Andric     Res0 = 1.0 / sqrt(opr0);
15230b57cec5SDimitry Andric     return true;
15240b57cec5SDimitry Andric 
15250b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SIN:
15260b57cec5SDimitry Andric     Res0 = sin(opr0);
15270b57cec5SDimitry Andric     return true;
15280b57cec5SDimitry Andric 
15290b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SINH:
15300b57cec5SDimitry Andric     Res0 = sinh(opr0);
15310b57cec5SDimitry Andric     return true;
15320b57cec5SDimitry Andric 
15330b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SINPI:
15340b57cec5SDimitry Andric     Res0 = sin(MATH_PI * opr0);
15350b57cec5SDimitry Andric     return true;
15360b57cec5SDimitry Andric 
15370b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TAN:
15380b57cec5SDimitry Andric     Res0 = tan(opr0);
15390b57cec5SDimitry Andric     return true;
15400b57cec5SDimitry Andric 
15410b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TANH:
15420b57cec5SDimitry Andric     Res0 = tanh(opr0);
15430b57cec5SDimitry Andric     return true;
15440b57cec5SDimitry Andric 
15450b57cec5SDimitry Andric   case AMDGPULibFunc::EI_TANPI:
15460b57cec5SDimitry Andric     Res0 = tan(MATH_PI * opr0);
15470b57cec5SDimitry Andric     return true;
15480b57cec5SDimitry Andric 
15490b57cec5SDimitry Andric   // two-arg functions
15500b57cec5SDimitry Andric   case AMDGPULibFunc::EI_POW:
15510b57cec5SDimitry Andric   case AMDGPULibFunc::EI_POWR:
15520b57cec5SDimitry Andric     Res0 = pow(opr0, opr1);
15530b57cec5SDimitry Andric     return true;
15540b57cec5SDimitry Andric 
15550b57cec5SDimitry Andric   case AMDGPULibFunc::EI_POWN: {
15560b57cec5SDimitry Andric     if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
15570b57cec5SDimitry Andric       double val = (double)iopr1->getSExtValue();
15580b57cec5SDimitry Andric       Res0 = pow(opr0, val);
15590b57cec5SDimitry Andric       return true;
15600b57cec5SDimitry Andric     }
15610b57cec5SDimitry Andric     return false;
15620b57cec5SDimitry Andric   }
15630b57cec5SDimitry Andric 
15640b57cec5SDimitry Andric   case AMDGPULibFunc::EI_ROOTN: {
15650b57cec5SDimitry Andric     if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) {
15660b57cec5SDimitry Andric       double val = (double)iopr1->getSExtValue();
15670b57cec5SDimitry Andric       Res0 = pow(opr0, 1.0 / val);
15680b57cec5SDimitry Andric       return true;
15690b57cec5SDimitry Andric     }
15700b57cec5SDimitry Andric     return false;
15710b57cec5SDimitry Andric   }
15720b57cec5SDimitry Andric 
15730b57cec5SDimitry Andric   // with ptr arg
15740b57cec5SDimitry Andric   case AMDGPULibFunc::EI_SINCOS:
15750b57cec5SDimitry Andric     Res0 = sin(opr0);
15760b57cec5SDimitry Andric     Res1 = cos(opr0);
15770b57cec5SDimitry Andric     return true;
15780b57cec5SDimitry Andric   }
15790b57cec5SDimitry Andric 
15800b57cec5SDimitry Andric   return false;
15810b57cec5SDimitry Andric }
15820b57cec5SDimitry Andric 
evaluateCall(CallInst * aCI,const FuncInfo & FInfo)1583349cc55cSDimitry Andric bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) {
1584349cc55cSDimitry Andric   int numArgs = (int)aCI->arg_size();
15850b57cec5SDimitry Andric   if (numArgs > 3)
15860b57cec5SDimitry Andric     return false;
15870b57cec5SDimitry Andric 
15880b57cec5SDimitry Andric   Constant *copr0 = nullptr;
15890b57cec5SDimitry Andric   Constant *copr1 = nullptr;
15900b57cec5SDimitry Andric   if (numArgs > 0) {
15910b57cec5SDimitry Andric     if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr)
15920b57cec5SDimitry Andric       return false;
15930b57cec5SDimitry Andric   }
15940b57cec5SDimitry Andric 
15950b57cec5SDimitry Andric   if (numArgs > 1) {
15960b57cec5SDimitry Andric     if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) {
15970b57cec5SDimitry Andric       if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS)
15980b57cec5SDimitry Andric         return false;
15990b57cec5SDimitry Andric     }
16000b57cec5SDimitry Andric   }
16010b57cec5SDimitry Andric 
16020b57cec5SDimitry Andric   // At this point, all arguments to aCI are constants.
16030b57cec5SDimitry Andric 
16040b57cec5SDimitry Andric   // max vector size is 16, and sincos will generate two results.
16050b57cec5SDimitry Andric   double DVal0[16], DVal1[16];
160681ad6265SDimitry Andric   int FuncVecSize = getVecSize(FInfo);
16070b57cec5SDimitry Andric   bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS);
160881ad6265SDimitry Andric   if (FuncVecSize == 1) {
16095f757f3fSDimitry Andric     if (!evaluateScalarMathFunc(FInfo, DVal0[0], DVal1[0], copr0, copr1)) {
16100b57cec5SDimitry Andric       return false;
16110b57cec5SDimitry Andric     }
16120b57cec5SDimitry Andric   } else {
16130b57cec5SDimitry Andric     ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0);
16140b57cec5SDimitry Andric     ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1);
161581ad6265SDimitry Andric     for (int i = 0; i < FuncVecSize; ++i) {
16160b57cec5SDimitry Andric       Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr;
16170b57cec5SDimitry Andric       Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr;
16185f757f3fSDimitry Andric       if (!evaluateScalarMathFunc(FInfo, DVal0[i], DVal1[i], celt0, celt1)) {
16190b57cec5SDimitry Andric         return false;
16200b57cec5SDimitry Andric       }
16210b57cec5SDimitry Andric     }
16220b57cec5SDimitry Andric   }
16230b57cec5SDimitry Andric 
16245f757f3fSDimitry Andric   LLVMContext &context = aCI->getContext();
16250b57cec5SDimitry Andric   Constant *nval0, *nval1;
162681ad6265SDimitry Andric   if (FuncVecSize == 1) {
16275f757f3fSDimitry Andric     nval0 = ConstantFP::get(aCI->getType(), DVal0[0]);
16280b57cec5SDimitry Andric     if (hasTwoResults)
16295f757f3fSDimitry Andric       nval1 = ConstantFP::get(aCI->getType(), DVal1[0]);
16300b57cec5SDimitry Andric   } else {
16310b57cec5SDimitry Andric     if (getArgType(FInfo) == AMDGPULibFunc::F32) {
16320b57cec5SDimitry Andric       SmallVector <float, 0> FVal0, FVal1;
163381ad6265SDimitry Andric       for (int i = 0; i < FuncVecSize; ++i)
16340b57cec5SDimitry Andric         FVal0.push_back((float)DVal0[i]);
16350b57cec5SDimitry Andric       ArrayRef<float> tmp0(FVal0);
16360b57cec5SDimitry Andric       nval0 = ConstantDataVector::get(context, tmp0);
16370b57cec5SDimitry Andric       if (hasTwoResults) {
163881ad6265SDimitry Andric         for (int i = 0; i < FuncVecSize; ++i)
16390b57cec5SDimitry Andric           FVal1.push_back((float)DVal1[i]);
16400b57cec5SDimitry Andric         ArrayRef<float> tmp1(FVal1);
16410b57cec5SDimitry Andric         nval1 = ConstantDataVector::get(context, tmp1);
16420b57cec5SDimitry Andric       }
16430b57cec5SDimitry Andric     } else {
16440b57cec5SDimitry Andric       ArrayRef<double> tmp0(DVal0);
16450b57cec5SDimitry Andric       nval0 = ConstantDataVector::get(context, tmp0);
16460b57cec5SDimitry Andric       if (hasTwoResults) {
16470b57cec5SDimitry Andric         ArrayRef<double> tmp1(DVal1);
16480b57cec5SDimitry Andric         nval1 = ConstantDataVector::get(context, tmp1);
16490b57cec5SDimitry Andric       }
16500b57cec5SDimitry Andric     }
16510b57cec5SDimitry Andric   }
16520b57cec5SDimitry Andric 
16530b57cec5SDimitry Andric   if (hasTwoResults) {
16540b57cec5SDimitry Andric     // sincos
16550b57cec5SDimitry Andric     assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS &&
16560b57cec5SDimitry Andric            "math function with ptr arg not supported yet");
16570b57cec5SDimitry Andric     new StoreInst(nval1, aCI->getArgOperand(1), aCI);
16580b57cec5SDimitry Andric   }
16590b57cec5SDimitry Andric 
16605f757f3fSDimitry Andric   replaceCall(aCI, nval0);
16610b57cec5SDimitry Andric   return true;
16620b57cec5SDimitry Andric }
16630b57cec5SDimitry Andric 
run(Function & F,FunctionAnalysisManager & AM)1664e8d8bef9SDimitry Andric PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F,
1665e8d8bef9SDimitry Andric                                                   FunctionAnalysisManager &AM) {
16665f757f3fSDimitry Andric   AMDGPULibCalls Simplifier;
1667e8d8bef9SDimitry Andric   Simplifier.initNativeFuncs();
16685f757f3fSDimitry Andric   Simplifier.initFunction(F, AM);
1669e8d8bef9SDimitry Andric 
1670e8d8bef9SDimitry Andric   bool Changed = false;
1671e8d8bef9SDimitry Andric 
1672e8d8bef9SDimitry Andric   LLVM_DEBUG(dbgs() << "AMDIC: process function ";
1673e8d8bef9SDimitry Andric              F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';);
1674e8d8bef9SDimitry Andric 
1675e8d8bef9SDimitry Andric   for (auto &BB : F) {
1676e8d8bef9SDimitry Andric     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1677e8d8bef9SDimitry Andric       // Ignore non-calls.
1678e8d8bef9SDimitry Andric       CallInst *CI = dyn_cast<CallInst>(I);
1679e8d8bef9SDimitry Andric       ++I;
1680e8d8bef9SDimitry Andric 
16815f757f3fSDimitry Andric       if (CI) {
16825f757f3fSDimitry Andric         if (Simplifier.fold(CI))
1683e8d8bef9SDimitry Andric           Changed = true;
1684e8d8bef9SDimitry Andric       }
1685e8d8bef9SDimitry Andric     }
16865f757f3fSDimitry Andric   }
1687e8d8bef9SDimitry Andric   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1688e8d8bef9SDimitry Andric }
1689e8d8bef9SDimitry Andric 
run(Function & F,FunctionAnalysisManager & AM)1690e8d8bef9SDimitry Andric PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F,
1691e8d8bef9SDimitry Andric                                                 FunctionAnalysisManager &AM) {
1692e8d8bef9SDimitry Andric   if (UseNative.empty())
1693e8d8bef9SDimitry Andric     return PreservedAnalyses::all();
1694e8d8bef9SDimitry Andric 
1695e8d8bef9SDimitry Andric   AMDGPULibCalls Simplifier;
1696e8d8bef9SDimitry Andric   Simplifier.initNativeFuncs();
16975f757f3fSDimitry Andric   Simplifier.initFunction(F, AM);
1698e8d8bef9SDimitry Andric 
1699e8d8bef9SDimitry Andric   bool Changed = false;
1700e8d8bef9SDimitry Andric   for (auto &BB : F) {
1701e8d8bef9SDimitry Andric     for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) {
1702e8d8bef9SDimitry Andric       // Ignore non-calls.
1703e8d8bef9SDimitry Andric       CallInst *CI = dyn_cast<CallInst>(I);
1704e8d8bef9SDimitry Andric       ++I;
17055f757f3fSDimitry Andric       if (CI && Simplifier.useNative(CI))
1706e8d8bef9SDimitry Andric         Changed = true;
1707e8d8bef9SDimitry Andric     }
1708e8d8bef9SDimitry Andric   }
1709e8d8bef9SDimitry Andric   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
1710e8d8bef9SDimitry Andric }
1711