1 //===- AMDGPULibCalls.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This file does AMD library function optimizations. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPULibFunc.h" 16 #include "GCNSubtarget.h" 17 #include "llvm/Analysis/AssumptionCache.h" 18 #include "llvm/Analysis/TargetLibraryInfo.h" 19 #include "llvm/Analysis/ValueTracking.h" 20 #include "llvm/IR/AttributeMask.h" 21 #include "llvm/IR/Dominators.h" 22 #include "llvm/IR/IRBuilder.h" 23 #include "llvm/IR/IntrinsicInst.h" 24 #include "llvm/IR/IntrinsicsAMDGPU.h" 25 #include "llvm/IR/PatternMatch.h" 26 #include "llvm/InitializePasses.h" 27 #include <cmath> 28 29 #define DEBUG_TYPE "amdgpu-simplifylib" 30 31 using namespace llvm; 32 using namespace llvm::PatternMatch; 33 34 static cl::opt<bool> EnablePreLink("amdgpu-prelink", 35 cl::desc("Enable pre-link mode optimizations"), 36 cl::init(false), 37 cl::Hidden); 38 39 static cl::list<std::string> UseNative("amdgpu-use-native", 40 cl::desc("Comma separated list of functions to replace with native, or all"), 41 cl::CommaSeparated, cl::ValueOptional, 42 cl::Hidden); 43 44 #define MATH_PI numbers::pi 45 #define MATH_E numbers::e 46 #define MATH_SQRT2 numbers::sqrt2 47 #define MATH_SQRT1_2 numbers::inv_sqrt2 48 49 namespace llvm { 50 51 class AMDGPULibCalls { 52 private: 53 const TargetLibraryInfo *TLInfo = nullptr; 54 AssumptionCache *AC = nullptr; 55 DominatorTree *DT = nullptr; 56 57 typedef llvm::AMDGPULibFunc FuncInfo; 58 59 bool UnsafeFPMath = false; 60 61 // -fuse-native. 62 bool AllNative = false; 63 64 bool useNativeFunc(const StringRef F) const; 65 66 // Return a pointer (pointer expr) to the function if function definition with 67 // "FuncName" exists. It may create a new function prototype in pre-link mode. 68 FunctionCallee getFunction(Module *M, const FuncInfo &fInfo); 69 70 bool parseFunctionName(const StringRef &FMangledName, FuncInfo &FInfo); 71 72 bool TDOFold(CallInst *CI, const FuncInfo &FInfo); 73 74 /* Specialized optimizations */ 75 76 // pow/powr/pown 77 bool fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); 78 79 // rootn 80 bool fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); 81 82 // -fuse-native for sincos 83 bool sincosUseNative(CallInst *aCI, const FuncInfo &FInfo); 84 85 // evaluate calls if calls' arguments are constants. 86 bool evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0, double &Res1, 87 Constant *copr0, Constant *copr1); 88 bool evaluateCall(CallInst *aCI, const FuncInfo &FInfo); 89 90 // sqrt 91 bool fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); 92 93 /// Insert a value to sincos function \p Fsincos. Returns (value of sin, value 94 /// of cos, sincos call). 95 std::tuple<Value *, Value *, Value *> insertSinCos(Value *Arg, 96 FastMathFlags FMF, 97 IRBuilder<> &B, 98 FunctionCallee Fsincos); 99 100 // sin/cos 101 bool fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, const FuncInfo &FInfo); 102 103 // __read_pipe/__write_pipe 104 bool fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, 105 const FuncInfo &FInfo); 106 107 // Get a scalar native builtin single argument FP function 108 FunctionCallee getNativeFunction(Module *M, const FuncInfo &FInfo); 109 110 /// Substitute a call to a known libcall with an intrinsic call. If \p 111 /// AllowMinSize is true, allow the replacement in a minsize function. 112 bool shouldReplaceLibcallWithIntrinsic(const CallInst *CI, 113 bool AllowMinSizeF32 = false, 114 bool AllowF64 = false, 115 bool AllowStrictFP = false); 116 void replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI, 117 Intrinsic::ID IntrID); 118 119 bool tryReplaceLibcallWithSimpleIntrinsic(IRBuilder<> &B, CallInst *CI, 120 Intrinsic::ID IntrID, 121 bool AllowMinSizeF32 = false, 122 bool AllowF64 = false, 123 bool AllowStrictFP = false); 124 125 protected: 126 bool isUnsafeMath(const FPMathOperator *FPOp) const; 127 bool isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const; 128 129 bool canIncreasePrecisionOfConstantFold(const FPMathOperator *FPOp) const; 130 131 static void replaceCall(Instruction *I, Value *With) { 132 I->replaceAllUsesWith(With); 133 I->eraseFromParent(); 134 } 135 136 static void replaceCall(FPMathOperator *I, Value *With) { 137 replaceCall(cast<Instruction>(I), With); 138 } 139 140 public: 141 AMDGPULibCalls() {} 142 143 bool fold(CallInst *CI); 144 145 void initFunction(Function &F, FunctionAnalysisManager &FAM); 146 void initNativeFuncs(); 147 148 // Replace a normal math function call with that native version 149 bool useNative(CallInst *CI); 150 }; 151 152 } // end llvm namespace 153 154 template <typename IRB> 155 static CallInst *CreateCallEx(IRB &B, FunctionCallee Callee, Value *Arg, 156 const Twine &Name = "") { 157 CallInst *R = B.CreateCall(Callee, Arg, Name); 158 if (Function *F = dyn_cast<Function>(Callee.getCallee())) 159 R->setCallingConv(F->getCallingConv()); 160 return R; 161 } 162 163 template <typename IRB> 164 static CallInst *CreateCallEx2(IRB &B, FunctionCallee Callee, Value *Arg1, 165 Value *Arg2, const Twine &Name = "") { 166 CallInst *R = B.CreateCall(Callee, {Arg1, Arg2}, Name); 167 if (Function *F = dyn_cast<Function>(Callee.getCallee())) 168 R->setCallingConv(F->getCallingConv()); 169 return R; 170 } 171 172 static FunctionType *getPownType(FunctionType *FT) { 173 Type *PowNExpTy = Type::getInt32Ty(FT->getContext()); 174 if (VectorType *VecTy = dyn_cast<VectorType>(FT->getReturnType())) 175 PowNExpTy = VectorType::get(PowNExpTy, VecTy->getElementCount()); 176 177 return FunctionType::get(FT->getReturnType(), 178 {FT->getParamType(0), PowNExpTy}, false); 179 } 180 181 // Data structures for table-driven optimizations. 182 // FuncTbl works for both f32 and f64 functions with 1 input argument 183 184 struct TableEntry { 185 double result; 186 double input; 187 }; 188 189 /* a list of {result, input} */ 190 static const TableEntry tbl_acos[] = { 191 {MATH_PI / 2.0, 0.0}, 192 {MATH_PI / 2.0, -0.0}, 193 {0.0, 1.0}, 194 {MATH_PI, -1.0} 195 }; 196 static const TableEntry tbl_acosh[] = { 197 {0.0, 1.0} 198 }; 199 static const TableEntry tbl_acospi[] = { 200 {0.5, 0.0}, 201 {0.5, -0.0}, 202 {0.0, 1.0}, 203 {1.0, -1.0} 204 }; 205 static const TableEntry tbl_asin[] = { 206 {0.0, 0.0}, 207 {-0.0, -0.0}, 208 {MATH_PI / 2.0, 1.0}, 209 {-MATH_PI / 2.0, -1.0} 210 }; 211 static const TableEntry tbl_asinh[] = { 212 {0.0, 0.0}, 213 {-0.0, -0.0} 214 }; 215 static const TableEntry tbl_asinpi[] = { 216 {0.0, 0.0}, 217 {-0.0, -0.0}, 218 {0.5, 1.0}, 219 {-0.5, -1.0} 220 }; 221 static const TableEntry tbl_atan[] = { 222 {0.0, 0.0}, 223 {-0.0, -0.0}, 224 {MATH_PI / 4.0, 1.0}, 225 {-MATH_PI / 4.0, -1.0} 226 }; 227 static const TableEntry tbl_atanh[] = { 228 {0.0, 0.0}, 229 {-0.0, -0.0} 230 }; 231 static const TableEntry tbl_atanpi[] = { 232 {0.0, 0.0}, 233 {-0.0, -0.0}, 234 {0.25, 1.0}, 235 {-0.25, -1.0} 236 }; 237 static const TableEntry tbl_cbrt[] = { 238 {0.0, 0.0}, 239 {-0.0, -0.0}, 240 {1.0, 1.0}, 241 {-1.0, -1.0}, 242 }; 243 static const TableEntry tbl_cos[] = { 244 {1.0, 0.0}, 245 {1.0, -0.0} 246 }; 247 static const TableEntry tbl_cosh[] = { 248 {1.0, 0.0}, 249 {1.0, -0.0} 250 }; 251 static const TableEntry tbl_cospi[] = { 252 {1.0, 0.0}, 253 {1.0, -0.0} 254 }; 255 static const TableEntry tbl_erfc[] = { 256 {1.0, 0.0}, 257 {1.0, -0.0} 258 }; 259 static const TableEntry tbl_erf[] = { 260 {0.0, 0.0}, 261 {-0.0, -0.0} 262 }; 263 static const TableEntry tbl_exp[] = { 264 {1.0, 0.0}, 265 {1.0, -0.0}, 266 {MATH_E, 1.0} 267 }; 268 static const TableEntry tbl_exp2[] = { 269 {1.0, 0.0}, 270 {1.0, -0.0}, 271 {2.0, 1.0} 272 }; 273 static const TableEntry tbl_exp10[] = { 274 {1.0, 0.0}, 275 {1.0, -0.0}, 276 {10.0, 1.0} 277 }; 278 static const TableEntry tbl_expm1[] = { 279 {0.0, 0.0}, 280 {-0.0, -0.0} 281 }; 282 static const TableEntry tbl_log[] = { 283 {0.0, 1.0}, 284 {1.0, MATH_E} 285 }; 286 static const TableEntry tbl_log2[] = { 287 {0.0, 1.0}, 288 {1.0, 2.0} 289 }; 290 static const TableEntry tbl_log10[] = { 291 {0.0, 1.0}, 292 {1.0, 10.0} 293 }; 294 static const TableEntry tbl_rsqrt[] = { 295 {1.0, 1.0}, 296 {MATH_SQRT1_2, 2.0} 297 }; 298 static const TableEntry tbl_sin[] = { 299 {0.0, 0.0}, 300 {-0.0, -0.0} 301 }; 302 static const TableEntry tbl_sinh[] = { 303 {0.0, 0.0}, 304 {-0.0, -0.0} 305 }; 306 static const TableEntry tbl_sinpi[] = { 307 {0.0, 0.0}, 308 {-0.0, -0.0} 309 }; 310 static const TableEntry tbl_sqrt[] = { 311 {0.0, 0.0}, 312 {1.0, 1.0}, 313 {MATH_SQRT2, 2.0} 314 }; 315 static const TableEntry tbl_tan[] = { 316 {0.0, 0.0}, 317 {-0.0, -0.0} 318 }; 319 static const TableEntry tbl_tanh[] = { 320 {0.0, 0.0}, 321 {-0.0, -0.0} 322 }; 323 static const TableEntry tbl_tanpi[] = { 324 {0.0, 0.0}, 325 {-0.0, -0.0} 326 }; 327 static const TableEntry tbl_tgamma[] = { 328 {1.0, 1.0}, 329 {1.0, 2.0}, 330 {2.0, 3.0}, 331 {6.0, 4.0} 332 }; 333 334 static bool HasNative(AMDGPULibFunc::EFuncId id) { 335 switch(id) { 336 case AMDGPULibFunc::EI_DIVIDE: 337 case AMDGPULibFunc::EI_COS: 338 case AMDGPULibFunc::EI_EXP: 339 case AMDGPULibFunc::EI_EXP2: 340 case AMDGPULibFunc::EI_EXP10: 341 case AMDGPULibFunc::EI_LOG: 342 case AMDGPULibFunc::EI_LOG2: 343 case AMDGPULibFunc::EI_LOG10: 344 case AMDGPULibFunc::EI_POWR: 345 case AMDGPULibFunc::EI_RECIP: 346 case AMDGPULibFunc::EI_RSQRT: 347 case AMDGPULibFunc::EI_SIN: 348 case AMDGPULibFunc::EI_SINCOS: 349 case AMDGPULibFunc::EI_SQRT: 350 case AMDGPULibFunc::EI_TAN: 351 return true; 352 default:; 353 } 354 return false; 355 } 356 357 using TableRef = ArrayRef<TableEntry>; 358 359 static TableRef getOptTable(AMDGPULibFunc::EFuncId id) { 360 switch(id) { 361 case AMDGPULibFunc::EI_ACOS: return TableRef(tbl_acos); 362 case AMDGPULibFunc::EI_ACOSH: return TableRef(tbl_acosh); 363 case AMDGPULibFunc::EI_ACOSPI: return TableRef(tbl_acospi); 364 case AMDGPULibFunc::EI_ASIN: return TableRef(tbl_asin); 365 case AMDGPULibFunc::EI_ASINH: return TableRef(tbl_asinh); 366 case AMDGPULibFunc::EI_ASINPI: return TableRef(tbl_asinpi); 367 case AMDGPULibFunc::EI_ATAN: return TableRef(tbl_atan); 368 case AMDGPULibFunc::EI_ATANH: return TableRef(tbl_atanh); 369 case AMDGPULibFunc::EI_ATANPI: return TableRef(tbl_atanpi); 370 case AMDGPULibFunc::EI_CBRT: return TableRef(tbl_cbrt); 371 case AMDGPULibFunc::EI_NCOS: 372 case AMDGPULibFunc::EI_COS: return TableRef(tbl_cos); 373 case AMDGPULibFunc::EI_COSH: return TableRef(tbl_cosh); 374 case AMDGPULibFunc::EI_COSPI: return TableRef(tbl_cospi); 375 case AMDGPULibFunc::EI_ERFC: return TableRef(tbl_erfc); 376 case AMDGPULibFunc::EI_ERF: return TableRef(tbl_erf); 377 case AMDGPULibFunc::EI_EXP: return TableRef(tbl_exp); 378 case AMDGPULibFunc::EI_NEXP2: 379 case AMDGPULibFunc::EI_EXP2: return TableRef(tbl_exp2); 380 case AMDGPULibFunc::EI_EXP10: return TableRef(tbl_exp10); 381 case AMDGPULibFunc::EI_EXPM1: return TableRef(tbl_expm1); 382 case AMDGPULibFunc::EI_LOG: return TableRef(tbl_log); 383 case AMDGPULibFunc::EI_NLOG2: 384 case AMDGPULibFunc::EI_LOG2: return TableRef(tbl_log2); 385 case AMDGPULibFunc::EI_LOG10: return TableRef(tbl_log10); 386 case AMDGPULibFunc::EI_NRSQRT: 387 case AMDGPULibFunc::EI_RSQRT: return TableRef(tbl_rsqrt); 388 case AMDGPULibFunc::EI_NSIN: 389 case AMDGPULibFunc::EI_SIN: return TableRef(tbl_sin); 390 case AMDGPULibFunc::EI_SINH: return TableRef(tbl_sinh); 391 case AMDGPULibFunc::EI_SINPI: return TableRef(tbl_sinpi); 392 case AMDGPULibFunc::EI_NSQRT: 393 case AMDGPULibFunc::EI_SQRT: return TableRef(tbl_sqrt); 394 case AMDGPULibFunc::EI_TAN: return TableRef(tbl_tan); 395 case AMDGPULibFunc::EI_TANH: return TableRef(tbl_tanh); 396 case AMDGPULibFunc::EI_TANPI: return TableRef(tbl_tanpi); 397 case AMDGPULibFunc::EI_TGAMMA: return TableRef(tbl_tgamma); 398 default:; 399 } 400 return TableRef(); 401 } 402 403 static inline int getVecSize(const AMDGPULibFunc& FInfo) { 404 return FInfo.getLeads()[0].VectorSize; 405 } 406 407 static inline AMDGPULibFunc::EType getArgType(const AMDGPULibFunc& FInfo) { 408 return (AMDGPULibFunc::EType)FInfo.getLeads()[0].ArgType; 409 } 410 411 FunctionCallee AMDGPULibCalls::getFunction(Module *M, const FuncInfo &fInfo) { 412 // If we are doing PreLinkOpt, the function is external. So it is safe to 413 // use getOrInsertFunction() at this stage. 414 415 return EnablePreLink ? AMDGPULibFunc::getOrInsertFunction(M, fInfo) 416 : AMDGPULibFunc::getFunction(M, fInfo); 417 } 418 419 bool AMDGPULibCalls::parseFunctionName(const StringRef &FMangledName, 420 FuncInfo &FInfo) { 421 return AMDGPULibFunc::parse(FMangledName, FInfo); 422 } 423 424 bool AMDGPULibCalls::isUnsafeMath(const FPMathOperator *FPOp) const { 425 return UnsafeFPMath || FPOp->isFast(); 426 } 427 428 bool AMDGPULibCalls::isUnsafeFiniteOnlyMath(const FPMathOperator *FPOp) const { 429 return UnsafeFPMath || 430 (FPOp->hasApproxFunc() && FPOp->hasNoNaNs() && FPOp->hasNoInfs()); 431 } 432 433 bool AMDGPULibCalls::canIncreasePrecisionOfConstantFold( 434 const FPMathOperator *FPOp) const { 435 // TODO: Refine to approxFunc or contract 436 return isUnsafeMath(FPOp); 437 } 438 439 void AMDGPULibCalls::initFunction(Function &F, FunctionAnalysisManager &FAM) { 440 UnsafeFPMath = F.getFnAttribute("unsafe-fp-math").getValueAsBool(); 441 AC = &FAM.getResult<AssumptionAnalysis>(F); 442 TLInfo = &FAM.getResult<TargetLibraryAnalysis>(F); 443 DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); 444 } 445 446 bool AMDGPULibCalls::useNativeFunc(const StringRef F) const { 447 return AllNative || llvm::is_contained(UseNative, F); 448 } 449 450 void AMDGPULibCalls::initNativeFuncs() { 451 AllNative = useNativeFunc("all") || 452 (UseNative.getNumOccurrences() && UseNative.size() == 1 && 453 UseNative.begin()->empty()); 454 } 455 456 bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) { 457 bool native_sin = useNativeFunc("sin"); 458 bool native_cos = useNativeFunc("cos"); 459 460 if (native_sin && native_cos) { 461 Module *M = aCI->getModule(); 462 Value *opr0 = aCI->getArgOperand(0); 463 464 AMDGPULibFunc nf; 465 nf.getLeads()[0].ArgType = FInfo.getLeads()[0].ArgType; 466 nf.getLeads()[0].VectorSize = FInfo.getLeads()[0].VectorSize; 467 468 nf.setPrefix(AMDGPULibFunc::NATIVE); 469 nf.setId(AMDGPULibFunc::EI_SIN); 470 FunctionCallee sinExpr = getFunction(M, nf); 471 472 nf.setPrefix(AMDGPULibFunc::NATIVE); 473 nf.setId(AMDGPULibFunc::EI_COS); 474 FunctionCallee cosExpr = getFunction(M, nf); 475 if (sinExpr && cosExpr) { 476 Value *sinval = CallInst::Create(sinExpr, opr0, "splitsin", aCI); 477 Value *cosval = CallInst::Create(cosExpr, opr0, "splitcos", aCI); 478 new StoreInst(cosval, aCI->getArgOperand(1), aCI); 479 480 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI 481 << " with native version of sin/cos"); 482 483 replaceCall(aCI, sinval); 484 return true; 485 } 486 } 487 return false; 488 } 489 490 bool AMDGPULibCalls::useNative(CallInst *aCI) { 491 Function *Callee = aCI->getCalledFunction(); 492 if (!Callee || aCI->isNoBuiltin()) 493 return false; 494 495 FuncInfo FInfo; 496 if (!parseFunctionName(Callee->getName(), FInfo) || !FInfo.isMangled() || 497 FInfo.getPrefix() != AMDGPULibFunc::NOPFX || 498 getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId()) || 499 !(AllNative || useNativeFunc(FInfo.getName()))) { 500 return false; 501 } 502 503 if (FInfo.getId() == AMDGPULibFunc::EI_SINCOS) 504 return sincosUseNative(aCI, FInfo); 505 506 FInfo.setPrefix(AMDGPULibFunc::NATIVE); 507 FunctionCallee F = getFunction(aCI->getModule(), FInfo); 508 if (!F) 509 return false; 510 511 aCI->setCalledFunction(F); 512 DEBUG_WITH_TYPE("usenative", dbgs() << "<useNative> replace " << *aCI 513 << " with native version"); 514 return true; 515 } 516 517 // Clang emits call of __read_pipe_2 or __read_pipe_4 for OpenCL read_pipe 518 // builtin, with appended type size and alignment arguments, where 2 or 4 519 // indicates the original number of arguments. The library has optimized version 520 // of __read_pipe_2/__read_pipe_4 when the type size and alignment has the same 521 // power of 2 value. This function transforms __read_pipe_2 to __read_pipe_2_N 522 // for such cases where N is the size in bytes of the type (N = 1, 2, 4, 8, ..., 523 // 128). The same for __read_pipe_4, write_pipe_2, and write_pipe_4. 524 bool AMDGPULibCalls::fold_read_write_pipe(CallInst *CI, IRBuilder<> &B, 525 const FuncInfo &FInfo) { 526 auto *Callee = CI->getCalledFunction(); 527 if (!Callee->isDeclaration()) 528 return false; 529 530 assert(Callee->hasName() && "Invalid read_pipe/write_pipe function"); 531 auto *M = Callee->getParent(); 532 std::string Name = std::string(Callee->getName()); 533 auto NumArg = CI->arg_size(); 534 if (NumArg != 4 && NumArg != 6) 535 return false; 536 ConstantInt *PacketSize = 537 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 2)); 538 ConstantInt *PacketAlign = 539 dyn_cast<ConstantInt>(CI->getArgOperand(NumArg - 1)); 540 if (!PacketSize || !PacketAlign) 541 return false; 542 543 unsigned Size = PacketSize->getZExtValue(); 544 Align Alignment = PacketAlign->getAlignValue(); 545 if (Alignment != Size) 546 return false; 547 548 unsigned PtrArgLoc = CI->arg_size() - 3; 549 Value *PtrArg = CI->getArgOperand(PtrArgLoc); 550 Type *PtrTy = PtrArg->getType(); 551 552 SmallVector<llvm::Type *, 6> ArgTys; 553 for (unsigned I = 0; I != PtrArgLoc; ++I) 554 ArgTys.push_back(CI->getArgOperand(I)->getType()); 555 ArgTys.push_back(PtrTy); 556 557 Name = Name + "_" + std::to_string(Size); 558 auto *FTy = FunctionType::get(Callee->getReturnType(), 559 ArrayRef<Type *>(ArgTys), false); 560 AMDGPULibFunc NewLibFunc(Name, FTy); 561 FunctionCallee F = AMDGPULibFunc::getOrInsertFunction(M, NewLibFunc); 562 if (!F) 563 return false; 564 565 SmallVector<Value *, 6> Args; 566 for (unsigned I = 0; I != PtrArgLoc; ++I) 567 Args.push_back(CI->getArgOperand(I)); 568 Args.push_back(PtrArg); 569 570 auto *NCI = B.CreateCall(F, Args); 571 NCI->setAttributes(CI->getAttributes()); 572 CI->replaceAllUsesWith(NCI); 573 CI->dropAllReferences(); 574 CI->eraseFromParent(); 575 576 return true; 577 } 578 579 static bool isKnownIntegral(const Value *V, const DataLayout &DL, 580 FastMathFlags FMF) { 581 if (isa<UndefValue>(V)) 582 return true; 583 584 if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) 585 return CF->getValueAPF().isInteger(); 586 587 if (const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(V)) { 588 for (unsigned i = 0, e = CDV->getNumElements(); i != e; ++i) { 589 Constant *ConstElt = CDV->getElementAsConstant(i); 590 if (isa<UndefValue>(ConstElt)) 591 continue; 592 const ConstantFP *CFP = dyn_cast<ConstantFP>(ConstElt); 593 if (!CFP || !CFP->getValue().isInteger()) 594 return false; 595 } 596 597 return true; 598 } 599 600 const Instruction *I = dyn_cast<Instruction>(V); 601 if (!I) 602 return false; 603 604 switch (I->getOpcode()) { 605 case Instruction::SIToFP: 606 case Instruction::UIToFP: 607 // TODO: Could check nofpclass(inf) on incoming argument 608 if (FMF.noInfs()) 609 return true; 610 611 // Need to check int size cannot produce infinity, which computeKnownFPClass 612 // knows how to do already. 613 return isKnownNeverInfinity(I, DL); 614 case Instruction::Call: { 615 const CallInst *CI = cast<CallInst>(I); 616 switch (CI->getIntrinsicID()) { 617 case Intrinsic::trunc: 618 case Intrinsic::floor: 619 case Intrinsic::ceil: 620 case Intrinsic::rint: 621 case Intrinsic::nearbyint: 622 case Intrinsic::round: 623 case Intrinsic::roundeven: 624 return (FMF.noInfs() && FMF.noNaNs()) || 625 isKnownNeverInfOrNaN(I, DL, nullptr); 626 default: 627 break; 628 } 629 630 break; 631 } 632 default: 633 break; 634 } 635 636 return false; 637 } 638 639 // This function returns false if no change; return true otherwise. 640 bool AMDGPULibCalls::fold(CallInst *CI) { 641 Function *Callee = CI->getCalledFunction(); 642 // Ignore indirect calls. 643 if (!Callee || Callee->isIntrinsic() || CI->isNoBuiltin()) 644 return false; 645 646 FuncInfo FInfo; 647 if (!parseFunctionName(Callee->getName(), FInfo)) 648 return false; 649 650 // Further check the number of arguments to see if they match. 651 // TODO: Check calling convention matches too 652 if (!FInfo.isCompatibleSignature(CI->getFunctionType())) 653 return false; 654 655 LLVM_DEBUG(dbgs() << "AMDIC: try folding " << *CI << '\n'); 656 657 if (TDOFold(CI, FInfo)) 658 return true; 659 660 IRBuilder<> B(CI); 661 662 if (FPMathOperator *FPOp = dyn_cast<FPMathOperator>(CI)) { 663 // Under unsafe-math, evaluate calls if possible. 664 // According to Brian Sumner, we can do this for all f32 function calls 665 // using host's double function calls. 666 if (canIncreasePrecisionOfConstantFold(FPOp) && evaluateCall(CI, FInfo)) 667 return true; 668 669 // Copy fast flags from the original call. 670 FastMathFlags FMF = FPOp->getFastMathFlags(); 671 B.setFastMathFlags(FMF); 672 673 // Specialized optimizations for each function call. 674 // 675 // TODO: Handle other simple intrinsic wrappers. Sqrt. 676 // 677 // TODO: Handle native functions 678 switch (FInfo.getId()) { 679 case AMDGPULibFunc::EI_EXP: 680 if (FMF.none()) 681 return false; 682 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp, 683 FMF.approxFunc()); 684 case AMDGPULibFunc::EI_EXP2: 685 if (FMF.none()) 686 return false; 687 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::exp2, 688 FMF.approxFunc()); 689 case AMDGPULibFunc::EI_LOG: 690 if (FMF.none()) 691 return false; 692 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log, 693 FMF.approxFunc()); 694 case AMDGPULibFunc::EI_LOG2: 695 if (FMF.none()) 696 return false; 697 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log2, 698 FMF.approxFunc()); 699 case AMDGPULibFunc::EI_LOG10: 700 if (FMF.none()) 701 return false; 702 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::log10, 703 FMF.approxFunc()); 704 case AMDGPULibFunc::EI_FMIN: 705 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::minnum, 706 true, true); 707 case AMDGPULibFunc::EI_FMAX: 708 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::maxnum, 709 true, true); 710 case AMDGPULibFunc::EI_FMA: 711 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fma, true, 712 true); 713 case AMDGPULibFunc::EI_MAD: 714 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fmuladd, 715 true, true); 716 case AMDGPULibFunc::EI_FABS: 717 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::fabs, true, 718 true, true); 719 case AMDGPULibFunc::EI_COPYSIGN: 720 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::copysign, 721 true, true, true); 722 case AMDGPULibFunc::EI_FLOOR: 723 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::floor, true, 724 true); 725 case AMDGPULibFunc::EI_CEIL: 726 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::ceil, true, 727 true); 728 case AMDGPULibFunc::EI_TRUNC: 729 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::trunc, true, 730 true); 731 case AMDGPULibFunc::EI_RINT: 732 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::rint, true, 733 true); 734 case AMDGPULibFunc::EI_ROUND: 735 return tryReplaceLibcallWithSimpleIntrinsic(B, CI, Intrinsic::round, true, 736 true); 737 case AMDGPULibFunc::EI_LDEXP: { 738 if (!shouldReplaceLibcallWithIntrinsic(CI, true, true)) 739 return false; 740 741 Value *Arg1 = CI->getArgOperand(1); 742 if (VectorType *VecTy = dyn_cast<VectorType>(CI->getType()); 743 VecTy && !isa<VectorType>(Arg1->getType())) { 744 Value *SplatArg1 = B.CreateVectorSplat(VecTy->getElementCount(), Arg1); 745 CI->setArgOperand(1, SplatArg1); 746 } 747 748 CI->setCalledFunction(Intrinsic::getDeclaration( 749 CI->getModule(), Intrinsic::ldexp, 750 {CI->getType(), CI->getArgOperand(1)->getType()})); 751 return true; 752 } 753 case AMDGPULibFunc::EI_POW: { 754 Module *M = Callee->getParent(); 755 AMDGPULibFunc PowrInfo(AMDGPULibFunc::EI_POWR, FInfo); 756 FunctionCallee PowrFunc = getFunction(M, PowrInfo); 757 CallInst *Call = cast<CallInst>(FPOp); 758 759 // pow(x, y) -> powr(x, y) for x >= -0.0 760 // TODO: Account for flags on current call 761 if (PowrFunc && 762 cannotBeOrderedLessThanZero(FPOp->getOperand(0), M->getDataLayout(), 763 TLInfo, 0, AC, Call, DT)) { 764 Call->setCalledFunction(PowrFunc); 765 return fold_pow(FPOp, B, PowrInfo) || true; 766 } 767 768 // pow(x, y) -> pown(x, y) for known integral y 769 if (isKnownIntegral(FPOp->getOperand(1), M->getDataLayout(), 770 FPOp->getFastMathFlags())) { 771 FunctionType *PownType = getPownType(CI->getFunctionType()); 772 AMDGPULibFunc PownInfo(AMDGPULibFunc::EI_POWN, PownType, true); 773 FunctionCallee PownFunc = getFunction(M, PownInfo); 774 if (PownFunc) { 775 // TODO: If the incoming integral value is an sitofp/uitofp, it won't 776 // fold out without a known range. We can probably take the source 777 // value directly. 778 Value *CastedArg = 779 B.CreateFPToSI(FPOp->getOperand(1), PownType->getParamType(1)); 780 // Have to drop any nofpclass attributes on the original call site. 781 Call->removeParamAttrs( 782 1, AttributeFuncs::typeIncompatible(CastedArg->getType())); 783 Call->setCalledFunction(PownFunc); 784 Call->setArgOperand(1, CastedArg); 785 return fold_pow(FPOp, B, PownInfo) || true; 786 } 787 } 788 789 return fold_pow(FPOp, B, FInfo); 790 } 791 case AMDGPULibFunc::EI_POWR: 792 case AMDGPULibFunc::EI_POWN: 793 return fold_pow(FPOp, B, FInfo); 794 case AMDGPULibFunc::EI_ROOTN: 795 return fold_rootn(FPOp, B, FInfo); 796 case AMDGPULibFunc::EI_SQRT: 797 return fold_sqrt(FPOp, B, FInfo); 798 case AMDGPULibFunc::EI_COS: 799 case AMDGPULibFunc::EI_SIN: 800 return fold_sincos(FPOp, B, FInfo); 801 default: 802 break; 803 } 804 } else { 805 // Specialized optimizations for each function call 806 switch (FInfo.getId()) { 807 case AMDGPULibFunc::EI_READ_PIPE_2: 808 case AMDGPULibFunc::EI_READ_PIPE_4: 809 case AMDGPULibFunc::EI_WRITE_PIPE_2: 810 case AMDGPULibFunc::EI_WRITE_PIPE_4: 811 return fold_read_write_pipe(CI, B, FInfo); 812 default: 813 break; 814 } 815 } 816 817 return false; 818 } 819 820 bool AMDGPULibCalls::TDOFold(CallInst *CI, const FuncInfo &FInfo) { 821 // Table-Driven optimization 822 const TableRef tr = getOptTable(FInfo.getId()); 823 if (tr.empty()) 824 return false; 825 826 int const sz = (int)tr.size(); 827 Value *opr0 = CI->getArgOperand(0); 828 829 if (getVecSize(FInfo) > 1) { 830 if (ConstantDataVector *CV = dyn_cast<ConstantDataVector>(opr0)) { 831 SmallVector<double, 0> DVal; 832 for (int eltNo = 0; eltNo < getVecSize(FInfo); ++eltNo) { 833 ConstantFP *eltval = dyn_cast<ConstantFP>( 834 CV->getElementAsConstant((unsigned)eltNo)); 835 assert(eltval && "Non-FP arguments in math function!"); 836 bool found = false; 837 for (int i=0; i < sz; ++i) { 838 if (eltval->isExactlyValue(tr[i].input)) { 839 DVal.push_back(tr[i].result); 840 found = true; 841 break; 842 } 843 } 844 if (!found) { 845 // This vector constants not handled yet. 846 return false; 847 } 848 } 849 LLVMContext &context = CI->getParent()->getParent()->getContext(); 850 Constant *nval; 851 if (getArgType(FInfo) == AMDGPULibFunc::F32) { 852 SmallVector<float, 0> FVal; 853 for (unsigned i = 0; i < DVal.size(); ++i) { 854 FVal.push_back((float)DVal[i]); 855 } 856 ArrayRef<float> tmp(FVal); 857 nval = ConstantDataVector::get(context, tmp); 858 } else { // F64 859 ArrayRef<double> tmp(DVal); 860 nval = ConstantDataVector::get(context, tmp); 861 } 862 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); 863 replaceCall(CI, nval); 864 return true; 865 } 866 } else { 867 // Scalar version 868 if (ConstantFP *CF = dyn_cast<ConstantFP>(opr0)) { 869 for (int i = 0; i < sz; ++i) { 870 if (CF->isExactlyValue(tr[i].input)) { 871 Value *nval = ConstantFP::get(CF->getType(), tr[i].result); 872 LLVM_DEBUG(errs() << "AMDIC: " << *CI << " ---> " << *nval << "\n"); 873 replaceCall(CI, nval); 874 return true; 875 } 876 } 877 } 878 } 879 880 return false; 881 } 882 883 namespace llvm { 884 static double log2(double V) { 885 #if _XOPEN_SOURCE >= 600 || defined(_ISOC99_SOURCE) || _POSIX_C_SOURCE >= 200112L 886 return ::log2(V); 887 #else 888 return log(V) / numbers::ln2; 889 #endif 890 } 891 } 892 893 bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, 894 const FuncInfo &FInfo) { 895 assert((FInfo.getId() == AMDGPULibFunc::EI_POW || 896 FInfo.getId() == AMDGPULibFunc::EI_POWR || 897 FInfo.getId() == AMDGPULibFunc::EI_POWN) && 898 "fold_pow: encounter a wrong function call"); 899 900 Module *M = B.GetInsertBlock()->getModule(); 901 Type *eltType = FPOp->getType()->getScalarType(); 902 Value *opr0 = FPOp->getOperand(0); 903 Value *opr1 = FPOp->getOperand(1); 904 905 const APFloat *CF = nullptr; 906 const APInt *CINT = nullptr; 907 if (!match(opr1, m_APFloatAllowUndef(CF))) 908 match(opr1, m_APIntAllowUndef(CINT)); 909 910 // 0x1111111 means that we don't do anything for this call. 911 int ci_opr1 = (CINT ? (int)CINT->getSExtValue() : 0x1111111); 912 913 if ((CF && CF->isZero()) || (CINT && ci_opr1 == 0)) { 914 // pow/powr/pown(x, 0) == 1 915 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1\n"); 916 Constant *cnval = ConstantFP::get(eltType, 1.0); 917 if (getVecSize(FInfo) > 1) { 918 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 919 } 920 replaceCall(FPOp, cnval); 921 return true; 922 } 923 if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) { 924 // pow/powr/pown(x, 1.0) = x 925 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n"); 926 replaceCall(FPOp, opr0); 927 return true; 928 } 929 if ((CF && CF->isExactlyValue(2.0)) || (CINT && ci_opr1 == 2)) { 930 // pow/powr/pown(x, 2.0) = x*x 931 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << " * " 932 << *opr0 << "\n"); 933 Value *nval = B.CreateFMul(opr0, opr0, "__pow2"); 934 replaceCall(FPOp, nval); 935 return true; 936 } 937 if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) { 938 // pow/powr/pown(x, -1.0) = 1.0/x 939 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1 / " << *opr0 << "\n"); 940 Constant *cnval = ConstantFP::get(eltType, 1.0); 941 if (getVecSize(FInfo) > 1) { 942 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 943 } 944 Value *nval = B.CreateFDiv(cnval, opr0, "__powrecip"); 945 replaceCall(FPOp, nval); 946 return true; 947 } 948 949 if (CF && (CF->isExactlyValue(0.5) || CF->isExactlyValue(-0.5))) { 950 // pow[r](x, [-]0.5) = sqrt(x) 951 bool issqrt = CF->isExactlyValue(0.5); 952 if (FunctionCallee FPExpr = 953 getFunction(M, AMDGPULibFunc(issqrt ? AMDGPULibFunc::EI_SQRT 954 : AMDGPULibFunc::EI_RSQRT, 955 FInfo))) { 956 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << FInfo.getName() 957 << '(' << *opr0 << ")\n"); 958 Value *nval = CreateCallEx(B,FPExpr, opr0, issqrt ? "__pow2sqrt" 959 : "__pow2rsqrt"); 960 replaceCall(FPOp, nval); 961 return true; 962 } 963 } 964 965 if (!isUnsafeFiniteOnlyMath(FPOp)) 966 return false; 967 968 // Unsafe Math optimization 969 970 // Remember that ci_opr1 is set if opr1 is integral 971 if (CF) { 972 double dval = (getArgType(FInfo) == AMDGPULibFunc::F32) 973 ? (double)CF->convertToFloat() 974 : CF->convertToDouble(); 975 int ival = (int)dval; 976 if ((double)ival == dval) { 977 ci_opr1 = ival; 978 } else 979 ci_opr1 = 0x11111111; 980 } 981 982 // pow/powr/pown(x, c) = [1/](x*x*..x); where 983 // trunc(c) == c && the number of x == c && |c| <= 12 984 unsigned abs_opr1 = (ci_opr1 < 0) ? -ci_opr1 : ci_opr1; 985 if (abs_opr1 <= 12) { 986 Constant *cnval; 987 Value *nval; 988 if (abs_opr1 == 0) { 989 cnval = ConstantFP::get(eltType, 1.0); 990 if (getVecSize(FInfo) > 1) { 991 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 992 } 993 nval = cnval; 994 } else { 995 Value *valx2 = nullptr; 996 nval = nullptr; 997 while (abs_opr1 > 0) { 998 valx2 = valx2 ? B.CreateFMul(valx2, valx2, "__powx2") : opr0; 999 if (abs_opr1 & 1) { 1000 nval = nval ? B.CreateFMul(nval, valx2, "__powprod") : valx2; 1001 } 1002 abs_opr1 >>= 1; 1003 } 1004 } 1005 1006 if (ci_opr1 < 0) { 1007 cnval = ConstantFP::get(eltType, 1.0); 1008 if (getVecSize(FInfo) > 1) { 1009 cnval = ConstantDataVector::getSplat(getVecSize(FInfo), cnval); 1010 } 1011 nval = B.CreateFDiv(cnval, nval, "__1powprod"); 1012 } 1013 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " 1014 << ((ci_opr1 < 0) ? "1/prod(" : "prod(") << *opr0 1015 << ")\n"); 1016 replaceCall(FPOp, nval); 1017 return true; 1018 } 1019 1020 // If we should use the generic intrinsic instead of emitting a libcall 1021 const bool ShouldUseIntrinsic = eltType->isFloatTy() || eltType->isHalfTy(); 1022 1023 // powr ---> exp2(y * log2(x)) 1024 // pown/pow ---> powr(fabs(x), y) | (x & ((int)y << 31)) 1025 FunctionCallee ExpExpr; 1026 if (ShouldUseIntrinsic) 1027 ExpExpr = Intrinsic::getDeclaration(M, Intrinsic::exp2, {FPOp->getType()}); 1028 else { 1029 ExpExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_EXP2, FInfo)); 1030 if (!ExpExpr) 1031 return false; 1032 } 1033 1034 bool needlog = false; 1035 bool needabs = false; 1036 bool needcopysign = false; 1037 Constant *cnval = nullptr; 1038 if (getVecSize(FInfo) == 1) { 1039 CF = nullptr; 1040 match(opr0, m_APFloatAllowUndef(CF)); 1041 1042 if (CF) { 1043 double V = (getArgType(FInfo) == AMDGPULibFunc::F32) 1044 ? (double)CF->convertToFloat() 1045 : CF->convertToDouble(); 1046 1047 V = log2(std::abs(V)); 1048 cnval = ConstantFP::get(eltType, V); 1049 needcopysign = (FInfo.getId() != AMDGPULibFunc::EI_POWR) && 1050 CF->isNegative(); 1051 } else { 1052 needlog = true; 1053 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR && 1054 (!CF || CF->isNegative()); 1055 } 1056 } else { 1057 ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(opr0); 1058 1059 if (!CDV) { 1060 needlog = true; 1061 needcopysign = needabs = FInfo.getId() != AMDGPULibFunc::EI_POWR; 1062 } else { 1063 assert ((int)CDV->getNumElements() == getVecSize(FInfo) && 1064 "Wrong vector size detected"); 1065 1066 SmallVector<double, 0> DVal; 1067 for (int i=0; i < getVecSize(FInfo); ++i) { 1068 double V = CDV->getElementAsAPFloat(i).convertToDouble(); 1069 if (V < 0.0) needcopysign = true; 1070 V = log2(std::abs(V)); 1071 DVal.push_back(V); 1072 } 1073 if (getArgType(FInfo) == AMDGPULibFunc::F32) { 1074 SmallVector<float, 0> FVal; 1075 for (unsigned i=0; i < DVal.size(); ++i) { 1076 FVal.push_back((float)DVal[i]); 1077 } 1078 ArrayRef<float> tmp(FVal); 1079 cnval = ConstantDataVector::get(M->getContext(), tmp); 1080 } else { 1081 ArrayRef<double> tmp(DVal); 1082 cnval = ConstantDataVector::get(M->getContext(), tmp); 1083 } 1084 } 1085 } 1086 1087 if (needcopysign && (FInfo.getId() == AMDGPULibFunc::EI_POW)) { 1088 // We cannot handle corner cases for a general pow() function, give up 1089 // unless y is a constant integral value. Then proceed as if it were pown. 1090 if (!isKnownIntegral(opr1, M->getDataLayout(), FPOp->getFastMathFlags())) 1091 return false; 1092 } 1093 1094 Value *nval; 1095 if (needabs) { 1096 nval = B.CreateUnaryIntrinsic(Intrinsic::fabs, opr0, nullptr, "__fabs"); 1097 } else { 1098 nval = cnval ? cnval : opr0; 1099 } 1100 if (needlog) { 1101 FunctionCallee LogExpr; 1102 if (ShouldUseIntrinsic) { 1103 LogExpr = 1104 Intrinsic::getDeclaration(M, Intrinsic::log2, {FPOp->getType()}); 1105 } else { 1106 LogExpr = getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_LOG2, FInfo)); 1107 if (!LogExpr) 1108 return false; 1109 } 1110 1111 nval = CreateCallEx(B,LogExpr, nval, "__log2"); 1112 } 1113 1114 if (FInfo.getId() == AMDGPULibFunc::EI_POWN) { 1115 // convert int(32) to fp(f32 or f64) 1116 opr1 = B.CreateSIToFP(opr1, nval->getType(), "pownI2F"); 1117 } 1118 nval = B.CreateFMul(opr1, nval, "__ylogx"); 1119 nval = CreateCallEx(B,ExpExpr, nval, "__exp2"); 1120 1121 if (needcopysign) { 1122 Value *opr_n; 1123 Type* rTy = opr0->getType(); 1124 Type* nTyS = B.getIntNTy(eltType->getPrimitiveSizeInBits()); 1125 Type *nTy = nTyS; 1126 if (const auto *vTy = dyn_cast<FixedVectorType>(rTy)) 1127 nTy = FixedVectorType::get(nTyS, vTy); 1128 unsigned size = nTy->getScalarSizeInBits(); 1129 opr_n = FPOp->getOperand(1); 1130 if (opr_n->getType()->isIntegerTy()) 1131 opr_n = B.CreateZExtOrTrunc(opr_n, nTy, "__ytou"); 1132 else 1133 opr_n = B.CreateFPToSI(opr1, nTy, "__ytou"); 1134 1135 Value *sign = B.CreateShl(opr_n, size-1, "__yeven"); 1136 sign = B.CreateAnd(B.CreateBitCast(opr0, nTy), sign, "__pow_sign"); 1137 nval = B.CreateOr(B.CreateBitCast(nval, nTy), sign); 1138 nval = B.CreateBitCast(nval, opr0->getType()); 1139 } 1140 1141 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " 1142 << "exp2(" << *opr1 << " * log2(" << *opr0 << "))\n"); 1143 replaceCall(FPOp, nval); 1144 1145 return true; 1146 } 1147 1148 bool AMDGPULibCalls::fold_rootn(FPMathOperator *FPOp, IRBuilder<> &B, 1149 const FuncInfo &FInfo) { 1150 // skip vector function 1151 if (getVecSize(FInfo) != 1) 1152 return false; 1153 1154 Value *opr0 = FPOp->getOperand(0); 1155 Value *opr1 = FPOp->getOperand(1); 1156 1157 ConstantInt *CINT = dyn_cast<ConstantInt>(opr1); 1158 if (!CINT) { 1159 return false; 1160 } 1161 int ci_opr1 = (int)CINT->getSExtValue(); 1162 if (ci_opr1 == 1) { // rootn(x, 1) = x 1163 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n"); 1164 replaceCall(FPOp, opr0); 1165 return true; 1166 } 1167 1168 Module *M = B.GetInsertBlock()->getModule(); 1169 if (ci_opr1 == 2) { // rootn(x, 2) = sqrt(x) 1170 if (FunctionCallee FPExpr = 1171 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { 1172 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> sqrt(" << *opr0 1173 << ")\n"); 1174 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2sqrt"); 1175 replaceCall(FPOp, nval); 1176 return true; 1177 } 1178 } else if (ci_opr1 == 3) { // rootn(x, 3) = cbrt(x) 1179 if (FunctionCallee FPExpr = 1180 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_CBRT, FInfo))) { 1181 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> cbrt(" << *opr0 1182 << ")\n"); 1183 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2cbrt"); 1184 replaceCall(FPOp, nval); 1185 return true; 1186 } 1187 } else if (ci_opr1 == -1) { // rootn(x, -1) = 1.0/x 1188 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1.0 / " << *opr0 << "\n"); 1189 Value *nval = B.CreateFDiv(ConstantFP::get(opr0->getType(), 1.0), 1190 opr0, 1191 "__rootn2div"); 1192 replaceCall(FPOp, nval); 1193 return true; 1194 } else if (ci_opr1 == -2) { // rootn(x, -2) = rsqrt(x) 1195 if (FunctionCallee FPExpr = 1196 getFunction(M, AMDGPULibFunc(AMDGPULibFunc::EI_RSQRT, FInfo))) { 1197 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> rsqrt(" << *opr0 1198 << ")\n"); 1199 Value *nval = CreateCallEx(B,FPExpr, opr0, "__rootn2rsqrt"); 1200 replaceCall(FPOp, nval); 1201 return true; 1202 } 1203 } 1204 return false; 1205 } 1206 1207 // Get a scalar native builtin single argument FP function 1208 FunctionCallee AMDGPULibCalls::getNativeFunction(Module *M, 1209 const FuncInfo &FInfo) { 1210 if (getArgType(FInfo) == AMDGPULibFunc::F64 || !HasNative(FInfo.getId())) 1211 return nullptr; 1212 FuncInfo nf = FInfo; 1213 nf.setPrefix(AMDGPULibFunc::NATIVE); 1214 return getFunction(M, nf); 1215 } 1216 1217 // Some library calls are just wrappers around llvm intrinsics, but compiled 1218 // conservatively. Preserve the flags from the original call site by 1219 // substituting them with direct calls with all the flags. 1220 bool AMDGPULibCalls::shouldReplaceLibcallWithIntrinsic(const CallInst *CI, 1221 bool AllowMinSizeF32, 1222 bool AllowF64, 1223 bool AllowStrictFP) { 1224 Type *FltTy = CI->getType()->getScalarType(); 1225 const bool IsF32 = FltTy->isFloatTy(); 1226 1227 // f64 intrinsics aren't implemented for most operations. 1228 if (!IsF32 && !FltTy->isHalfTy() && (!AllowF64 || !FltTy->isDoubleTy())) 1229 return false; 1230 1231 // We're implicitly inlining by replacing the libcall with the intrinsic, so 1232 // don't do it for noinline call sites. 1233 if (CI->isNoInline()) 1234 return false; 1235 1236 const Function *ParentF = CI->getFunction(); 1237 // TODO: Handle strictfp 1238 if (!AllowStrictFP && ParentF->hasFnAttribute(Attribute::StrictFP)) 1239 return false; 1240 1241 if (IsF32 && !AllowMinSizeF32 && ParentF->hasMinSize()) 1242 return false; 1243 return true; 1244 } 1245 1246 void AMDGPULibCalls::replaceLibCallWithSimpleIntrinsic(IRBuilder<> &B, 1247 CallInst *CI, 1248 Intrinsic::ID IntrID) { 1249 if (CI->arg_size() == 2) { 1250 Value *Arg0 = CI->getArgOperand(0); 1251 Value *Arg1 = CI->getArgOperand(1); 1252 VectorType *Arg0VecTy = dyn_cast<VectorType>(Arg0->getType()); 1253 VectorType *Arg1VecTy = dyn_cast<VectorType>(Arg1->getType()); 1254 if (Arg0VecTy && !Arg1VecTy) { 1255 Value *SplatRHS = B.CreateVectorSplat(Arg0VecTy->getElementCount(), Arg1); 1256 CI->setArgOperand(1, SplatRHS); 1257 } else if (!Arg0VecTy && Arg1VecTy) { 1258 Value *SplatLHS = B.CreateVectorSplat(Arg1VecTy->getElementCount(), Arg0); 1259 CI->setArgOperand(0, SplatLHS); 1260 } 1261 } 1262 1263 CI->setCalledFunction( 1264 Intrinsic::getDeclaration(CI->getModule(), IntrID, {CI->getType()})); 1265 } 1266 1267 bool AMDGPULibCalls::tryReplaceLibcallWithSimpleIntrinsic( 1268 IRBuilder<> &B, CallInst *CI, Intrinsic::ID IntrID, bool AllowMinSizeF32, 1269 bool AllowF64, bool AllowStrictFP) { 1270 if (!shouldReplaceLibcallWithIntrinsic(CI, AllowMinSizeF32, AllowF64, 1271 AllowStrictFP)) 1272 return false; 1273 replaceLibCallWithSimpleIntrinsic(B, CI, IntrID); 1274 return true; 1275 } 1276 1277 // fold sqrt -> native_sqrt (x) 1278 bool AMDGPULibCalls::fold_sqrt(FPMathOperator *FPOp, IRBuilder<> &B, 1279 const FuncInfo &FInfo) { 1280 if (!isUnsafeMath(FPOp)) 1281 return false; 1282 1283 if (getArgType(FInfo) == AMDGPULibFunc::F32 && (getVecSize(FInfo) == 1) && 1284 (FInfo.getPrefix() != AMDGPULibFunc::NATIVE)) { 1285 Module *M = B.GetInsertBlock()->getModule(); 1286 1287 if (FunctionCallee FPExpr = getNativeFunction( 1288 M, AMDGPULibFunc(AMDGPULibFunc::EI_SQRT, FInfo))) { 1289 Value *opr0 = FPOp->getOperand(0); 1290 LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " 1291 << "sqrt(" << *opr0 << ")\n"); 1292 Value *nval = CreateCallEx(B,FPExpr, opr0, "__sqrt"); 1293 replaceCall(FPOp, nval); 1294 return true; 1295 } 1296 } 1297 return false; 1298 } 1299 1300 std::tuple<Value *, Value *, Value *> 1301 AMDGPULibCalls::insertSinCos(Value *Arg, FastMathFlags FMF, IRBuilder<> &B, 1302 FunctionCallee Fsincos) { 1303 DebugLoc DL = B.getCurrentDebugLocation(); 1304 Function *F = B.GetInsertBlock()->getParent(); 1305 B.SetInsertPointPastAllocas(F); 1306 1307 AllocaInst *Alloc = B.CreateAlloca(Arg->getType(), nullptr, "__sincos_"); 1308 1309 if (Instruction *ArgInst = dyn_cast<Instruction>(Arg)) { 1310 // If the argument is an instruction, it must dominate all uses so put our 1311 // sincos call there. Otherwise, right after the allocas works well enough 1312 // if it's an argument or constant. 1313 1314 B.SetInsertPoint(ArgInst->getParent(), ++ArgInst->getIterator()); 1315 1316 // SetInsertPoint unwelcomely always tries to set the debug loc. 1317 B.SetCurrentDebugLocation(DL); 1318 } 1319 1320 Type *CosPtrTy = Fsincos.getFunctionType()->getParamType(1); 1321 1322 // The allocaInst allocates the memory in private address space. This need 1323 // to be addrspacecasted to point to the address space of cos pointer type. 1324 // In OpenCL 2.0 this is generic, while in 1.2 that is private. 1325 Value *CastAlloc = B.CreateAddrSpaceCast(Alloc, CosPtrTy); 1326 1327 CallInst *SinCos = CreateCallEx2(B, Fsincos, Arg, CastAlloc); 1328 1329 // TODO: Is it worth trying to preserve the location for the cos calls for the 1330 // load? 1331 1332 LoadInst *LoadCos = B.CreateLoad(Alloc->getAllocatedType(), Alloc); 1333 return {SinCos, LoadCos, SinCos}; 1334 } 1335 1336 // fold sin, cos -> sincos. 1337 bool AMDGPULibCalls::fold_sincos(FPMathOperator *FPOp, IRBuilder<> &B, 1338 const FuncInfo &fInfo) { 1339 assert(fInfo.getId() == AMDGPULibFunc::EI_SIN || 1340 fInfo.getId() == AMDGPULibFunc::EI_COS); 1341 1342 if ((getArgType(fInfo) != AMDGPULibFunc::F32 && 1343 getArgType(fInfo) != AMDGPULibFunc::F64) || 1344 fInfo.getPrefix() != AMDGPULibFunc::NOPFX) 1345 return false; 1346 1347 bool const isSin = fInfo.getId() == AMDGPULibFunc::EI_SIN; 1348 1349 Value *CArgVal = FPOp->getOperand(0); 1350 CallInst *CI = cast<CallInst>(FPOp); 1351 1352 Function *F = B.GetInsertBlock()->getParent(); 1353 Module *M = F->getParent(); 1354 1355 // Merge the sin and cos. For OpenCL 2.0, there may only be a generic pointer 1356 // implementation. Prefer the private form if available. 1357 AMDGPULibFunc SinCosLibFuncPrivate(AMDGPULibFunc::EI_SINCOS, fInfo); 1358 SinCosLibFuncPrivate.getLeads()[0].PtrKind = 1359 AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::PRIVATE_ADDRESS); 1360 1361 AMDGPULibFunc SinCosLibFuncGeneric(AMDGPULibFunc::EI_SINCOS, fInfo); 1362 SinCosLibFuncGeneric.getLeads()[0].PtrKind = 1363 AMDGPULibFunc::getEPtrKindFromAddrSpace(AMDGPUAS::FLAT_ADDRESS); 1364 1365 FunctionCallee FSinCosPrivate = getFunction(M, SinCosLibFuncPrivate); 1366 FunctionCallee FSinCosGeneric = getFunction(M, SinCosLibFuncGeneric); 1367 FunctionCallee FSinCos = FSinCosPrivate ? FSinCosPrivate : FSinCosGeneric; 1368 if (!FSinCos) 1369 return false; 1370 1371 SmallVector<CallInst *> SinCalls; 1372 SmallVector<CallInst *> CosCalls; 1373 SmallVector<CallInst *> SinCosCalls; 1374 FuncInfo PartnerInfo(isSin ? AMDGPULibFunc::EI_COS : AMDGPULibFunc::EI_SIN, 1375 fInfo); 1376 const std::string PairName = PartnerInfo.mangle(); 1377 1378 StringRef SinName = isSin ? CI->getCalledFunction()->getName() : PairName; 1379 StringRef CosName = isSin ? PairName : CI->getCalledFunction()->getName(); 1380 const std::string SinCosPrivateName = SinCosLibFuncPrivate.mangle(); 1381 const std::string SinCosGenericName = SinCosLibFuncGeneric.mangle(); 1382 1383 // Intersect the two sets of flags. 1384 FastMathFlags FMF = FPOp->getFastMathFlags(); 1385 MDNode *FPMath = CI->getMetadata(LLVMContext::MD_fpmath); 1386 1387 SmallVector<DILocation *> MergeDbgLocs = {CI->getDebugLoc()}; 1388 1389 for (User* U : CArgVal->users()) { 1390 CallInst *XI = dyn_cast<CallInst>(U); 1391 if (!XI || XI->getFunction() != F || XI->isNoBuiltin()) 1392 continue; 1393 1394 Function *UCallee = XI->getCalledFunction(); 1395 if (!UCallee) 1396 continue; 1397 1398 bool Handled = true; 1399 1400 if (UCallee->getName() == SinName) 1401 SinCalls.push_back(XI); 1402 else if (UCallee->getName() == CosName) 1403 CosCalls.push_back(XI); 1404 else if (UCallee->getName() == SinCosPrivateName || 1405 UCallee->getName() == SinCosGenericName) 1406 SinCosCalls.push_back(XI); 1407 else 1408 Handled = false; 1409 1410 if (Handled) { 1411 MergeDbgLocs.push_back(XI->getDebugLoc()); 1412 auto *OtherOp = cast<FPMathOperator>(XI); 1413 FMF &= OtherOp->getFastMathFlags(); 1414 FPMath = MDNode::getMostGenericFPMath( 1415 FPMath, XI->getMetadata(LLVMContext::MD_fpmath)); 1416 } 1417 } 1418 1419 if (SinCalls.empty() || CosCalls.empty()) 1420 return false; 1421 1422 B.setFastMathFlags(FMF); 1423 B.setDefaultFPMathTag(FPMath); 1424 DILocation *DbgLoc = DILocation::getMergedLocations(MergeDbgLocs); 1425 B.SetCurrentDebugLocation(DbgLoc); 1426 1427 auto [Sin, Cos, SinCos] = insertSinCos(CArgVal, FMF, B, FSinCos); 1428 1429 auto replaceTrigInsts = [](ArrayRef<CallInst *> Calls, Value *Res) { 1430 for (CallInst *C : Calls) 1431 C->replaceAllUsesWith(Res); 1432 1433 // Leave the other dead instructions to avoid clobbering iterators. 1434 }; 1435 1436 replaceTrigInsts(SinCalls, Sin); 1437 replaceTrigInsts(CosCalls, Cos); 1438 replaceTrigInsts(SinCosCalls, SinCos); 1439 1440 // It's safe to delete the original now. 1441 CI->eraseFromParent(); 1442 return true; 1443 } 1444 1445 bool AMDGPULibCalls::evaluateScalarMathFunc(const FuncInfo &FInfo, double &Res0, 1446 double &Res1, Constant *copr0, 1447 Constant *copr1) { 1448 // By default, opr0/opr1/opr3 holds values of float/double type. 1449 // If they are not float/double, each function has to its 1450 // operand separately. 1451 double opr0 = 0.0, opr1 = 0.0; 1452 ConstantFP *fpopr0 = dyn_cast_or_null<ConstantFP>(copr0); 1453 ConstantFP *fpopr1 = dyn_cast_or_null<ConstantFP>(copr1); 1454 if (fpopr0) { 1455 opr0 = (getArgType(FInfo) == AMDGPULibFunc::F64) 1456 ? fpopr0->getValueAPF().convertToDouble() 1457 : (double)fpopr0->getValueAPF().convertToFloat(); 1458 } 1459 1460 if (fpopr1) { 1461 opr1 = (getArgType(FInfo) == AMDGPULibFunc::F64) 1462 ? fpopr1->getValueAPF().convertToDouble() 1463 : (double)fpopr1->getValueAPF().convertToFloat(); 1464 } 1465 1466 switch (FInfo.getId()) { 1467 default : return false; 1468 1469 case AMDGPULibFunc::EI_ACOS: 1470 Res0 = acos(opr0); 1471 return true; 1472 1473 case AMDGPULibFunc::EI_ACOSH: 1474 // acosh(x) == log(x + sqrt(x*x - 1)) 1475 Res0 = log(opr0 + sqrt(opr0*opr0 - 1.0)); 1476 return true; 1477 1478 case AMDGPULibFunc::EI_ACOSPI: 1479 Res0 = acos(opr0) / MATH_PI; 1480 return true; 1481 1482 case AMDGPULibFunc::EI_ASIN: 1483 Res0 = asin(opr0); 1484 return true; 1485 1486 case AMDGPULibFunc::EI_ASINH: 1487 // asinh(x) == log(x + sqrt(x*x + 1)) 1488 Res0 = log(opr0 + sqrt(opr0*opr0 + 1.0)); 1489 return true; 1490 1491 case AMDGPULibFunc::EI_ASINPI: 1492 Res0 = asin(opr0) / MATH_PI; 1493 return true; 1494 1495 case AMDGPULibFunc::EI_ATAN: 1496 Res0 = atan(opr0); 1497 return true; 1498 1499 case AMDGPULibFunc::EI_ATANH: 1500 // atanh(x) == (log(x+1) - log(x-1))/2; 1501 Res0 = (log(opr0 + 1.0) - log(opr0 - 1.0))/2.0; 1502 return true; 1503 1504 case AMDGPULibFunc::EI_ATANPI: 1505 Res0 = atan(opr0) / MATH_PI; 1506 return true; 1507 1508 case AMDGPULibFunc::EI_CBRT: 1509 Res0 = (opr0 < 0.0) ? -pow(-opr0, 1.0/3.0) : pow(opr0, 1.0/3.0); 1510 return true; 1511 1512 case AMDGPULibFunc::EI_COS: 1513 Res0 = cos(opr0); 1514 return true; 1515 1516 case AMDGPULibFunc::EI_COSH: 1517 Res0 = cosh(opr0); 1518 return true; 1519 1520 case AMDGPULibFunc::EI_COSPI: 1521 Res0 = cos(MATH_PI * opr0); 1522 return true; 1523 1524 case AMDGPULibFunc::EI_EXP: 1525 Res0 = exp(opr0); 1526 return true; 1527 1528 case AMDGPULibFunc::EI_EXP2: 1529 Res0 = pow(2.0, opr0); 1530 return true; 1531 1532 case AMDGPULibFunc::EI_EXP10: 1533 Res0 = pow(10.0, opr0); 1534 return true; 1535 1536 case AMDGPULibFunc::EI_LOG: 1537 Res0 = log(opr0); 1538 return true; 1539 1540 case AMDGPULibFunc::EI_LOG2: 1541 Res0 = log(opr0) / log(2.0); 1542 return true; 1543 1544 case AMDGPULibFunc::EI_LOG10: 1545 Res0 = log(opr0) / log(10.0); 1546 return true; 1547 1548 case AMDGPULibFunc::EI_RSQRT: 1549 Res0 = 1.0 / sqrt(opr0); 1550 return true; 1551 1552 case AMDGPULibFunc::EI_SIN: 1553 Res0 = sin(opr0); 1554 return true; 1555 1556 case AMDGPULibFunc::EI_SINH: 1557 Res0 = sinh(opr0); 1558 return true; 1559 1560 case AMDGPULibFunc::EI_SINPI: 1561 Res0 = sin(MATH_PI * opr0); 1562 return true; 1563 1564 case AMDGPULibFunc::EI_TAN: 1565 Res0 = tan(opr0); 1566 return true; 1567 1568 case AMDGPULibFunc::EI_TANH: 1569 Res0 = tanh(opr0); 1570 return true; 1571 1572 case AMDGPULibFunc::EI_TANPI: 1573 Res0 = tan(MATH_PI * opr0); 1574 return true; 1575 1576 // two-arg functions 1577 case AMDGPULibFunc::EI_POW: 1578 case AMDGPULibFunc::EI_POWR: 1579 Res0 = pow(opr0, opr1); 1580 return true; 1581 1582 case AMDGPULibFunc::EI_POWN: { 1583 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { 1584 double val = (double)iopr1->getSExtValue(); 1585 Res0 = pow(opr0, val); 1586 return true; 1587 } 1588 return false; 1589 } 1590 1591 case AMDGPULibFunc::EI_ROOTN: { 1592 if (ConstantInt *iopr1 = dyn_cast_or_null<ConstantInt>(copr1)) { 1593 double val = (double)iopr1->getSExtValue(); 1594 Res0 = pow(opr0, 1.0 / val); 1595 return true; 1596 } 1597 return false; 1598 } 1599 1600 // with ptr arg 1601 case AMDGPULibFunc::EI_SINCOS: 1602 Res0 = sin(opr0); 1603 Res1 = cos(opr0); 1604 return true; 1605 } 1606 1607 return false; 1608 } 1609 1610 bool AMDGPULibCalls::evaluateCall(CallInst *aCI, const FuncInfo &FInfo) { 1611 int numArgs = (int)aCI->arg_size(); 1612 if (numArgs > 3) 1613 return false; 1614 1615 Constant *copr0 = nullptr; 1616 Constant *copr1 = nullptr; 1617 if (numArgs > 0) { 1618 if ((copr0 = dyn_cast<Constant>(aCI->getArgOperand(0))) == nullptr) 1619 return false; 1620 } 1621 1622 if (numArgs > 1) { 1623 if ((copr1 = dyn_cast<Constant>(aCI->getArgOperand(1))) == nullptr) { 1624 if (FInfo.getId() != AMDGPULibFunc::EI_SINCOS) 1625 return false; 1626 } 1627 } 1628 1629 // At this point, all arguments to aCI are constants. 1630 1631 // max vector size is 16, and sincos will generate two results. 1632 double DVal0[16], DVal1[16]; 1633 int FuncVecSize = getVecSize(FInfo); 1634 bool hasTwoResults = (FInfo.getId() == AMDGPULibFunc::EI_SINCOS); 1635 if (FuncVecSize == 1) { 1636 if (!evaluateScalarMathFunc(FInfo, DVal0[0], DVal1[0], copr0, copr1)) { 1637 return false; 1638 } 1639 } else { 1640 ConstantDataVector *CDV0 = dyn_cast_or_null<ConstantDataVector>(copr0); 1641 ConstantDataVector *CDV1 = dyn_cast_or_null<ConstantDataVector>(copr1); 1642 for (int i = 0; i < FuncVecSize; ++i) { 1643 Constant *celt0 = CDV0 ? CDV0->getElementAsConstant(i) : nullptr; 1644 Constant *celt1 = CDV1 ? CDV1->getElementAsConstant(i) : nullptr; 1645 if (!evaluateScalarMathFunc(FInfo, DVal0[i], DVal1[i], celt0, celt1)) { 1646 return false; 1647 } 1648 } 1649 } 1650 1651 LLVMContext &context = aCI->getContext(); 1652 Constant *nval0, *nval1; 1653 if (FuncVecSize == 1) { 1654 nval0 = ConstantFP::get(aCI->getType(), DVal0[0]); 1655 if (hasTwoResults) 1656 nval1 = ConstantFP::get(aCI->getType(), DVal1[0]); 1657 } else { 1658 if (getArgType(FInfo) == AMDGPULibFunc::F32) { 1659 SmallVector <float, 0> FVal0, FVal1; 1660 for (int i = 0; i < FuncVecSize; ++i) 1661 FVal0.push_back((float)DVal0[i]); 1662 ArrayRef<float> tmp0(FVal0); 1663 nval0 = ConstantDataVector::get(context, tmp0); 1664 if (hasTwoResults) { 1665 for (int i = 0; i < FuncVecSize; ++i) 1666 FVal1.push_back((float)DVal1[i]); 1667 ArrayRef<float> tmp1(FVal1); 1668 nval1 = ConstantDataVector::get(context, tmp1); 1669 } 1670 } else { 1671 ArrayRef<double> tmp0(DVal0); 1672 nval0 = ConstantDataVector::get(context, tmp0); 1673 if (hasTwoResults) { 1674 ArrayRef<double> tmp1(DVal1); 1675 nval1 = ConstantDataVector::get(context, tmp1); 1676 } 1677 } 1678 } 1679 1680 if (hasTwoResults) { 1681 // sincos 1682 assert(FInfo.getId() == AMDGPULibFunc::EI_SINCOS && 1683 "math function with ptr arg not supported yet"); 1684 new StoreInst(nval1, aCI->getArgOperand(1), aCI); 1685 } 1686 1687 replaceCall(aCI, nval0); 1688 return true; 1689 } 1690 1691 PreservedAnalyses AMDGPUSimplifyLibCallsPass::run(Function &F, 1692 FunctionAnalysisManager &AM) { 1693 AMDGPULibCalls Simplifier; 1694 Simplifier.initNativeFuncs(); 1695 Simplifier.initFunction(F, AM); 1696 1697 bool Changed = false; 1698 1699 LLVM_DEBUG(dbgs() << "AMDIC: process function "; 1700 F.printAsOperand(dbgs(), false, F.getParent()); dbgs() << '\n';); 1701 1702 for (auto &BB : F) { 1703 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { 1704 // Ignore non-calls. 1705 CallInst *CI = dyn_cast<CallInst>(I); 1706 ++I; 1707 1708 if (CI) { 1709 if (Simplifier.fold(CI)) 1710 Changed = true; 1711 } 1712 } 1713 } 1714 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 1715 } 1716 1717 PreservedAnalyses AMDGPUUseNativeCallsPass::run(Function &F, 1718 FunctionAnalysisManager &AM) { 1719 if (UseNative.empty()) 1720 return PreservedAnalyses::all(); 1721 1722 AMDGPULibCalls Simplifier; 1723 Simplifier.initNativeFuncs(); 1724 Simplifier.initFunction(F, AM); 1725 1726 bool Changed = false; 1727 for (auto &BB : F) { 1728 for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E;) { 1729 // Ignore non-calls. 1730 CallInst *CI = dyn_cast<CallInst>(I); 1731 ++I; 1732 if (CI && Simplifier.useNative(CI)) 1733 Changed = true; 1734 } 1735 } 1736 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 1737 } 1738