1 //=== AMDGPUPrintfRuntimeBinding.cpp - OpenCL printf implementation -------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // \file
9 //
10 // The pass bind printfs to a kernel arg pointer that will be bound to a buffer
11 // later by the runtime.
12 //
13 // This pass traverses the functions in the module and converts
14 // each call to printf to a sequence of operations that
15 // store the following into the printf buffer:
16 // - format string (passed as a module's metadata unique ID)
17 // - bitwise copies of printf arguments
18 // The backend passes will need to store metadata in the kernel
19 //===----------------------------------------------------------------------===//
20
21 #include "AMDGPU.h"
22 #include "llvm/Analysis/InstructionSimplify.h"
23 #include "llvm/Analysis/TargetLibraryInfo.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/IR/Instructions.h"
27 #include "llvm/InitializePasses.h"
28 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
29
30 using namespace llvm;
31
32 #define DEBUG_TYPE "printfToRuntime"
33 #define DWORD_ALIGN 4
34
35 namespace {
36 class AMDGPUPrintfRuntimeBinding final : public ModulePass {
37
38 public:
39 static char ID;
40
41 explicit AMDGPUPrintfRuntimeBinding();
42
43 private:
44 bool runOnModule(Module &M) override;
45
getAnalysisUsage(AnalysisUsage & AU) const46 void getAnalysisUsage(AnalysisUsage &AU) const override {
47 AU.addRequired<TargetLibraryInfoWrapperPass>();
48 AU.addRequired<DominatorTreeWrapperPass>();
49 }
50 };
51
52 class AMDGPUPrintfRuntimeBindingImpl {
53 public:
AMDGPUPrintfRuntimeBindingImpl(function_ref<const DominatorTree & (Function &)> GetDT,function_ref<const TargetLibraryInfo & (Function &)> GetTLI)54 AMDGPUPrintfRuntimeBindingImpl(
55 function_ref<const DominatorTree &(Function &)> GetDT,
56 function_ref<const TargetLibraryInfo &(Function &)> GetTLI)
57 : GetDT(GetDT), GetTLI(GetTLI) {}
58 bool run(Module &M);
59
60 private:
61 void getConversionSpecifiers(SmallVectorImpl<char> &OpConvSpecifiers,
62 StringRef fmt, size_t num_ops) const;
63
64 bool shouldPrintAsStr(char Specifier, Type *OpType) const;
65 bool lowerPrintfForGpu(Module &M);
66
simplify(Instruction * I,const TargetLibraryInfo * TLI,const DominatorTree * DT)67 Value *simplify(Instruction *I, const TargetLibraryInfo *TLI,
68 const DominatorTree *DT) {
69 return SimplifyInstruction(I, {*TD, TLI, DT});
70 }
71
72 const DataLayout *TD;
73 function_ref<const DominatorTree &(Function &)> GetDT;
74 function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
75 SmallVector<CallInst *, 32> Printfs;
76 };
77 } // namespace
78
79 char AMDGPUPrintfRuntimeBinding::ID = 0;
80
81 INITIALIZE_PASS_BEGIN(AMDGPUPrintfRuntimeBinding,
82 "amdgpu-printf-runtime-binding", "AMDGPU Printf lowering",
83 false, false)
84 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
85 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
86 INITIALIZE_PASS_END(AMDGPUPrintfRuntimeBinding, "amdgpu-printf-runtime-binding",
87 "AMDGPU Printf lowering", false, false)
88
89 char &llvm::AMDGPUPrintfRuntimeBindingID = AMDGPUPrintfRuntimeBinding::ID;
90
91 namespace llvm {
createAMDGPUPrintfRuntimeBinding()92 ModulePass *createAMDGPUPrintfRuntimeBinding() {
93 return new AMDGPUPrintfRuntimeBinding();
94 }
95 } // namespace llvm
96
AMDGPUPrintfRuntimeBinding()97 AMDGPUPrintfRuntimeBinding::AMDGPUPrintfRuntimeBinding() : ModulePass(ID) {
98 initializeAMDGPUPrintfRuntimeBindingPass(*PassRegistry::getPassRegistry());
99 }
100
getConversionSpecifiers(SmallVectorImpl<char> & OpConvSpecifiers,StringRef Fmt,size_t NumOps) const101 void AMDGPUPrintfRuntimeBindingImpl::getConversionSpecifiers(
102 SmallVectorImpl<char> &OpConvSpecifiers, StringRef Fmt,
103 size_t NumOps) const {
104 // not all format characters are collected.
105 // At this time the format characters of interest
106 // are %p and %s, which use to know if we
107 // are either storing a literal string or a
108 // pointer to the printf buffer.
109 static const char ConvSpecifiers[] = "cdieEfgGaosuxXp";
110 size_t CurFmtSpecifierIdx = 0;
111 size_t PrevFmtSpecifierIdx = 0;
112
113 while ((CurFmtSpecifierIdx = Fmt.find_first_of(
114 ConvSpecifiers, CurFmtSpecifierIdx)) != StringRef::npos) {
115 bool ArgDump = false;
116 StringRef CurFmt = Fmt.substr(PrevFmtSpecifierIdx,
117 CurFmtSpecifierIdx - PrevFmtSpecifierIdx);
118 size_t pTag = CurFmt.find_last_of("%");
119 if (pTag != StringRef::npos) {
120 ArgDump = true;
121 while (pTag && CurFmt[--pTag] == '%') {
122 ArgDump = !ArgDump;
123 }
124 }
125
126 if (ArgDump)
127 OpConvSpecifiers.push_back(Fmt[CurFmtSpecifierIdx]);
128
129 PrevFmtSpecifierIdx = ++CurFmtSpecifierIdx;
130 }
131 }
132
shouldPrintAsStr(char Specifier,Type * OpType) const133 bool AMDGPUPrintfRuntimeBindingImpl::shouldPrintAsStr(char Specifier,
134 Type *OpType) const {
135 if (Specifier != 's')
136 return false;
137 const PointerType *PT = dyn_cast<PointerType>(OpType);
138 if (!PT || PT->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
139 return false;
140 Type *ElemType = PT->getContainedType(0);
141 if (ElemType->getTypeID() != Type::IntegerTyID)
142 return false;
143 IntegerType *ElemIType = cast<IntegerType>(ElemType);
144 return ElemIType->getBitWidth() == 8;
145 }
146
lowerPrintfForGpu(Module & M)147 bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
148 LLVMContext &Ctx = M.getContext();
149 IRBuilder<> Builder(Ctx);
150 Type *I32Ty = Type::getInt32Ty(Ctx);
151 unsigned UniqID = 0;
152 // NB: This is important for this string size to be divizable by 4
153 const char NonLiteralStr[4] = "???";
154
155 for (auto CI : Printfs) {
156 unsigned NumOps = CI->getNumArgOperands();
157
158 SmallString<16> OpConvSpecifiers;
159 Value *Op = CI->getArgOperand(0);
160
161 if (auto LI = dyn_cast<LoadInst>(Op)) {
162 Op = LI->getPointerOperand();
163 for (auto Use : Op->users()) {
164 if (auto SI = dyn_cast<StoreInst>(Use)) {
165 Op = SI->getValueOperand();
166 break;
167 }
168 }
169 }
170
171 if (auto I = dyn_cast<Instruction>(Op)) {
172 Value *Op_simplified =
173 simplify(I, &GetTLI(*I->getFunction()), &GetDT(*I->getFunction()));
174 if (Op_simplified)
175 Op = Op_simplified;
176 }
177
178 ConstantExpr *ConstExpr = dyn_cast<ConstantExpr>(Op);
179
180 if (ConstExpr) {
181 GlobalVariable *GVar = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
182
183 StringRef Str("unknown");
184 if (GVar && GVar->hasInitializer()) {
185 auto *Init = GVar->getInitializer();
186 if (auto *CA = dyn_cast<ConstantDataArray>(Init)) {
187 if (CA->isString())
188 Str = CA->getAsCString();
189 } else if (isa<ConstantAggregateZero>(Init)) {
190 Str = "";
191 }
192 //
193 // we need this call to ascertain
194 // that we are printing a string
195 // or a pointer. It takes out the
196 // specifiers and fills up the first
197 // arg
198 getConversionSpecifiers(OpConvSpecifiers, Str, NumOps - 1);
199 }
200 // Add metadata for the string
201 std::string AStreamHolder;
202 raw_string_ostream Sizes(AStreamHolder);
203 int Sum = DWORD_ALIGN;
204 Sizes << CI->getNumArgOperands() - 1;
205 Sizes << ':';
206 for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() &&
207 ArgCount <= OpConvSpecifiers.size();
208 ArgCount++) {
209 Value *Arg = CI->getArgOperand(ArgCount);
210 Type *ArgType = Arg->getType();
211 unsigned ArgSize = TD->getTypeAllocSizeInBits(ArgType);
212 ArgSize = ArgSize / 8;
213 //
214 // ArgSize by design should be a multiple of DWORD_ALIGN,
215 // expand the arguments that do not follow this rule.
216 //
217 if (ArgSize % DWORD_ALIGN != 0) {
218 llvm::Type *ResType = llvm::Type::getInt32Ty(Ctx);
219 auto *LLVMVecType = llvm::dyn_cast<llvm::FixedVectorType>(ArgType);
220 int NumElem = LLVMVecType ? LLVMVecType->getNumElements() : 1;
221 if (LLVMVecType && NumElem > 1)
222 ResType = llvm::FixedVectorType::get(ResType, NumElem);
223 Builder.SetInsertPoint(CI);
224 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
225 if (OpConvSpecifiers[ArgCount - 1] == 'x' ||
226 OpConvSpecifiers[ArgCount - 1] == 'X' ||
227 OpConvSpecifiers[ArgCount - 1] == 'u' ||
228 OpConvSpecifiers[ArgCount - 1] == 'o')
229 Arg = Builder.CreateZExt(Arg, ResType);
230 else
231 Arg = Builder.CreateSExt(Arg, ResType);
232 ArgType = Arg->getType();
233 ArgSize = TD->getTypeAllocSizeInBits(ArgType);
234 ArgSize = ArgSize / 8;
235 CI->setOperand(ArgCount, Arg);
236 }
237 if (OpConvSpecifiers[ArgCount - 1] == 'f') {
238 ConstantFP *FpCons = dyn_cast<ConstantFP>(Arg);
239 if (FpCons)
240 ArgSize = 4;
241 else {
242 FPExtInst *FpExt = dyn_cast<FPExtInst>(Arg);
243 if (FpExt && FpExt->getType()->isDoubleTy() &&
244 FpExt->getOperand(0)->getType()->isFloatTy())
245 ArgSize = 4;
246 }
247 }
248 if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
249 if (auto *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
250 auto *GV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
251 if (GV && GV->hasInitializer()) {
252 Constant *Init = GV->getInitializer();
253 bool IsZeroValue = Init->isZeroValue();
254 auto *CA = dyn_cast<ConstantDataArray>(Init);
255 if (IsZeroValue || (CA && CA->isString())) {
256 size_t SizeStr =
257 IsZeroValue ? 1 : (strlen(CA->getAsCString().data()) + 1);
258 size_t Rem = SizeStr % DWORD_ALIGN;
259 size_t NSizeStr = 0;
260 LLVM_DEBUG(dbgs() << "Printf string original size = " << SizeStr
261 << '\n');
262 if (Rem) {
263 NSizeStr = SizeStr + (DWORD_ALIGN - Rem);
264 } else {
265 NSizeStr = SizeStr;
266 }
267 ArgSize = NSizeStr;
268 }
269 } else {
270 ArgSize = sizeof(NonLiteralStr);
271 }
272 } else {
273 ArgSize = sizeof(NonLiteralStr);
274 }
275 }
276 LLVM_DEBUG(dbgs() << "Printf ArgSize (in buffer) = " << ArgSize
277 << " for type: " << *ArgType << '\n');
278 Sizes << ArgSize << ':';
279 Sum += ArgSize;
280 }
281 LLVM_DEBUG(dbgs() << "Printf format string in source = " << Str.str()
282 << '\n');
283 for (size_t I = 0; I < Str.size(); ++I) {
284 // Rest of the C escape sequences (e.g. \') are handled correctly
285 // by the MDParser
286 switch (Str[I]) {
287 case '\a':
288 Sizes << "\\a";
289 break;
290 case '\b':
291 Sizes << "\\b";
292 break;
293 case '\f':
294 Sizes << "\\f";
295 break;
296 case '\n':
297 Sizes << "\\n";
298 break;
299 case '\r':
300 Sizes << "\\r";
301 break;
302 case '\v':
303 Sizes << "\\v";
304 break;
305 case ':':
306 // ':' cannot be scanned by Flex, as it is defined as a delimiter
307 // Replace it with it's octal representation \72
308 Sizes << "\\72";
309 break;
310 default:
311 Sizes << Str[I];
312 break;
313 }
314 }
315
316 // Insert the printf_alloc call
317 Builder.SetInsertPoint(CI);
318 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
319
320 AttributeList Attr = AttributeList::get(Ctx, AttributeList::FunctionIndex,
321 Attribute::NoUnwind);
322
323 Type *SizetTy = Type::getInt32Ty(Ctx);
324
325 Type *Tys_alloc[1] = {SizetTy};
326 Type *I8Ptr = PointerType::get(Type::getInt8Ty(Ctx), 1);
327 FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false);
328 FunctionCallee PrintfAllocFn =
329 M.getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr);
330
331 LLVM_DEBUG(dbgs() << "Printf metadata = " << Sizes.str() << '\n');
332 std::string fmtstr = itostr(++UniqID) + ":" + Sizes.str().c_str();
333 MDString *fmtStrArray = MDString::get(Ctx, fmtstr);
334
335 // Instead of creating global variables, the
336 // printf format strings are extracted
337 // and passed as metadata. This avoids
338 // polluting llvm's symbol tables in this module.
339 // Metadata is going to be extracted
340 // by the backend passes and inserted
341 // into the OpenCL binary as appropriate.
342 StringRef amd("llvm.printf.fmts");
343 NamedMDNode *metaD = M.getOrInsertNamedMetadata(amd);
344 MDNode *myMD = MDNode::get(Ctx, fmtStrArray);
345 metaD->addOperand(myMD);
346 Value *sumC = ConstantInt::get(SizetTy, Sum, false);
347 SmallVector<Value *, 1> alloc_args;
348 alloc_args.push_back(sumC);
349 CallInst *pcall =
350 CallInst::Create(PrintfAllocFn, alloc_args, "printf_alloc_fn", CI);
351
352 //
353 // Insert code to split basicblock with a
354 // piece of hammock code.
355 // basicblock splits after buffer overflow check
356 //
357 ConstantPointerNull *zeroIntPtr =
358 ConstantPointerNull::get(PointerType::get(Type::getInt8Ty(Ctx), 1));
359 auto *cmp = cast<ICmpInst>(Builder.CreateICmpNE(pcall, zeroIntPtr, ""));
360 if (!CI->use_empty()) {
361 Value *result =
362 Builder.CreateSExt(Builder.CreateNot(cmp), I32Ty, "printf_res");
363 CI->replaceAllUsesWith(result);
364 }
365 SplitBlock(CI->getParent(), cmp);
366 Instruction *Brnch =
367 SplitBlockAndInsertIfThen(cmp, cmp->getNextNode(), false);
368
369 Builder.SetInsertPoint(Brnch);
370
371 // store unique printf id in the buffer
372 //
373 SmallVector<Value *, 1> ZeroIdxList;
374 ConstantInt *zeroInt =
375 ConstantInt::get(Ctx, APInt(32, StringRef("0"), 10));
376 ZeroIdxList.push_back(zeroInt);
377
378 GetElementPtrInst *BufferIdx = GetElementPtrInst::Create(
379 nullptr, pcall, ZeroIdxList, "PrintBuffID", Brnch);
380
381 Type *idPointer = PointerType::get(I32Ty, AMDGPUAS::GLOBAL_ADDRESS);
382 Value *id_gep_cast =
383 new BitCastInst(BufferIdx, idPointer, "PrintBuffIdCast", Brnch);
384
385 new StoreInst(ConstantInt::get(I32Ty, UniqID), id_gep_cast, Brnch);
386
387 SmallVector<Value *, 2> FourthIdxList;
388 ConstantInt *fourInt =
389 ConstantInt::get(Ctx, APInt(32, StringRef("4"), 10));
390
391 FourthIdxList.push_back(fourInt); // 1st 4 bytes hold the printf_id
392 // the following GEP is the buffer pointer
393 BufferIdx = GetElementPtrInst::Create(nullptr, pcall, FourthIdxList,
394 "PrintBuffGep", Brnch);
395
396 Type *Int32Ty = Type::getInt32Ty(Ctx);
397 Type *Int64Ty = Type::getInt64Ty(Ctx);
398 for (unsigned ArgCount = 1; ArgCount < CI->getNumArgOperands() &&
399 ArgCount <= OpConvSpecifiers.size();
400 ArgCount++) {
401 Value *Arg = CI->getArgOperand(ArgCount);
402 Type *ArgType = Arg->getType();
403 SmallVector<Value *, 32> WhatToStore;
404 if (ArgType->isFPOrFPVectorTy() && !isa<VectorType>(ArgType)) {
405 Type *IType = (ArgType->isFloatTy()) ? Int32Ty : Int64Ty;
406 if (OpConvSpecifiers[ArgCount - 1] == 'f') {
407 if (auto *FpCons = dyn_cast<ConstantFP>(Arg)) {
408 APFloat Val(FpCons->getValueAPF());
409 bool Lost = false;
410 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
411 &Lost);
412 Arg = ConstantFP::get(Ctx, Val);
413 IType = Int32Ty;
414 } else if (auto *FpExt = dyn_cast<FPExtInst>(Arg)) {
415 if (FpExt->getType()->isDoubleTy() &&
416 FpExt->getOperand(0)->getType()->isFloatTy()) {
417 Arg = FpExt->getOperand(0);
418 IType = Int32Ty;
419 }
420 }
421 }
422 Arg = new BitCastInst(Arg, IType, "PrintArgFP", Brnch);
423 WhatToStore.push_back(Arg);
424 } else if (ArgType->getTypeID() == Type::PointerTyID) {
425 if (shouldPrintAsStr(OpConvSpecifiers[ArgCount - 1], ArgType)) {
426 const char *S = NonLiteralStr;
427 if (auto *ConstExpr = dyn_cast<ConstantExpr>(Arg)) {
428 auto *GV = dyn_cast<GlobalVariable>(ConstExpr->getOperand(0));
429 if (GV && GV->hasInitializer()) {
430 Constant *Init = GV->getInitializer();
431 bool IsZeroValue = Init->isZeroValue();
432 auto *CA = dyn_cast<ConstantDataArray>(Init);
433 if (IsZeroValue || (CA && CA->isString())) {
434 S = IsZeroValue ? "" : CA->getAsCString().data();
435 }
436 }
437 }
438 size_t SizeStr = strlen(S) + 1;
439 size_t Rem = SizeStr % DWORD_ALIGN;
440 size_t NSizeStr = 0;
441 if (Rem) {
442 NSizeStr = SizeStr + (DWORD_ALIGN - Rem);
443 } else {
444 NSizeStr = SizeStr;
445 }
446 if (S[0]) {
447 char *MyNewStr = new char[NSizeStr]();
448 strcpy(MyNewStr, S);
449 int NumInts = NSizeStr / 4;
450 int CharC = 0;
451 while (NumInts) {
452 int ANum = *(int *)(MyNewStr + CharC);
453 CharC += 4;
454 NumInts--;
455 Value *ANumV = ConstantInt::get(Int32Ty, ANum, false);
456 WhatToStore.push_back(ANumV);
457 }
458 delete[] MyNewStr;
459 } else {
460 // Empty string, give a hint to RT it is no NULL
461 Value *ANumV = ConstantInt::get(Int32Ty, 0xFFFFFF00, false);
462 WhatToStore.push_back(ANumV);
463 }
464 } else {
465 uint64_t Size = TD->getTypeAllocSizeInBits(ArgType);
466 assert((Size == 32 || Size == 64) && "unsupported size");
467 Type *DstType = (Size == 32) ? Int32Ty : Int64Ty;
468 Arg = new PtrToIntInst(Arg, DstType, "PrintArgPtr", Brnch);
469 WhatToStore.push_back(Arg);
470 }
471 } else if (isa<FixedVectorType>(ArgType)) {
472 Type *IType = NULL;
473 uint32_t EleCount = cast<FixedVectorType>(ArgType)->getNumElements();
474 uint32_t EleSize = ArgType->getScalarSizeInBits();
475 uint32_t TotalSize = EleCount * EleSize;
476 if (EleCount == 3) {
477 ShuffleVectorInst *Shuffle =
478 new ShuffleVectorInst(Arg, Arg, ArrayRef<int>{0, 1, 2, 2});
479 Shuffle->insertBefore(Brnch);
480 Arg = Shuffle;
481 ArgType = Arg->getType();
482 TotalSize += EleSize;
483 }
484 switch (EleSize) {
485 default:
486 EleCount = TotalSize / 64;
487 IType = Type::getInt64Ty(ArgType->getContext());
488 break;
489 case 8:
490 if (EleCount >= 8) {
491 EleCount = TotalSize / 64;
492 IType = Type::getInt64Ty(ArgType->getContext());
493 } else if (EleCount >= 3) {
494 EleCount = 1;
495 IType = Type::getInt32Ty(ArgType->getContext());
496 } else {
497 EleCount = 1;
498 IType = Type::getInt16Ty(ArgType->getContext());
499 }
500 break;
501 case 16:
502 if (EleCount >= 3) {
503 EleCount = TotalSize / 64;
504 IType = Type::getInt64Ty(ArgType->getContext());
505 } else {
506 EleCount = 1;
507 IType = Type::getInt32Ty(ArgType->getContext());
508 }
509 break;
510 }
511 if (EleCount > 1) {
512 IType = FixedVectorType::get(IType, EleCount);
513 }
514 Arg = new BitCastInst(Arg, IType, "PrintArgVect", Brnch);
515 WhatToStore.push_back(Arg);
516 } else {
517 WhatToStore.push_back(Arg);
518 }
519 for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) {
520 Value *TheBtCast = WhatToStore[I];
521 unsigned ArgSize =
522 TD->getTypeAllocSizeInBits(TheBtCast->getType()) / 8;
523 SmallVector<Value *, 1> BuffOffset;
524 BuffOffset.push_back(ConstantInt::get(I32Ty, ArgSize));
525
526 Type *ArgPointer = PointerType::get(TheBtCast->getType(), 1);
527 Value *CastedGEP =
528 new BitCastInst(BufferIdx, ArgPointer, "PrintBuffPtrCast", Brnch);
529 StoreInst *StBuff = new StoreInst(TheBtCast, CastedGEP, Brnch);
530 LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n"
531 << *StBuff << '\n');
532 (void)StBuff;
533 if (I + 1 == E && ArgCount + 1 == CI->getNumArgOperands())
534 break;
535 BufferIdx = GetElementPtrInst::Create(nullptr, BufferIdx, BuffOffset,
536 "PrintBuffNextPtr", Brnch);
537 LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n"
538 << *BufferIdx << '\n');
539 }
540 }
541 }
542 }
543
544 // erase the printf calls
545 for (auto CI : Printfs)
546 CI->eraseFromParent();
547
548 Printfs.clear();
549 return true;
550 }
551
run(Module & M)552 bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) {
553 Triple TT(M.getTargetTriple());
554 if (TT.getArch() == Triple::r600)
555 return false;
556
557 auto PrintfFunction = M.getFunction("printf");
558 if (!PrintfFunction)
559 return false;
560
561 for (auto &U : PrintfFunction->uses()) {
562 if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
563 if (CI->isCallee(&U))
564 Printfs.push_back(CI);
565 }
566 }
567
568 if (Printfs.empty())
569 return false;
570
571 if (auto HostcallFunction = M.getFunction("__ockl_hostcall_internal")) {
572 for (auto &U : HostcallFunction->uses()) {
573 if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
574 M.getContext().emitError(
575 CI, "Cannot use both printf and hostcall in the same module");
576 }
577 }
578 }
579
580 TD = &M.getDataLayout();
581
582 return lowerPrintfForGpu(M);
583 }
584
runOnModule(Module & M)585 bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) {
586 auto GetDT = [this](Function &F) -> DominatorTree & {
587 return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
588 };
589 auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
590 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
591 };
592
593 return AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M);
594 }
595
596 PreservedAnalyses
run(Module & M,ModuleAnalysisManager & AM)597 AMDGPUPrintfRuntimeBindingPass::run(Module &M, ModuleAnalysisManager &AM) {
598 FunctionAnalysisManager &FAM =
599 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
600 auto GetDT = [&FAM](Function &F) -> DominatorTree & {
601 return FAM.getResult<DominatorTreeAnalysis>(F);
602 };
603 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
604 return FAM.getResult<TargetLibraryAnalysis>(F);
605 };
606 bool Changed = AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M);
607 return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
608 }
609