1 //===- NVPTXLowerAggrCopies.cpp - ------------------------------*- C++ -*--===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // \file
11 // Lower aggregate copies, memset, memcpy, memmov intrinsics into loops when
12 // the size is large or is not a compile-time constant.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "NVPTXLowerAggrCopies.h"
17 #include "llvm/Analysis/TargetTransformInfo.h"
18 #include "llvm/CodeGen/StackProtector.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/DataLayout.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/IRBuilder.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/IntrinsicInst.h"
25 #include "llvm/IR/Intrinsics.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/IR/Module.h"
28 #include "llvm/Support/Debug.h"
29 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
30 #include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
31 
32 #define DEBUG_TYPE "nvptx"
33 
34 using namespace llvm;
35 
36 namespace {
37 
38 // actual analysis class, which is a functionpass
39 struct NVPTXLowerAggrCopies : public FunctionPass {
40   static char ID;
41 
NVPTXLowerAggrCopies__anon882d19640111::NVPTXLowerAggrCopies42   NVPTXLowerAggrCopies() : FunctionPass(ID) {}
43 
getAnalysisUsage__anon882d19640111::NVPTXLowerAggrCopies44   void getAnalysisUsage(AnalysisUsage &AU) const override {
45     AU.addPreserved<StackProtector>();
46     AU.addRequired<TargetTransformInfoWrapperPass>();
47   }
48 
49   bool runOnFunction(Function &F) override;
50 
51   static const unsigned MaxAggrCopySize = 128;
52 
getPassName__anon882d19640111::NVPTXLowerAggrCopies53   StringRef getPassName() const override {
54     return "Lower aggregate copies/intrinsics into loops";
55   }
56 };
57 
58 char NVPTXLowerAggrCopies::ID = 0;
59 
runOnFunction(Function & F)60 bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
61   SmallVector<LoadInst *, 4> AggrLoads;
62   SmallVector<MemIntrinsic *, 4> MemCalls;
63 
64   const DataLayout &DL = F.getParent()->getDataLayout();
65   LLVMContext &Context = F.getParent()->getContext();
66   const TargetTransformInfo &TTI =
67       getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
68 
69   // Collect all aggregate loads and mem* calls.
70   for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
71     for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
72          ++II) {
73       if (LoadInst *LI = dyn_cast<LoadInst>(II)) {
74         if (!LI->hasOneUse())
75           continue;
76 
77         if (DL.getTypeStoreSize(LI->getType()) < MaxAggrCopySize)
78           continue;
79 
80         if (StoreInst *SI = dyn_cast<StoreInst>(LI->user_back())) {
81           if (SI->getOperand(0) != LI)
82             continue;
83           AggrLoads.push_back(LI);
84         }
85       } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) {
86         // Convert intrinsic calls with variable size or with constant size
87         // larger than the MaxAggrCopySize threshold.
88         if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
89           if (LenCI->getZExtValue() >= MaxAggrCopySize) {
90             MemCalls.push_back(IntrCall);
91           }
92         } else {
93           MemCalls.push_back(IntrCall);
94         }
95       }
96     }
97   }
98 
99   if (AggrLoads.size() == 0 && MemCalls.size() == 0) {
100     return false;
101   }
102 
103   //
104   // Do the transformation of an aggr load/copy/set to a loop
105   //
106   for (LoadInst *LI : AggrLoads) {
107     StoreInst *SI = dyn_cast<StoreInst>(*LI->user_begin());
108     Value *SrcAddr = LI->getOperand(0);
109     Value *DstAddr = SI->getOperand(1);
110     unsigned NumLoads = DL.getTypeStoreSize(LI->getType());
111     ConstantInt *CopyLen =
112         ConstantInt::get(Type::getInt32Ty(Context), NumLoads);
113 
114     createMemCpyLoopKnownSize(/* ConvertedInst */ SI,
115                               /* SrcAddr */ SrcAddr, /* DstAddr */ DstAddr,
116                               /* CopyLen */ CopyLen,
117                               /* SrcAlign */ LI->getAlignment(),
118                               /* DestAlign */ SI->getAlignment(),
119                               /* SrcIsVolatile */ LI->isVolatile(),
120                               /* DstIsVolatile */ SI->isVolatile(), TTI);
121 
122     SI->eraseFromParent();
123     LI->eraseFromParent();
124   }
125 
126   // Transform mem* intrinsic calls.
127   for (MemIntrinsic *MemCall : MemCalls) {
128     if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
129       expandMemCpyAsLoop(Memcpy, TTI);
130     } else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
131       expandMemMoveAsLoop(Memmove);
132     } else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
133       expandMemSetAsLoop(Memset);
134     }
135     MemCall->eraseFromParent();
136   }
137 
138   return true;
139 }
140 
141 } // namespace
142 
143 namespace llvm {
144 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
145 }
146 
147 INITIALIZE_PASS(NVPTXLowerAggrCopies, "nvptx-lower-aggr-copies",
148                 "Lower aggregate copies, and llvm.mem* intrinsics into loops",
149                 false, false)
150 
createLowerAggrCopies()151 FunctionPass *llvm::createLowerAggrCopies() {
152   return new NVPTXLowerAggrCopies();
153 }
154