1*0a6a1f1dSLionel Sambuc //===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===//
2*0a6a1f1dSLionel Sambuc //
3*0a6a1f1dSLionel Sambuc //                     The LLVM Compiler Infrastructure
4*0a6a1f1dSLionel Sambuc //
5*0a6a1f1dSLionel Sambuc // This file is distributed under the University of Illinois Open Source
6*0a6a1f1dSLionel Sambuc // License. See LICENSE.TXT for details.
7*0a6a1f1dSLionel Sambuc //
8*0a6a1f1dSLionel Sambuc //===----------------------------------------------------------------------===//
9*0a6a1f1dSLionel Sambuc //
10*0a6a1f1dSLionel Sambuc // This pass eliminates allocas by either converting them into vectors or
11*0a6a1f1dSLionel Sambuc // by migrating them to local address space.
12*0a6a1f1dSLionel Sambuc //
13*0a6a1f1dSLionel Sambuc //===----------------------------------------------------------------------===//
14*0a6a1f1dSLionel Sambuc 
15*0a6a1f1dSLionel Sambuc #include "AMDGPU.h"
16*0a6a1f1dSLionel Sambuc #include "AMDGPUSubtarget.h"
17*0a6a1f1dSLionel Sambuc #include "llvm/Analysis/ValueTracking.h"
18*0a6a1f1dSLionel Sambuc #include "llvm/IR/IRBuilder.h"
19*0a6a1f1dSLionel Sambuc #include "llvm/IR/InstVisitor.h"
20*0a6a1f1dSLionel Sambuc #include "llvm/Support/Debug.h"
21*0a6a1f1dSLionel Sambuc 
22*0a6a1f1dSLionel Sambuc #define DEBUG_TYPE "amdgpu-promote-alloca"
23*0a6a1f1dSLionel Sambuc 
24*0a6a1f1dSLionel Sambuc using namespace llvm;
25*0a6a1f1dSLionel Sambuc 
26*0a6a1f1dSLionel Sambuc namespace {
27*0a6a1f1dSLionel Sambuc 
28*0a6a1f1dSLionel Sambuc class AMDGPUPromoteAlloca : public FunctionPass,
29*0a6a1f1dSLionel Sambuc                        public InstVisitor<AMDGPUPromoteAlloca> {
30*0a6a1f1dSLionel Sambuc 
31*0a6a1f1dSLionel Sambuc   static char ID;
32*0a6a1f1dSLionel Sambuc   Module *Mod;
33*0a6a1f1dSLionel Sambuc   const AMDGPUSubtarget &ST;
34*0a6a1f1dSLionel Sambuc   int LocalMemAvailable;
35*0a6a1f1dSLionel Sambuc 
36*0a6a1f1dSLionel Sambuc public:
AMDGPUPromoteAlloca(const AMDGPUSubtarget & st)37*0a6a1f1dSLionel Sambuc   AMDGPUPromoteAlloca(const AMDGPUSubtarget &st) : FunctionPass(ID), ST(st),
38*0a6a1f1dSLionel Sambuc                                                    LocalMemAvailable(0) { }
39*0a6a1f1dSLionel Sambuc   bool doInitialization(Module &M) override;
40*0a6a1f1dSLionel Sambuc   bool runOnFunction(Function &F) override;
getPassName() const41*0a6a1f1dSLionel Sambuc   const char *getPassName() const override { return "AMDGPU Promote Alloca"; }
42*0a6a1f1dSLionel Sambuc   void visitAlloca(AllocaInst &I);
43*0a6a1f1dSLionel Sambuc };
44*0a6a1f1dSLionel Sambuc 
45*0a6a1f1dSLionel Sambuc } // End anonymous namespace
46*0a6a1f1dSLionel Sambuc 
47*0a6a1f1dSLionel Sambuc char AMDGPUPromoteAlloca::ID = 0;
48*0a6a1f1dSLionel Sambuc 
doInitialization(Module & M)49*0a6a1f1dSLionel Sambuc bool AMDGPUPromoteAlloca::doInitialization(Module &M) {
50*0a6a1f1dSLionel Sambuc   Mod = &M;
51*0a6a1f1dSLionel Sambuc   return false;
52*0a6a1f1dSLionel Sambuc }
53*0a6a1f1dSLionel Sambuc 
runOnFunction(Function & F)54*0a6a1f1dSLionel Sambuc bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
55*0a6a1f1dSLionel Sambuc 
56*0a6a1f1dSLionel Sambuc   const FunctionType *FTy = F.getFunctionType();
57*0a6a1f1dSLionel Sambuc 
58*0a6a1f1dSLionel Sambuc   LocalMemAvailable = ST.getLocalMemorySize();
59*0a6a1f1dSLionel Sambuc 
60*0a6a1f1dSLionel Sambuc 
61*0a6a1f1dSLionel Sambuc   // If the function has any arguments in the local address space, then it's
62*0a6a1f1dSLionel Sambuc   // possible these arguments require the entire local memory space, so
63*0a6a1f1dSLionel Sambuc   // we cannot use local memory in the pass.
64*0a6a1f1dSLionel Sambuc   for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
65*0a6a1f1dSLionel Sambuc     const Type *ParamTy = FTy->getParamType(i);
66*0a6a1f1dSLionel Sambuc     if (ParamTy->isPointerTy() &&
67*0a6a1f1dSLionel Sambuc         ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
68*0a6a1f1dSLionel Sambuc       LocalMemAvailable = 0;
69*0a6a1f1dSLionel Sambuc       DEBUG(dbgs() << "Function has local memory argument.  Promoting to "
70*0a6a1f1dSLionel Sambuc                       "local memory disabled.\n");
71*0a6a1f1dSLionel Sambuc       break;
72*0a6a1f1dSLionel Sambuc     }
73*0a6a1f1dSLionel Sambuc   }
74*0a6a1f1dSLionel Sambuc 
75*0a6a1f1dSLionel Sambuc   if (LocalMemAvailable > 0) {
76*0a6a1f1dSLionel Sambuc     // Check how much local memory is being used by global objects
77*0a6a1f1dSLionel Sambuc     for (Module::global_iterator I = Mod->global_begin(),
78*0a6a1f1dSLionel Sambuc                                  E = Mod->global_end(); I != E; ++I) {
79*0a6a1f1dSLionel Sambuc       GlobalVariable *GV = I;
80*0a6a1f1dSLionel Sambuc       PointerType *GVTy = GV->getType();
81*0a6a1f1dSLionel Sambuc       if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
82*0a6a1f1dSLionel Sambuc         continue;
83*0a6a1f1dSLionel Sambuc       for (Value::use_iterator U = GV->use_begin(),
84*0a6a1f1dSLionel Sambuc                                UE = GV->use_end(); U != UE; ++U) {
85*0a6a1f1dSLionel Sambuc         Instruction *Use = dyn_cast<Instruction>(*U);
86*0a6a1f1dSLionel Sambuc         if (!Use)
87*0a6a1f1dSLionel Sambuc           continue;
88*0a6a1f1dSLionel Sambuc         if (Use->getParent()->getParent() == &F)
89*0a6a1f1dSLionel Sambuc           LocalMemAvailable -=
90*0a6a1f1dSLionel Sambuc               Mod->getDataLayout()->getTypeAllocSize(GVTy->getElementType());
91*0a6a1f1dSLionel Sambuc       }
92*0a6a1f1dSLionel Sambuc     }
93*0a6a1f1dSLionel Sambuc   }
94*0a6a1f1dSLionel Sambuc 
95*0a6a1f1dSLionel Sambuc   LocalMemAvailable = std::max(0, LocalMemAvailable);
96*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << LocalMemAvailable << "bytes free in local memory.\n");
97*0a6a1f1dSLionel Sambuc 
98*0a6a1f1dSLionel Sambuc   visit(F);
99*0a6a1f1dSLionel Sambuc 
100*0a6a1f1dSLionel Sambuc   return false;
101*0a6a1f1dSLionel Sambuc }
102*0a6a1f1dSLionel Sambuc 
arrayTypeToVecType(const Type * ArrayTy)103*0a6a1f1dSLionel Sambuc static VectorType *arrayTypeToVecType(const Type *ArrayTy) {
104*0a6a1f1dSLionel Sambuc   return VectorType::get(ArrayTy->getArrayElementType(),
105*0a6a1f1dSLionel Sambuc                          ArrayTy->getArrayNumElements());
106*0a6a1f1dSLionel Sambuc }
107*0a6a1f1dSLionel Sambuc 
108*0a6a1f1dSLionel Sambuc static Value *
calculateVectorIndex(Value * Ptr,const std::map<GetElementPtrInst *,Value * > & GEPIdx)109*0a6a1f1dSLionel Sambuc calculateVectorIndex(Value *Ptr,
110*0a6a1f1dSLionel Sambuc                      const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
111*0a6a1f1dSLionel Sambuc   if (isa<AllocaInst>(Ptr))
112*0a6a1f1dSLionel Sambuc     return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext()));
113*0a6a1f1dSLionel Sambuc 
114*0a6a1f1dSLionel Sambuc   GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr);
115*0a6a1f1dSLionel Sambuc 
116*0a6a1f1dSLionel Sambuc   auto I = GEPIdx.find(GEP);
117*0a6a1f1dSLionel Sambuc   return I == GEPIdx.end() ? nullptr : I->second;
118*0a6a1f1dSLionel Sambuc }
119*0a6a1f1dSLionel Sambuc 
GEPToVectorIndex(GetElementPtrInst * GEP)120*0a6a1f1dSLionel Sambuc static Value* GEPToVectorIndex(GetElementPtrInst *GEP) {
121*0a6a1f1dSLionel Sambuc   // FIXME we only support simple cases
122*0a6a1f1dSLionel Sambuc   if (GEP->getNumOperands() != 3)
123*0a6a1f1dSLionel Sambuc     return NULL;
124*0a6a1f1dSLionel Sambuc 
125*0a6a1f1dSLionel Sambuc   ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1));
126*0a6a1f1dSLionel Sambuc   if (!I0 || !I0->isZero())
127*0a6a1f1dSLionel Sambuc     return NULL;
128*0a6a1f1dSLionel Sambuc 
129*0a6a1f1dSLionel Sambuc   return GEP->getOperand(2);
130*0a6a1f1dSLionel Sambuc }
131*0a6a1f1dSLionel Sambuc 
132*0a6a1f1dSLionel Sambuc // Not an instruction handled below to turn into a vector.
133*0a6a1f1dSLionel Sambuc //
134*0a6a1f1dSLionel Sambuc // TODO: Check isTriviallyVectorizable for calls and handle other
135*0a6a1f1dSLionel Sambuc // instructions.
canVectorizeInst(Instruction * Inst)136*0a6a1f1dSLionel Sambuc static bool canVectorizeInst(Instruction *Inst) {
137*0a6a1f1dSLionel Sambuc   switch (Inst->getOpcode()) {
138*0a6a1f1dSLionel Sambuc   case Instruction::Load:
139*0a6a1f1dSLionel Sambuc   case Instruction::Store:
140*0a6a1f1dSLionel Sambuc   case Instruction::BitCast:
141*0a6a1f1dSLionel Sambuc   case Instruction::AddrSpaceCast:
142*0a6a1f1dSLionel Sambuc     return true;
143*0a6a1f1dSLionel Sambuc   default:
144*0a6a1f1dSLionel Sambuc     return false;
145*0a6a1f1dSLionel Sambuc   }
146*0a6a1f1dSLionel Sambuc }
147*0a6a1f1dSLionel Sambuc 
tryPromoteAllocaToVector(AllocaInst * Alloca)148*0a6a1f1dSLionel Sambuc static bool tryPromoteAllocaToVector(AllocaInst *Alloca) {
149*0a6a1f1dSLionel Sambuc   Type *AllocaTy = Alloca->getAllocatedType();
150*0a6a1f1dSLionel Sambuc 
151*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << "Alloca Candidate for vectorization \n");
152*0a6a1f1dSLionel Sambuc 
153*0a6a1f1dSLionel Sambuc   // FIXME: There is no reason why we can't support larger arrays, we
154*0a6a1f1dSLionel Sambuc   // are just being conservative for now.
155*0a6a1f1dSLionel Sambuc   if (!AllocaTy->isArrayTy() ||
156*0a6a1f1dSLionel Sambuc       AllocaTy->getArrayElementType()->isVectorTy() ||
157*0a6a1f1dSLionel Sambuc       AllocaTy->getArrayNumElements() > 4) {
158*0a6a1f1dSLionel Sambuc 
159*0a6a1f1dSLionel Sambuc     DEBUG(dbgs() << "  Cannot convert type to vector");
160*0a6a1f1dSLionel Sambuc     return false;
161*0a6a1f1dSLionel Sambuc   }
162*0a6a1f1dSLionel Sambuc 
163*0a6a1f1dSLionel Sambuc   std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
164*0a6a1f1dSLionel Sambuc   std::vector<Value*> WorkList;
165*0a6a1f1dSLionel Sambuc   for (User *AllocaUser : Alloca->users()) {
166*0a6a1f1dSLionel Sambuc     GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser);
167*0a6a1f1dSLionel Sambuc     if (!GEP) {
168*0a6a1f1dSLionel Sambuc       if (!canVectorizeInst(cast<Instruction>(AllocaUser)))
169*0a6a1f1dSLionel Sambuc         return false;
170*0a6a1f1dSLionel Sambuc 
171*0a6a1f1dSLionel Sambuc       WorkList.push_back(AllocaUser);
172*0a6a1f1dSLionel Sambuc       continue;
173*0a6a1f1dSLionel Sambuc     }
174*0a6a1f1dSLionel Sambuc 
175*0a6a1f1dSLionel Sambuc     Value *Index = GEPToVectorIndex(GEP);
176*0a6a1f1dSLionel Sambuc 
177*0a6a1f1dSLionel Sambuc     // If we can't compute a vector index from this GEP, then we can't
178*0a6a1f1dSLionel Sambuc     // promote this alloca to vector.
179*0a6a1f1dSLionel Sambuc     if (!Index) {
180*0a6a1f1dSLionel Sambuc       DEBUG(dbgs() << "  Cannot compute vector index for GEP " << *GEP << '\n');
181*0a6a1f1dSLionel Sambuc       return false;
182*0a6a1f1dSLionel Sambuc     }
183*0a6a1f1dSLionel Sambuc 
184*0a6a1f1dSLionel Sambuc     GEPVectorIdx[GEP] = Index;
185*0a6a1f1dSLionel Sambuc     for (User *GEPUser : AllocaUser->users()) {
186*0a6a1f1dSLionel Sambuc       if (!canVectorizeInst(cast<Instruction>(GEPUser)))
187*0a6a1f1dSLionel Sambuc         return false;
188*0a6a1f1dSLionel Sambuc 
189*0a6a1f1dSLionel Sambuc       WorkList.push_back(GEPUser);
190*0a6a1f1dSLionel Sambuc     }
191*0a6a1f1dSLionel Sambuc   }
192*0a6a1f1dSLionel Sambuc 
193*0a6a1f1dSLionel Sambuc   VectorType *VectorTy = arrayTypeToVecType(AllocaTy);
194*0a6a1f1dSLionel Sambuc 
195*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << "  Converting alloca to vector "
196*0a6a1f1dSLionel Sambuc         << *AllocaTy << " -> " << *VectorTy << '\n');
197*0a6a1f1dSLionel Sambuc 
198*0a6a1f1dSLionel Sambuc   for (std::vector<Value*>::iterator I = WorkList.begin(),
199*0a6a1f1dSLionel Sambuc                                      E = WorkList.end(); I != E; ++I) {
200*0a6a1f1dSLionel Sambuc     Instruction *Inst = cast<Instruction>(*I);
201*0a6a1f1dSLionel Sambuc     IRBuilder<> Builder(Inst);
202*0a6a1f1dSLionel Sambuc     switch (Inst->getOpcode()) {
203*0a6a1f1dSLionel Sambuc     case Instruction::Load: {
204*0a6a1f1dSLionel Sambuc       Value *Ptr = Inst->getOperand(0);
205*0a6a1f1dSLionel Sambuc       Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
206*0a6a1f1dSLionel Sambuc       Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
207*0a6a1f1dSLionel Sambuc       Value *VecValue = Builder.CreateLoad(BitCast);
208*0a6a1f1dSLionel Sambuc       Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
209*0a6a1f1dSLionel Sambuc       Inst->replaceAllUsesWith(ExtractElement);
210*0a6a1f1dSLionel Sambuc       Inst->eraseFromParent();
211*0a6a1f1dSLionel Sambuc       break;
212*0a6a1f1dSLionel Sambuc     }
213*0a6a1f1dSLionel Sambuc     case Instruction::Store: {
214*0a6a1f1dSLionel Sambuc       Value *Ptr = Inst->getOperand(1);
215*0a6a1f1dSLionel Sambuc       Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
216*0a6a1f1dSLionel Sambuc       Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0));
217*0a6a1f1dSLionel Sambuc       Value *VecValue = Builder.CreateLoad(BitCast);
218*0a6a1f1dSLionel Sambuc       Value *NewVecValue = Builder.CreateInsertElement(VecValue,
219*0a6a1f1dSLionel Sambuc                                                        Inst->getOperand(0),
220*0a6a1f1dSLionel Sambuc                                                        Index);
221*0a6a1f1dSLionel Sambuc       Builder.CreateStore(NewVecValue, BitCast);
222*0a6a1f1dSLionel Sambuc       Inst->eraseFromParent();
223*0a6a1f1dSLionel Sambuc       break;
224*0a6a1f1dSLionel Sambuc     }
225*0a6a1f1dSLionel Sambuc     case Instruction::BitCast:
226*0a6a1f1dSLionel Sambuc     case Instruction::AddrSpaceCast:
227*0a6a1f1dSLionel Sambuc       break;
228*0a6a1f1dSLionel Sambuc 
229*0a6a1f1dSLionel Sambuc     default:
230*0a6a1f1dSLionel Sambuc       Inst->dump();
231*0a6a1f1dSLionel Sambuc       llvm_unreachable("Inconsistency in instructions promotable to vector");
232*0a6a1f1dSLionel Sambuc     }
233*0a6a1f1dSLionel Sambuc   }
234*0a6a1f1dSLionel Sambuc   return true;
235*0a6a1f1dSLionel Sambuc }
236*0a6a1f1dSLionel Sambuc 
collectUsesWithPtrTypes(Value * Val,std::vector<Value * > & WorkList)237*0a6a1f1dSLionel Sambuc static bool collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) {
238*0a6a1f1dSLionel Sambuc   bool Success = true;
239*0a6a1f1dSLionel Sambuc   for (User *User : Val->users()) {
240*0a6a1f1dSLionel Sambuc     if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
241*0a6a1f1dSLionel Sambuc       continue;
242*0a6a1f1dSLionel Sambuc     if (isa<CallInst>(User)) {
243*0a6a1f1dSLionel Sambuc       WorkList.push_back(User);
244*0a6a1f1dSLionel Sambuc       continue;
245*0a6a1f1dSLionel Sambuc     }
246*0a6a1f1dSLionel Sambuc 
247*0a6a1f1dSLionel Sambuc     // FIXME: Correctly handle ptrtoint instructions.
248*0a6a1f1dSLionel Sambuc     Instruction *UseInst = dyn_cast<Instruction>(User);
249*0a6a1f1dSLionel Sambuc     if (UseInst && UseInst->getOpcode() == Instruction::PtrToInt)
250*0a6a1f1dSLionel Sambuc       return false;
251*0a6a1f1dSLionel Sambuc 
252*0a6a1f1dSLionel Sambuc     if (!User->getType()->isPointerTy())
253*0a6a1f1dSLionel Sambuc       continue;
254*0a6a1f1dSLionel Sambuc 
255*0a6a1f1dSLionel Sambuc     WorkList.push_back(User);
256*0a6a1f1dSLionel Sambuc 
257*0a6a1f1dSLionel Sambuc     Success &= collectUsesWithPtrTypes(User, WorkList);
258*0a6a1f1dSLionel Sambuc   }
259*0a6a1f1dSLionel Sambuc   return Success;
260*0a6a1f1dSLionel Sambuc }
261*0a6a1f1dSLionel Sambuc 
visitAlloca(AllocaInst & I)262*0a6a1f1dSLionel Sambuc void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
263*0a6a1f1dSLionel Sambuc   IRBuilder<> Builder(&I);
264*0a6a1f1dSLionel Sambuc 
265*0a6a1f1dSLionel Sambuc   // First try to replace the alloca with a vector
266*0a6a1f1dSLionel Sambuc   Type *AllocaTy = I.getAllocatedType();
267*0a6a1f1dSLionel Sambuc 
268*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << "Trying to promote " << I << '\n');
269*0a6a1f1dSLionel Sambuc 
270*0a6a1f1dSLionel Sambuc   if (tryPromoteAllocaToVector(&I))
271*0a6a1f1dSLionel Sambuc     return;
272*0a6a1f1dSLionel Sambuc 
273*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n");
274*0a6a1f1dSLionel Sambuc 
275*0a6a1f1dSLionel Sambuc   // FIXME: This is the maximum work group size.  We should try to get
276*0a6a1f1dSLionel Sambuc   // value from the reqd_work_group_size function attribute if it is
277*0a6a1f1dSLionel Sambuc   // available.
278*0a6a1f1dSLionel Sambuc   unsigned WorkGroupSize = 256;
279*0a6a1f1dSLionel Sambuc   int AllocaSize = WorkGroupSize *
280*0a6a1f1dSLionel Sambuc       Mod->getDataLayout()->getTypeAllocSize(AllocaTy);
281*0a6a1f1dSLionel Sambuc 
282*0a6a1f1dSLionel Sambuc   if (AllocaSize > LocalMemAvailable) {
283*0a6a1f1dSLionel Sambuc     DEBUG(dbgs() << " Not enough local memory to promote alloca.\n");
284*0a6a1f1dSLionel Sambuc     return;
285*0a6a1f1dSLionel Sambuc   }
286*0a6a1f1dSLionel Sambuc 
287*0a6a1f1dSLionel Sambuc   std::vector<Value*> WorkList;
288*0a6a1f1dSLionel Sambuc 
289*0a6a1f1dSLionel Sambuc   if (!collectUsesWithPtrTypes(&I, WorkList)) {
290*0a6a1f1dSLionel Sambuc     DEBUG(dbgs() << " Do not know how to convert all uses\n");
291*0a6a1f1dSLionel Sambuc     return;
292*0a6a1f1dSLionel Sambuc   }
293*0a6a1f1dSLionel Sambuc 
294*0a6a1f1dSLionel Sambuc   DEBUG(dbgs() << "Promoting alloca to local memory\n");
295*0a6a1f1dSLionel Sambuc   LocalMemAvailable -= AllocaSize;
296*0a6a1f1dSLionel Sambuc 
297*0a6a1f1dSLionel Sambuc   GlobalVariable *GV = new GlobalVariable(
298*0a6a1f1dSLionel Sambuc       *Mod, ArrayType::get(I.getAllocatedType(), 256), false,
299*0a6a1f1dSLionel Sambuc       GlobalValue::ExternalLinkage, 0, I.getName(), 0,
300*0a6a1f1dSLionel Sambuc       GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
301*0a6a1f1dSLionel Sambuc 
302*0a6a1f1dSLionel Sambuc   FunctionType *FTy = FunctionType::get(
303*0a6a1f1dSLionel Sambuc       Type::getInt32Ty(Mod->getContext()), false);
304*0a6a1f1dSLionel Sambuc   AttributeSet AttrSet;
305*0a6a1f1dSLionel Sambuc   AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone);
306*0a6a1f1dSLionel Sambuc 
307*0a6a1f1dSLionel Sambuc   Value *ReadLocalSizeY = Mod->getOrInsertFunction(
308*0a6a1f1dSLionel Sambuc       "llvm.r600.read.local.size.y", FTy, AttrSet);
309*0a6a1f1dSLionel Sambuc   Value *ReadLocalSizeZ = Mod->getOrInsertFunction(
310*0a6a1f1dSLionel Sambuc       "llvm.r600.read.local.size.z", FTy, AttrSet);
311*0a6a1f1dSLionel Sambuc   Value *ReadTIDIGX = Mod->getOrInsertFunction(
312*0a6a1f1dSLionel Sambuc       "llvm.r600.read.tidig.x", FTy, AttrSet);
313*0a6a1f1dSLionel Sambuc   Value *ReadTIDIGY = Mod->getOrInsertFunction(
314*0a6a1f1dSLionel Sambuc       "llvm.r600.read.tidig.y", FTy, AttrSet);
315*0a6a1f1dSLionel Sambuc   Value *ReadTIDIGZ = Mod->getOrInsertFunction(
316*0a6a1f1dSLionel Sambuc       "llvm.r600.read.tidig.z", FTy, AttrSet);
317*0a6a1f1dSLionel Sambuc 
318*0a6a1f1dSLionel Sambuc 
319*0a6a1f1dSLionel Sambuc   Value *TCntY = Builder.CreateCall(ReadLocalSizeY);
320*0a6a1f1dSLionel Sambuc   Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ);
321*0a6a1f1dSLionel Sambuc   Value *TIdX  = Builder.CreateCall(ReadTIDIGX);
322*0a6a1f1dSLionel Sambuc   Value *TIdY  = Builder.CreateCall(ReadTIDIGY);
323*0a6a1f1dSLionel Sambuc   Value *TIdZ  = Builder.CreateCall(ReadTIDIGZ);
324*0a6a1f1dSLionel Sambuc 
325*0a6a1f1dSLionel Sambuc   Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ);
326*0a6a1f1dSLionel Sambuc   Tmp0 = Builder.CreateMul(Tmp0, TIdX);
327*0a6a1f1dSLionel Sambuc   Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ);
328*0a6a1f1dSLionel Sambuc   Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
329*0a6a1f1dSLionel Sambuc   TID = Builder.CreateAdd(TID, TIdZ);
330*0a6a1f1dSLionel Sambuc 
331*0a6a1f1dSLionel Sambuc   std::vector<Value*> Indices;
332*0a6a1f1dSLionel Sambuc   Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext())));
333*0a6a1f1dSLionel Sambuc   Indices.push_back(TID);
334*0a6a1f1dSLionel Sambuc 
335*0a6a1f1dSLionel Sambuc   Value *Offset = Builder.CreateGEP(GV, Indices);
336*0a6a1f1dSLionel Sambuc   I.mutateType(Offset->getType());
337*0a6a1f1dSLionel Sambuc   I.replaceAllUsesWith(Offset);
338*0a6a1f1dSLionel Sambuc   I.eraseFromParent();
339*0a6a1f1dSLionel Sambuc 
340*0a6a1f1dSLionel Sambuc   for (std::vector<Value*>::iterator i = WorkList.begin(),
341*0a6a1f1dSLionel Sambuc                                      e = WorkList.end(); i != e; ++i) {
342*0a6a1f1dSLionel Sambuc     Value *V = *i;
343*0a6a1f1dSLionel Sambuc     CallInst *Call = dyn_cast<CallInst>(V);
344*0a6a1f1dSLionel Sambuc     if (!Call) {
345*0a6a1f1dSLionel Sambuc       Type *EltTy = V->getType()->getPointerElementType();
346*0a6a1f1dSLionel Sambuc       PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS);
347*0a6a1f1dSLionel Sambuc 
348*0a6a1f1dSLionel Sambuc       // The operand's value should be corrected on its own.
349*0a6a1f1dSLionel Sambuc       if (isa<AddrSpaceCastInst>(V))
350*0a6a1f1dSLionel Sambuc         continue;
351*0a6a1f1dSLionel Sambuc 
352*0a6a1f1dSLionel Sambuc       // FIXME: It doesn't really make sense to try to do this for all
353*0a6a1f1dSLionel Sambuc       // instructions.
354*0a6a1f1dSLionel Sambuc       V->mutateType(NewTy);
355*0a6a1f1dSLionel Sambuc       continue;
356*0a6a1f1dSLionel Sambuc     }
357*0a6a1f1dSLionel Sambuc 
358*0a6a1f1dSLionel Sambuc     IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call);
359*0a6a1f1dSLionel Sambuc     if (!Intr) {
360*0a6a1f1dSLionel Sambuc       std::vector<Type*> ArgTypes;
361*0a6a1f1dSLionel Sambuc       for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands();
362*0a6a1f1dSLionel Sambuc                                 ArgIdx != ArgEnd; ++ArgIdx) {
363*0a6a1f1dSLionel Sambuc         ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType());
364*0a6a1f1dSLionel Sambuc       }
365*0a6a1f1dSLionel Sambuc       Function *F = Call->getCalledFunction();
366*0a6a1f1dSLionel Sambuc       FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes,
367*0a6a1f1dSLionel Sambuc                                                 F->isVarArg());
368*0a6a1f1dSLionel Sambuc       Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType,
369*0a6a1f1dSLionel Sambuc                                              F->getAttributes());
370*0a6a1f1dSLionel Sambuc       Function *NewF = cast<Function>(C);
371*0a6a1f1dSLionel Sambuc       Call->setCalledFunction(NewF);
372*0a6a1f1dSLionel Sambuc       continue;
373*0a6a1f1dSLionel Sambuc     }
374*0a6a1f1dSLionel Sambuc 
375*0a6a1f1dSLionel Sambuc     Builder.SetInsertPoint(Intr);
376*0a6a1f1dSLionel Sambuc     switch (Intr->getIntrinsicID()) {
377*0a6a1f1dSLionel Sambuc     case Intrinsic::lifetime_start:
378*0a6a1f1dSLionel Sambuc     case Intrinsic::lifetime_end:
379*0a6a1f1dSLionel Sambuc       // These intrinsics are for address space 0 only
380*0a6a1f1dSLionel Sambuc       Intr->eraseFromParent();
381*0a6a1f1dSLionel Sambuc       continue;
382*0a6a1f1dSLionel Sambuc     case Intrinsic::memcpy: {
383*0a6a1f1dSLionel Sambuc       MemCpyInst *MemCpy = cast<MemCpyInst>(Intr);
384*0a6a1f1dSLionel Sambuc       Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(),
385*0a6a1f1dSLionel Sambuc                            MemCpy->getLength(), MemCpy->getAlignment(),
386*0a6a1f1dSLionel Sambuc                            MemCpy->isVolatile());
387*0a6a1f1dSLionel Sambuc       Intr->eraseFromParent();
388*0a6a1f1dSLionel Sambuc       continue;
389*0a6a1f1dSLionel Sambuc     }
390*0a6a1f1dSLionel Sambuc     case Intrinsic::memset: {
391*0a6a1f1dSLionel Sambuc       MemSetInst *MemSet = cast<MemSetInst>(Intr);
392*0a6a1f1dSLionel Sambuc       Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(),
393*0a6a1f1dSLionel Sambuc                            MemSet->getLength(), MemSet->getAlignment(),
394*0a6a1f1dSLionel Sambuc                            MemSet->isVolatile());
395*0a6a1f1dSLionel Sambuc       Intr->eraseFromParent();
396*0a6a1f1dSLionel Sambuc       continue;
397*0a6a1f1dSLionel Sambuc     }
398*0a6a1f1dSLionel Sambuc     default:
399*0a6a1f1dSLionel Sambuc       Intr->dump();
400*0a6a1f1dSLionel Sambuc       llvm_unreachable("Don't know how to promote alloca intrinsic use.");
401*0a6a1f1dSLionel Sambuc     }
402*0a6a1f1dSLionel Sambuc   }
403*0a6a1f1dSLionel Sambuc }
404*0a6a1f1dSLionel Sambuc 
createAMDGPUPromoteAlloca(const AMDGPUSubtarget & ST)405*0a6a1f1dSLionel Sambuc FunctionPass *llvm::createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST) {
406*0a6a1f1dSLionel Sambuc   return new AMDGPUPromoteAlloca(ST);
407*0a6a1f1dSLionel Sambuc }
408