1 //===--------------------- R600MergeVectorRegisters.cpp -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This pass merges inputs of swizzeable instructions into vector sharing 12 /// common data and/or have enough undef subreg using swizzle abilities. 13 /// 14 /// For instance let's consider the following pseudo code : 15 /// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 16 /// ... 17 /// vreg7<def> = REG_SEQ vreg1, sub0, vreg3, sub1, undef, sub2, vreg4, sub3 18 /// (swizzable Inst) vreg7, SwizzleMask : sub0, sub1, sub2, sub3 19 /// 20 /// is turned into : 21 /// vreg5<def> = REG_SEQ vreg1, sub0, vreg2, sub1, vreg3, sub2, undef, sub3 22 /// ... 23 /// vreg7<def> = INSERT_SUBREG vreg4, sub3 24 /// (swizzable Inst) vreg7, SwizzleMask : sub0, sub2, sub1, sub3 25 /// 26 /// This allow regalloc to reduce register pressure for vector registers and 27 /// to reduce MOV count. 28 //===----------------------------------------------------------------------===// 29 30 #define DEBUG_TYPE "vec-merger" 31 #include "llvm/Support/Debug.h" 32 #include "AMDGPU.h" 33 #include "R600InstrInfo.h" 34 #include "llvm/CodeGen/DFAPacketizer.h" 35 #include "llvm/CodeGen/MachineDominators.h" 36 #include "llvm/CodeGen/MachineFunctionPass.h" 37 #include "llvm/CodeGen/MachineLoopInfo.h" 38 #include "llvm/CodeGen/Passes.h" 39 #include "llvm/CodeGen/MachineInstrBuilder.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include "llvm/CodeGen/MachineRegisterInfo.h" 42 43 using namespace llvm; 44 45 namespace { 46 47 static bool 48 isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) { 49 for (MachineRegisterInfo::def_iterator It = MRI.def_begin(Reg), 50 E = MRI.def_end(); It != E; ++It) { 51 return (*It).isImplicitDef(); 52 } 53 if (MRI.isReserved(Reg)) { 54 return false; 55 } 56 llvm_unreachable("Reg without a def"); 57 return false; 58 } 59 60 class RegSeqInfo { 61 public: 62 MachineInstr *Instr; 63 DenseMap<unsigned, unsigned> RegToChan; 64 std::vector<unsigned> UndefReg; 65 RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) { 66 assert (MI->getOpcode() == AMDGPU::REG_SEQUENCE); 67 for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) { 68 MachineOperand &MO = Instr->getOperand(i); 69 unsigned Chan = Instr->getOperand(i + 1).getImm(); 70 if (isImplicitlyDef(MRI, MO.getReg())) 71 UndefReg.push_back(Chan); 72 else 73 RegToChan[MO.getReg()] = Chan; 74 } 75 } 76 RegSeqInfo() {} 77 78 bool operator==(const RegSeqInfo &RSI) const { 79 return RSI.Instr == Instr; 80 } 81 }; 82 83 class R600VectorRegMerger : public MachineFunctionPass { 84 private: 85 MachineRegisterInfo *MRI; 86 const R600InstrInfo *TII; 87 bool canSwizzle(const MachineInstr &) const; 88 bool areAllUsesSwizzeable(unsigned Reg) const; 89 void SwizzleInput(MachineInstr &, 90 const std::vector<std::pair<unsigned, unsigned> > &) const; 91 bool tryMergeVector(const RegSeqInfo *, RegSeqInfo *, 92 std::vector<std::pair<unsigned, unsigned> > &Remap) const; 93 bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 94 std::vector<std::pair<unsigned, unsigned> > &RemapChan); 95 bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI, 96 std::vector<std::pair<unsigned, unsigned> > &RemapChan); 97 MachineInstr *RebuildVector(RegSeqInfo *MI, 98 const RegSeqInfo *BaseVec, 99 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const; 100 void RemoveMI(MachineInstr *); 101 void trackRSI(const RegSeqInfo &RSI); 102 103 typedef DenseMap<unsigned, std::vector<MachineInstr *> > InstructionSetMap; 104 DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq; 105 InstructionSetMap PreviousRegSeqByReg; 106 InstructionSetMap PreviousRegSeqByUndefCount; 107 public: 108 static char ID; 109 R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID), 110 TII(0) { } 111 112 void getAnalysisUsage(AnalysisUsage &AU) const { 113 AU.setPreservesCFG(); 114 AU.addRequired<MachineDominatorTree>(); 115 AU.addPreserved<MachineDominatorTree>(); 116 AU.addRequired<MachineLoopInfo>(); 117 AU.addPreserved<MachineLoopInfo>(); 118 MachineFunctionPass::getAnalysisUsage(AU); 119 } 120 121 const char *getPassName() const { 122 return "R600 Vector Registers Merge Pass"; 123 } 124 125 bool runOnMachineFunction(MachineFunction &Fn); 126 }; 127 128 char R600VectorRegMerger::ID = 0; 129 130 bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI) 131 const { 132 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 133 return true; 134 switch (MI.getOpcode()) { 135 case AMDGPU::R600_ExportSwz: 136 case AMDGPU::EG_ExportSwz: 137 return true; 138 default: 139 return false; 140 } 141 } 142 143 bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched, 144 RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned> > &Remap) 145 const { 146 unsigned CurrentUndexIdx = 0; 147 for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(), 148 E = ToMerge->RegToChan.end(); It != E; ++It) { 149 DenseMap<unsigned, unsigned>::const_iterator PosInUntouched = 150 Untouched->RegToChan.find((*It).first); 151 if (PosInUntouched != Untouched->RegToChan.end()) { 152 Remap.push_back(std::pair<unsigned, unsigned> 153 ((*It).second, (*PosInUntouched).second)); 154 continue; 155 } 156 if (CurrentUndexIdx >= Untouched->UndefReg.size()) 157 return false; 158 Remap.push_back(std::pair<unsigned, unsigned> 159 ((*It).second, Untouched->UndefReg[CurrentUndexIdx++])); 160 } 161 162 return true; 163 } 164 165 static 166 unsigned getReassignedChan( 167 const std::vector<std::pair<unsigned, unsigned> > &RemapChan, 168 unsigned Chan) { 169 for (unsigned j = 0, je = RemapChan.size(); j < je; j++) { 170 if (RemapChan[j].first == Chan) 171 return RemapChan[j].second; 172 } 173 llvm_unreachable("Chan wasn't reassigned"); 174 } 175 176 MachineInstr *R600VectorRegMerger::RebuildVector( 177 RegSeqInfo *RSI, const RegSeqInfo *BaseRSI, 178 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 179 unsigned Reg = RSI->Instr->getOperand(0).getReg(); 180 MachineBasicBlock::iterator Pos = RSI->Instr; 181 MachineBasicBlock &MBB = *Pos->getParent(); 182 DebugLoc DL = Pos->getDebugLoc(); 183 184 unsigned SrcVec = BaseRSI->Instr->getOperand(0).getReg(); 185 DenseMap<unsigned, unsigned> UpdatedRegToChan = BaseRSI->RegToChan; 186 std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg; 187 for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(), 188 E = RSI->RegToChan.end(); It != E; ++It) { 189 unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass); 190 unsigned SubReg = (*It).first; 191 unsigned Swizzle = (*It).second; 192 unsigned Chan = getReassignedChan(RemapChan, Swizzle); 193 194 MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG), 195 DstReg) 196 .addReg(SrcVec) 197 .addReg(SubReg) 198 .addImm(Chan); 199 UpdatedRegToChan[SubReg] = Chan; 200 std::vector<unsigned>::iterator ChanPos = 201 std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan); 202 if (ChanPos != UpdatedUndef.end()) 203 UpdatedUndef.erase(ChanPos); 204 assert(std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan) == 205 UpdatedUndef.end() && 206 "UpdatedUndef shouldn't contain Chan more than once!"); 207 DEBUG(dbgs() << " ->"; Tmp->dump();); 208 (void)Tmp; 209 SrcVec = DstReg; 210 } 211 Pos = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg) 212 .addReg(SrcVec); 213 DEBUG(dbgs() << " ->"; Pos->dump();); 214 215 DEBUG(dbgs() << " Updating Swizzle:\n"); 216 for (MachineRegisterInfo::use_iterator It = MRI->use_begin(Reg), 217 E = MRI->use_end(); It != E; ++It) { 218 DEBUG(dbgs() << " ";(*It).dump(); dbgs() << " ->"); 219 SwizzleInput(*It, RemapChan); 220 DEBUG((*It).dump()); 221 } 222 RSI->Instr->eraseFromParent(); 223 224 // Update RSI 225 RSI->Instr = Pos; 226 RSI->RegToChan = UpdatedRegToChan; 227 RSI->UndefReg = UpdatedUndef; 228 229 return Pos; 230 } 231 232 void R600VectorRegMerger::RemoveMI(MachineInstr *MI) { 233 for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(), 234 E = PreviousRegSeqByReg.end(); It != E; ++It) { 235 std::vector<MachineInstr *> &MIs = (*It).second; 236 MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); 237 } 238 for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(), 239 E = PreviousRegSeqByUndefCount.end(); It != E; ++It) { 240 std::vector<MachineInstr *> &MIs = (*It).second; 241 MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end()); 242 } 243 } 244 245 void R600VectorRegMerger::SwizzleInput(MachineInstr &MI, 246 const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const { 247 unsigned Offset; 248 if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) 249 Offset = 2; 250 else 251 Offset = 3; 252 for (unsigned i = 0; i < 4; i++) { 253 unsigned Swizzle = MI.getOperand(i + Offset).getImm() + 1; 254 for (unsigned j = 0, e = RemapChan.size(); j < e; j++) { 255 if (RemapChan[j].first == Swizzle) { 256 MI.getOperand(i + Offset).setImm(RemapChan[j].second - 1); 257 break; 258 } 259 } 260 } 261 } 262 263 bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const { 264 for (MachineRegisterInfo::use_iterator It = MRI->use_begin(Reg), 265 E = MRI->use_end(); It != E; ++It) { 266 if (!canSwizzle(*It)) 267 return false; 268 } 269 return true; 270 } 271 272 bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI, 273 RegSeqInfo &CompatibleRSI, 274 std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 275 for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(), 276 MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) { 277 if (!MOp->isReg()) 278 continue; 279 if (PreviousRegSeqByReg[MOp->getReg()].empty()) 280 continue; 281 std::vector<MachineInstr *> MIs = PreviousRegSeqByReg[MOp->getReg()]; 282 for (unsigned i = 0, e = MIs.size(); i < e; i++) { 283 CompatibleRSI = PreviousRegSeq[MIs[i]]; 284 if (RSI == CompatibleRSI) 285 continue; 286 if (tryMergeVector(&CompatibleRSI, &RSI, RemapChan)) 287 return true; 288 } 289 } 290 return false; 291 } 292 293 bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI, 294 RegSeqInfo &CompatibleRSI, 295 std::vector<std::pair<unsigned, unsigned> > &RemapChan) { 296 unsigned NeededUndefs = 4 - RSI.UndefReg.size(); 297 if (PreviousRegSeqByUndefCount[NeededUndefs].empty()) 298 return false; 299 std::vector<MachineInstr *> &MIs = 300 PreviousRegSeqByUndefCount[NeededUndefs]; 301 CompatibleRSI = PreviousRegSeq[MIs.back()]; 302 tryMergeVector(&CompatibleRSI, &RSI, RemapChan); 303 return true; 304 } 305 306 void R600VectorRegMerger::trackRSI(const RegSeqInfo &RSI) { 307 for (DenseMap<unsigned, unsigned>::const_iterator 308 It = RSI.RegToChan.begin(), E = RSI.RegToChan.end(); It != E; ++It) { 309 PreviousRegSeqByReg[(*It).first].push_back(RSI.Instr); 310 } 311 PreviousRegSeqByUndefCount[RSI.UndefReg.size()].push_back(RSI.Instr); 312 PreviousRegSeq[RSI.Instr] = RSI; 313 } 314 315 bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) { 316 TII = static_cast<const R600InstrInfo *>(Fn.getTarget().getInstrInfo()); 317 MRI = &(Fn.getRegInfo()); 318 for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); 319 MBB != MBBe; ++MBB) { 320 MachineBasicBlock *MB = MBB; 321 PreviousRegSeq.clear(); 322 PreviousRegSeqByReg.clear(); 323 PreviousRegSeqByUndefCount.clear(); 324 325 for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end(); 326 MII != MIIE; ++MII) { 327 MachineInstr *MI = MII; 328 if (MI->getOpcode() != AMDGPU::REG_SEQUENCE) { 329 if (TII->get(MI->getOpcode()).TSFlags & R600_InstFlag::TEX_INST) { 330 unsigned Reg = MI->getOperand(1).getReg(); 331 for (MachineRegisterInfo::def_iterator It = MRI->def_begin(Reg), 332 E = MRI->def_end(); It != E; ++It) { 333 RemoveMI(&(*It)); 334 } 335 } 336 continue; 337 } 338 339 340 RegSeqInfo RSI(*MRI, MI); 341 342 // All uses of MI are swizzeable ? 343 unsigned Reg = MI->getOperand(0).getReg(); 344 if (!areAllUsesSwizzeable(Reg)) 345 continue; 346 347 DEBUG (dbgs() << "Trying to optimize "; 348 MI->dump(); 349 ); 350 351 RegSeqInfo CandidateRSI; 352 std::vector<std::pair<unsigned, unsigned> > RemapChan; 353 DEBUG(dbgs() << "Using common slots...\n";); 354 if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) { 355 // Remove CandidateRSI mapping 356 RemoveMI(CandidateRSI.Instr); 357 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 358 trackRSI(RSI); 359 continue; 360 } 361 DEBUG(dbgs() << "Using free slots...\n";); 362 RemapChan.clear(); 363 if (tryMergeUsingFreeSlot(RSI, CandidateRSI, RemapChan)) { 364 RemoveMI(CandidateRSI.Instr); 365 MII = RebuildVector(&RSI, &CandidateRSI, RemapChan); 366 trackRSI(RSI); 367 continue; 368 } 369 //Failed to merge 370 trackRSI(RSI); 371 } 372 } 373 return false; 374 } 375 376 } 377 378 llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) { 379 return new R600VectorRegMerger(tm); 380 } 381