1 //===-- R600InstrInfo.cpp - R600 Instruction Information ------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief R600 Implementation of TargetInstrInfo. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "R600InstrInfo.h" 16 #include "AMDGPU.h" 17 #include "AMDGPUSubtarget.h" 18 #include "AMDGPUTargetMachine.h" 19 #include "R600Defines.h" 20 #include "R600MachineFunctionInfo.h" 21 #include "R600RegisterInfo.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 26 #define GET_INSTRINFO_CTOR_DTOR 27 #include "AMDGPUGenDFAPacketizer.inc" 28 29 using namespace llvm; 30 31 R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) 32 : AMDGPUInstrInfo(tm), 33 RI(tm), 34 ST(tm.getSubtarget<AMDGPUSubtarget>()) 35 { } 36 37 const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { 38 return RI; 39 } 40 41 bool R600InstrInfo::isTrig(const MachineInstr &MI) const { 42 return get(MI.getOpcode()).TSFlags & R600_InstFlag::TRIG; 43 } 44 45 bool R600InstrInfo::isVector(const MachineInstr &MI) const { 46 return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR; 47 } 48 49 void 50 R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, 51 MachineBasicBlock::iterator MI, DebugLoc DL, 52 unsigned DestReg, unsigned SrcReg, 53 bool KillSrc) const { 54 unsigned VectorComponents = 0; 55 if (AMDGPU::R600_Reg128RegClass.contains(DestReg) && 56 AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { 57 VectorComponents = 4; 58 } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) && 59 AMDGPU::R600_Reg64RegClass.contains(SrcReg)) { 60 VectorComponents = 2; 61 } 62 63 if (VectorComponents > 0) { 64 for (unsigned I = 0; I < VectorComponents; I++) { 65 unsigned SubRegIndex = RI.getSubRegFromChannel(I); 66 buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 67 RI.getSubReg(DestReg, SubRegIndex), 68 RI.getSubReg(SrcReg, SubRegIndex)) 69 .addReg(DestReg, 70 RegState::Define | RegState::Implicit); 71 } 72 } else { 73 MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV, 74 DestReg, SrcReg); 75 NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0)) 76 .setIsKill(KillSrc); 77 } 78 } 79 80 /// \returns true if \p MBBI can be moved into a new basic. 81 bool R600InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, 82 MachineBasicBlock::iterator MBBI) const { 83 for (MachineInstr::const_mop_iterator I = MBBI->operands_begin(), 84 E = MBBI->operands_end(); I != E; ++I) { 85 if (I->isReg() && !TargetRegisterInfo::isVirtualRegister(I->getReg()) && 86 I->isUse() && RI.isPhysRegLiveAcrossClauses(I->getReg())) 87 return false; 88 } 89 return true; 90 } 91 92 unsigned R600InstrInfo::getIEQOpcode() const { 93 return AMDGPU::SETE_INT; 94 } 95 96 bool R600InstrInfo::isMov(unsigned Opcode) const { 97 98 99 switch(Opcode) { 100 default: return false; 101 case AMDGPU::MOV: 102 case AMDGPU::MOV_IMM_F32: 103 case AMDGPU::MOV_IMM_I32: 104 return true; 105 } 106 } 107 108 // Some instructions act as place holders to emulate operations that the GPU 109 // hardware does automatically. This function can be used to check if 110 // an opcode falls into this category. 111 bool R600InstrInfo::isPlaceHolderOpcode(unsigned Opcode) const { 112 switch (Opcode) { 113 default: return false; 114 case AMDGPU::RETURN: 115 return true; 116 } 117 } 118 119 bool R600InstrInfo::isReductionOp(unsigned Opcode) const { 120 return false; 121 } 122 123 bool R600InstrInfo::isCubeOp(unsigned Opcode) const { 124 switch(Opcode) { 125 default: return false; 126 case AMDGPU::CUBE_r600_pseudo: 127 case AMDGPU::CUBE_r600_real: 128 case AMDGPU::CUBE_eg_pseudo: 129 case AMDGPU::CUBE_eg_real: 130 return true; 131 } 132 } 133 134 bool R600InstrInfo::isALUInstr(unsigned Opcode) const { 135 unsigned TargetFlags = get(Opcode).TSFlags; 136 137 return (TargetFlags & R600_InstFlag::ALU_INST); 138 } 139 140 bool R600InstrInfo::hasInstrModifiers(unsigned Opcode) const { 141 unsigned TargetFlags = get(Opcode).TSFlags; 142 143 return ((TargetFlags & R600_InstFlag::OP1) | 144 (TargetFlags & R600_InstFlag::OP2) | 145 (TargetFlags & R600_InstFlag::OP3)); 146 } 147 148 bool R600InstrInfo::isLDSInstr(unsigned Opcode) const { 149 unsigned TargetFlags = get(Opcode).TSFlags; 150 151 return ((TargetFlags & R600_InstFlag::LDS_1A) | 152 (TargetFlags & R600_InstFlag::LDS_1A1D) | 153 (TargetFlags & R600_InstFlag::LDS_1A2D)); 154 } 155 156 bool R600InstrInfo::isLDSNoRetInstr(unsigned Opcode) const { 157 return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) == -1; 158 } 159 160 bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const { 161 return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1; 162 } 163 164 bool R600InstrInfo::canBeConsideredALU(const MachineInstr *MI) const { 165 if (isALUInstr(MI->getOpcode())) 166 return true; 167 if (isVector(*MI) || isCubeOp(MI->getOpcode())) 168 return true; 169 switch (MI->getOpcode()) { 170 case AMDGPU::PRED_X: 171 case AMDGPU::INTERP_PAIR_XY: 172 case AMDGPU::INTERP_PAIR_ZW: 173 case AMDGPU::INTERP_VEC_LOAD: 174 case AMDGPU::COPY: 175 case AMDGPU::DOT_4: 176 return true; 177 default: 178 return false; 179 } 180 } 181 182 bool R600InstrInfo::isTransOnly(unsigned Opcode) const { 183 if (ST.hasCaymanISA()) 184 return false; 185 return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU); 186 } 187 188 bool R600InstrInfo::isTransOnly(const MachineInstr *MI) const { 189 return isTransOnly(MI->getOpcode()); 190 } 191 192 bool R600InstrInfo::isVectorOnly(unsigned Opcode) const { 193 return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU); 194 } 195 196 bool R600InstrInfo::isVectorOnly(const MachineInstr *MI) const { 197 return isVectorOnly(MI->getOpcode()); 198 } 199 200 bool R600InstrInfo::isExport(unsigned Opcode) const { 201 return (get(Opcode).TSFlags & R600_InstFlag::IS_EXPORT); 202 } 203 204 bool R600InstrInfo::usesVertexCache(unsigned Opcode) const { 205 return ST.hasVertexCache() && IS_VTX(get(Opcode)); 206 } 207 208 bool R600InstrInfo::usesVertexCache(const MachineInstr *MI) const { 209 const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); 210 return MFI->ShaderType != ShaderType::COMPUTE && usesVertexCache(MI->getOpcode()); 211 } 212 213 bool R600InstrInfo::usesTextureCache(unsigned Opcode) const { 214 return (!ST.hasVertexCache() && IS_VTX(get(Opcode))) || IS_TEX(get(Opcode)); 215 } 216 217 bool R600InstrInfo::usesTextureCache(const MachineInstr *MI) const { 218 const R600MachineFunctionInfo *MFI = MI->getParent()->getParent()->getInfo<R600MachineFunctionInfo>(); 219 return (MFI->ShaderType == ShaderType::COMPUTE && usesVertexCache(MI->getOpcode())) || 220 usesTextureCache(MI->getOpcode()); 221 } 222 223 bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const { 224 switch (Opcode) { 225 case AMDGPU::KILLGT: 226 case AMDGPU::GROUP_BARRIER: 227 return true; 228 default: 229 return false; 230 } 231 } 232 233 bool R600InstrInfo::usesAddressRegister(MachineInstr *MI) const { 234 return MI->findRegisterUseOperandIdx(AMDGPU::AR_X) != -1; 235 } 236 237 bool R600InstrInfo::definesAddressRegister(MachineInstr *MI) const { 238 return MI->findRegisterDefOperandIdx(AMDGPU::AR_X) != -1; 239 } 240 241 bool R600InstrInfo::readsLDSSrcReg(const MachineInstr *MI) const { 242 if (!isALUInstr(MI->getOpcode())) { 243 return false; 244 } 245 for (MachineInstr::const_mop_iterator I = MI->operands_begin(), 246 E = MI->operands_end(); I != E; ++I) { 247 if (!I->isReg() || !I->isUse() || 248 TargetRegisterInfo::isVirtualRegister(I->getReg())) 249 continue; 250 251 if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg())) 252 return true; 253 } 254 return false; 255 } 256 257 int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const { 258 static const unsigned OpTable[] = { 259 AMDGPU::OpName::src0, 260 AMDGPU::OpName::src1, 261 AMDGPU::OpName::src2 262 }; 263 264 assert (SrcNum < 3); 265 return getOperandIdx(Opcode, OpTable[SrcNum]); 266 } 267 268 #define SRC_SEL_ROWS 11 269 int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const { 270 static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = { 271 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 272 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 273 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 274 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 275 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 276 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 277 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 278 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 279 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 280 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 281 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W} 282 }; 283 284 for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) { 285 if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) { 286 return getOperandIdx(Opcode, SrcSelTable[i][1]); 287 } 288 } 289 return -1; 290 } 291 #undef SRC_SEL_ROWS 292 293 SmallVector<std::pair<MachineOperand *, int64_t>, 3> 294 R600InstrInfo::getSrcs(MachineInstr *MI) const { 295 SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result; 296 297 if (MI->getOpcode() == AMDGPU::DOT_4) { 298 static const unsigned OpTable[8][2] = { 299 {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X}, 300 {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y}, 301 {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z}, 302 {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W}, 303 {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X}, 304 {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y}, 305 {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z}, 306 {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}, 307 }; 308 309 for (unsigned j = 0; j < 8; j++) { 310 MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), 311 OpTable[j][0])); 312 unsigned Reg = MO.getReg(); 313 if (Reg == AMDGPU::ALU_CONST) { 314 unsigned Sel = MI->getOperand(getOperandIdx(MI->getOpcode(), 315 OpTable[j][1])).getImm(); 316 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 317 continue; 318 } 319 320 } 321 return Result; 322 } 323 324 static const unsigned OpTable[3][2] = { 325 {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel}, 326 {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel}, 327 {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel}, 328 }; 329 330 for (unsigned j = 0; j < 3; j++) { 331 int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]); 332 if (SrcIdx < 0) 333 break; 334 MachineOperand &MO = MI->getOperand(SrcIdx); 335 unsigned Reg = MI->getOperand(SrcIdx).getReg(); 336 if (Reg == AMDGPU::ALU_CONST) { 337 unsigned Sel = MI->getOperand( 338 getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm(); 339 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Sel)); 340 continue; 341 } 342 if (Reg == AMDGPU::ALU_LITERAL_X) { 343 unsigned Imm = MI->getOperand( 344 getOperandIdx(MI->getOpcode(), AMDGPU::OpName::literal)).getImm(); 345 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, Imm)); 346 continue; 347 } 348 Result.push_back(std::pair<MachineOperand *, int64_t>(&MO, 0)); 349 } 350 return Result; 351 } 352 353 std::vector<std::pair<int, unsigned> > 354 R600InstrInfo::ExtractSrcs(MachineInstr *MI, 355 const DenseMap<unsigned, unsigned> &PV, 356 unsigned &ConstCount) const { 357 ConstCount = 0; 358 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = getSrcs(MI); 359 const std::pair<int, unsigned> DummyPair(-1, 0); 360 std::vector<std::pair<int, unsigned> > Result; 361 unsigned i = 0; 362 for (unsigned n = Srcs.size(); i < n; ++i) { 363 unsigned Reg = Srcs[i].first->getReg(); 364 unsigned Index = RI.getEncodingValue(Reg) & 0xff; 365 if (Reg == AMDGPU::OQAP) { 366 Result.push_back(std::pair<int, unsigned>(Index, 0)); 367 } 368 if (PV.find(Reg) != PV.end()) { 369 // 255 is used to tells its a PS/PV reg 370 Result.push_back(std::pair<int, unsigned>(255, 0)); 371 continue; 372 } 373 if (Index > 127) { 374 ConstCount++; 375 Result.push_back(DummyPair); 376 continue; 377 } 378 unsigned Chan = RI.getHWRegChan(Reg); 379 Result.push_back(std::pair<int, unsigned>(Index, Chan)); 380 } 381 for (; i < 3; ++i) 382 Result.push_back(DummyPair); 383 return Result; 384 } 385 386 static std::vector<std::pair<int, unsigned> > 387 Swizzle(std::vector<std::pair<int, unsigned> > Src, 388 R600InstrInfo::BankSwizzle Swz) { 389 if (Src[0] == Src[1]) 390 Src[1].first = -1; 391 switch (Swz) { 392 case R600InstrInfo::ALU_VEC_012_SCL_210: 393 break; 394 case R600InstrInfo::ALU_VEC_021_SCL_122: 395 std::swap(Src[1], Src[2]); 396 break; 397 case R600InstrInfo::ALU_VEC_102_SCL_221: 398 std::swap(Src[0], Src[1]); 399 break; 400 case R600InstrInfo::ALU_VEC_120_SCL_212: 401 std::swap(Src[0], Src[1]); 402 std::swap(Src[0], Src[2]); 403 break; 404 case R600InstrInfo::ALU_VEC_201: 405 std::swap(Src[0], Src[2]); 406 std::swap(Src[0], Src[1]); 407 break; 408 case R600InstrInfo::ALU_VEC_210: 409 std::swap(Src[0], Src[2]); 410 break; 411 } 412 return Src; 413 } 414 415 static unsigned 416 getTransSwizzle(R600InstrInfo::BankSwizzle Swz, unsigned Op) { 417 switch (Swz) { 418 case R600InstrInfo::ALU_VEC_012_SCL_210: { 419 unsigned Cycles[3] = { 2, 1, 0}; 420 return Cycles[Op]; 421 } 422 case R600InstrInfo::ALU_VEC_021_SCL_122: { 423 unsigned Cycles[3] = { 1, 2, 2}; 424 return Cycles[Op]; 425 } 426 case R600InstrInfo::ALU_VEC_120_SCL_212: { 427 unsigned Cycles[3] = { 2, 1, 2}; 428 return Cycles[Op]; 429 } 430 case R600InstrInfo::ALU_VEC_102_SCL_221: { 431 unsigned Cycles[3] = { 2, 2, 1}; 432 return Cycles[Op]; 433 } 434 default: 435 llvm_unreachable("Wrong Swizzle for Trans Slot"); 436 return 0; 437 } 438 } 439 440 /// returns how many MIs (whose inputs are represented by IGSrcs) can be packed 441 /// in the same Instruction Group while meeting read port limitations given a 442 /// Swz swizzle sequence. 443 unsigned R600InstrInfo::isLegalUpTo( 444 const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 445 const std::vector<R600InstrInfo::BankSwizzle> &Swz, 446 const std::vector<std::pair<int, unsigned> > &TransSrcs, 447 R600InstrInfo::BankSwizzle TransSwz) const { 448 int Vector[4][3]; 449 memset(Vector, -1, sizeof(Vector)); 450 for (unsigned i = 0, e = IGSrcs.size(); i < e; i++) { 451 const std::vector<std::pair<int, unsigned> > &Srcs = 452 Swizzle(IGSrcs[i], Swz[i]); 453 for (unsigned j = 0; j < 3; j++) { 454 const std::pair<int, unsigned> &Src = Srcs[j]; 455 if (Src.first < 0 || Src.first == 255) 456 continue; 457 if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) { 458 if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 && 459 Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) { 460 // The value from output queue A (denoted by register OQAP) can 461 // only be fetched during the first cycle. 462 return false; 463 } 464 // OQAP does not count towards the normal read port restrictions 465 continue; 466 } 467 if (Vector[Src.second][j] < 0) 468 Vector[Src.second][j] = Src.first; 469 if (Vector[Src.second][j] != Src.first) 470 return i; 471 } 472 } 473 // Now check Trans Alu 474 for (unsigned i = 0, e = TransSrcs.size(); i < e; ++i) { 475 const std::pair<int, unsigned> &Src = TransSrcs[i]; 476 unsigned Cycle = getTransSwizzle(TransSwz, i); 477 if (Src.first < 0) 478 continue; 479 if (Src.first == 255) 480 continue; 481 if (Vector[Src.second][Cycle] < 0) 482 Vector[Src.second][Cycle] = Src.first; 483 if (Vector[Src.second][Cycle] != Src.first) 484 return IGSrcs.size() - 1; 485 } 486 return IGSrcs.size(); 487 } 488 489 /// Given a swizzle sequence SwzCandidate and an index Idx, returns the next 490 /// (in lexicographic term) swizzle sequence assuming that all swizzles after 491 /// Idx can be skipped 492 static bool 493 NextPossibleSolution( 494 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 495 unsigned Idx) { 496 assert(Idx < SwzCandidate.size()); 497 int ResetIdx = Idx; 498 while (ResetIdx > -1 && SwzCandidate[ResetIdx] == R600InstrInfo::ALU_VEC_210) 499 ResetIdx --; 500 for (unsigned i = ResetIdx + 1, e = SwzCandidate.size(); i < e; i++) { 501 SwzCandidate[i] = R600InstrInfo::ALU_VEC_012_SCL_210; 502 } 503 if (ResetIdx == -1) 504 return false; 505 int NextSwizzle = SwzCandidate[ResetIdx] + 1; 506 SwzCandidate[ResetIdx] = (R600InstrInfo::BankSwizzle)NextSwizzle; 507 return true; 508 } 509 510 /// Enumerate all possible Swizzle sequence to find one that can meet all 511 /// read port requirements. 512 bool R600InstrInfo::FindSwizzleForVectorSlot( 513 const std::vector<std::vector<std::pair<int, unsigned> > > &IGSrcs, 514 std::vector<R600InstrInfo::BankSwizzle> &SwzCandidate, 515 const std::vector<std::pair<int, unsigned> > &TransSrcs, 516 R600InstrInfo::BankSwizzle TransSwz) const { 517 unsigned ValidUpTo = 0; 518 do { 519 ValidUpTo = isLegalUpTo(IGSrcs, SwzCandidate, TransSrcs, TransSwz); 520 if (ValidUpTo == IGSrcs.size()) 521 return true; 522 } while (NextPossibleSolution(SwzCandidate, ValidUpTo)); 523 return false; 524 } 525 526 /// Instructions in Trans slot can't read gpr at cycle 0 if they also read 527 /// a const, and can't read a gpr at cycle 1 if they read 2 const. 528 static bool 529 isConstCompatible(R600InstrInfo::BankSwizzle TransSwz, 530 const std::vector<std::pair<int, unsigned> > &TransOps, 531 unsigned ConstCount) { 532 // TransALU can't read 3 constants 533 if (ConstCount > 2) 534 return false; 535 for (unsigned i = 0, e = TransOps.size(); i < e; ++i) { 536 const std::pair<int, unsigned> &Src = TransOps[i]; 537 unsigned Cycle = getTransSwizzle(TransSwz, i); 538 if (Src.first < 0) 539 continue; 540 if (ConstCount > 0 && Cycle == 0) 541 return false; 542 if (ConstCount > 1 && Cycle == 1) 543 return false; 544 } 545 return true; 546 } 547 548 bool 549 R600InstrInfo::fitsReadPortLimitations(const std::vector<MachineInstr *> &IG, 550 const DenseMap<unsigned, unsigned> &PV, 551 std::vector<BankSwizzle> &ValidSwizzle, 552 bool isLastAluTrans) 553 const { 554 //Todo : support shared src0 - src1 operand 555 556 std::vector<std::vector<std::pair<int, unsigned> > > IGSrcs; 557 ValidSwizzle.clear(); 558 unsigned ConstCount; 559 BankSwizzle TransBS = ALU_VEC_012_SCL_210; 560 for (unsigned i = 0, e = IG.size(); i < e; ++i) { 561 IGSrcs.push_back(ExtractSrcs(IG[i], PV, ConstCount)); 562 unsigned Op = getOperandIdx(IG[i]->getOpcode(), 563 AMDGPU::OpName::bank_swizzle); 564 ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle) 565 IG[i]->getOperand(Op).getImm()); 566 } 567 std::vector<std::pair<int, unsigned> > TransOps; 568 if (!isLastAluTrans) 569 return FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, TransBS); 570 571 TransOps = IGSrcs.back(); 572 IGSrcs.pop_back(); 573 ValidSwizzle.pop_back(); 574 575 static const R600InstrInfo::BankSwizzle TransSwz[] = { 576 ALU_VEC_012_SCL_210, 577 ALU_VEC_021_SCL_122, 578 ALU_VEC_120_SCL_212, 579 ALU_VEC_102_SCL_221 580 }; 581 for (unsigned i = 0; i < 4; i++) { 582 TransBS = TransSwz[i]; 583 if (!isConstCompatible(TransBS, TransOps, ConstCount)) 584 continue; 585 bool Result = FindSwizzleForVectorSlot(IGSrcs, ValidSwizzle, TransOps, 586 TransBS); 587 if (Result) { 588 ValidSwizzle.push_back(TransBS); 589 return true; 590 } 591 } 592 593 return false; 594 } 595 596 597 bool 598 R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts) 599 const { 600 assert (Consts.size() <= 12 && "Too many operands in instructions group"); 601 unsigned Pair1 = 0, Pair2 = 0; 602 for (unsigned i = 0, n = Consts.size(); i < n; ++i) { 603 unsigned ReadConstHalf = Consts[i] & 2; 604 unsigned ReadConstIndex = Consts[i] & (~3); 605 unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf; 606 if (!Pair1) { 607 Pair1 = ReadHalfConst; 608 continue; 609 } 610 if (Pair1 == ReadHalfConst) 611 continue; 612 if (!Pair2) { 613 Pair2 = ReadHalfConst; 614 continue; 615 } 616 if (Pair2 != ReadHalfConst) 617 return false; 618 } 619 return true; 620 } 621 622 bool 623 R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs) 624 const { 625 std::vector<unsigned> Consts; 626 SmallSet<int64_t, 4> Literals; 627 for (unsigned i = 0, n = MIs.size(); i < n; i++) { 628 MachineInstr *MI = MIs[i]; 629 if (!isALUInstr(MI->getOpcode())) 630 continue; 631 632 const SmallVectorImpl<std::pair<MachineOperand *, int64_t> > &Srcs = 633 getSrcs(MI); 634 635 for (unsigned j = 0, e = Srcs.size(); j < e; j++) { 636 std::pair<MachineOperand *, unsigned> Src = Srcs[j]; 637 if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X) 638 Literals.insert(Src.second); 639 if (Literals.size() > 4) 640 return false; 641 if (Src.first->getReg() == AMDGPU::ALU_CONST) 642 Consts.push_back(Src.second); 643 if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) || 644 AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) { 645 unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff; 646 unsigned Chan = RI.getHWRegChan(Src.first->getReg()); 647 Consts.push_back((Index << 2) | Chan); 648 } 649 } 650 } 651 return fitsConstReadLimitations(Consts); 652 } 653 654 DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM, 655 const ScheduleDAG *DAG) const { 656 const InstrItineraryData *II = TM->getInstrItineraryData(); 657 return TM->getSubtarget<AMDGPUSubtarget>().createDFAPacketizer(II); 658 } 659 660 static bool 661 isPredicateSetter(unsigned Opcode) { 662 switch (Opcode) { 663 case AMDGPU::PRED_X: 664 return true; 665 default: 666 return false; 667 } 668 } 669 670 static MachineInstr * 671 findFirstPredicateSetterFrom(MachineBasicBlock &MBB, 672 MachineBasicBlock::iterator I) { 673 while (I != MBB.begin()) { 674 --I; 675 MachineInstr *MI = I; 676 if (isPredicateSetter(MI->getOpcode())) 677 return MI; 678 } 679 680 return NULL; 681 } 682 683 static 684 bool isJump(unsigned Opcode) { 685 return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND; 686 } 687 688 static bool isBranch(unsigned Opcode) { 689 return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 || 690 Opcode == AMDGPU::BRANCH_COND_f32; 691 } 692 693 bool 694 R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 695 MachineBasicBlock *&TBB, 696 MachineBasicBlock *&FBB, 697 SmallVectorImpl<MachineOperand> &Cond, 698 bool AllowModify) const { 699 // Most of the following comes from the ARM implementation of AnalyzeBranch 700 701 // If the block has no terminators, it just falls into the block after it. 702 MachineBasicBlock::iterator I = MBB.end(); 703 if (I == MBB.begin()) 704 return false; 705 --I; 706 while (I->isDebugValue()) { 707 if (I == MBB.begin()) 708 return false; 709 --I; 710 } 711 // AMDGPU::BRANCH* instructions are only available after isel and are not 712 // handled 713 if (isBranch(I->getOpcode())) 714 return true; 715 if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) { 716 return false; 717 } 718 719 // Get the last instruction in the block. 720 MachineInstr *LastInst = I; 721 722 // If there is only one terminator instruction, process it. 723 unsigned LastOpc = LastInst->getOpcode(); 724 if (I == MBB.begin() || 725 !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) { 726 if (LastOpc == AMDGPU::JUMP) { 727 TBB = LastInst->getOperand(0).getMBB(); 728 return false; 729 } else if (LastOpc == AMDGPU::JUMP_COND) { 730 MachineInstr *predSet = I; 731 while (!isPredicateSetter(predSet->getOpcode())) { 732 predSet = --I; 733 } 734 TBB = LastInst->getOperand(0).getMBB(); 735 Cond.push_back(predSet->getOperand(1)); 736 Cond.push_back(predSet->getOperand(2)); 737 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 738 return false; 739 } 740 return true; // Can't handle indirect branch. 741 } 742 743 // Get the instruction before it if it is a terminator. 744 MachineInstr *SecondLastInst = I; 745 unsigned SecondLastOpc = SecondLastInst->getOpcode(); 746 747 // If the block ends with a B and a Bcc, handle it. 748 if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) { 749 MachineInstr *predSet = --I; 750 while (!isPredicateSetter(predSet->getOpcode())) { 751 predSet = --I; 752 } 753 TBB = SecondLastInst->getOperand(0).getMBB(); 754 FBB = LastInst->getOperand(0).getMBB(); 755 Cond.push_back(predSet->getOperand(1)); 756 Cond.push_back(predSet->getOperand(2)); 757 Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false)); 758 return false; 759 } 760 761 // Otherwise, can't handle this. 762 return true; 763 } 764 765 int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { 766 const MachineInstr *MI = op.getParent(); 767 768 switch (MI->getDesc().OpInfo->RegClass) { 769 default: // FIXME: fallthrough?? 770 case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; 771 case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; 772 }; 773 } 774 775 static 776 MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { 777 for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); 778 It != E; ++It) { 779 if (It->getOpcode() == AMDGPU::CF_ALU || 780 It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) 781 return llvm::prior(It.base()); 782 } 783 return MBB.end(); 784 } 785 786 unsigned 787 R600InstrInfo::InsertBranch(MachineBasicBlock &MBB, 788 MachineBasicBlock *TBB, 789 MachineBasicBlock *FBB, 790 const SmallVectorImpl<MachineOperand> &Cond, 791 DebugLoc DL) const { 792 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 793 794 if (FBB == 0) { 795 if (Cond.empty()) { 796 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB); 797 return 1; 798 } else { 799 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 800 assert(PredSet && "No previous predicate !"); 801 addFlag(PredSet, 0, MO_FLAG_PUSH); 802 PredSet->getOperand(2).setImm(Cond[1].getImm()); 803 804 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 805 .addMBB(TBB) 806 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 807 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 808 if (CfAlu == MBB.end()) 809 return 1; 810 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 811 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 812 return 1; 813 } 814 } else { 815 MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end()); 816 assert(PredSet && "No previous predicate !"); 817 addFlag(PredSet, 0, MO_FLAG_PUSH); 818 PredSet->getOperand(2).setImm(Cond[1].getImm()); 819 BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND)) 820 .addMBB(TBB) 821 .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill); 822 BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB); 823 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 824 if (CfAlu == MBB.end()) 825 return 2; 826 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU); 827 CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE)); 828 return 2; 829 } 830 } 831 832 unsigned 833 R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 834 835 // Note : we leave PRED* instructions there. 836 // They may be needed when predicating instructions. 837 838 MachineBasicBlock::iterator I = MBB.end(); 839 840 if (I == MBB.begin()) { 841 return 0; 842 } 843 --I; 844 switch (I->getOpcode()) { 845 default: 846 return 0; 847 case AMDGPU::JUMP_COND: { 848 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 849 clearFlag(predSet, 0, MO_FLAG_PUSH); 850 I->eraseFromParent(); 851 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 852 if (CfAlu == MBB.end()) 853 break; 854 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 855 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 856 break; 857 } 858 case AMDGPU::JUMP: 859 I->eraseFromParent(); 860 break; 861 } 862 I = MBB.end(); 863 864 if (I == MBB.begin()) { 865 return 1; 866 } 867 --I; 868 switch (I->getOpcode()) { 869 // FIXME: only one case?? 870 default: 871 return 1; 872 case AMDGPU::JUMP_COND: { 873 MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I); 874 clearFlag(predSet, 0, MO_FLAG_PUSH); 875 I->eraseFromParent(); 876 MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB); 877 if (CfAlu == MBB.end()) 878 break; 879 assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE); 880 CfAlu->setDesc(get(AMDGPU::CF_ALU)); 881 break; 882 } 883 case AMDGPU::JUMP: 884 I->eraseFromParent(); 885 break; 886 } 887 return 2; 888 } 889 890 bool 891 R600InstrInfo::isPredicated(const MachineInstr *MI) const { 892 int idx = MI->findFirstPredOperandIdx(); 893 if (idx < 0) 894 return false; 895 896 unsigned Reg = MI->getOperand(idx).getReg(); 897 switch (Reg) { 898 default: return false; 899 case AMDGPU::PRED_SEL_ONE: 900 case AMDGPU::PRED_SEL_ZERO: 901 case AMDGPU::PREDICATE_BIT: 902 return true; 903 } 904 } 905 906 bool 907 R600InstrInfo::isPredicable(MachineInstr *MI) const { 908 // XXX: KILL* instructions can be predicated, but they must be the last 909 // instruction in a clause, so this means any instructions after them cannot 910 // be predicated. Until we have proper support for instruction clauses in the 911 // backend, we will mark KILL* instructions as unpredicable. 912 913 if (MI->getOpcode() == AMDGPU::KILLGT) { 914 return false; 915 } else if (MI->getOpcode() == AMDGPU::CF_ALU) { 916 // If the clause start in the middle of MBB then the MBB has more 917 // than a single clause, unable to predicate several clauses. 918 if (MI->getParent()->begin() != MachineBasicBlock::iterator(MI)) 919 return false; 920 // TODO: We don't support KC merging atm 921 if (MI->getOperand(3).getImm() != 0 || MI->getOperand(4).getImm() != 0) 922 return false; 923 return true; 924 } else if (isVector(*MI)) { 925 return false; 926 } else { 927 return AMDGPUInstrInfo::isPredicable(MI); 928 } 929 } 930 931 932 bool 933 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, 934 unsigned NumCyles, 935 unsigned ExtraPredCycles, 936 const BranchProbability &Probability) const{ 937 return true; 938 } 939 940 bool 941 R600InstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB, 942 unsigned NumTCycles, 943 unsigned ExtraTCycles, 944 MachineBasicBlock &FMBB, 945 unsigned NumFCycles, 946 unsigned ExtraFCycles, 947 const BranchProbability &Probability) const { 948 return true; 949 } 950 951 bool 952 R600InstrInfo::isProfitableToDupForIfCvt(MachineBasicBlock &MBB, 953 unsigned NumCyles, 954 const BranchProbability &Probability) 955 const { 956 return true; 957 } 958 959 bool 960 R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB, 961 MachineBasicBlock &FMBB) const { 962 return false; 963 } 964 965 966 bool 967 R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 968 MachineOperand &MO = Cond[1]; 969 switch (MO.getImm()) { 970 case OPCODE_IS_ZERO_INT: 971 MO.setImm(OPCODE_IS_NOT_ZERO_INT); 972 break; 973 case OPCODE_IS_NOT_ZERO_INT: 974 MO.setImm(OPCODE_IS_ZERO_INT); 975 break; 976 case OPCODE_IS_ZERO: 977 MO.setImm(OPCODE_IS_NOT_ZERO); 978 break; 979 case OPCODE_IS_NOT_ZERO: 980 MO.setImm(OPCODE_IS_ZERO); 981 break; 982 default: 983 return true; 984 } 985 986 MachineOperand &MO2 = Cond[2]; 987 switch (MO2.getReg()) { 988 case AMDGPU::PRED_SEL_ZERO: 989 MO2.setReg(AMDGPU::PRED_SEL_ONE); 990 break; 991 case AMDGPU::PRED_SEL_ONE: 992 MO2.setReg(AMDGPU::PRED_SEL_ZERO); 993 break; 994 default: 995 return true; 996 } 997 return false; 998 } 999 1000 bool 1001 R600InstrInfo::DefinesPredicate(MachineInstr *MI, 1002 std::vector<MachineOperand> &Pred) const { 1003 return isPredicateSetter(MI->getOpcode()); 1004 } 1005 1006 1007 bool 1008 R600InstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1, 1009 const SmallVectorImpl<MachineOperand> &Pred2) const { 1010 return false; 1011 } 1012 1013 1014 bool 1015 R600InstrInfo::PredicateInstruction(MachineInstr *MI, 1016 const SmallVectorImpl<MachineOperand> &Pred) const { 1017 int PIdx = MI->findFirstPredOperandIdx(); 1018 1019 if (MI->getOpcode() == AMDGPU::CF_ALU) { 1020 MI->getOperand(8).setImm(0); 1021 return true; 1022 } 1023 1024 if (MI->getOpcode() == AMDGPU::DOT_4) { 1025 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_X)) 1026 .setReg(Pred[2].getReg()); 1027 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Y)) 1028 .setReg(Pred[2].getReg()); 1029 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_Z)) 1030 .setReg(Pred[2].getReg()); 1031 MI->getOperand(getOperandIdx(*MI, AMDGPU::OpName::pred_sel_W)) 1032 .setReg(Pred[2].getReg()); 1033 MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 1034 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 1035 return true; 1036 } 1037 1038 if (PIdx != -1) { 1039 MachineOperand &PMO = MI->getOperand(PIdx); 1040 PMO.setReg(Pred[2].getReg()); 1041 MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); 1042 MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit); 1043 return true; 1044 } 1045 1046 return false; 1047 } 1048 1049 unsigned int R600InstrInfo::getPredicationCost(const MachineInstr *) const { 1050 return 2; 1051 } 1052 1053 unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, 1054 const MachineInstr *MI, 1055 unsigned *PredCost) const { 1056 if (PredCost) 1057 *PredCost = 2; 1058 return 2; 1059 } 1060 1061 void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, 1062 const MachineFunction &MF) const { 1063 const AMDGPUFrameLowering *TFL = 1064 static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering()); 1065 1066 unsigned StackWidth = TFL->getStackWidth(MF); 1067 int End = getIndirectIndexEnd(MF); 1068 1069 if (End == -1) 1070 return; 1071 1072 for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) { 1073 unsigned SuperReg = AMDGPU::R600_Reg128RegClass.getRegister(Index); 1074 Reserved.set(SuperReg); 1075 for (unsigned Chan = 0; Chan < StackWidth; ++Chan) { 1076 unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan); 1077 Reserved.set(Reg); 1078 } 1079 } 1080 } 1081 1082 unsigned R600InstrInfo::calculateIndirectAddress(unsigned RegIndex, 1083 unsigned Channel) const { 1084 // XXX: Remove when we support a stack width > 2 1085 assert(Channel == 0); 1086 return RegIndex; 1087 } 1088 1089 const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const { 1090 return &AMDGPU::R600_TReg32_XRegClass; 1091 } 1092 1093 MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, 1094 MachineBasicBlock::iterator I, 1095 unsigned ValueReg, unsigned Address, 1096 unsigned OffsetReg) const { 1097 unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); 1098 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1099 AMDGPU::AR_X, OffsetReg); 1100 setImmOperand(MOVA, AMDGPU::OpName::write, 0); 1101 1102 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1103 AddrReg, ValueReg) 1104 .addReg(AMDGPU::AR_X, 1105 RegState::Implicit | RegState::Kill); 1106 setImmOperand(Mov, AMDGPU::OpName::dst_rel, 1); 1107 return Mov; 1108 } 1109 1110 MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, 1111 MachineBasicBlock::iterator I, 1112 unsigned ValueReg, unsigned Address, 1113 unsigned OffsetReg) const { 1114 unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); 1115 MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, 1116 AMDGPU::AR_X, 1117 OffsetReg); 1118 setImmOperand(MOVA, AMDGPU::OpName::write, 0); 1119 MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV, 1120 ValueReg, 1121 AddrReg) 1122 .addReg(AMDGPU::AR_X, 1123 RegState::Implicit | RegState::Kill); 1124 setImmOperand(Mov, AMDGPU::OpName::src0_rel, 1); 1125 1126 return Mov; 1127 } 1128 1129 unsigned R600InstrInfo::getMaxAlusPerClause() const { 1130 return 115; 1131 } 1132 1133 MachineInstrBuilder R600InstrInfo::buildDefaultInstruction(MachineBasicBlock &MBB, 1134 MachineBasicBlock::iterator I, 1135 unsigned Opcode, 1136 unsigned DstReg, 1137 unsigned Src0Reg, 1138 unsigned Src1Reg) const { 1139 MachineInstrBuilder MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opcode), 1140 DstReg); // $dst 1141 1142 if (Src1Reg) { 1143 MIB.addImm(0) // $update_exec_mask 1144 .addImm(0); // $update_predicate 1145 } 1146 MIB.addImm(1) // $write 1147 .addImm(0) // $omod 1148 .addImm(0) // $dst_rel 1149 .addImm(0) // $dst_clamp 1150 .addReg(Src0Reg) // $src0 1151 .addImm(0) // $src0_neg 1152 .addImm(0) // $src0_rel 1153 .addImm(0) // $src0_abs 1154 .addImm(-1); // $src0_sel 1155 1156 if (Src1Reg) { 1157 MIB.addReg(Src1Reg) // $src1 1158 .addImm(0) // $src1_neg 1159 .addImm(0) // $src1_rel 1160 .addImm(0) // $src1_abs 1161 .addImm(-1); // $src1_sel 1162 } 1163 1164 //XXX: The r600g finalizer expects this to be 1, once we've moved the 1165 //scheduling to the backend, we can change the default to 0. 1166 MIB.addImm(1) // $last 1167 .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel 1168 .addImm(0) // $literal 1169 .addImm(0); // $bank_swizzle 1170 1171 return MIB; 1172 } 1173 1174 #define OPERAND_CASE(Label) \ 1175 case Label: { \ 1176 static const unsigned Ops[] = \ 1177 { \ 1178 Label##_X, \ 1179 Label##_Y, \ 1180 Label##_Z, \ 1181 Label##_W \ 1182 }; \ 1183 return Ops[Slot]; \ 1184 } 1185 1186 static unsigned getSlotedOps(unsigned Op, unsigned Slot) { 1187 switch (Op) { 1188 OPERAND_CASE(AMDGPU::OpName::update_exec_mask) 1189 OPERAND_CASE(AMDGPU::OpName::update_pred) 1190 OPERAND_CASE(AMDGPU::OpName::write) 1191 OPERAND_CASE(AMDGPU::OpName::omod) 1192 OPERAND_CASE(AMDGPU::OpName::dst_rel) 1193 OPERAND_CASE(AMDGPU::OpName::clamp) 1194 OPERAND_CASE(AMDGPU::OpName::src0) 1195 OPERAND_CASE(AMDGPU::OpName::src0_neg) 1196 OPERAND_CASE(AMDGPU::OpName::src0_rel) 1197 OPERAND_CASE(AMDGPU::OpName::src0_abs) 1198 OPERAND_CASE(AMDGPU::OpName::src0_sel) 1199 OPERAND_CASE(AMDGPU::OpName::src1) 1200 OPERAND_CASE(AMDGPU::OpName::src1_neg) 1201 OPERAND_CASE(AMDGPU::OpName::src1_rel) 1202 OPERAND_CASE(AMDGPU::OpName::src1_abs) 1203 OPERAND_CASE(AMDGPU::OpName::src1_sel) 1204 OPERAND_CASE(AMDGPU::OpName::pred_sel) 1205 default: 1206 llvm_unreachable("Wrong Operand"); 1207 } 1208 } 1209 1210 #undef OPERAND_CASE 1211 1212 MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( 1213 MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg) 1214 const { 1215 assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); 1216 unsigned Opcode; 1217 const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); 1218 if (ST.getGeneration() <= AMDGPUSubtarget::R700) 1219 Opcode = AMDGPU::DOT4_r600; 1220 else 1221 Opcode = AMDGPU::DOT4_eg; 1222 MachineBasicBlock::iterator I = MI; 1223 MachineOperand &Src0 = MI->getOperand( 1224 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot))); 1225 MachineOperand &Src1 = MI->getOperand( 1226 getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot))); 1227 MachineInstr *MIB = buildDefaultInstruction( 1228 MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg()); 1229 static const unsigned Operands[14] = { 1230 AMDGPU::OpName::update_exec_mask, 1231 AMDGPU::OpName::update_pred, 1232 AMDGPU::OpName::write, 1233 AMDGPU::OpName::omod, 1234 AMDGPU::OpName::dst_rel, 1235 AMDGPU::OpName::clamp, 1236 AMDGPU::OpName::src0_neg, 1237 AMDGPU::OpName::src0_rel, 1238 AMDGPU::OpName::src0_abs, 1239 AMDGPU::OpName::src0_sel, 1240 AMDGPU::OpName::src1_neg, 1241 AMDGPU::OpName::src1_rel, 1242 AMDGPU::OpName::src1_abs, 1243 AMDGPU::OpName::src1_sel, 1244 }; 1245 1246 MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(), 1247 getSlotedOps(AMDGPU::OpName::pred_sel, Slot))); 1248 MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel)) 1249 .setReg(MO.getReg()); 1250 1251 for (unsigned i = 0; i < 14; i++) { 1252 MachineOperand &MO = MI->getOperand( 1253 getOperandIdx(MI->getOpcode(), getSlotedOps(Operands[i], Slot))); 1254 assert (MO.isImm()); 1255 setImmOperand(MIB, Operands[i], MO.getImm()); 1256 } 1257 MIB->getOperand(20).setImm(0); 1258 return MIB; 1259 } 1260 1261 MachineInstr *R600InstrInfo::buildMovImm(MachineBasicBlock &BB, 1262 MachineBasicBlock::iterator I, 1263 unsigned DstReg, 1264 uint64_t Imm) const { 1265 MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg, 1266 AMDGPU::ALU_LITERAL_X); 1267 setImmOperand(MovImm, AMDGPU::OpName::literal, Imm); 1268 return MovImm; 1269 } 1270 1271 MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB, 1272 MachineBasicBlock::iterator I, 1273 unsigned DstReg, unsigned SrcReg) const { 1274 return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg); 1275 } 1276 1277 int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const { 1278 return getOperandIdx(MI.getOpcode(), Op); 1279 } 1280 1281 int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const { 1282 return AMDGPU::getNamedOperandIdx(Opcode, Op); 1283 } 1284 1285 void R600InstrInfo::setImmOperand(MachineInstr *MI, unsigned Op, 1286 int64_t Imm) const { 1287 int Idx = getOperandIdx(*MI, Op); 1288 assert(Idx != -1 && "Operand not supported for this instruction."); 1289 assert(MI->getOperand(Idx).isImm()); 1290 MI->getOperand(Idx).setImm(Imm); 1291 } 1292 1293 //===----------------------------------------------------------------------===// 1294 // Instruction flag getters/setters 1295 //===----------------------------------------------------------------------===// 1296 1297 bool R600InstrInfo::hasFlagOperand(const MachineInstr &MI) const { 1298 return GET_FLAG_OPERAND_IDX(get(MI.getOpcode()).TSFlags) != 0; 1299 } 1300 1301 MachineOperand &R600InstrInfo::getFlagOp(MachineInstr *MI, unsigned SrcIdx, 1302 unsigned Flag) const { 1303 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1304 int FlagIndex = 0; 1305 if (Flag != 0) { 1306 // If we pass something other than the default value of Flag to this 1307 // function, it means we are want to set a flag on an instruction 1308 // that uses native encoding. 1309 assert(HAS_NATIVE_OPERANDS(TargetFlags)); 1310 bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3; 1311 switch (Flag) { 1312 case MO_FLAG_CLAMP: 1313 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::clamp); 1314 break; 1315 case MO_FLAG_MASK: 1316 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::write); 1317 break; 1318 case MO_FLAG_NOT_LAST: 1319 case MO_FLAG_LAST: 1320 FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::last); 1321 break; 1322 case MO_FLAG_NEG: 1323 switch (SrcIdx) { 1324 case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_neg); break; 1325 case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_neg); break; 1326 case 2: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src2_neg); break; 1327 } 1328 break; 1329 1330 case MO_FLAG_ABS: 1331 assert(!IsOP3 && "Cannot set absolute value modifier for OP3 " 1332 "instructions."); 1333 (void)IsOP3; 1334 switch (SrcIdx) { 1335 case 0: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src0_abs); break; 1336 case 1: FlagIndex = getOperandIdx(*MI, AMDGPU::OpName::src1_abs); break; 1337 } 1338 break; 1339 1340 default: 1341 FlagIndex = -1; 1342 break; 1343 } 1344 assert(FlagIndex != -1 && "Flag not supported for this instruction"); 1345 } else { 1346 FlagIndex = GET_FLAG_OPERAND_IDX(TargetFlags); 1347 assert(FlagIndex != 0 && 1348 "Instruction flags not supported for this instruction"); 1349 } 1350 1351 MachineOperand &FlagOp = MI->getOperand(FlagIndex); 1352 assert(FlagOp.isImm()); 1353 return FlagOp; 1354 } 1355 1356 void R600InstrInfo::addFlag(MachineInstr *MI, unsigned Operand, 1357 unsigned Flag) const { 1358 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1359 if (Flag == 0) { 1360 return; 1361 } 1362 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1363 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1364 if (Flag == MO_FLAG_NOT_LAST) { 1365 clearFlag(MI, Operand, MO_FLAG_LAST); 1366 } else if (Flag == MO_FLAG_MASK) { 1367 clearFlag(MI, Operand, Flag); 1368 } else { 1369 FlagOp.setImm(1); 1370 } 1371 } else { 1372 MachineOperand &FlagOp = getFlagOp(MI, Operand); 1373 FlagOp.setImm(FlagOp.getImm() | (Flag << (NUM_MO_FLAGS * Operand))); 1374 } 1375 } 1376 1377 void R600InstrInfo::clearFlag(MachineInstr *MI, unsigned Operand, 1378 unsigned Flag) const { 1379 unsigned TargetFlags = get(MI->getOpcode()).TSFlags; 1380 if (HAS_NATIVE_OPERANDS(TargetFlags)) { 1381 MachineOperand &FlagOp = getFlagOp(MI, Operand, Flag); 1382 FlagOp.setImm(0); 1383 } else { 1384 MachineOperand &FlagOp = getFlagOp(MI); 1385 unsigned InstFlags = FlagOp.getImm(); 1386 InstFlags &= ~(Flag << (NUM_MO_FLAGS * Operand)); 1387 FlagOp.setImm(InstFlags); 1388 } 1389 } 1390