1 //===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This file contains some helper functions which try to cleanup artifacts 9 // such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make 10 // the types match. This file also contains some combines of merges that happens 11 // at the end of the legalization. 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H 15 #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H 16 17 #include "llvm/ADT/SmallBitVector.h" 18 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 20 #include "llvm/CodeGen/GlobalISel/Legalizer.h" 21 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 22 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 24 #include "llvm/CodeGen/GlobalISel/Utils.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/Register.h" 27 #include "llvm/Support/Debug.h" 28 29 #define DEBUG_TYPE "legalizer" 30 using namespace llvm::MIPatternMatch; 31 32 namespace llvm { 33 class LegalizationArtifactCombiner { 34 MachineIRBuilder &Builder; 35 MachineRegisterInfo &MRI; 36 const LegalizerInfo &LI; 37 38 static bool isArtifactCast(unsigned Opc) { 39 switch (Opc) { 40 case TargetOpcode::G_TRUNC: 41 case TargetOpcode::G_SEXT: 42 case TargetOpcode::G_ZEXT: 43 case TargetOpcode::G_ANYEXT: 44 return true; 45 default: 46 return false; 47 } 48 } 49 50 public: 51 LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI, 52 const LegalizerInfo &LI) 53 : Builder(B), MRI(MRI), LI(LI) {} 54 55 bool tryCombineAnyExt(MachineInstr &MI, 56 SmallVectorImpl<MachineInstr *> &DeadInsts, 57 SmallVectorImpl<Register> &UpdatedDefs, 58 GISelObserverWrapper &Observer) { 59 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); 60 61 Builder.setInstrAndDebugLoc(MI); 62 Register DstReg = MI.getOperand(0).getReg(); 63 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 64 65 // aext(trunc x) - > aext/copy/trunc x 66 Register TruncSrc; 67 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { 68 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); 69 if (MRI.getType(DstReg) == MRI.getType(TruncSrc)) 70 replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs, 71 Observer); 72 else 73 Builder.buildAnyExtOrTrunc(DstReg, TruncSrc); 74 UpdatedDefs.push_back(DstReg); 75 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 76 return true; 77 } 78 79 // aext([asz]ext x) -> [asz]ext x 80 Register ExtSrc; 81 MachineInstr *ExtMI; 82 if (mi_match(SrcReg, MRI, 83 m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)), 84 m_GSExt(m_Reg(ExtSrc)), 85 m_GZExt(m_Reg(ExtSrc)))))) { 86 Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); 87 UpdatedDefs.push_back(DstReg); 88 markInstAndDefDead(MI, *ExtMI, DeadInsts); 89 return true; 90 } 91 92 // Try to fold aext(g_constant) when the larger constant type is legal. 93 auto *SrcMI = MRI.getVRegDef(SrcReg); 94 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 95 const LLT DstTy = MRI.getType(DstReg); 96 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 97 auto &CstVal = SrcMI->getOperand(1); 98 Builder.buildConstant( 99 DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); 100 UpdatedDefs.push_back(DstReg); 101 markInstAndDefDead(MI, *SrcMI, DeadInsts); 102 return true; 103 } 104 } 105 return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); 106 } 107 108 bool tryCombineZExt(MachineInstr &MI, 109 SmallVectorImpl<MachineInstr *> &DeadInsts, 110 SmallVectorImpl<Register> &UpdatedDefs, 111 GISelObserverWrapper &Observer) { 112 assert(MI.getOpcode() == TargetOpcode::G_ZEXT); 113 114 Builder.setInstrAndDebugLoc(MI); 115 Register DstReg = MI.getOperand(0).getReg(); 116 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 117 118 // zext(trunc x) - > and (aext/copy/trunc x), mask 119 // zext(sext x) -> and (sext x), mask 120 Register TruncSrc; 121 Register SextSrc; 122 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) || 123 mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) { 124 LLT DstTy = MRI.getType(DstReg); 125 if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) || 126 isConstantUnsupported(DstTy)) 127 return false; 128 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); 129 LLT SrcTy = MRI.getType(SrcReg); 130 APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits()); 131 auto Mask = Builder.buildConstant( 132 DstTy, MaskVal.zext(DstTy.getScalarSizeInBits())); 133 if (SextSrc && (DstTy != MRI.getType(SextSrc))) 134 SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0); 135 if (TruncSrc && (DstTy != MRI.getType(TruncSrc))) 136 TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); 137 Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask); 138 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 139 return true; 140 } 141 142 // zext(zext x) -> (zext x) 143 Register ZextSrc; 144 if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) { 145 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); 146 Observer.changingInstr(MI); 147 MI.getOperand(1).setReg(ZextSrc); 148 Observer.changedInstr(MI); 149 UpdatedDefs.push_back(DstReg); 150 markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 151 return true; 152 } 153 154 // Try to fold zext(g_constant) when the larger constant type is legal. 155 auto *SrcMI = MRI.getVRegDef(SrcReg); 156 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 157 const LLT DstTy = MRI.getType(DstReg); 158 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 159 auto &CstVal = SrcMI->getOperand(1); 160 Builder.buildConstant( 161 DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits())); 162 UpdatedDefs.push_back(DstReg); 163 markInstAndDefDead(MI, *SrcMI, DeadInsts); 164 return true; 165 } 166 } 167 return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); 168 } 169 170 bool tryCombineSExt(MachineInstr &MI, 171 SmallVectorImpl<MachineInstr *> &DeadInsts, 172 SmallVectorImpl<Register> &UpdatedDefs) { 173 assert(MI.getOpcode() == TargetOpcode::G_SEXT); 174 175 Builder.setInstrAndDebugLoc(MI); 176 Register DstReg = MI.getOperand(0).getReg(); 177 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 178 179 // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c) 180 Register TruncSrc; 181 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { 182 LLT DstTy = MRI.getType(DstReg); 183 if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}})) 184 return false; 185 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); 186 LLT SrcTy = MRI.getType(SrcReg); 187 uint64_t SizeInBits = SrcTy.getScalarSizeInBits(); 188 if (DstTy != MRI.getType(TruncSrc)) 189 TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); 190 Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits); 191 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 192 return true; 193 } 194 195 // sext(zext x) -> (zext x) 196 // sext(sext x) -> (sext x) 197 Register ExtSrc; 198 MachineInstr *ExtMI; 199 if (mi_match(SrcReg, MRI, 200 m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)), 201 m_GSExt(m_Reg(ExtSrc)))))) { 202 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); 203 Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); 204 UpdatedDefs.push_back(DstReg); 205 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 206 return true; 207 } 208 209 // Try to fold sext(g_constant) when the larger constant type is legal. 210 auto *SrcMI = MRI.getVRegDef(SrcReg); 211 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 212 const LLT DstTy = MRI.getType(DstReg); 213 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 214 auto &CstVal = SrcMI->getOperand(1); 215 Builder.buildConstant( 216 DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); 217 UpdatedDefs.push_back(DstReg); 218 markInstAndDefDead(MI, *SrcMI, DeadInsts); 219 return true; 220 } 221 } 222 223 return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); 224 } 225 226 bool tryCombineTrunc(MachineInstr &MI, 227 SmallVectorImpl<MachineInstr *> &DeadInsts, 228 SmallVectorImpl<Register> &UpdatedDefs, 229 GISelObserverWrapper &Observer) { 230 assert(MI.getOpcode() == TargetOpcode::G_TRUNC); 231 232 Builder.setInstr(MI); 233 Register DstReg = MI.getOperand(0).getReg(); 234 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 235 236 // Try to fold trunc(g_constant) when the smaller constant type is legal. 237 auto *SrcMI = MRI.getVRegDef(SrcReg); 238 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 239 const LLT DstTy = MRI.getType(DstReg); 240 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 241 auto &CstVal = SrcMI->getOperand(1); 242 Builder.buildConstant( 243 DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits())); 244 UpdatedDefs.push_back(DstReg); 245 markInstAndDefDead(MI, *SrcMI, DeadInsts); 246 return true; 247 } 248 } 249 250 // Try to fold trunc(merge) to directly use the source of the merge. 251 // This gets rid of large, difficult to legalize, merges 252 if (auto *SrcMerge = dyn_cast<GMerge>(SrcMI)) { 253 const Register MergeSrcReg = SrcMerge->getSourceReg(0); 254 const LLT MergeSrcTy = MRI.getType(MergeSrcReg); 255 const LLT DstTy = MRI.getType(DstReg); 256 257 // We can only fold if the types are scalar 258 const unsigned DstSize = DstTy.getSizeInBits(); 259 const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits(); 260 if (!DstTy.isScalar() || !MergeSrcTy.isScalar()) 261 return false; 262 263 if (DstSize < MergeSrcSize) { 264 // When the merge source is larger than the destination, we can just 265 // truncate the merge source directly 266 if (isInstUnsupported({TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}})) 267 return false; 268 269 LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: " 270 << MI); 271 272 Builder.buildTrunc(DstReg, MergeSrcReg); 273 UpdatedDefs.push_back(DstReg); 274 } else if (DstSize == MergeSrcSize) { 275 // If the sizes match we can simply try to replace the register 276 LLVM_DEBUG( 277 dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: " 278 << MI); 279 replaceRegOrBuildCopy(DstReg, MergeSrcReg, MRI, Builder, UpdatedDefs, 280 Observer); 281 } else if (DstSize % MergeSrcSize == 0) { 282 // If the trunc size is a multiple of the merge source size we can use 283 // a smaller merge instead 284 if (isInstUnsupported( 285 {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}})) 286 return false; 287 288 LLVM_DEBUG( 289 dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: " 290 << MI); 291 292 const unsigned NumSrcs = DstSize / MergeSrcSize; 293 assert(NumSrcs < SrcMI->getNumOperands() - 1 && 294 "trunc(merge) should require less inputs than merge"); 295 SmallVector<Register, 8> SrcRegs(NumSrcs); 296 for (unsigned i = 0; i < NumSrcs; ++i) 297 SrcRegs[i] = SrcMerge->getSourceReg(i); 298 299 Builder.buildMerge(DstReg, SrcRegs); 300 UpdatedDefs.push_back(DstReg); 301 } else { 302 // Unable to combine 303 return false; 304 } 305 306 markInstAndDefDead(MI, *SrcMerge, DeadInsts); 307 return true; 308 } 309 310 // trunc(trunc) -> trunc 311 Register TruncSrc; 312 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { 313 // Always combine trunc(trunc) since the eventual resulting trunc must be 314 // legal anyway as it must be legal for all outputs of the consumer type 315 // set. 316 LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI); 317 318 Builder.buildTrunc(DstReg, TruncSrc); 319 UpdatedDefs.push_back(DstReg); 320 markInstAndDefDead(MI, *MRI.getVRegDef(TruncSrc), DeadInsts); 321 return true; 322 } 323 324 return false; 325 } 326 327 /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF). 328 bool tryFoldImplicitDef(MachineInstr &MI, 329 SmallVectorImpl<MachineInstr *> &DeadInsts, 330 SmallVectorImpl<Register> &UpdatedDefs) { 331 unsigned Opcode = MI.getOpcode(); 332 assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT || 333 Opcode == TargetOpcode::G_SEXT); 334 335 if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, 336 MI.getOperand(1).getReg(), MRI)) { 337 Builder.setInstr(MI); 338 Register DstReg = MI.getOperand(0).getReg(); 339 LLT DstTy = MRI.getType(DstReg); 340 341 if (Opcode == TargetOpcode::G_ANYEXT) { 342 // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF 343 if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) 344 return false; 345 LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;); 346 Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {}); 347 UpdatedDefs.push_back(DstReg); 348 } else { 349 // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top 350 // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT. 351 if (isConstantUnsupported(DstTy)) 352 return false; 353 LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;); 354 Builder.buildConstant(DstReg, 0); 355 UpdatedDefs.push_back(DstReg); 356 } 357 358 markInstAndDefDead(MI, *DefMI, DeadInsts); 359 return true; 360 } 361 return false; 362 } 363 364 bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI, 365 SmallVectorImpl<MachineInstr *> &DeadInsts, 366 SmallVectorImpl<Register> &UpdatedDefs) { 367 368 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); 369 370 const unsigned CastOpc = CastMI.getOpcode(); 371 372 if (!isArtifactCast(CastOpc)) 373 return false; 374 375 const unsigned NumDefs = MI.getNumOperands() - 1; 376 377 const Register CastSrcReg = CastMI.getOperand(1).getReg(); 378 const LLT CastSrcTy = MRI.getType(CastSrcReg); 379 const LLT DestTy = MRI.getType(MI.getOperand(0).getReg()); 380 const LLT SrcTy = MRI.getType(MI.getOperand(NumDefs).getReg()); 381 382 const unsigned CastSrcSize = CastSrcTy.getSizeInBits(); 383 const unsigned DestSize = DestTy.getSizeInBits(); 384 385 if (CastOpc == TargetOpcode::G_TRUNC) { 386 if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) { 387 // %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>) 388 // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1 389 // => 390 // %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0 391 // %2:_(s8) = G_TRUNC %6 392 // %3:_(s8) = G_TRUNC %7 393 // %4:_(s8) = G_TRUNC %8 394 // %5:_(s8) = G_TRUNC %9 395 396 unsigned UnmergeNumElts = 397 DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1; 398 LLT UnmergeTy = CastSrcTy.changeElementCount( 399 ElementCount::getFixed(UnmergeNumElts)); 400 401 if (isInstUnsupported( 402 {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}})) 403 return false; 404 405 Builder.setInstr(MI); 406 auto NewUnmerge = Builder.buildUnmerge(UnmergeTy, CastSrcReg); 407 408 for (unsigned I = 0; I != NumDefs; ++I) { 409 Register DefReg = MI.getOperand(I).getReg(); 410 UpdatedDefs.push_back(DefReg); 411 Builder.buildTrunc(DefReg, NewUnmerge.getReg(I)); 412 } 413 414 markInstAndDefDead(MI, CastMI, DeadInsts); 415 return true; 416 } 417 418 if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) { 419 // %1:_(s16) = G_TRUNC %0(s32) 420 // %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1 421 // => 422 // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 423 424 // Unmerge(trunc) can be combined if the trunc source size is a multiple 425 // of the unmerge destination size 426 if (CastSrcSize % DestSize != 0) 427 return false; 428 429 // Check if the new unmerge is supported 430 if (isInstUnsupported( 431 {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}})) 432 return false; 433 434 // Gather the original destination registers and create new ones for the 435 // unused bits 436 const unsigned NewNumDefs = CastSrcSize / DestSize; 437 SmallVector<Register, 8> DstRegs(NewNumDefs); 438 for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) { 439 if (Idx < NumDefs) 440 DstRegs[Idx] = MI.getOperand(Idx).getReg(); 441 else 442 DstRegs[Idx] = MRI.createGenericVirtualRegister(DestTy); 443 } 444 445 // Build new unmerge 446 Builder.setInstr(MI); 447 Builder.buildUnmerge(DstRegs, CastSrcReg); 448 UpdatedDefs.append(DstRegs.begin(), DstRegs.begin() + NewNumDefs); 449 markInstAndDefDead(MI, CastMI, DeadInsts); 450 return true; 451 } 452 } 453 454 // TODO: support combines with other casts as well 455 return false; 456 } 457 458 static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp, 459 LLT OpTy, LLT DestTy) { 460 // Check if we found a definition that is like G_MERGE_VALUES. 461 switch (MergeOp) { 462 default: 463 return false; 464 case TargetOpcode::G_BUILD_VECTOR: 465 case TargetOpcode::G_MERGE_VALUES: 466 // The convert operation that we will need to insert is 467 // going to convert the input of that type of instruction (scalar) 468 // to the destination type (DestTy). 469 // The conversion needs to stay in the same domain (scalar to scalar 470 // and vector to vector), so if we were to allow to fold the merge 471 // we would need to insert some bitcasts. 472 // E.g., 473 // <2 x s16> = build_vector s16, s16 474 // <2 x s32> = zext <2 x s16> 475 // <2 x s16>, <2 x s16> = unmerge <2 x s32> 476 // 477 // As is the folding would produce: 478 // <2 x s16> = zext s16 <-- scalar to vector 479 // <2 x s16> = zext s16 <-- scalar to vector 480 // Which is invalid. 481 // Instead we would want to generate: 482 // s32 = zext s16 483 // <2 x s16> = bitcast s32 484 // s32 = zext s16 485 // <2 x s16> = bitcast s32 486 // 487 // That is not done yet. 488 if (ConvertOp == 0) 489 return true; 490 return !DestTy.isVector() && OpTy.isVector() && 491 DestTy == OpTy.getElementType(); 492 case TargetOpcode::G_CONCAT_VECTORS: { 493 if (ConvertOp == 0) 494 return true; 495 if (!DestTy.isVector()) 496 return false; 497 498 const unsigned OpEltSize = OpTy.getElementType().getSizeInBits(); 499 500 // Don't handle scalarization with a cast that isn't in the same 501 // direction as the vector cast. This could be handled, but it would 502 // require more intermediate unmerges. 503 if (ConvertOp == TargetOpcode::G_TRUNC) 504 return DestTy.getSizeInBits() <= OpEltSize; 505 return DestTy.getSizeInBits() >= OpEltSize; 506 } 507 } 508 } 509 510 /// Try to replace DstReg with SrcReg or build a COPY instruction 511 /// depending on the register constraints. 512 static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg, 513 MachineRegisterInfo &MRI, 514 MachineIRBuilder &Builder, 515 SmallVectorImpl<Register> &UpdatedDefs, 516 GISelChangeObserver &Observer) { 517 if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) { 518 Builder.buildCopy(DstReg, SrcReg); 519 UpdatedDefs.push_back(DstReg); 520 return; 521 } 522 SmallVector<MachineInstr *, 4> UseMIs; 523 // Get the users and notify the observer before replacing. 524 for (auto &UseMI : MRI.use_instructions(DstReg)) { 525 UseMIs.push_back(&UseMI); 526 Observer.changingInstr(UseMI); 527 } 528 // Replace the registers. 529 MRI.replaceRegWith(DstReg, SrcReg); 530 UpdatedDefs.push_back(SrcReg); 531 // Notify the observer that we changed the instructions. 532 for (auto *UseMI : UseMIs) 533 Observer.changedInstr(*UseMI); 534 } 535 536 /// Return the operand index in \p MI that defines \p Def 537 static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) { 538 unsigned DefIdx = 0; 539 for (const MachineOperand &Def : MI.defs()) { 540 if (Def.getReg() == SearchDef) 541 break; 542 ++DefIdx; 543 } 544 545 return DefIdx; 546 } 547 548 /// This class provides utilities for finding source registers of specific 549 /// bit ranges in an artifact. The routines can look through the source 550 /// registers if they're other artifacts to try to find a non-artifact source 551 /// of a value. 552 class ArtifactValueFinder { 553 MachineRegisterInfo &MRI; 554 MachineIRBuilder &MIB; 555 const LegalizerInfo &LI; 556 557 // Stores the best register found in the current query so far. 558 Register CurrentBest = Register(); 559 560 /// Given an concat_vector op \p Concat and a start bit and size, try to 561 /// find the origin of the value defined by that start position and size. 562 /// 563 /// \returns a register with the requested size, or the current best 564 /// register found during the current query. 565 Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit, 566 unsigned Size) { 567 assert(Size > 0); 568 569 // Find the source operand that provides the bits requested. 570 Register Src1Reg = Concat.getSourceReg(0); 571 unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); 572 573 // Operand index of the source that provides the start of the bit range. 574 unsigned StartSrcIdx = (StartBit / SrcSize) + 1; 575 // Offset into the source at which the bit range starts. 576 unsigned InRegOffset = StartBit % SrcSize; 577 // Check that the bits don't span multiple sources. 578 // FIXME: we might be able return multiple sources? Or create an 579 // appropriate concat to make it fit. 580 if (InRegOffset + Size > SrcSize) 581 return CurrentBest; 582 583 Register SrcReg = Concat.getReg(StartSrcIdx); 584 if (InRegOffset == 0 && Size == SrcSize) { 585 CurrentBest = SrcReg; 586 return findValueFromDefImpl(SrcReg, 0, Size); 587 } 588 589 return findValueFromDefImpl(SrcReg, InRegOffset, Size); 590 } 591 592 /// Given an build_vector op \p BV and a start bit and size, try to find 593 /// the origin of the value defined by that start position and size. 594 /// 595 /// \returns a register with the requested size, or the current best 596 /// register found during the current query. 597 Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit, 598 unsigned Size) { 599 assert(Size > 0); 600 601 // Find the source operand that provides the bits requested. 602 Register Src1Reg = BV.getSourceReg(0); 603 unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); 604 605 // Operand index of the source that provides the start of the bit range. 606 unsigned StartSrcIdx = (StartBit / SrcSize) + 1; 607 // Offset into the source at which the bit range starts. 608 unsigned InRegOffset = StartBit % SrcSize; 609 610 if (InRegOffset != 0) 611 return CurrentBest; // Give up, bits don't start at a scalar source. 612 if (Size < SrcSize) 613 return CurrentBest; // Scalar source is too large for requested bits. 614 615 // If the bits cover multiple sources evenly, then create a new 616 // build_vector to synthesize the required size, if that's been requested. 617 if (Size > SrcSize) { 618 if (Size % SrcSize > 0) 619 return CurrentBest; // Isn't covered exactly by sources. 620 621 unsigned NumSrcsUsed = Size / SrcSize; 622 // If we're requesting all of the sources, just return this def. 623 if (NumSrcsUsed == BV.getNumSources()) 624 return BV.getReg(0); 625 626 LLT SrcTy = MRI.getType(Src1Reg); 627 LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy); 628 629 // Check if the resulting build vector would be legal. 630 LegalizeActionStep ActionStep = 631 LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}}); 632 if (ActionStep.Action != LegalizeActions::Legal) 633 return CurrentBest; 634 635 SmallVector<Register> NewSrcs; 636 for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed; 637 ++SrcIdx) 638 NewSrcs.push_back(BV.getReg(SrcIdx)); 639 MIB.setInstrAndDebugLoc(BV); 640 return MIB.buildBuildVector(NewBVTy, NewSrcs).getReg(0); 641 } 642 // A single source is requested, just return it. 643 return BV.getReg(StartSrcIdx); 644 } 645 646 /// Given an G_INSERT op \p MI and a start bit and size, try to find 647 /// the origin of the value defined by that start position and size. 648 /// 649 /// \returns a register with the requested size, or the current best 650 /// register found during the current query. 651 Register findValueFromInsert(MachineInstr &MI, unsigned StartBit, 652 unsigned Size) { 653 assert(MI.getOpcode() == TargetOpcode::G_INSERT); 654 assert(Size > 0); 655 656 Register ContainerSrcReg = MI.getOperand(1).getReg(); 657 Register InsertedReg = MI.getOperand(2).getReg(); 658 LLT InsertedRegTy = MRI.getType(InsertedReg); 659 unsigned InsertOffset = MI.getOperand(3).getImm(); 660 661 // There are 4 possible container/insertreg + requested bit-range layouts 662 // that the instruction and query could be representing. 663 // For: %_ = G_INSERT %CONTAINER, %INS, InsOff (abbrev. to 'IO') 664 // and a start bit 'SB', with size S, giving an end bit 'EB', we could 665 // have... 666 // Scenario A: 667 // -------------------------- 668 // | INS | CONTAINER | 669 // -------------------------- 670 // | | 671 // SB EB 672 // 673 // Scenario B: 674 // -------------------------- 675 // | INS | CONTAINER | 676 // -------------------------- 677 // | | 678 // SB EB 679 // 680 // Scenario C: 681 // -------------------------- 682 // | CONTAINER | INS | 683 // -------------------------- 684 // | | 685 // SB EB 686 // 687 // Scenario D: 688 // -------------------------- 689 // | CONTAINER | INS | 690 // -------------------------- 691 // | | 692 // SB EB 693 // 694 // So therefore, A and D are requesting data from the INS operand, while 695 // B and C are requesting from the container operand. 696 697 unsigned InsertedEndBit = InsertOffset + InsertedRegTy.getSizeInBits(); 698 unsigned EndBit = StartBit + Size; 699 unsigned NewStartBit; 700 Register SrcRegToUse; 701 if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) { 702 SrcRegToUse = ContainerSrcReg; 703 NewStartBit = StartBit; 704 return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); 705 } 706 if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) { 707 SrcRegToUse = InsertedReg; 708 NewStartBit = StartBit - InsertOffset; 709 if (NewStartBit == 0 && 710 Size == MRI.getType(SrcRegToUse).getSizeInBits()) 711 CurrentBest = SrcRegToUse; 712 return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); 713 } 714 // The bit range spans both the inserted and container regions. 715 return Register(); 716 } 717 718 /// Internal implementation for findValueFromDef(). findValueFromDef() 719 /// initializes some data like the CurrentBest register, which this method 720 /// and its callees rely upon. 721 Register findValueFromDefImpl(Register DefReg, unsigned StartBit, 722 unsigned Size) { 723 MachineInstr *Def = getDefIgnoringCopies(DefReg, MRI); 724 // If the instruction has a single def, then simply delegate the search. 725 // For unmerge however with multiple defs, we need to compute the offset 726 // into the source of the unmerge. 727 switch (Def->getOpcode()) { 728 case TargetOpcode::G_CONCAT_VECTORS: 729 return findValueFromConcat(cast<GConcatVectors>(*Def), StartBit, Size); 730 case TargetOpcode::G_UNMERGE_VALUES: { 731 unsigned DefStartBit = 0; 732 unsigned DefSize = MRI.getType(DefReg).getSizeInBits(); 733 for (const auto &MO : Def->defs()) { 734 if (MO.getReg() == DefReg) 735 break; 736 DefStartBit += DefSize; 737 } 738 Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg(); 739 Register SrcOriginReg = 740 findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size); 741 if (SrcOriginReg) 742 return SrcOriginReg; 743 // Failed to find a further value. If the StartBit and Size perfectly 744 // covered the requested DefReg, return that since it's better than 745 // nothing. 746 if (StartBit == 0 && Size == DefSize) 747 return DefReg; 748 return CurrentBest; 749 } 750 case TargetOpcode::G_BUILD_VECTOR: 751 return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit, 752 Size); 753 case TargetOpcode::G_INSERT: 754 return findValueFromInsert(*Def, StartBit, Size); 755 default: 756 return CurrentBest; 757 } 758 } 759 760 public: 761 ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder, 762 const LegalizerInfo &Info) 763 : MRI(Mri), MIB(Builder), LI(Info) {} 764 765 /// Try to find a source of the value defined in the def \p DefReg, starting 766 /// at position \p StartBit with size \p Size. 767 /// \returns a register with the requested size, or an empty Register if no 768 /// better value could be found. 769 Register findValueFromDef(Register DefReg, unsigned StartBit, 770 unsigned Size) { 771 CurrentBest = Register(); 772 Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size); 773 return FoundReg != DefReg ? FoundReg : Register(); 774 } 775 776 /// Try to combine the defs of an unmerge \p MI by attempting to find 777 /// values that provides the bits for each def reg. 778 /// \returns true if all the defs of the unmerge have been made dead. 779 bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer, 780 SmallVectorImpl<Register> &UpdatedDefs) { 781 unsigned NumDefs = MI.getNumDefs(); 782 LLT DestTy = MRI.getType(MI.getReg(0)); 783 784 SmallBitVector DeadDefs(NumDefs); 785 for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { 786 Register DefReg = MI.getReg(DefIdx); 787 if (MRI.use_nodbg_empty(DefReg)) { 788 DeadDefs[DefIdx] = true; 789 continue; 790 } 791 Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits()); 792 if (!FoundVal) 793 continue; 794 if (MRI.getType(FoundVal) != DestTy) 795 continue; 796 797 replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs, 798 Observer); 799 // We only want to replace the uses, not the def of the old reg. 800 Observer.changingInstr(MI); 801 MI.getOperand(DefIdx).setReg(DefReg); 802 Observer.changedInstr(MI); 803 DeadDefs[DefIdx] = true; 804 } 805 return DeadDefs.all(); 806 } 807 }; 808 809 bool tryCombineUnmergeValues(GUnmerge &MI, 810 SmallVectorImpl<MachineInstr *> &DeadInsts, 811 SmallVectorImpl<Register> &UpdatedDefs, 812 GISelChangeObserver &Observer) { 813 unsigned NumDefs = MI.getNumDefs(); 814 Register SrcReg = MI.getSourceReg(); 815 MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI); 816 if (!SrcDef) 817 return false; 818 819 LLT OpTy = MRI.getType(SrcReg); 820 LLT DestTy = MRI.getType(MI.getReg(0)); 821 unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg); 822 823 Builder.setInstrAndDebugLoc(MI); 824 825 ArtifactValueFinder Finder(MRI, Builder, LI); 826 if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { 827 markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); 828 return true; 829 } 830 831 if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) { 832 // %0:_(<4 x s16>) = G_FOO 833 // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0 834 // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 835 // 836 // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0 837 Register SrcUnmergeSrc = SrcUnmerge->getSourceReg(); 838 LLT SrcUnmergeSrcTy = MRI.getType(SrcUnmergeSrc); 839 840 // If we need to decrease the number of vector elements in the result type 841 // of an unmerge, this would involve the creation of an equivalent unmerge 842 // to copy back to the original result registers. 843 LegalizeActionStep ActionStep = LI.getAction( 844 {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}}); 845 switch (ActionStep.Action) { 846 case LegalizeActions::Lower: 847 case LegalizeActions::Unsupported: 848 break; 849 case LegalizeActions::FewerElements: 850 case LegalizeActions::NarrowScalar: 851 if (ActionStep.TypeIdx == 1) 852 return false; 853 break; 854 default: 855 return false; 856 } 857 858 auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc); 859 860 // TODO: Should we try to process out the other defs now? If the other 861 // defs of the source unmerge are also unmerged, we end up with a separate 862 // unmerge for each one. 863 for (unsigned I = 0; I != NumDefs; ++I) { 864 Register Def = MI.getReg(I); 865 replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I), 866 MRI, Builder, UpdatedDefs, Observer); 867 } 868 869 markInstAndDefDead(MI, *SrcUnmerge, DeadInsts, SrcDefIdx); 870 return true; 871 } 872 873 MachineInstr *MergeI = SrcDef; 874 unsigned ConvertOp = 0; 875 876 // Handle intermediate conversions 877 unsigned SrcOp = SrcDef->getOpcode(); 878 if (isArtifactCast(SrcOp)) { 879 ConvertOp = SrcOp; 880 MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI); 881 } 882 883 if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(), 884 ConvertOp, OpTy, DestTy)) { 885 // We might have a chance to combine later by trying to combine 886 // unmerge(cast) first 887 return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs); 888 } 889 890 const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; 891 892 if (NumMergeRegs < NumDefs) { 893 if (NumDefs % NumMergeRegs != 0) 894 return false; 895 896 Builder.setInstr(MI); 897 // Transform to UNMERGEs, for example 898 // %1 = G_MERGE_VALUES %4, %5 899 // %9, %10, %11, %12 = G_UNMERGE_VALUES %1 900 // to 901 // %9, %10 = G_UNMERGE_VALUES %4 902 // %11, %12 = G_UNMERGE_VALUES %5 903 904 const unsigned NewNumDefs = NumDefs / NumMergeRegs; 905 for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) { 906 SmallVector<Register, 8> DstRegs; 907 for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs; 908 ++j, ++DefIdx) 909 DstRegs.push_back(MI.getReg(DefIdx)); 910 911 if (ConvertOp) { 912 LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg()); 913 914 // This is a vector that is being split and casted. Extract to the 915 // element type, and do the conversion on the scalars (or smaller 916 // vectors). 917 LLT MergeEltTy = MergeSrcTy.divide(NewNumDefs); 918 919 // Handle split to smaller vectors, with conversions. 920 // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>) 921 // %3(<8 x s16>) = G_SEXT %2 922 // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %3 923 // 924 // => 925 // 926 // %8(<2 x s8>), %9(<2 x s8>) = G_UNMERGE_VALUES %0 927 // %10(<2 x s8>), %11(<2 x s8>) = G_UNMERGE_VALUES %1 928 // %4(<2 x s16>) = G_SEXT %8 929 // %5(<2 x s16>) = G_SEXT %9 930 // %6(<2 x s16>) = G_SEXT %10 931 // %7(<2 x s16>)= G_SEXT %11 932 933 SmallVector<Register, 4> TmpRegs(NewNumDefs); 934 for (unsigned k = 0; k < NewNumDefs; ++k) 935 TmpRegs[k] = MRI.createGenericVirtualRegister(MergeEltTy); 936 937 Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg()); 938 939 for (unsigned k = 0; k < NewNumDefs; ++k) 940 Builder.buildInstr(ConvertOp, {DstRegs[k]}, {TmpRegs[k]}); 941 } else { 942 Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); 943 } 944 UpdatedDefs.append(DstRegs.begin(), DstRegs.end()); 945 } 946 947 } else if (NumMergeRegs > NumDefs) { 948 if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0) 949 return false; 950 951 Builder.setInstr(MI); 952 // Transform to MERGEs 953 // %6 = G_MERGE_VALUES %17, %18, %19, %20 954 // %7, %8 = G_UNMERGE_VALUES %6 955 // to 956 // %7 = G_MERGE_VALUES %17, %18 957 // %8 = G_MERGE_VALUES %19, %20 958 959 const unsigned NumRegs = NumMergeRegs / NumDefs; 960 for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { 961 SmallVector<Register, 8> Regs; 962 for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; 963 ++j, ++Idx) 964 Regs.push_back(MergeI->getOperand(Idx).getReg()); 965 966 Register DefReg = MI.getReg(DefIdx); 967 Builder.buildMerge(DefReg, Regs); 968 UpdatedDefs.push_back(DefReg); 969 } 970 971 } else { 972 LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg()); 973 974 if (!ConvertOp && DestTy != MergeSrcTy) 975 ConvertOp = TargetOpcode::G_BITCAST; 976 977 if (ConvertOp) { 978 Builder.setInstr(MI); 979 980 for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { 981 Register DefReg = MI.getOperand(Idx).getReg(); 982 Register MergeSrc = MergeI->getOperand(Idx + 1).getReg(); 983 984 if (!MRI.use_empty(DefReg)) { 985 Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc}); 986 UpdatedDefs.push_back(DefReg); 987 } 988 } 989 990 markInstAndDefDead(MI, *MergeI, DeadInsts); 991 return true; 992 } 993 994 assert(DestTy == MergeSrcTy && 995 "Bitcast and the other kinds of conversions should " 996 "have happened earlier"); 997 998 Builder.setInstr(MI); 999 for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { 1000 Register DstReg = MI.getOperand(Idx).getReg(); 1001 Register SrcReg = MergeI->getOperand(Idx + 1).getReg(); 1002 replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs, 1003 Observer); 1004 } 1005 } 1006 1007 markInstAndDefDead(MI, *MergeI, DeadInsts); 1008 return true; 1009 } 1010 1011 bool tryCombineExtract(MachineInstr &MI, 1012 SmallVectorImpl<MachineInstr *> &DeadInsts, 1013 SmallVectorImpl<Register> &UpdatedDefs) { 1014 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT); 1015 1016 // Try to use the source registers from a G_MERGE_VALUES 1017 // 1018 // %2 = G_MERGE_VALUES %0, %1 1019 // %3 = G_EXTRACT %2, N 1020 // => 1021 // 1022 // for N < %2.getSizeInBits() / 2 1023 // %3 = G_EXTRACT %0, N 1024 // 1025 // for N >= %2.getSizeInBits() / 2 1026 // %3 = G_EXTRACT %1, (N - %0.getSizeInBits() 1027 1028 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 1029 MachineInstr *MergeI = MRI.getVRegDef(SrcReg); 1030 if (!MergeI || !isa<GMergeLikeOp>(MergeI)) 1031 return false; 1032 1033 Register DstReg = MI.getOperand(0).getReg(); 1034 LLT DstTy = MRI.getType(DstReg); 1035 LLT SrcTy = MRI.getType(SrcReg); 1036 1037 // TODO: Do we need to check if the resulting extract is supported? 1038 unsigned ExtractDstSize = DstTy.getSizeInBits(); 1039 unsigned Offset = MI.getOperand(2).getImm(); 1040 unsigned NumMergeSrcs = MergeI->getNumOperands() - 1; 1041 unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs; 1042 unsigned MergeSrcIdx = Offset / MergeSrcSize; 1043 1044 // Compute the offset of the last bit the extract needs. 1045 unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize; 1046 1047 // Can't handle the case where the extract spans multiple inputs. 1048 if (MergeSrcIdx != EndMergeSrcIdx) 1049 return false; 1050 1051 // TODO: We could modify MI in place in most cases. 1052 Builder.setInstr(MI); 1053 Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(), 1054 Offset - MergeSrcIdx * MergeSrcSize); 1055 UpdatedDefs.push_back(DstReg); 1056 markInstAndDefDead(MI, *MergeI, DeadInsts); 1057 return true; 1058 } 1059 1060 /// Try to combine away MI. 1061 /// Returns true if it combined away the MI. 1062 /// Adds instructions that are dead as a result of the combine 1063 /// into DeadInsts, which can include MI. 1064 bool tryCombineInstruction(MachineInstr &MI, 1065 SmallVectorImpl<MachineInstr *> &DeadInsts, 1066 GISelObserverWrapper &WrapperObserver) { 1067 // This might be a recursive call, and we might have DeadInsts already 1068 // populated. To avoid bad things happening later with multiple vreg defs 1069 // etc, process the dead instructions now if any. 1070 if (!DeadInsts.empty()) 1071 deleteMarkedDeadInsts(DeadInsts, WrapperObserver); 1072 1073 // Put here every vreg that was redefined in such a way that it's at least 1074 // possible that one (or more) of its users (immediate or COPY-separated) 1075 // could become artifact combinable with the new definition (or the 1076 // instruction reachable from it through a chain of copies if any). 1077 SmallVector<Register, 4> UpdatedDefs; 1078 bool Changed = false; 1079 switch (MI.getOpcode()) { 1080 default: 1081 return false; 1082 case TargetOpcode::G_ANYEXT: 1083 Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); 1084 break; 1085 case TargetOpcode::G_ZEXT: 1086 Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); 1087 break; 1088 case TargetOpcode::G_SEXT: 1089 Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs); 1090 break; 1091 case TargetOpcode::G_UNMERGE_VALUES: 1092 Changed = tryCombineUnmergeValues(cast<GUnmerge>(MI), DeadInsts, 1093 UpdatedDefs, WrapperObserver); 1094 break; 1095 case TargetOpcode::G_MERGE_VALUES: 1096 case TargetOpcode::G_BUILD_VECTOR: 1097 case TargetOpcode::G_CONCAT_VECTORS: 1098 // If any of the users of this merge are an unmerge, then add them to the 1099 // artifact worklist in case there's folding that can be done looking up. 1100 for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) { 1101 if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES || 1102 U.getOpcode() == TargetOpcode::G_TRUNC) { 1103 UpdatedDefs.push_back(MI.getOperand(0).getReg()); 1104 break; 1105 } 1106 } 1107 break; 1108 case TargetOpcode::G_EXTRACT: 1109 Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs); 1110 break; 1111 case TargetOpcode::G_TRUNC: 1112 Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, WrapperObserver); 1113 if (!Changed) { 1114 // Try to combine truncates away even if they are legal. As all artifact 1115 // combines at the moment look only "up" the def-use chains, we achieve 1116 // that by throwing truncates' users (with look through copies) into the 1117 // ArtifactList again. 1118 UpdatedDefs.push_back(MI.getOperand(0).getReg()); 1119 } 1120 break; 1121 } 1122 // If the main loop through the ArtifactList found at least one combinable 1123 // pair of artifacts, not only combine it away (as done above), but also 1124 // follow the def-use chain from there to combine everything that can be 1125 // combined within this def-use chain of artifacts. 1126 while (!UpdatedDefs.empty()) { 1127 Register NewDef = UpdatedDefs.pop_back_val(); 1128 assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg"); 1129 for (MachineInstr &Use : MRI.use_instructions(NewDef)) { 1130 switch (Use.getOpcode()) { 1131 // Keep this list in sync with the list of all artifact combines. 1132 case TargetOpcode::G_ANYEXT: 1133 case TargetOpcode::G_ZEXT: 1134 case TargetOpcode::G_SEXT: 1135 case TargetOpcode::G_UNMERGE_VALUES: 1136 case TargetOpcode::G_EXTRACT: 1137 case TargetOpcode::G_TRUNC: 1138 // Adding Use to ArtifactList. 1139 WrapperObserver.changedInstr(Use); 1140 break; 1141 case TargetOpcode::COPY: { 1142 Register Copy = Use.getOperand(0).getReg(); 1143 if (Copy.isVirtual()) 1144 UpdatedDefs.push_back(Copy); 1145 break; 1146 } 1147 default: 1148 // If we do not have an artifact combine for the opcode, there is no 1149 // point in adding it to the ArtifactList as nothing interesting will 1150 // be done to it anyway. 1151 break; 1152 } 1153 } 1154 } 1155 return Changed; 1156 } 1157 1158 private: 1159 static Register getArtifactSrcReg(const MachineInstr &MI) { 1160 switch (MI.getOpcode()) { 1161 case TargetOpcode::COPY: 1162 case TargetOpcode::G_TRUNC: 1163 case TargetOpcode::G_ZEXT: 1164 case TargetOpcode::G_ANYEXT: 1165 case TargetOpcode::G_SEXT: 1166 case TargetOpcode::G_EXTRACT: 1167 return MI.getOperand(1).getReg(); 1168 case TargetOpcode::G_UNMERGE_VALUES: 1169 return MI.getOperand(MI.getNumOperands() - 1).getReg(); 1170 default: 1171 llvm_unreachable("Not a legalization artifact happen"); 1172 } 1173 } 1174 1175 /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI 1176 /// (either by killing it or changing operands) results in DefMI being dead 1177 /// too. In-between COPYs or artifact-casts are also collected if they are 1178 /// dead. 1179 /// MI is not marked dead. 1180 void markDefDead(MachineInstr &MI, MachineInstr &DefMI, 1181 SmallVectorImpl<MachineInstr *> &DeadInsts, 1182 unsigned DefIdx = 0) { 1183 // Collect all the copy instructions that are made dead, due to deleting 1184 // this instruction. Collect all of them until the Trunc(DefMI). 1185 // Eg, 1186 // %1(s1) = G_TRUNC %0(s32) 1187 // %2(s1) = COPY %1(s1) 1188 // %3(s1) = COPY %2(s1) 1189 // %4(s32) = G_ANYEXT %3(s1) 1190 // In this case, we would have replaced %4 with a copy of %0, 1191 // and as a result, %3, %2, %1 are dead. 1192 MachineInstr *PrevMI = &MI; 1193 while (PrevMI != &DefMI) { 1194 Register PrevRegSrc = getArtifactSrcReg(*PrevMI); 1195 1196 MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc); 1197 if (MRI.hasOneUse(PrevRegSrc)) { 1198 if (TmpDef != &DefMI) { 1199 assert((TmpDef->getOpcode() == TargetOpcode::COPY || 1200 isArtifactCast(TmpDef->getOpcode())) && 1201 "Expecting copy or artifact cast here"); 1202 1203 DeadInsts.push_back(TmpDef); 1204 } 1205 } else 1206 break; 1207 PrevMI = TmpDef; 1208 } 1209 1210 if (PrevMI == &DefMI) { 1211 unsigned I = 0; 1212 bool IsDead = true; 1213 for (MachineOperand &Def : DefMI.defs()) { 1214 if (I != DefIdx) { 1215 if (!MRI.use_empty(Def.getReg())) { 1216 IsDead = false; 1217 break; 1218 } 1219 } else { 1220 if (!MRI.hasOneUse(DefMI.getOperand(DefIdx).getReg())) 1221 break; 1222 } 1223 1224 ++I; 1225 } 1226 1227 if (IsDead) 1228 DeadInsts.push_back(&DefMI); 1229 } 1230 } 1231 1232 /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be 1233 /// dead due to MI being killed, then mark DefMI as dead too. 1234 /// Some of the combines (extends(trunc)), try to walk through redundant 1235 /// copies in between the extends and the truncs, and this attempts to collect 1236 /// the in between copies if they're dead. 1237 void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI, 1238 SmallVectorImpl<MachineInstr *> &DeadInsts, 1239 unsigned DefIdx = 0) { 1240 DeadInsts.push_back(&MI); 1241 markDefDead(MI, DefMI, DeadInsts, DefIdx); 1242 } 1243 1244 /// Erase the dead instructions in the list and call the observer hooks. 1245 /// Normally the Legalizer will deal with erasing instructions that have been 1246 /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to 1247 /// process instructions which have been marked dead, but otherwise break the 1248 /// MIR by introducing multiple vreg defs. For those cases, allow the combines 1249 /// to explicitly delete the instructions before we run into trouble. 1250 void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts, 1251 GISelObserverWrapper &WrapperObserver) { 1252 for (auto *DeadMI : DeadInsts) { 1253 LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n"); 1254 WrapperObserver.erasingInstr(*DeadMI); 1255 DeadMI->eraseFromParent(); 1256 } 1257 DeadInsts.clear(); 1258 } 1259 1260 /// Checks if the target legalizer info has specified anything about the 1261 /// instruction, or if unsupported. 1262 bool isInstUnsupported(const LegalityQuery &Query) const { 1263 using namespace LegalizeActions; 1264 auto Step = LI.getAction(Query); 1265 return Step.Action == Unsupported || Step.Action == NotFound; 1266 } 1267 1268 bool isInstLegal(const LegalityQuery &Query) const { 1269 return LI.getAction(Query).Action == LegalizeActions::Legal; 1270 } 1271 1272 bool isConstantUnsupported(LLT Ty) const { 1273 if (!Ty.isVector()) 1274 return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}}); 1275 1276 LLT EltTy = Ty.getElementType(); 1277 return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) || 1278 isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}); 1279 } 1280 1281 /// Looks through copy instructions and returns the actual 1282 /// source register. 1283 Register lookThroughCopyInstrs(Register Reg) { 1284 Register TmpReg; 1285 while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) { 1286 if (MRI.getType(TmpReg).isValid()) 1287 Reg = TmpReg; 1288 else 1289 break; 1290 } 1291 return Reg; 1292 } 1293 }; 1294 1295 } // namespace llvm 1296 1297 #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H 1298