1 //===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This file contains some helper functions which try to cleanup artifacts 9 // such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make 10 // the types match. This file also contains some combines of merges that happens 11 // at the end of the legalization. 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H 15 #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H 16 17 #include "llvm/ADT/SmallBitVector.h" 18 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 20 #include "llvm/CodeGen/GlobalISel/Legalizer.h" 21 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 22 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 24 #include "llvm/CodeGen/GlobalISel/Utils.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/Register.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/Support/Debug.h" 29 30 #define DEBUG_TYPE "legalizer" 31 32 namespace llvm { 33 class LegalizationArtifactCombiner { 34 MachineIRBuilder &Builder; 35 MachineRegisterInfo &MRI; 36 const LegalizerInfo &LI; 37 38 static bool isArtifactCast(unsigned Opc) { 39 switch (Opc) { 40 case TargetOpcode::G_TRUNC: 41 case TargetOpcode::G_SEXT: 42 case TargetOpcode::G_ZEXT: 43 case TargetOpcode::G_ANYEXT: 44 return true; 45 default: 46 return false; 47 } 48 } 49 50 public: 51 LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI, 52 const LegalizerInfo &LI) 53 : Builder(B), MRI(MRI), LI(LI) {} 54 55 bool tryCombineAnyExt(MachineInstr &MI, 56 SmallVectorImpl<MachineInstr *> &DeadInsts, 57 SmallVectorImpl<Register> &UpdatedDefs, 58 GISelObserverWrapper &Observer) { 59 using namespace llvm::MIPatternMatch; 60 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); 61 62 Builder.setInstrAndDebugLoc(MI); 63 Register DstReg = MI.getOperand(0).getReg(); 64 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 65 66 // aext(trunc x) - > aext/copy/trunc x 67 Register TruncSrc; 68 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { 69 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); 70 if (MRI.getType(DstReg) == MRI.getType(TruncSrc)) 71 replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs, 72 Observer); 73 else 74 Builder.buildAnyExtOrTrunc(DstReg, TruncSrc); 75 UpdatedDefs.push_back(DstReg); 76 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 77 return true; 78 } 79 80 // aext([asz]ext x) -> [asz]ext x 81 Register ExtSrc; 82 MachineInstr *ExtMI; 83 if (mi_match(SrcReg, MRI, 84 m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)), 85 m_GSExt(m_Reg(ExtSrc)), 86 m_GZExt(m_Reg(ExtSrc)))))) { 87 Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); 88 UpdatedDefs.push_back(DstReg); 89 markInstAndDefDead(MI, *ExtMI, DeadInsts); 90 return true; 91 } 92 93 // Try to fold aext(g_constant) when the larger constant type is legal. 94 auto *SrcMI = MRI.getVRegDef(SrcReg); 95 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 96 const LLT DstTy = MRI.getType(DstReg); 97 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 98 auto &CstVal = SrcMI->getOperand(1); 99 Builder.buildConstant( 100 DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); 101 UpdatedDefs.push_back(DstReg); 102 markInstAndDefDead(MI, *SrcMI, DeadInsts); 103 return true; 104 } 105 } 106 return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); 107 } 108 109 bool tryCombineZExt(MachineInstr &MI, 110 SmallVectorImpl<MachineInstr *> &DeadInsts, 111 SmallVectorImpl<Register> &UpdatedDefs, 112 GISelObserverWrapper &Observer) { 113 using namespace llvm::MIPatternMatch; 114 assert(MI.getOpcode() == TargetOpcode::G_ZEXT); 115 116 Builder.setInstrAndDebugLoc(MI); 117 Register DstReg = MI.getOperand(0).getReg(); 118 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 119 120 // zext(trunc x) - > and (aext/copy/trunc x), mask 121 // zext(sext x) -> and (sext x), mask 122 Register TruncSrc; 123 Register SextSrc; 124 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) || 125 mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) { 126 LLT DstTy = MRI.getType(DstReg); 127 if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) || 128 isConstantUnsupported(DstTy)) 129 return false; 130 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); 131 LLT SrcTy = MRI.getType(SrcReg); 132 APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits()); 133 auto Mask = Builder.buildConstant( 134 DstTy, MaskVal.zext(DstTy.getScalarSizeInBits())); 135 if (SextSrc && (DstTy != MRI.getType(SextSrc))) 136 SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0); 137 if (TruncSrc && (DstTy != MRI.getType(TruncSrc))) 138 TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); 139 Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask); 140 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 141 return true; 142 } 143 144 // zext(zext x) -> (zext x) 145 Register ZextSrc; 146 if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) { 147 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); 148 Observer.changingInstr(MI); 149 MI.getOperand(1).setReg(ZextSrc); 150 Observer.changedInstr(MI); 151 UpdatedDefs.push_back(DstReg); 152 markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 153 return true; 154 } 155 156 // Try to fold zext(g_constant) when the larger constant type is legal. 157 auto *SrcMI = MRI.getVRegDef(SrcReg); 158 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 159 const LLT DstTy = MRI.getType(DstReg); 160 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 161 auto &CstVal = SrcMI->getOperand(1); 162 Builder.buildConstant( 163 DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits())); 164 UpdatedDefs.push_back(DstReg); 165 markInstAndDefDead(MI, *SrcMI, DeadInsts); 166 return true; 167 } 168 } 169 return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); 170 } 171 172 bool tryCombineSExt(MachineInstr &MI, 173 SmallVectorImpl<MachineInstr *> &DeadInsts, 174 SmallVectorImpl<Register> &UpdatedDefs) { 175 using namespace llvm::MIPatternMatch; 176 assert(MI.getOpcode() == TargetOpcode::G_SEXT); 177 178 Builder.setInstrAndDebugLoc(MI); 179 Register DstReg = MI.getOperand(0).getReg(); 180 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 181 182 // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c) 183 Register TruncSrc; 184 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { 185 LLT DstTy = MRI.getType(DstReg); 186 if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}})) 187 return false; 188 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); 189 LLT SrcTy = MRI.getType(SrcReg); 190 uint64_t SizeInBits = SrcTy.getScalarSizeInBits(); 191 if (DstTy != MRI.getType(TruncSrc)) 192 TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); 193 Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits); 194 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 195 return true; 196 } 197 198 // sext(zext x) -> (zext x) 199 // sext(sext x) -> (sext x) 200 Register ExtSrc; 201 MachineInstr *ExtMI; 202 if (mi_match(SrcReg, MRI, 203 m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)), 204 m_GSExt(m_Reg(ExtSrc)))))) { 205 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); 206 Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); 207 UpdatedDefs.push_back(DstReg); 208 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 209 return true; 210 } 211 212 // Try to fold sext(g_constant) when the larger constant type is legal. 213 auto *SrcMI = MRI.getVRegDef(SrcReg); 214 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 215 const LLT DstTy = MRI.getType(DstReg); 216 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 217 auto &CstVal = SrcMI->getOperand(1); 218 Builder.buildConstant( 219 DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); 220 UpdatedDefs.push_back(DstReg); 221 markInstAndDefDead(MI, *SrcMI, DeadInsts); 222 return true; 223 } 224 } 225 226 return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); 227 } 228 229 bool tryCombineTrunc(MachineInstr &MI, 230 SmallVectorImpl<MachineInstr *> &DeadInsts, 231 SmallVectorImpl<Register> &UpdatedDefs, 232 GISelObserverWrapper &Observer) { 233 using namespace llvm::MIPatternMatch; 234 assert(MI.getOpcode() == TargetOpcode::G_TRUNC); 235 236 Builder.setInstr(MI); 237 Register DstReg = MI.getOperand(0).getReg(); 238 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 239 240 // Try to fold trunc(g_constant) when the smaller constant type is legal. 241 auto *SrcMI = MRI.getVRegDef(SrcReg); 242 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 243 const LLT DstTy = MRI.getType(DstReg); 244 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 245 auto &CstVal = SrcMI->getOperand(1); 246 Builder.buildConstant( 247 DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits())); 248 UpdatedDefs.push_back(DstReg); 249 markInstAndDefDead(MI, *SrcMI, DeadInsts); 250 return true; 251 } 252 } 253 254 // Try to fold trunc(merge) to directly use the source of the merge. 255 // This gets rid of large, difficult to legalize, merges 256 if (auto *SrcMerge = dyn_cast<GMerge>(SrcMI)) { 257 const Register MergeSrcReg = SrcMerge->getSourceReg(0); 258 const LLT MergeSrcTy = MRI.getType(MergeSrcReg); 259 const LLT DstTy = MRI.getType(DstReg); 260 261 // We can only fold if the types are scalar 262 const unsigned DstSize = DstTy.getSizeInBits(); 263 const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits(); 264 if (!DstTy.isScalar() || !MergeSrcTy.isScalar()) 265 return false; 266 267 if (DstSize < MergeSrcSize) { 268 // When the merge source is larger than the destination, we can just 269 // truncate the merge source directly 270 if (isInstUnsupported({TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}})) 271 return false; 272 273 LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: " 274 << MI); 275 276 Builder.buildTrunc(DstReg, MergeSrcReg); 277 UpdatedDefs.push_back(DstReg); 278 } else if (DstSize == MergeSrcSize) { 279 // If the sizes match we can simply try to replace the register 280 LLVM_DEBUG( 281 dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: " 282 << MI); 283 replaceRegOrBuildCopy(DstReg, MergeSrcReg, MRI, Builder, UpdatedDefs, 284 Observer); 285 } else if (DstSize % MergeSrcSize == 0) { 286 // If the trunc size is a multiple of the merge source size we can use 287 // a smaller merge instead 288 if (isInstUnsupported( 289 {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}})) 290 return false; 291 292 LLVM_DEBUG( 293 dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: " 294 << MI); 295 296 const unsigned NumSrcs = DstSize / MergeSrcSize; 297 assert(NumSrcs < SrcMI->getNumOperands() - 1 && 298 "trunc(merge) should require less inputs than merge"); 299 SmallVector<Register, 8> SrcRegs(NumSrcs); 300 for (unsigned i = 0; i < NumSrcs; ++i) 301 SrcRegs[i] = SrcMerge->getSourceReg(i); 302 303 Builder.buildMerge(DstReg, SrcRegs); 304 UpdatedDefs.push_back(DstReg); 305 } else { 306 // Unable to combine 307 return false; 308 } 309 310 markInstAndDefDead(MI, *SrcMerge, DeadInsts); 311 return true; 312 } 313 314 // trunc(trunc) -> trunc 315 Register TruncSrc; 316 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { 317 // Always combine trunc(trunc) since the eventual resulting trunc must be 318 // legal anyway as it must be legal for all outputs of the consumer type 319 // set. 320 LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI); 321 322 Builder.buildTrunc(DstReg, TruncSrc); 323 UpdatedDefs.push_back(DstReg); 324 markInstAndDefDead(MI, *MRI.getVRegDef(TruncSrc), DeadInsts); 325 return true; 326 } 327 328 return false; 329 } 330 331 /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF). 332 bool tryFoldImplicitDef(MachineInstr &MI, 333 SmallVectorImpl<MachineInstr *> &DeadInsts, 334 SmallVectorImpl<Register> &UpdatedDefs) { 335 unsigned Opcode = MI.getOpcode(); 336 assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT || 337 Opcode == TargetOpcode::G_SEXT); 338 339 if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, 340 MI.getOperand(1).getReg(), MRI)) { 341 Builder.setInstr(MI); 342 Register DstReg = MI.getOperand(0).getReg(); 343 LLT DstTy = MRI.getType(DstReg); 344 345 if (Opcode == TargetOpcode::G_ANYEXT) { 346 // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF 347 if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) 348 return false; 349 LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;); 350 Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {}); 351 UpdatedDefs.push_back(DstReg); 352 } else { 353 // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top 354 // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT. 355 if (isConstantUnsupported(DstTy)) 356 return false; 357 LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;); 358 Builder.buildConstant(DstReg, 0); 359 UpdatedDefs.push_back(DstReg); 360 } 361 362 markInstAndDefDead(MI, *DefMI, DeadInsts); 363 return true; 364 } 365 return false; 366 } 367 368 bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI, 369 SmallVectorImpl<MachineInstr *> &DeadInsts, 370 SmallVectorImpl<Register> &UpdatedDefs) { 371 372 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); 373 374 const unsigned CastOpc = CastMI.getOpcode(); 375 376 if (!isArtifactCast(CastOpc)) 377 return false; 378 379 const unsigned NumDefs = MI.getNumOperands() - 1; 380 381 const Register CastSrcReg = CastMI.getOperand(1).getReg(); 382 const LLT CastSrcTy = MRI.getType(CastSrcReg); 383 const LLT DestTy = MRI.getType(MI.getOperand(0).getReg()); 384 const LLT SrcTy = MRI.getType(MI.getOperand(NumDefs).getReg()); 385 386 const unsigned CastSrcSize = CastSrcTy.getSizeInBits(); 387 const unsigned DestSize = DestTy.getSizeInBits(); 388 389 if (CastOpc == TargetOpcode::G_TRUNC) { 390 if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) { 391 // %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>) 392 // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1 393 // => 394 // %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0 395 // %2:_(s8) = G_TRUNC %6 396 // %3:_(s8) = G_TRUNC %7 397 // %4:_(s8) = G_TRUNC %8 398 // %5:_(s8) = G_TRUNC %9 399 400 unsigned UnmergeNumElts = 401 DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1; 402 LLT UnmergeTy = CastSrcTy.changeElementCount( 403 ElementCount::getFixed(UnmergeNumElts)); 404 405 if (isInstUnsupported( 406 {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}})) 407 return false; 408 409 Builder.setInstr(MI); 410 auto NewUnmerge = Builder.buildUnmerge(UnmergeTy, CastSrcReg); 411 412 for (unsigned I = 0; I != NumDefs; ++I) { 413 Register DefReg = MI.getOperand(I).getReg(); 414 UpdatedDefs.push_back(DefReg); 415 Builder.buildTrunc(DefReg, NewUnmerge.getReg(I)); 416 } 417 418 markInstAndDefDead(MI, CastMI, DeadInsts); 419 return true; 420 } 421 422 if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) { 423 // %1:_(s16) = G_TRUNC %0(s32) 424 // %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1 425 // => 426 // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 427 428 // Unmerge(trunc) can be combined if the trunc source size is a multiple 429 // of the unmerge destination size 430 if (CastSrcSize % DestSize != 0) 431 return false; 432 433 // Check if the new unmerge is supported 434 if (isInstUnsupported( 435 {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}})) 436 return false; 437 438 // Gather the original destination registers and create new ones for the 439 // unused bits 440 const unsigned NewNumDefs = CastSrcSize / DestSize; 441 SmallVector<Register, 8> DstRegs(NewNumDefs); 442 for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) { 443 if (Idx < NumDefs) 444 DstRegs[Idx] = MI.getOperand(Idx).getReg(); 445 else 446 DstRegs[Idx] = MRI.createGenericVirtualRegister(DestTy); 447 } 448 449 // Build new unmerge 450 Builder.setInstr(MI); 451 Builder.buildUnmerge(DstRegs, CastSrcReg); 452 UpdatedDefs.append(DstRegs.begin(), DstRegs.begin() + NewNumDefs); 453 markInstAndDefDead(MI, CastMI, DeadInsts); 454 return true; 455 } 456 } 457 458 // TODO: support combines with other casts as well 459 return false; 460 } 461 462 static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp, 463 LLT OpTy, LLT DestTy) { 464 // Check if we found a definition that is like G_MERGE_VALUES. 465 switch (MergeOp) { 466 default: 467 return false; 468 case TargetOpcode::G_BUILD_VECTOR: 469 case TargetOpcode::G_MERGE_VALUES: 470 // The convert operation that we will need to insert is 471 // going to convert the input of that type of instruction (scalar) 472 // to the destination type (DestTy). 473 // The conversion needs to stay in the same domain (scalar to scalar 474 // and vector to vector), so if we were to allow to fold the merge 475 // we would need to insert some bitcasts. 476 // E.g., 477 // <2 x s16> = build_vector s16, s16 478 // <2 x s32> = zext <2 x s16> 479 // <2 x s16>, <2 x s16> = unmerge <2 x s32> 480 // 481 // As is the folding would produce: 482 // <2 x s16> = zext s16 <-- scalar to vector 483 // <2 x s16> = zext s16 <-- scalar to vector 484 // Which is invalid. 485 // Instead we would want to generate: 486 // s32 = zext s16 487 // <2 x s16> = bitcast s32 488 // s32 = zext s16 489 // <2 x s16> = bitcast s32 490 // 491 // That is not done yet. 492 if (ConvertOp == 0) 493 return true; 494 return !DestTy.isVector() && OpTy.isVector() && 495 DestTy == OpTy.getElementType(); 496 case TargetOpcode::G_CONCAT_VECTORS: { 497 if (ConvertOp == 0) 498 return true; 499 if (!DestTy.isVector()) 500 return false; 501 502 const unsigned OpEltSize = OpTy.getElementType().getSizeInBits(); 503 504 // Don't handle scalarization with a cast that isn't in the same 505 // direction as the vector cast. This could be handled, but it would 506 // require more intermediate unmerges. 507 if (ConvertOp == TargetOpcode::G_TRUNC) 508 return DestTy.getSizeInBits() <= OpEltSize; 509 return DestTy.getSizeInBits() >= OpEltSize; 510 } 511 } 512 } 513 514 /// Try to replace DstReg with SrcReg or build a COPY instruction 515 /// depending on the register constraints. 516 static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg, 517 MachineRegisterInfo &MRI, 518 MachineIRBuilder &Builder, 519 SmallVectorImpl<Register> &UpdatedDefs, 520 GISelChangeObserver &Observer) { 521 if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) { 522 Builder.buildCopy(DstReg, SrcReg); 523 UpdatedDefs.push_back(DstReg); 524 return; 525 } 526 SmallVector<MachineInstr *, 4> UseMIs; 527 // Get the users and notify the observer before replacing. 528 for (auto &UseMI : MRI.use_instructions(DstReg)) { 529 UseMIs.push_back(&UseMI); 530 Observer.changingInstr(UseMI); 531 } 532 // Replace the registers. 533 MRI.replaceRegWith(DstReg, SrcReg); 534 UpdatedDefs.push_back(SrcReg); 535 // Notify the observer that we changed the instructions. 536 for (auto *UseMI : UseMIs) 537 Observer.changedInstr(*UseMI); 538 } 539 540 /// Return the operand index in \p MI that defines \p Def 541 static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) { 542 unsigned DefIdx = 0; 543 for (const MachineOperand &Def : MI.defs()) { 544 if (Def.getReg() == SearchDef) 545 break; 546 ++DefIdx; 547 } 548 549 return DefIdx; 550 } 551 552 /// This class provides utilities for finding source registers of specific 553 /// bit ranges in an artifact. The routines can look through the source 554 /// registers if they're other artifacts to try to find a non-artifact source 555 /// of a value. 556 class ArtifactValueFinder { 557 MachineRegisterInfo &MRI; 558 MachineIRBuilder &MIB; 559 const LegalizerInfo &LI; 560 561 // Stores the best register found in the current query so far. 562 Register CurrentBest = Register(); 563 564 /// Given an concat_vector op \p Concat and a start bit and size, try to 565 /// find the origin of the value defined by that start position and size. 566 /// 567 /// \returns a register with the requested size, or the current best 568 /// register found during the current query. 569 Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit, 570 unsigned Size) { 571 assert(Size > 0); 572 573 // Find the source operand that provides the bits requested. 574 Register Src1Reg = Concat.getSourceReg(0); 575 unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); 576 577 // Operand index of the source that provides the start of the bit range. 578 unsigned StartSrcIdx = (StartBit / SrcSize) + 1; 579 // Offset into the source at which the bit range starts. 580 unsigned InRegOffset = StartBit % SrcSize; 581 // Check that the bits don't span multiple sources. 582 // FIXME: we might be able return multiple sources? Or create an 583 // appropriate concat to make it fit. 584 if (InRegOffset + Size > SrcSize) 585 return CurrentBest; 586 587 Register SrcReg = Concat.getReg(StartSrcIdx); 588 if (InRegOffset == 0 && Size == SrcSize) { 589 CurrentBest = SrcReg; 590 return findValueFromDefImpl(SrcReg, 0, Size); 591 } 592 593 return findValueFromDefImpl(SrcReg, InRegOffset, Size); 594 } 595 596 /// Given an build_vector op \p BV and a start bit and size, try to find 597 /// the origin of the value defined by that start position and size. 598 /// 599 /// \returns a register with the requested size, or the current best 600 /// register found during the current query. 601 Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit, 602 unsigned Size) { 603 assert(Size > 0); 604 605 // Find the source operand that provides the bits requested. 606 Register Src1Reg = BV.getSourceReg(0); 607 unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); 608 609 // Operand index of the source that provides the start of the bit range. 610 unsigned StartSrcIdx = (StartBit / SrcSize) + 1; 611 // Offset into the source at which the bit range starts. 612 unsigned InRegOffset = StartBit % SrcSize; 613 614 if (InRegOffset != 0) 615 return CurrentBest; // Give up, bits don't start at a scalar source. 616 if (Size < SrcSize) 617 return CurrentBest; // Scalar source is too large for requested bits. 618 619 // If the bits cover multiple sources evenly, then create a new 620 // build_vector to synthesize the required size, if that's been requested. 621 if (Size > SrcSize) { 622 if (Size % SrcSize > 0) 623 return CurrentBest; // Isn't covered exactly by sources. 624 625 unsigned NumSrcsUsed = Size / SrcSize; 626 // If we're requesting all of the sources, just return this def. 627 if (NumSrcsUsed == BV.getNumSources()) 628 return BV.getReg(0); 629 630 LLT SrcTy = MRI.getType(Src1Reg); 631 LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy); 632 633 // Check if the resulting build vector would be legal. 634 LegalizeActionStep ActionStep = 635 LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}}); 636 if (ActionStep.Action != LegalizeActions::Legal) 637 return CurrentBest; 638 639 SmallVector<Register> NewSrcs; 640 for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed; 641 ++SrcIdx) 642 NewSrcs.push_back(BV.getReg(SrcIdx)); 643 MIB.setInstrAndDebugLoc(BV); 644 return MIB.buildBuildVector(NewBVTy, NewSrcs).getReg(0); 645 } 646 // A single source is requested, just return it. 647 return BV.getReg(StartSrcIdx); 648 } 649 650 /// Given an G_INSERT op \p MI and a start bit and size, try to find 651 /// the origin of the value defined by that start position and size. 652 /// 653 /// \returns a register with the requested size, or the current best 654 /// register found during the current query. 655 Register findValueFromInsert(MachineInstr &MI, unsigned StartBit, 656 unsigned Size) { 657 assert(MI.getOpcode() == TargetOpcode::G_INSERT); 658 assert(Size > 0); 659 660 Register ContainerSrcReg = MI.getOperand(1).getReg(); 661 Register InsertedReg = MI.getOperand(2).getReg(); 662 LLT InsertedRegTy = MRI.getType(InsertedReg); 663 unsigned InsertOffset = MI.getOperand(3).getImm(); 664 665 // There are 4 possible container/insertreg + requested bit-range layouts 666 // that the instruction and query could be representing. 667 // For: %_ = G_INSERT %CONTAINER, %INS, InsOff (abbrev. to 'IO') 668 // and a start bit 'SB', with size S, giving an end bit 'EB', we could 669 // have... 670 // Scenario A: 671 // -------------------------- 672 // | INS | CONTAINER | 673 // -------------------------- 674 // | | 675 // SB EB 676 // 677 // Scenario B: 678 // -------------------------- 679 // | INS | CONTAINER | 680 // -------------------------- 681 // | | 682 // SB EB 683 // 684 // Scenario C: 685 // -------------------------- 686 // | CONTAINER | INS | 687 // -------------------------- 688 // | | 689 // SB EB 690 // 691 // Scenario D: 692 // -------------------------- 693 // | CONTAINER | INS | 694 // -------------------------- 695 // | | 696 // SB EB 697 // 698 // So therefore, A and D are requesting data from the INS operand, while 699 // B and C are requesting from the container operand. 700 701 unsigned InsertedEndBit = InsertOffset + InsertedRegTy.getSizeInBits(); 702 unsigned EndBit = StartBit + Size; 703 unsigned NewStartBit; 704 Register SrcRegToUse; 705 if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) { 706 SrcRegToUse = ContainerSrcReg; 707 NewStartBit = StartBit; 708 return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); 709 } 710 if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) { 711 SrcRegToUse = InsertedReg; 712 NewStartBit = StartBit - InsertOffset; 713 if (NewStartBit == 0 && 714 Size == MRI.getType(SrcRegToUse).getSizeInBits()) 715 CurrentBest = SrcRegToUse; 716 return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); 717 } 718 // The bit range spans both the inserted and container regions. 719 return Register(); 720 } 721 722 /// Internal implementation for findValueFromDef(). findValueFromDef() 723 /// initializes some data like the CurrentBest register, which this method 724 /// and its callees rely upon. 725 Register findValueFromDefImpl(Register DefReg, unsigned StartBit, 726 unsigned Size) { 727 MachineInstr *Def = getDefIgnoringCopies(DefReg, MRI); 728 // If the instruction has a single def, then simply delegate the search. 729 // For unmerge however with multiple defs, we need to compute the offset 730 // into the source of the unmerge. 731 switch (Def->getOpcode()) { 732 case TargetOpcode::G_CONCAT_VECTORS: 733 return findValueFromConcat(cast<GConcatVectors>(*Def), StartBit, Size); 734 case TargetOpcode::G_UNMERGE_VALUES: { 735 unsigned DefStartBit = 0; 736 unsigned DefSize = MRI.getType(DefReg).getSizeInBits(); 737 for (const auto &MO : Def->defs()) { 738 if (MO.getReg() == DefReg) 739 break; 740 DefStartBit += DefSize; 741 } 742 Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg(); 743 Register SrcOriginReg = 744 findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size); 745 if (SrcOriginReg) 746 return SrcOriginReg; 747 // Failed to find a further value. If the StartBit and Size perfectly 748 // covered the requested DefReg, return that since it's better than 749 // nothing. 750 if (StartBit == 0 && Size == DefSize) 751 return DefReg; 752 return CurrentBest; 753 } 754 case TargetOpcode::G_BUILD_VECTOR: 755 return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit, 756 Size); 757 case TargetOpcode::G_INSERT: 758 return findValueFromInsert(*Def, StartBit, Size); 759 default: 760 return CurrentBest; 761 } 762 } 763 764 public: 765 ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder, 766 const LegalizerInfo &Info) 767 : MRI(Mri), MIB(Builder), LI(Info) {} 768 769 /// Try to find a source of the value defined in the def \p DefReg, starting 770 /// at position \p StartBit with size \p Size. 771 /// \returns a register with the requested size, or an empty Register if no 772 /// better value could be found. 773 Register findValueFromDef(Register DefReg, unsigned StartBit, 774 unsigned Size) { 775 CurrentBest = Register(); 776 Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size); 777 return FoundReg != DefReg ? FoundReg : Register(); 778 } 779 780 /// Try to combine the defs of an unmerge \p MI by attempting to find 781 /// values that provides the bits for each def reg. 782 /// \returns true if all the defs of the unmerge have been made dead. 783 bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer, 784 SmallVectorImpl<Register> &UpdatedDefs) { 785 unsigned NumDefs = MI.getNumDefs(); 786 LLT DestTy = MRI.getType(MI.getReg(0)); 787 788 SmallBitVector DeadDefs(NumDefs); 789 for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { 790 Register DefReg = MI.getReg(DefIdx); 791 if (MRI.use_nodbg_empty(DefReg)) { 792 DeadDefs[DefIdx] = true; 793 continue; 794 } 795 Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits()); 796 if (!FoundVal) 797 continue; 798 if (MRI.getType(FoundVal) != DestTy) 799 continue; 800 801 replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs, 802 Observer); 803 // We only want to replace the uses, not the def of the old reg. 804 Observer.changingInstr(MI); 805 MI.getOperand(DefIdx).setReg(DefReg); 806 Observer.changedInstr(MI); 807 DeadDefs[DefIdx] = true; 808 } 809 return DeadDefs.all(); 810 } 811 }; 812 813 bool tryCombineUnmergeValues(GUnmerge &MI, 814 SmallVectorImpl<MachineInstr *> &DeadInsts, 815 SmallVectorImpl<Register> &UpdatedDefs, 816 GISelChangeObserver &Observer) { 817 unsigned NumDefs = MI.getNumDefs(); 818 Register SrcReg = MI.getSourceReg(); 819 MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI); 820 if (!SrcDef) 821 return false; 822 823 LLT OpTy = MRI.getType(SrcReg); 824 LLT DestTy = MRI.getType(MI.getReg(0)); 825 unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg); 826 827 Builder.setInstrAndDebugLoc(MI); 828 829 ArtifactValueFinder Finder(MRI, Builder, LI); 830 if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { 831 markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); 832 return true; 833 } 834 835 if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) { 836 // %0:_(<4 x s16>) = G_FOO 837 // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0 838 // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 839 // 840 // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0 841 Register SrcUnmergeSrc = SrcUnmerge->getSourceReg(); 842 LLT SrcUnmergeSrcTy = MRI.getType(SrcUnmergeSrc); 843 844 // If we need to decrease the number of vector elements in the result type 845 // of an unmerge, this would involve the creation of an equivalent unmerge 846 // to copy back to the original result registers. 847 LegalizeActionStep ActionStep = LI.getAction( 848 {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}}); 849 switch (ActionStep.Action) { 850 case LegalizeActions::Lower: 851 case LegalizeActions::Unsupported: 852 break; 853 case LegalizeActions::FewerElements: 854 case LegalizeActions::NarrowScalar: 855 if (ActionStep.TypeIdx == 1) 856 return false; 857 break; 858 default: 859 return false; 860 } 861 862 auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc); 863 864 // TODO: Should we try to process out the other defs now? If the other 865 // defs of the source unmerge are also unmerged, we end up with a separate 866 // unmerge for each one. 867 for (unsigned I = 0; I != NumDefs; ++I) { 868 Register Def = MI.getReg(I); 869 replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I), 870 MRI, Builder, UpdatedDefs, Observer); 871 } 872 873 markInstAndDefDead(MI, *SrcUnmerge, DeadInsts, SrcDefIdx); 874 return true; 875 } 876 877 MachineInstr *MergeI = SrcDef; 878 unsigned ConvertOp = 0; 879 880 // Handle intermediate conversions 881 unsigned SrcOp = SrcDef->getOpcode(); 882 if (isArtifactCast(SrcOp)) { 883 ConvertOp = SrcOp; 884 MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI); 885 } 886 887 if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(), 888 ConvertOp, OpTy, DestTy)) { 889 // We might have a chance to combine later by trying to combine 890 // unmerge(cast) first 891 return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs); 892 } 893 894 const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; 895 896 if (NumMergeRegs < NumDefs) { 897 if (NumDefs % NumMergeRegs != 0) 898 return false; 899 900 Builder.setInstr(MI); 901 // Transform to UNMERGEs, for example 902 // %1 = G_MERGE_VALUES %4, %5 903 // %9, %10, %11, %12 = G_UNMERGE_VALUES %1 904 // to 905 // %9, %10 = G_UNMERGE_VALUES %4 906 // %11, %12 = G_UNMERGE_VALUES %5 907 908 const unsigned NewNumDefs = NumDefs / NumMergeRegs; 909 for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) { 910 SmallVector<Register, 8> DstRegs; 911 for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs; 912 ++j, ++DefIdx) 913 DstRegs.push_back(MI.getReg(DefIdx)); 914 915 if (ConvertOp) { 916 LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg()); 917 918 // This is a vector that is being split and casted. Extract to the 919 // element type, and do the conversion on the scalars (or smaller 920 // vectors). 921 LLT MergeEltTy = MergeSrcTy.divide(NewNumDefs); 922 923 // Handle split to smaller vectors, with conversions. 924 // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>) 925 // %3(<8 x s16>) = G_SEXT %2 926 // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %3 927 // 928 // => 929 // 930 // %8(<2 x s8>), %9(<2 x s8>) = G_UNMERGE_VALUES %0 931 // %10(<2 x s8>), %11(<2 x s8>) = G_UNMERGE_VALUES %1 932 // %4(<2 x s16>) = G_SEXT %8 933 // %5(<2 x s16>) = G_SEXT %9 934 // %6(<2 x s16>) = G_SEXT %10 935 // %7(<2 x s16>)= G_SEXT %11 936 937 SmallVector<Register, 4> TmpRegs(NewNumDefs); 938 for (unsigned k = 0; k < NewNumDefs; ++k) 939 TmpRegs[k] = MRI.createGenericVirtualRegister(MergeEltTy); 940 941 Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg()); 942 943 for (unsigned k = 0; k < NewNumDefs; ++k) 944 Builder.buildInstr(ConvertOp, {DstRegs[k]}, {TmpRegs[k]}); 945 } else { 946 Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); 947 } 948 UpdatedDefs.append(DstRegs.begin(), DstRegs.end()); 949 } 950 951 } else if (NumMergeRegs > NumDefs) { 952 if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0) 953 return false; 954 955 Builder.setInstr(MI); 956 // Transform to MERGEs 957 // %6 = G_MERGE_VALUES %17, %18, %19, %20 958 // %7, %8 = G_UNMERGE_VALUES %6 959 // to 960 // %7 = G_MERGE_VALUES %17, %18 961 // %8 = G_MERGE_VALUES %19, %20 962 963 const unsigned NumRegs = NumMergeRegs / NumDefs; 964 for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { 965 SmallVector<Register, 8> Regs; 966 for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; 967 ++j, ++Idx) 968 Regs.push_back(MergeI->getOperand(Idx).getReg()); 969 970 Register DefReg = MI.getReg(DefIdx); 971 Builder.buildMerge(DefReg, Regs); 972 UpdatedDefs.push_back(DefReg); 973 } 974 975 } else { 976 LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg()); 977 978 if (!ConvertOp && DestTy != MergeSrcTy) 979 ConvertOp = TargetOpcode::G_BITCAST; 980 981 if (ConvertOp) { 982 Builder.setInstr(MI); 983 984 for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { 985 Register DefReg = MI.getOperand(Idx).getReg(); 986 Register MergeSrc = MergeI->getOperand(Idx + 1).getReg(); 987 988 if (!MRI.use_empty(DefReg)) { 989 Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc}); 990 UpdatedDefs.push_back(DefReg); 991 } 992 } 993 994 markInstAndDefDead(MI, *MergeI, DeadInsts); 995 return true; 996 } 997 998 assert(DestTy == MergeSrcTy && 999 "Bitcast and the other kinds of conversions should " 1000 "have happened earlier"); 1001 1002 Builder.setInstr(MI); 1003 for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { 1004 Register DstReg = MI.getOperand(Idx).getReg(); 1005 Register SrcReg = MergeI->getOperand(Idx + 1).getReg(); 1006 replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs, 1007 Observer); 1008 } 1009 } 1010 1011 markInstAndDefDead(MI, *MergeI, DeadInsts); 1012 return true; 1013 } 1014 1015 bool tryCombineExtract(MachineInstr &MI, 1016 SmallVectorImpl<MachineInstr *> &DeadInsts, 1017 SmallVectorImpl<Register> &UpdatedDefs) { 1018 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT); 1019 1020 // Try to use the source registers from a G_MERGE_VALUES 1021 // 1022 // %2 = G_MERGE_VALUES %0, %1 1023 // %3 = G_EXTRACT %2, N 1024 // => 1025 // 1026 // for N < %2.getSizeInBits() / 2 1027 // %3 = G_EXTRACT %0, N 1028 // 1029 // for N >= %2.getSizeInBits() / 2 1030 // %3 = G_EXTRACT %1, (N - %0.getSizeInBits() 1031 1032 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 1033 MachineInstr *MergeI = MRI.getVRegDef(SrcReg); 1034 if (!MergeI || !isa<GMergeLikeOp>(MergeI)) 1035 return false; 1036 1037 Register DstReg = MI.getOperand(0).getReg(); 1038 LLT DstTy = MRI.getType(DstReg); 1039 LLT SrcTy = MRI.getType(SrcReg); 1040 1041 // TODO: Do we need to check if the resulting extract is supported? 1042 unsigned ExtractDstSize = DstTy.getSizeInBits(); 1043 unsigned Offset = MI.getOperand(2).getImm(); 1044 unsigned NumMergeSrcs = MergeI->getNumOperands() - 1; 1045 unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs; 1046 unsigned MergeSrcIdx = Offset / MergeSrcSize; 1047 1048 // Compute the offset of the last bit the extract needs. 1049 unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize; 1050 1051 // Can't handle the case where the extract spans multiple inputs. 1052 if (MergeSrcIdx != EndMergeSrcIdx) 1053 return false; 1054 1055 // TODO: We could modify MI in place in most cases. 1056 Builder.setInstr(MI); 1057 Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(), 1058 Offset - MergeSrcIdx * MergeSrcSize); 1059 UpdatedDefs.push_back(DstReg); 1060 markInstAndDefDead(MI, *MergeI, DeadInsts); 1061 return true; 1062 } 1063 1064 /// Try to combine away MI. 1065 /// Returns true if it combined away the MI. 1066 /// Adds instructions that are dead as a result of the combine 1067 /// into DeadInsts, which can include MI. 1068 bool tryCombineInstruction(MachineInstr &MI, 1069 SmallVectorImpl<MachineInstr *> &DeadInsts, 1070 GISelObserverWrapper &WrapperObserver) { 1071 // This might be a recursive call, and we might have DeadInsts already 1072 // populated. To avoid bad things happening later with multiple vreg defs 1073 // etc, process the dead instructions now if any. 1074 if (!DeadInsts.empty()) 1075 deleteMarkedDeadInsts(DeadInsts, WrapperObserver); 1076 1077 // Put here every vreg that was redefined in such a way that it's at least 1078 // possible that one (or more) of its users (immediate or COPY-separated) 1079 // could become artifact combinable with the new definition (or the 1080 // instruction reachable from it through a chain of copies if any). 1081 SmallVector<Register, 4> UpdatedDefs; 1082 bool Changed = false; 1083 switch (MI.getOpcode()) { 1084 default: 1085 return false; 1086 case TargetOpcode::G_ANYEXT: 1087 Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); 1088 break; 1089 case TargetOpcode::G_ZEXT: 1090 Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); 1091 break; 1092 case TargetOpcode::G_SEXT: 1093 Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs); 1094 break; 1095 case TargetOpcode::G_UNMERGE_VALUES: 1096 Changed = tryCombineUnmergeValues(cast<GUnmerge>(MI), DeadInsts, 1097 UpdatedDefs, WrapperObserver); 1098 break; 1099 case TargetOpcode::G_MERGE_VALUES: 1100 case TargetOpcode::G_BUILD_VECTOR: 1101 case TargetOpcode::G_CONCAT_VECTORS: 1102 // If any of the users of this merge are an unmerge, then add them to the 1103 // artifact worklist in case there's folding that can be done looking up. 1104 for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) { 1105 if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES || 1106 U.getOpcode() == TargetOpcode::G_TRUNC) { 1107 UpdatedDefs.push_back(MI.getOperand(0).getReg()); 1108 break; 1109 } 1110 } 1111 break; 1112 case TargetOpcode::G_EXTRACT: 1113 Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs); 1114 break; 1115 case TargetOpcode::G_TRUNC: 1116 Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, WrapperObserver); 1117 if (!Changed) { 1118 // Try to combine truncates away even if they are legal. As all artifact 1119 // combines at the moment look only "up" the def-use chains, we achieve 1120 // that by throwing truncates' users (with look through copies) into the 1121 // ArtifactList again. 1122 UpdatedDefs.push_back(MI.getOperand(0).getReg()); 1123 } 1124 break; 1125 } 1126 // If the main loop through the ArtifactList found at least one combinable 1127 // pair of artifacts, not only combine it away (as done above), but also 1128 // follow the def-use chain from there to combine everything that can be 1129 // combined within this def-use chain of artifacts. 1130 while (!UpdatedDefs.empty()) { 1131 Register NewDef = UpdatedDefs.pop_back_val(); 1132 assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg"); 1133 for (MachineInstr &Use : MRI.use_instructions(NewDef)) { 1134 switch (Use.getOpcode()) { 1135 // Keep this list in sync with the list of all artifact combines. 1136 case TargetOpcode::G_ANYEXT: 1137 case TargetOpcode::G_ZEXT: 1138 case TargetOpcode::G_SEXT: 1139 case TargetOpcode::G_UNMERGE_VALUES: 1140 case TargetOpcode::G_EXTRACT: 1141 case TargetOpcode::G_TRUNC: 1142 // Adding Use to ArtifactList. 1143 WrapperObserver.changedInstr(Use); 1144 break; 1145 case TargetOpcode::COPY: { 1146 Register Copy = Use.getOperand(0).getReg(); 1147 if (Copy.isVirtual()) 1148 UpdatedDefs.push_back(Copy); 1149 break; 1150 } 1151 default: 1152 // If we do not have an artifact combine for the opcode, there is no 1153 // point in adding it to the ArtifactList as nothing interesting will 1154 // be done to it anyway. 1155 break; 1156 } 1157 } 1158 } 1159 return Changed; 1160 } 1161 1162 private: 1163 static Register getArtifactSrcReg(const MachineInstr &MI) { 1164 switch (MI.getOpcode()) { 1165 case TargetOpcode::COPY: 1166 case TargetOpcode::G_TRUNC: 1167 case TargetOpcode::G_ZEXT: 1168 case TargetOpcode::G_ANYEXT: 1169 case TargetOpcode::G_SEXT: 1170 case TargetOpcode::G_EXTRACT: 1171 return MI.getOperand(1).getReg(); 1172 case TargetOpcode::G_UNMERGE_VALUES: 1173 return MI.getOperand(MI.getNumOperands() - 1).getReg(); 1174 default: 1175 llvm_unreachable("Not a legalization artifact happen"); 1176 } 1177 } 1178 1179 /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI 1180 /// (either by killing it or changing operands) results in DefMI being dead 1181 /// too. In-between COPYs or artifact-casts are also collected if they are 1182 /// dead. 1183 /// MI is not marked dead. 1184 void markDefDead(MachineInstr &MI, MachineInstr &DefMI, 1185 SmallVectorImpl<MachineInstr *> &DeadInsts, 1186 unsigned DefIdx = 0) { 1187 // Collect all the copy instructions that are made dead, due to deleting 1188 // this instruction. Collect all of them until the Trunc(DefMI). 1189 // Eg, 1190 // %1(s1) = G_TRUNC %0(s32) 1191 // %2(s1) = COPY %1(s1) 1192 // %3(s1) = COPY %2(s1) 1193 // %4(s32) = G_ANYEXT %3(s1) 1194 // In this case, we would have replaced %4 with a copy of %0, 1195 // and as a result, %3, %2, %1 are dead. 1196 MachineInstr *PrevMI = &MI; 1197 while (PrevMI != &DefMI) { 1198 Register PrevRegSrc = getArtifactSrcReg(*PrevMI); 1199 1200 MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc); 1201 if (MRI.hasOneUse(PrevRegSrc)) { 1202 if (TmpDef != &DefMI) { 1203 assert((TmpDef->getOpcode() == TargetOpcode::COPY || 1204 isArtifactCast(TmpDef->getOpcode())) && 1205 "Expecting copy or artifact cast here"); 1206 1207 DeadInsts.push_back(TmpDef); 1208 } 1209 } else 1210 break; 1211 PrevMI = TmpDef; 1212 } 1213 1214 if (PrevMI == &DefMI) { 1215 unsigned I = 0; 1216 bool IsDead = true; 1217 for (MachineOperand &Def : DefMI.defs()) { 1218 if (I != DefIdx) { 1219 if (!MRI.use_empty(Def.getReg())) { 1220 IsDead = false; 1221 break; 1222 } 1223 } else { 1224 if (!MRI.hasOneUse(DefMI.getOperand(DefIdx).getReg())) 1225 break; 1226 } 1227 1228 ++I; 1229 } 1230 1231 if (IsDead) 1232 DeadInsts.push_back(&DefMI); 1233 } 1234 } 1235 1236 /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be 1237 /// dead due to MI being killed, then mark DefMI as dead too. 1238 /// Some of the combines (extends(trunc)), try to walk through redundant 1239 /// copies in between the extends and the truncs, and this attempts to collect 1240 /// the in between copies if they're dead. 1241 void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI, 1242 SmallVectorImpl<MachineInstr *> &DeadInsts, 1243 unsigned DefIdx = 0) { 1244 DeadInsts.push_back(&MI); 1245 markDefDead(MI, DefMI, DeadInsts, DefIdx); 1246 } 1247 1248 /// Erase the dead instructions in the list and call the observer hooks. 1249 /// Normally the Legalizer will deal with erasing instructions that have been 1250 /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to 1251 /// process instructions which have been marked dead, but otherwise break the 1252 /// MIR by introducing multiple vreg defs. For those cases, allow the combines 1253 /// to explicitly delete the instructions before we run into trouble. 1254 void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts, 1255 GISelObserverWrapper &WrapperObserver) { 1256 for (auto *DeadMI : DeadInsts) { 1257 LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n"); 1258 WrapperObserver.erasingInstr(*DeadMI); 1259 DeadMI->eraseFromParent(); 1260 } 1261 DeadInsts.clear(); 1262 } 1263 1264 /// Checks if the target legalizer info has specified anything about the 1265 /// instruction, or if unsupported. 1266 bool isInstUnsupported(const LegalityQuery &Query) const { 1267 using namespace LegalizeActions; 1268 auto Step = LI.getAction(Query); 1269 return Step.Action == Unsupported || Step.Action == NotFound; 1270 } 1271 1272 bool isInstLegal(const LegalityQuery &Query) const { 1273 return LI.getAction(Query).Action == LegalizeActions::Legal; 1274 } 1275 1276 bool isConstantUnsupported(LLT Ty) const { 1277 if (!Ty.isVector()) 1278 return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}}); 1279 1280 LLT EltTy = Ty.getElementType(); 1281 return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) || 1282 isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}); 1283 } 1284 1285 /// Looks through copy instructions and returns the actual 1286 /// source register. 1287 Register lookThroughCopyInstrs(Register Reg) { 1288 using namespace llvm::MIPatternMatch; 1289 1290 Register TmpReg; 1291 while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) { 1292 if (MRI.getType(TmpReg).isValid()) 1293 Reg = TmpReg; 1294 else 1295 break; 1296 } 1297 return Reg; 1298 } 1299 }; 1300 1301 } // namespace llvm 1302 1303 #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H 1304