1 //===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This file contains some helper functions which try to cleanup artifacts 9 // such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make 10 // the types match. This file also contains some combines of merges that happens 11 // at the end of the legalization. 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H 15 #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H 16 17 #include "llvm/ADT/SmallBitVector.h" 18 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 20 #include "llvm/CodeGen/GlobalISel/Legalizer.h" 21 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 22 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 24 #include "llvm/CodeGen/GlobalISel/Utils.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/Register.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/Support/Debug.h" 29 30 #define DEBUG_TYPE "legalizer" 31 32 namespace llvm { 33 class LegalizationArtifactCombiner { 34 MachineIRBuilder &Builder; 35 MachineRegisterInfo &MRI; 36 const LegalizerInfo &LI; 37 38 static bool isArtifactCast(unsigned Opc) { 39 switch (Opc) { 40 case TargetOpcode::G_TRUNC: 41 case TargetOpcode::G_SEXT: 42 case TargetOpcode::G_ZEXT: 43 case TargetOpcode::G_ANYEXT: 44 return true; 45 default: 46 return false; 47 } 48 } 49 50 public: 51 LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI, 52 const LegalizerInfo &LI) 53 : Builder(B), MRI(MRI), LI(LI) {} 54 55 bool tryCombineAnyExt(MachineInstr &MI, 56 SmallVectorImpl<MachineInstr *> &DeadInsts, 57 SmallVectorImpl<Register> &UpdatedDefs, 58 GISelObserverWrapper &Observer) { 59 using namespace llvm::MIPatternMatch; 60 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); 61 62 Builder.setInstrAndDebugLoc(MI); 63 Register DstReg = MI.getOperand(0).getReg(); 64 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 65 66 // aext(trunc x) - > aext/copy/trunc x 67 Register TruncSrc; 68 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { 69 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); 70 if (MRI.getType(DstReg) == MRI.getType(TruncSrc)) 71 replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs, 72 Observer); 73 else 74 Builder.buildAnyExtOrTrunc(DstReg, TruncSrc); 75 UpdatedDefs.push_back(DstReg); 76 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 77 return true; 78 } 79 80 // aext([asz]ext x) -> [asz]ext x 81 Register ExtSrc; 82 MachineInstr *ExtMI; 83 if (mi_match(SrcReg, MRI, 84 m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)), 85 m_GSExt(m_Reg(ExtSrc)), 86 m_GZExt(m_Reg(ExtSrc)))))) { 87 Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); 88 UpdatedDefs.push_back(DstReg); 89 markInstAndDefDead(MI, *ExtMI, DeadInsts); 90 return true; 91 } 92 93 // Try to fold aext(g_constant) when the larger constant type is legal. 94 auto *SrcMI = MRI.getVRegDef(SrcReg); 95 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 96 const LLT DstTy = MRI.getType(DstReg); 97 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 98 auto &CstVal = SrcMI->getOperand(1); 99 Builder.buildConstant( 100 DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); 101 UpdatedDefs.push_back(DstReg); 102 markInstAndDefDead(MI, *SrcMI, DeadInsts); 103 return true; 104 } 105 } 106 return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); 107 } 108 109 bool tryCombineZExt(MachineInstr &MI, 110 SmallVectorImpl<MachineInstr *> &DeadInsts, 111 SmallVectorImpl<Register> &UpdatedDefs, 112 GISelObserverWrapper &Observer) { 113 using namespace llvm::MIPatternMatch; 114 assert(MI.getOpcode() == TargetOpcode::G_ZEXT); 115 116 Builder.setInstrAndDebugLoc(MI); 117 Register DstReg = MI.getOperand(0).getReg(); 118 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 119 120 // zext(trunc x) - > and (aext/copy/trunc x), mask 121 // zext(sext x) -> and (sext x), mask 122 Register TruncSrc; 123 Register SextSrc; 124 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) || 125 mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) { 126 LLT DstTy = MRI.getType(DstReg); 127 if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) || 128 isConstantUnsupported(DstTy)) 129 return false; 130 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); 131 LLT SrcTy = MRI.getType(SrcReg); 132 APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits()); 133 auto Mask = Builder.buildConstant( 134 DstTy, MaskVal.zext(DstTy.getScalarSizeInBits())); 135 if (SextSrc && (DstTy != MRI.getType(SextSrc))) 136 SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0); 137 if (TruncSrc && (DstTy != MRI.getType(TruncSrc))) 138 TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); 139 Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask); 140 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 141 return true; 142 } 143 144 // zext(zext x) -> (zext x) 145 Register ZextSrc; 146 if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) { 147 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); 148 Observer.changingInstr(MI); 149 MI.getOperand(1).setReg(ZextSrc); 150 Observer.changedInstr(MI); 151 UpdatedDefs.push_back(DstReg); 152 markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 153 return true; 154 } 155 156 // Try to fold zext(g_constant) when the larger constant type is legal. 157 auto *SrcMI = MRI.getVRegDef(SrcReg); 158 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 159 const LLT DstTy = MRI.getType(DstReg); 160 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 161 auto &CstVal = SrcMI->getOperand(1); 162 Builder.buildConstant( 163 DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits())); 164 UpdatedDefs.push_back(DstReg); 165 markInstAndDefDead(MI, *SrcMI, DeadInsts); 166 return true; 167 } 168 } 169 return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); 170 } 171 172 bool tryCombineSExt(MachineInstr &MI, 173 SmallVectorImpl<MachineInstr *> &DeadInsts, 174 SmallVectorImpl<Register> &UpdatedDefs) { 175 using namespace llvm::MIPatternMatch; 176 assert(MI.getOpcode() == TargetOpcode::G_SEXT); 177 178 Builder.setInstrAndDebugLoc(MI); 179 Register DstReg = MI.getOperand(0).getReg(); 180 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 181 182 // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c) 183 Register TruncSrc; 184 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { 185 LLT DstTy = MRI.getType(DstReg); 186 if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}})) 187 return false; 188 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); 189 LLT SrcTy = MRI.getType(SrcReg); 190 uint64_t SizeInBits = SrcTy.getScalarSizeInBits(); 191 if (DstTy != MRI.getType(TruncSrc)) 192 TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); 193 Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits); 194 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 195 return true; 196 } 197 198 // sext(zext x) -> (zext x) 199 // sext(sext x) -> (sext x) 200 Register ExtSrc; 201 MachineInstr *ExtMI; 202 if (mi_match(SrcReg, MRI, 203 m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)), 204 m_GSExt(m_Reg(ExtSrc)))))) { 205 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); 206 Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); 207 UpdatedDefs.push_back(DstReg); 208 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 209 return true; 210 } 211 212 // Try to fold sext(g_constant) when the larger constant type is legal. 213 auto *SrcMI = MRI.getVRegDef(SrcReg); 214 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 215 const LLT DstTy = MRI.getType(DstReg); 216 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 217 auto &CstVal = SrcMI->getOperand(1); 218 Builder.buildConstant( 219 DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); 220 UpdatedDefs.push_back(DstReg); 221 markInstAndDefDead(MI, *SrcMI, DeadInsts); 222 return true; 223 } 224 } 225 226 return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); 227 } 228 229 bool tryCombineTrunc(MachineInstr &MI, 230 SmallVectorImpl<MachineInstr *> &DeadInsts, 231 SmallVectorImpl<Register> &UpdatedDefs, 232 GISelObserverWrapper &Observer) { 233 using namespace llvm::MIPatternMatch; 234 assert(MI.getOpcode() == TargetOpcode::G_TRUNC); 235 236 Builder.setInstr(MI); 237 Register DstReg = MI.getOperand(0).getReg(); 238 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 239 240 // Try to fold trunc(g_constant) when the smaller constant type is legal. 241 auto *SrcMI = MRI.getVRegDef(SrcReg); 242 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 243 const LLT DstTy = MRI.getType(DstReg); 244 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 245 auto &CstVal = SrcMI->getOperand(1); 246 Builder.buildConstant( 247 DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits())); 248 UpdatedDefs.push_back(DstReg); 249 markInstAndDefDead(MI, *SrcMI, DeadInsts); 250 return true; 251 } 252 } 253 254 // Try to fold trunc(merge) to directly use the source of the merge. 255 // This gets rid of large, difficult to legalize, merges 256 if (auto *SrcMerge = dyn_cast<GMerge>(SrcMI)) { 257 const Register MergeSrcReg = SrcMerge->getSourceReg(0); 258 const LLT MergeSrcTy = MRI.getType(MergeSrcReg); 259 const LLT DstTy = MRI.getType(DstReg); 260 261 // We can only fold if the types are scalar 262 const unsigned DstSize = DstTy.getSizeInBits(); 263 const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits(); 264 if (!DstTy.isScalar() || !MergeSrcTy.isScalar()) 265 return false; 266 267 if (DstSize < MergeSrcSize) { 268 // When the merge source is larger than the destination, we can just 269 // truncate the merge source directly 270 if (isInstUnsupported({TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}})) 271 return false; 272 273 LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: " 274 << MI); 275 276 Builder.buildTrunc(DstReg, MergeSrcReg); 277 UpdatedDefs.push_back(DstReg); 278 } else if (DstSize == MergeSrcSize) { 279 // If the sizes match we can simply try to replace the register 280 LLVM_DEBUG( 281 dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: " 282 << MI); 283 replaceRegOrBuildCopy(DstReg, MergeSrcReg, MRI, Builder, UpdatedDefs, 284 Observer); 285 } else if (DstSize % MergeSrcSize == 0) { 286 // If the trunc size is a multiple of the merge source size we can use 287 // a smaller merge instead 288 if (isInstUnsupported( 289 {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}})) 290 return false; 291 292 LLVM_DEBUG( 293 dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: " 294 << MI); 295 296 const unsigned NumSrcs = DstSize / MergeSrcSize; 297 assert(NumSrcs < SrcMI->getNumOperands() - 1 && 298 "trunc(merge) should require less inputs than merge"); 299 SmallVector<Register, 8> SrcRegs(NumSrcs); 300 for (unsigned i = 0; i < NumSrcs; ++i) 301 SrcRegs[i] = SrcMerge->getSourceReg(i); 302 303 Builder.buildMergeValues(DstReg, SrcRegs); 304 UpdatedDefs.push_back(DstReg); 305 } else { 306 // Unable to combine 307 return false; 308 } 309 310 markInstAndDefDead(MI, *SrcMerge, DeadInsts); 311 return true; 312 } 313 314 // trunc(trunc) -> trunc 315 Register TruncSrc; 316 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { 317 // Always combine trunc(trunc) since the eventual resulting trunc must be 318 // legal anyway as it must be legal for all outputs of the consumer type 319 // set. 320 LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI); 321 322 Builder.buildTrunc(DstReg, TruncSrc); 323 UpdatedDefs.push_back(DstReg); 324 markInstAndDefDead(MI, *MRI.getVRegDef(TruncSrc), DeadInsts); 325 return true; 326 } 327 328 return false; 329 } 330 331 /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF). 332 bool tryFoldImplicitDef(MachineInstr &MI, 333 SmallVectorImpl<MachineInstr *> &DeadInsts, 334 SmallVectorImpl<Register> &UpdatedDefs) { 335 unsigned Opcode = MI.getOpcode(); 336 assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT || 337 Opcode == TargetOpcode::G_SEXT); 338 339 if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, 340 MI.getOperand(1).getReg(), MRI)) { 341 Builder.setInstr(MI); 342 Register DstReg = MI.getOperand(0).getReg(); 343 LLT DstTy = MRI.getType(DstReg); 344 345 if (Opcode == TargetOpcode::G_ANYEXT) { 346 // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF 347 if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) 348 return false; 349 LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;); 350 Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {}); 351 UpdatedDefs.push_back(DstReg); 352 } else { 353 // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top 354 // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT. 355 if (isConstantUnsupported(DstTy)) 356 return false; 357 LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;); 358 Builder.buildConstant(DstReg, 0); 359 UpdatedDefs.push_back(DstReg); 360 } 361 362 markInstAndDefDead(MI, *DefMI, DeadInsts); 363 return true; 364 } 365 return false; 366 } 367 368 bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI, 369 SmallVectorImpl<MachineInstr *> &DeadInsts, 370 SmallVectorImpl<Register> &UpdatedDefs) { 371 372 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); 373 374 const unsigned CastOpc = CastMI.getOpcode(); 375 376 if (!isArtifactCast(CastOpc)) 377 return false; 378 379 const unsigned NumDefs = MI.getNumOperands() - 1; 380 381 const Register CastSrcReg = CastMI.getOperand(1).getReg(); 382 const LLT CastSrcTy = MRI.getType(CastSrcReg); 383 const LLT DestTy = MRI.getType(MI.getOperand(0).getReg()); 384 const LLT SrcTy = MRI.getType(MI.getOperand(NumDefs).getReg()); 385 386 const unsigned CastSrcSize = CastSrcTy.getSizeInBits(); 387 const unsigned DestSize = DestTy.getSizeInBits(); 388 389 if (CastOpc == TargetOpcode::G_TRUNC) { 390 if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) { 391 // %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>) 392 // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1 393 // => 394 // %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0 395 // %2:_(s8) = G_TRUNC %6 396 // %3:_(s8) = G_TRUNC %7 397 // %4:_(s8) = G_TRUNC %8 398 // %5:_(s8) = G_TRUNC %9 399 400 unsigned UnmergeNumElts = 401 DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1; 402 LLT UnmergeTy = CastSrcTy.changeElementCount( 403 ElementCount::getFixed(UnmergeNumElts)); 404 405 if (isInstUnsupported( 406 {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}})) 407 return false; 408 409 Builder.setInstr(MI); 410 auto NewUnmerge = Builder.buildUnmerge(UnmergeTy, CastSrcReg); 411 412 for (unsigned I = 0; I != NumDefs; ++I) { 413 Register DefReg = MI.getOperand(I).getReg(); 414 UpdatedDefs.push_back(DefReg); 415 Builder.buildTrunc(DefReg, NewUnmerge.getReg(I)); 416 } 417 418 markInstAndDefDead(MI, CastMI, DeadInsts); 419 return true; 420 } 421 422 if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) { 423 // %1:_(s16) = G_TRUNC %0(s32) 424 // %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1 425 // => 426 // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 427 428 // Unmerge(trunc) can be combined if the trunc source size is a multiple 429 // of the unmerge destination size 430 if (CastSrcSize % DestSize != 0) 431 return false; 432 433 // Check if the new unmerge is supported 434 if (isInstUnsupported( 435 {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}})) 436 return false; 437 438 // Gather the original destination registers and create new ones for the 439 // unused bits 440 const unsigned NewNumDefs = CastSrcSize / DestSize; 441 SmallVector<Register, 8> DstRegs(NewNumDefs); 442 for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) { 443 if (Idx < NumDefs) 444 DstRegs[Idx] = MI.getOperand(Idx).getReg(); 445 else 446 DstRegs[Idx] = MRI.createGenericVirtualRegister(DestTy); 447 } 448 449 // Build new unmerge 450 Builder.setInstr(MI); 451 Builder.buildUnmerge(DstRegs, CastSrcReg); 452 UpdatedDefs.append(DstRegs.begin(), DstRegs.begin() + NewNumDefs); 453 markInstAndDefDead(MI, CastMI, DeadInsts); 454 return true; 455 } 456 } 457 458 // TODO: support combines with other casts as well 459 return false; 460 } 461 462 static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp, 463 LLT OpTy, LLT DestTy) { 464 // Check if we found a definition that is like G_MERGE_VALUES. 465 switch (MergeOp) { 466 default: 467 return false; 468 case TargetOpcode::G_BUILD_VECTOR: 469 case TargetOpcode::G_MERGE_VALUES: 470 // The convert operation that we will need to insert is 471 // going to convert the input of that type of instruction (scalar) 472 // to the destination type (DestTy). 473 // The conversion needs to stay in the same domain (scalar to scalar 474 // and vector to vector), so if we were to allow to fold the merge 475 // we would need to insert some bitcasts. 476 // E.g., 477 // <2 x s16> = build_vector s16, s16 478 // <2 x s32> = zext <2 x s16> 479 // <2 x s16>, <2 x s16> = unmerge <2 x s32> 480 // 481 // As is the folding would produce: 482 // <2 x s16> = zext s16 <-- scalar to vector 483 // <2 x s16> = zext s16 <-- scalar to vector 484 // Which is invalid. 485 // Instead we would want to generate: 486 // s32 = zext s16 487 // <2 x s16> = bitcast s32 488 // s32 = zext s16 489 // <2 x s16> = bitcast s32 490 // 491 // That is not done yet. 492 if (ConvertOp == 0) 493 return true; 494 return !DestTy.isVector() && OpTy.isVector() && 495 DestTy == OpTy.getElementType(); 496 case TargetOpcode::G_CONCAT_VECTORS: { 497 if (ConvertOp == 0) 498 return true; 499 if (!DestTy.isVector()) 500 return false; 501 502 const unsigned OpEltSize = OpTy.getElementType().getSizeInBits(); 503 504 // Don't handle scalarization with a cast that isn't in the same 505 // direction as the vector cast. This could be handled, but it would 506 // require more intermediate unmerges. 507 if (ConvertOp == TargetOpcode::G_TRUNC) 508 return DestTy.getSizeInBits() <= OpEltSize; 509 return DestTy.getSizeInBits() >= OpEltSize; 510 } 511 } 512 } 513 514 /// Try to replace DstReg with SrcReg or build a COPY instruction 515 /// depending on the register constraints. 516 static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg, 517 MachineRegisterInfo &MRI, 518 MachineIRBuilder &Builder, 519 SmallVectorImpl<Register> &UpdatedDefs, 520 GISelChangeObserver &Observer) { 521 if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) { 522 Builder.buildCopy(DstReg, SrcReg); 523 UpdatedDefs.push_back(DstReg); 524 return; 525 } 526 SmallVector<MachineInstr *, 4> UseMIs; 527 // Get the users and notify the observer before replacing. 528 for (auto &UseMI : MRI.use_instructions(DstReg)) { 529 UseMIs.push_back(&UseMI); 530 Observer.changingInstr(UseMI); 531 } 532 // Replace the registers. 533 MRI.replaceRegWith(DstReg, SrcReg); 534 UpdatedDefs.push_back(SrcReg); 535 // Notify the observer that we changed the instructions. 536 for (auto *UseMI : UseMIs) 537 Observer.changedInstr(*UseMI); 538 } 539 540 /// Return the operand index in \p MI that defines \p Def 541 static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) { 542 unsigned DefIdx = 0; 543 for (const MachineOperand &Def : MI.defs()) { 544 if (Def.getReg() == SearchDef) 545 break; 546 ++DefIdx; 547 } 548 549 return DefIdx; 550 } 551 552 /// This class provides utilities for finding source registers of specific 553 /// bit ranges in an artifact. The routines can look through the source 554 /// registers if they're other artifacts to try to find a non-artifact source 555 /// of a value. 556 class ArtifactValueFinder { 557 MachineRegisterInfo &MRI; 558 MachineIRBuilder &MIB; 559 const LegalizerInfo &LI; 560 561 // Stores the best register found in the current query so far. 562 Register CurrentBest = Register(); 563 564 /// Given an concat_vector op \p Concat and a start bit and size, try to 565 /// find the origin of the value defined by that start position and size. 566 /// 567 /// \returns a register with the requested size, or the current best 568 /// register found during the current query. 569 Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit, 570 unsigned Size) { 571 assert(Size > 0); 572 573 // Find the source operand that provides the bits requested. 574 Register Src1Reg = Concat.getSourceReg(0); 575 unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); 576 577 // Operand index of the source that provides the start of the bit range. 578 unsigned StartSrcIdx = (StartBit / SrcSize) + 1; 579 // Offset into the source at which the bit range starts. 580 unsigned InRegOffset = StartBit % SrcSize; 581 // Check that the bits don't span multiple sources. 582 // FIXME: we might be able return multiple sources? Or create an 583 // appropriate concat to make it fit. 584 if (InRegOffset + Size > SrcSize) 585 return CurrentBest; 586 587 Register SrcReg = Concat.getReg(StartSrcIdx); 588 if (InRegOffset == 0 && Size == SrcSize) { 589 CurrentBest = SrcReg; 590 return findValueFromDefImpl(SrcReg, 0, Size); 591 } 592 593 return findValueFromDefImpl(SrcReg, InRegOffset, Size); 594 } 595 596 /// Given an build_vector op \p BV and a start bit and size, try to find 597 /// the origin of the value defined by that start position and size. 598 /// 599 /// \returns a register with the requested size, or the current best 600 /// register found during the current query. 601 Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit, 602 unsigned Size) { 603 assert(Size > 0); 604 605 // Find the source operand that provides the bits requested. 606 Register Src1Reg = BV.getSourceReg(0); 607 unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); 608 609 // Operand index of the source that provides the start of the bit range. 610 unsigned StartSrcIdx = (StartBit / SrcSize) + 1; 611 // Offset into the source at which the bit range starts. 612 unsigned InRegOffset = StartBit % SrcSize; 613 614 if (InRegOffset != 0) 615 return CurrentBest; // Give up, bits don't start at a scalar source. 616 if (Size < SrcSize) 617 return CurrentBest; // Scalar source is too large for requested bits. 618 619 // If the bits cover multiple sources evenly, then create a new 620 // build_vector to synthesize the required size, if that's been requested. 621 if (Size > SrcSize) { 622 if (Size % SrcSize > 0) 623 return CurrentBest; // Isn't covered exactly by sources. 624 625 unsigned NumSrcsUsed = Size / SrcSize; 626 // If we're requesting all of the sources, just return this def. 627 if (NumSrcsUsed == BV.getNumSources()) 628 return BV.getReg(0); 629 630 LLT SrcTy = MRI.getType(Src1Reg); 631 LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy); 632 633 // Check if the resulting build vector would be legal. 634 LegalizeActionStep ActionStep = 635 LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}}); 636 if (ActionStep.Action != LegalizeActions::Legal) 637 return CurrentBest; 638 639 SmallVector<Register> NewSrcs; 640 for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed; 641 ++SrcIdx) 642 NewSrcs.push_back(BV.getReg(SrcIdx)); 643 MIB.setInstrAndDebugLoc(BV); 644 return MIB.buildBuildVector(NewBVTy, NewSrcs).getReg(0); 645 } 646 // A single source is requested, just return it. 647 return BV.getReg(StartSrcIdx); 648 } 649 650 /// Given an G_INSERT op \p MI and a start bit and size, try to find 651 /// the origin of the value defined by that start position and size. 652 /// 653 /// \returns a register with the requested size, or the current best 654 /// register found during the current query. 655 Register findValueFromInsert(MachineInstr &MI, unsigned StartBit, 656 unsigned Size) { 657 assert(MI.getOpcode() == TargetOpcode::G_INSERT); 658 assert(Size > 0); 659 660 Register ContainerSrcReg = MI.getOperand(1).getReg(); 661 Register InsertedReg = MI.getOperand(2).getReg(); 662 LLT InsertedRegTy = MRI.getType(InsertedReg); 663 unsigned InsertOffset = MI.getOperand(3).getImm(); 664 665 // There are 4 possible container/insertreg + requested bit-range layouts 666 // that the instruction and query could be representing. 667 // For: %_ = G_INSERT %CONTAINER, %INS, InsOff (abbrev. to 'IO') 668 // and a start bit 'SB', with size S, giving an end bit 'EB', we could 669 // have... 670 // Scenario A: 671 // -------------------------- 672 // | INS | CONTAINER | 673 // -------------------------- 674 // | | 675 // SB EB 676 // 677 // Scenario B: 678 // -------------------------- 679 // | INS | CONTAINER | 680 // -------------------------- 681 // | | 682 // SB EB 683 // 684 // Scenario C: 685 // -------------------------- 686 // | CONTAINER | INS | 687 // -------------------------- 688 // | | 689 // SB EB 690 // 691 // Scenario D: 692 // -------------------------- 693 // | CONTAINER | INS | 694 // -------------------------- 695 // | | 696 // SB EB 697 // 698 // So therefore, A and D are requesting data from the INS operand, while 699 // B and C are requesting from the container operand. 700 701 unsigned InsertedEndBit = InsertOffset + InsertedRegTy.getSizeInBits(); 702 unsigned EndBit = StartBit + Size; 703 unsigned NewStartBit; 704 Register SrcRegToUse; 705 if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) { 706 SrcRegToUse = ContainerSrcReg; 707 NewStartBit = StartBit; 708 return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); 709 } 710 if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) { 711 SrcRegToUse = InsertedReg; 712 NewStartBit = StartBit - InsertOffset; 713 if (NewStartBit == 0 && 714 Size == MRI.getType(SrcRegToUse).getSizeInBits()) 715 CurrentBest = SrcRegToUse; 716 return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); 717 } 718 // The bit range spans both the inserted and container regions. 719 return Register(); 720 } 721 722 /// Internal implementation for findValueFromDef(). findValueFromDef() 723 /// initializes some data like the CurrentBest register, which this method 724 /// and its callees rely upon. 725 Register findValueFromDefImpl(Register DefReg, unsigned StartBit, 726 unsigned Size) { 727 std::optional<DefinitionAndSourceRegister> DefSrcReg = 728 getDefSrcRegIgnoringCopies(DefReg, MRI); 729 MachineInstr *Def = DefSrcReg->MI; 730 DefReg = DefSrcReg->Reg; 731 // If the instruction has a single def, then simply delegate the search. 732 // For unmerge however with multiple defs, we need to compute the offset 733 // into the source of the unmerge. 734 switch (Def->getOpcode()) { 735 case TargetOpcode::G_CONCAT_VECTORS: 736 return findValueFromConcat(cast<GConcatVectors>(*Def), StartBit, Size); 737 case TargetOpcode::G_UNMERGE_VALUES: { 738 unsigned DefStartBit = 0; 739 unsigned DefSize = MRI.getType(DefReg).getSizeInBits(); 740 for (const auto &MO : Def->defs()) { 741 if (MO.getReg() == DefReg) 742 break; 743 DefStartBit += DefSize; 744 } 745 Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg(); 746 Register SrcOriginReg = 747 findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size); 748 if (SrcOriginReg) 749 return SrcOriginReg; 750 // Failed to find a further value. If the StartBit and Size perfectly 751 // covered the requested DefReg, return that since it's better than 752 // nothing. 753 if (StartBit == 0 && Size == DefSize) 754 return DefReg; 755 return CurrentBest; 756 } 757 case TargetOpcode::G_BUILD_VECTOR: 758 return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit, 759 Size); 760 case TargetOpcode::G_INSERT: 761 return findValueFromInsert(*Def, StartBit, Size); 762 default: 763 return CurrentBest; 764 } 765 } 766 767 public: 768 ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder, 769 const LegalizerInfo &Info) 770 : MRI(Mri), MIB(Builder), LI(Info) {} 771 772 /// Try to find a source of the value defined in the def \p DefReg, starting 773 /// at position \p StartBit with size \p Size. 774 /// \returns a register with the requested size, or an empty Register if no 775 /// better value could be found. 776 Register findValueFromDef(Register DefReg, unsigned StartBit, 777 unsigned Size) { 778 CurrentBest = Register(); 779 Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size); 780 return FoundReg != DefReg ? FoundReg : Register(); 781 } 782 783 /// Try to combine the defs of an unmerge \p MI by attempting to find 784 /// values that provides the bits for each def reg. 785 /// \returns true if all the defs of the unmerge have been made dead. 786 bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer, 787 SmallVectorImpl<Register> &UpdatedDefs) { 788 unsigned NumDefs = MI.getNumDefs(); 789 LLT DestTy = MRI.getType(MI.getReg(0)); 790 791 SmallBitVector DeadDefs(NumDefs); 792 for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { 793 Register DefReg = MI.getReg(DefIdx); 794 if (MRI.use_nodbg_empty(DefReg)) { 795 DeadDefs[DefIdx] = true; 796 continue; 797 } 798 Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits()); 799 if (!FoundVal) 800 continue; 801 if (MRI.getType(FoundVal) != DestTy) 802 continue; 803 804 replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs, 805 Observer); 806 // We only want to replace the uses, not the def of the old reg. 807 Observer.changingInstr(MI); 808 MI.getOperand(DefIdx).setReg(DefReg); 809 Observer.changedInstr(MI); 810 DeadDefs[DefIdx] = true; 811 } 812 return DeadDefs.all(); 813 } 814 815 GUnmerge *findUnmergeThatDefinesReg(Register Reg, unsigned Size, 816 unsigned &DefOperandIdx) { 817 if (Register Def = findValueFromDefImpl(Reg, 0, Size)) { 818 if (auto *Unmerge = dyn_cast<GUnmerge>(MRI.getVRegDef(Def))) { 819 DefOperandIdx = Unmerge->findRegisterDefOperandIdx(Def); 820 return Unmerge; 821 } 822 } 823 return nullptr; 824 } 825 826 // Check if sequence of elements from merge-like instruction is defined by 827 // another sequence of elements defined by unmerge. Most often this is the 828 // same sequence. Search for elements using findValueFromDefImpl. 829 bool isSequenceFromUnmerge(GMergeLikeInstr &MI, unsigned MergeStartIdx, 830 GUnmerge *Unmerge, unsigned UnmergeIdxStart, 831 unsigned NumElts, unsigned EltSize) { 832 assert(MergeStartIdx + NumElts <= MI.getNumSources()); 833 for (unsigned i = MergeStartIdx; i < MergeStartIdx + NumElts; ++i) { 834 unsigned EltUnmergeIdx; 835 GUnmerge *EltUnmerge = findUnmergeThatDefinesReg( 836 MI.getSourceReg(i), EltSize, EltUnmergeIdx); 837 // Check if source i comes from the same Unmerge. 838 if (!EltUnmerge || EltUnmerge != Unmerge) 839 return false; 840 // Check that source i's def has same index in sequence in Unmerge. 841 if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart) 842 return false; 843 } 844 return true; 845 } 846 847 bool tryCombineMergeLike(GMergeLikeInstr &MI, 848 SmallVectorImpl<MachineInstr *> &DeadInsts, 849 SmallVectorImpl<Register> &UpdatedDefs, 850 GISelChangeObserver &Observer) { 851 Register Elt0 = MI.getSourceReg(0); 852 LLT EltTy = MRI.getType(Elt0); 853 unsigned EltSize = EltTy.getSizeInBits(); 854 855 unsigned Elt0UnmergeIdx; 856 // Search for unmerge that will be candidate for combine. 857 auto *Unmerge = findUnmergeThatDefinesReg(Elt0, EltSize, Elt0UnmergeIdx); 858 if (!Unmerge) 859 return false; 860 861 unsigned NumMIElts = MI.getNumSources(); 862 Register Dst = MI.getReg(0); 863 LLT DstTy = MRI.getType(Dst); 864 Register UnmergeSrc = Unmerge->getSourceReg(); 865 LLT UnmergeSrcTy = MRI.getType(UnmergeSrc); 866 867 // Recognize copy of UnmergeSrc to Dst. 868 // Unmerge UnmergeSrc and reassemble it using merge-like opcode into Dst. 869 // 870 // %0:_(EltTy), %1, ... = G_UNMERGE_VALUES %UnmergeSrc:_(Ty) 871 // %Dst:_(Ty) = G_merge_like_opcode %0:_(EltTy), %1, ... 872 // 873 // %Dst:_(Ty) = COPY %UnmergeSrc:_(Ty) 874 if ((DstTy == UnmergeSrcTy) && (Elt0UnmergeIdx == 0)) { 875 if (!isSequenceFromUnmerge(MI, 0, Unmerge, 0, NumMIElts, EltSize)) 876 return false; 877 replaceRegOrBuildCopy(Dst, UnmergeSrc, MRI, MIB, UpdatedDefs, Observer); 878 DeadInsts.push_back(&MI); 879 return true; 880 } 881 882 // Recognize UnmergeSrc that can be unmerged to DstTy directly. 883 // Types have to be either both vector or both non-vector types. 884 // Merge-like opcodes are combined one at the time. First one creates new 885 // unmerge, following should use the same unmerge (builder performs CSE). 886 // 887 // %0:_(EltTy), %1, %2, %3 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) 888 // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1 889 // %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3 890 // 891 // %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc 892 if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && 893 (Elt0UnmergeIdx % NumMIElts == 0) && 894 getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) { 895 if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts, 896 EltSize)) 897 return false; 898 MIB.setInstrAndDebugLoc(MI); 899 auto NewUnmerge = MIB.buildUnmerge(DstTy, Unmerge->getSourceReg()); 900 unsigned DstIdx = (Elt0UnmergeIdx * EltSize) / DstTy.getSizeInBits(); 901 replaceRegOrBuildCopy(Dst, NewUnmerge.getReg(DstIdx), MRI, MIB, 902 UpdatedDefs, Observer); 903 DeadInsts.push_back(&MI); 904 return true; 905 } 906 907 // Recognize when multiple unmerged sources with UnmergeSrcTy type 908 // can be merged into Dst with DstTy type directly. 909 // Types have to be either both vector or both non-vector types. 910 911 // %0:_(EltTy), %1 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) 912 // %2:_(EltTy), %3 = G_UNMERGE_VALUES %AnotherUnmergeSrc:_(UnmergeSrcTy) 913 // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1, %2, %3 914 // 915 // %Dst:_(DstTy) = G_merge_like_opcode %UnmergeSrc, %AnotherUnmergeSrc 916 917 if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && 918 getCoverTy(DstTy, UnmergeSrcTy) == DstTy) { 919 SmallVector<Register, 4> ConcatSources; 920 unsigned NumElts = Unmerge->getNumDefs(); 921 for (unsigned i = 0; i < MI.getNumSources(); i += NumElts) { 922 unsigned EltUnmergeIdx; 923 auto *UnmergeI = findUnmergeThatDefinesReg(MI.getSourceReg(i), 924 EltSize, EltUnmergeIdx); 925 // All unmerges have to be the same size. 926 if ((!UnmergeI) || (UnmergeI->getNumDefs() != NumElts) || 927 (EltUnmergeIdx != 0)) 928 return false; 929 if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize)) 930 return false; 931 ConcatSources.push_back(UnmergeI->getSourceReg()); 932 } 933 934 MIB.setInstrAndDebugLoc(MI); 935 MIB.buildMergeLikeInstr(Dst, ConcatSources); 936 DeadInsts.push_back(&MI); 937 return true; 938 } 939 940 return false; 941 } 942 }; 943 944 bool tryCombineUnmergeValues(GUnmerge &MI, 945 SmallVectorImpl<MachineInstr *> &DeadInsts, 946 SmallVectorImpl<Register> &UpdatedDefs, 947 GISelChangeObserver &Observer) { 948 unsigned NumDefs = MI.getNumDefs(); 949 Register SrcReg = MI.getSourceReg(); 950 MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI); 951 if (!SrcDef) 952 return false; 953 954 LLT OpTy = MRI.getType(SrcReg); 955 LLT DestTy = MRI.getType(MI.getReg(0)); 956 unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg); 957 958 Builder.setInstrAndDebugLoc(MI); 959 960 ArtifactValueFinder Finder(MRI, Builder, LI); 961 if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { 962 markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); 963 return true; 964 } 965 966 if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) { 967 // %0:_(<4 x s16>) = G_FOO 968 // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0 969 // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 970 // 971 // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0 972 Register SrcUnmergeSrc = SrcUnmerge->getSourceReg(); 973 LLT SrcUnmergeSrcTy = MRI.getType(SrcUnmergeSrc); 974 975 // If we need to decrease the number of vector elements in the result type 976 // of an unmerge, this would involve the creation of an equivalent unmerge 977 // to copy back to the original result registers. 978 LegalizeActionStep ActionStep = LI.getAction( 979 {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}}); 980 switch (ActionStep.Action) { 981 case LegalizeActions::Lower: 982 case LegalizeActions::Unsupported: 983 break; 984 case LegalizeActions::FewerElements: 985 case LegalizeActions::NarrowScalar: 986 if (ActionStep.TypeIdx == 1) 987 return false; 988 break; 989 default: 990 return false; 991 } 992 993 auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc); 994 995 // TODO: Should we try to process out the other defs now? If the other 996 // defs of the source unmerge are also unmerged, we end up with a separate 997 // unmerge for each one. 998 for (unsigned I = 0; I != NumDefs; ++I) { 999 Register Def = MI.getReg(I); 1000 replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I), 1001 MRI, Builder, UpdatedDefs, Observer); 1002 } 1003 1004 markInstAndDefDead(MI, *SrcUnmerge, DeadInsts, SrcDefIdx); 1005 return true; 1006 } 1007 1008 MachineInstr *MergeI = SrcDef; 1009 unsigned ConvertOp = 0; 1010 1011 // Handle intermediate conversions 1012 unsigned SrcOp = SrcDef->getOpcode(); 1013 if (isArtifactCast(SrcOp)) { 1014 ConvertOp = SrcOp; 1015 MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI); 1016 } 1017 1018 if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(), 1019 ConvertOp, OpTy, DestTy)) { 1020 // We might have a chance to combine later by trying to combine 1021 // unmerge(cast) first 1022 return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs); 1023 } 1024 1025 const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; 1026 1027 if (NumMergeRegs < NumDefs) { 1028 if (NumDefs % NumMergeRegs != 0) 1029 return false; 1030 1031 Builder.setInstr(MI); 1032 // Transform to UNMERGEs, for example 1033 // %1 = G_MERGE_VALUES %4, %5 1034 // %9, %10, %11, %12 = G_UNMERGE_VALUES %1 1035 // to 1036 // %9, %10 = G_UNMERGE_VALUES %4 1037 // %11, %12 = G_UNMERGE_VALUES %5 1038 1039 const unsigned NewNumDefs = NumDefs / NumMergeRegs; 1040 for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) { 1041 SmallVector<Register, 8> DstRegs; 1042 for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs; 1043 ++j, ++DefIdx) 1044 DstRegs.push_back(MI.getReg(DefIdx)); 1045 1046 if (ConvertOp) { 1047 LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg()); 1048 1049 // This is a vector that is being split and casted. Extract to the 1050 // element type, and do the conversion on the scalars (or smaller 1051 // vectors). 1052 LLT MergeEltTy = MergeSrcTy.divide(NewNumDefs); 1053 1054 // Handle split to smaller vectors, with conversions. 1055 // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>) 1056 // %3(<8 x s16>) = G_SEXT %2 1057 // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %3 1058 // 1059 // => 1060 // 1061 // %8(<2 x s8>), %9(<2 x s8>) = G_UNMERGE_VALUES %0 1062 // %10(<2 x s8>), %11(<2 x s8>) = G_UNMERGE_VALUES %1 1063 // %4(<2 x s16>) = G_SEXT %8 1064 // %5(<2 x s16>) = G_SEXT %9 1065 // %6(<2 x s16>) = G_SEXT %10 1066 // %7(<2 x s16>)= G_SEXT %11 1067 1068 SmallVector<Register, 4> TmpRegs(NewNumDefs); 1069 for (unsigned k = 0; k < NewNumDefs; ++k) 1070 TmpRegs[k] = MRI.createGenericVirtualRegister(MergeEltTy); 1071 1072 Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg()); 1073 1074 for (unsigned k = 0; k < NewNumDefs; ++k) 1075 Builder.buildInstr(ConvertOp, {DstRegs[k]}, {TmpRegs[k]}); 1076 } else { 1077 Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); 1078 } 1079 UpdatedDefs.append(DstRegs.begin(), DstRegs.end()); 1080 } 1081 1082 } else if (NumMergeRegs > NumDefs) { 1083 if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0) 1084 return false; 1085 1086 Builder.setInstr(MI); 1087 // Transform to MERGEs 1088 // %6 = G_MERGE_VALUES %17, %18, %19, %20 1089 // %7, %8 = G_UNMERGE_VALUES %6 1090 // to 1091 // %7 = G_MERGE_VALUES %17, %18 1092 // %8 = G_MERGE_VALUES %19, %20 1093 1094 const unsigned NumRegs = NumMergeRegs / NumDefs; 1095 for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { 1096 SmallVector<Register, 8> Regs; 1097 for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; 1098 ++j, ++Idx) 1099 Regs.push_back(MergeI->getOperand(Idx).getReg()); 1100 1101 Register DefReg = MI.getReg(DefIdx); 1102 Builder.buildMergeLikeInstr(DefReg, Regs); 1103 UpdatedDefs.push_back(DefReg); 1104 } 1105 1106 } else { 1107 LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg()); 1108 1109 if (!ConvertOp && DestTy != MergeSrcTy) 1110 ConvertOp = TargetOpcode::G_BITCAST; 1111 1112 if (ConvertOp) { 1113 Builder.setInstr(MI); 1114 1115 for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { 1116 Register DefReg = MI.getOperand(Idx).getReg(); 1117 Register MergeSrc = MergeI->getOperand(Idx + 1).getReg(); 1118 1119 if (!MRI.use_empty(DefReg)) { 1120 Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc}); 1121 UpdatedDefs.push_back(DefReg); 1122 } 1123 } 1124 1125 markInstAndDefDead(MI, *MergeI, DeadInsts); 1126 return true; 1127 } 1128 1129 assert(DestTy == MergeSrcTy && 1130 "Bitcast and the other kinds of conversions should " 1131 "have happened earlier"); 1132 1133 Builder.setInstr(MI); 1134 for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { 1135 Register DstReg = MI.getOperand(Idx).getReg(); 1136 Register SrcReg = MergeI->getOperand(Idx + 1).getReg(); 1137 replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs, 1138 Observer); 1139 } 1140 } 1141 1142 markInstAndDefDead(MI, *MergeI, DeadInsts); 1143 return true; 1144 } 1145 1146 bool tryCombineExtract(MachineInstr &MI, 1147 SmallVectorImpl<MachineInstr *> &DeadInsts, 1148 SmallVectorImpl<Register> &UpdatedDefs) { 1149 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT); 1150 1151 // Try to use the source registers from a G_MERGE_VALUES 1152 // 1153 // %2 = G_MERGE_VALUES %0, %1 1154 // %3 = G_EXTRACT %2, N 1155 // => 1156 // 1157 // for N < %2.getSizeInBits() / 2 1158 // %3 = G_EXTRACT %0, N 1159 // 1160 // for N >= %2.getSizeInBits() / 2 1161 // %3 = G_EXTRACT %1, (N - %0.getSizeInBits() 1162 1163 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 1164 MachineInstr *MergeI = MRI.getVRegDef(SrcReg); 1165 if (!MergeI || !isa<GMergeLikeInstr>(MergeI)) 1166 return false; 1167 1168 Register DstReg = MI.getOperand(0).getReg(); 1169 LLT DstTy = MRI.getType(DstReg); 1170 LLT SrcTy = MRI.getType(SrcReg); 1171 1172 // TODO: Do we need to check if the resulting extract is supported? 1173 unsigned ExtractDstSize = DstTy.getSizeInBits(); 1174 unsigned Offset = MI.getOperand(2).getImm(); 1175 unsigned NumMergeSrcs = MergeI->getNumOperands() - 1; 1176 unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs; 1177 unsigned MergeSrcIdx = Offset / MergeSrcSize; 1178 1179 // Compute the offset of the last bit the extract needs. 1180 unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize; 1181 1182 // Can't handle the case where the extract spans multiple inputs. 1183 if (MergeSrcIdx != EndMergeSrcIdx) 1184 return false; 1185 1186 // TODO: We could modify MI in place in most cases. 1187 Builder.setInstr(MI); 1188 Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(), 1189 Offset - MergeSrcIdx * MergeSrcSize); 1190 UpdatedDefs.push_back(DstReg); 1191 markInstAndDefDead(MI, *MergeI, DeadInsts); 1192 return true; 1193 } 1194 1195 /// Try to combine away MI. 1196 /// Returns true if it combined away the MI. 1197 /// Adds instructions that are dead as a result of the combine 1198 /// into DeadInsts, which can include MI. 1199 bool tryCombineInstruction(MachineInstr &MI, 1200 SmallVectorImpl<MachineInstr *> &DeadInsts, 1201 GISelObserverWrapper &WrapperObserver) { 1202 ArtifactValueFinder Finder(MRI, Builder, LI); 1203 1204 // This might be a recursive call, and we might have DeadInsts already 1205 // populated. To avoid bad things happening later with multiple vreg defs 1206 // etc, process the dead instructions now if any. 1207 if (!DeadInsts.empty()) 1208 deleteMarkedDeadInsts(DeadInsts, WrapperObserver); 1209 1210 // Put here every vreg that was redefined in such a way that it's at least 1211 // possible that one (or more) of its users (immediate or COPY-separated) 1212 // could become artifact combinable with the new definition (or the 1213 // instruction reachable from it through a chain of copies if any). 1214 SmallVector<Register, 4> UpdatedDefs; 1215 bool Changed = false; 1216 switch (MI.getOpcode()) { 1217 default: 1218 return false; 1219 case TargetOpcode::G_ANYEXT: 1220 Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); 1221 break; 1222 case TargetOpcode::G_ZEXT: 1223 Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); 1224 break; 1225 case TargetOpcode::G_SEXT: 1226 Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs); 1227 break; 1228 case TargetOpcode::G_UNMERGE_VALUES: 1229 Changed = tryCombineUnmergeValues(cast<GUnmerge>(MI), DeadInsts, 1230 UpdatedDefs, WrapperObserver); 1231 break; 1232 case TargetOpcode::G_MERGE_VALUES: 1233 case TargetOpcode::G_BUILD_VECTOR: 1234 case TargetOpcode::G_CONCAT_VECTORS: 1235 // If any of the users of this merge are an unmerge, then add them to the 1236 // artifact worklist in case there's folding that can be done looking up. 1237 for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) { 1238 if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES || 1239 U.getOpcode() == TargetOpcode::G_TRUNC) { 1240 UpdatedDefs.push_back(MI.getOperand(0).getReg()); 1241 break; 1242 } 1243 } 1244 Changed = Finder.tryCombineMergeLike(cast<GMergeLikeInstr>(MI), DeadInsts, 1245 UpdatedDefs, WrapperObserver); 1246 break; 1247 case TargetOpcode::G_EXTRACT: 1248 Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs); 1249 break; 1250 case TargetOpcode::G_TRUNC: 1251 Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, WrapperObserver); 1252 if (!Changed) { 1253 // Try to combine truncates away even if they are legal. As all artifact 1254 // combines at the moment look only "up" the def-use chains, we achieve 1255 // that by throwing truncates' users (with look through copies) into the 1256 // ArtifactList again. 1257 UpdatedDefs.push_back(MI.getOperand(0).getReg()); 1258 } 1259 break; 1260 } 1261 // If the main loop through the ArtifactList found at least one combinable 1262 // pair of artifacts, not only combine it away (as done above), but also 1263 // follow the def-use chain from there to combine everything that can be 1264 // combined within this def-use chain of artifacts. 1265 while (!UpdatedDefs.empty()) { 1266 Register NewDef = UpdatedDefs.pop_back_val(); 1267 assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg"); 1268 for (MachineInstr &Use : MRI.use_instructions(NewDef)) { 1269 switch (Use.getOpcode()) { 1270 // Keep this list in sync with the list of all artifact combines. 1271 case TargetOpcode::G_ANYEXT: 1272 case TargetOpcode::G_ZEXT: 1273 case TargetOpcode::G_SEXT: 1274 case TargetOpcode::G_UNMERGE_VALUES: 1275 case TargetOpcode::G_EXTRACT: 1276 case TargetOpcode::G_TRUNC: 1277 case TargetOpcode::G_BUILD_VECTOR: 1278 // Adding Use to ArtifactList. 1279 WrapperObserver.changedInstr(Use); 1280 break; 1281 case TargetOpcode::COPY: { 1282 Register Copy = Use.getOperand(0).getReg(); 1283 if (Copy.isVirtual()) 1284 UpdatedDefs.push_back(Copy); 1285 break; 1286 } 1287 default: 1288 // If we do not have an artifact combine for the opcode, there is no 1289 // point in adding it to the ArtifactList as nothing interesting will 1290 // be done to it anyway. 1291 break; 1292 } 1293 } 1294 } 1295 return Changed; 1296 } 1297 1298 private: 1299 static Register getArtifactSrcReg(const MachineInstr &MI) { 1300 switch (MI.getOpcode()) { 1301 case TargetOpcode::COPY: 1302 case TargetOpcode::G_TRUNC: 1303 case TargetOpcode::G_ZEXT: 1304 case TargetOpcode::G_ANYEXT: 1305 case TargetOpcode::G_SEXT: 1306 case TargetOpcode::G_EXTRACT: 1307 return MI.getOperand(1).getReg(); 1308 case TargetOpcode::G_UNMERGE_VALUES: 1309 return MI.getOperand(MI.getNumOperands() - 1).getReg(); 1310 default: 1311 llvm_unreachable("Not a legalization artifact happen"); 1312 } 1313 } 1314 1315 /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI 1316 /// (either by killing it or changing operands) results in DefMI being dead 1317 /// too. In-between COPYs or artifact-casts are also collected if they are 1318 /// dead. 1319 /// MI is not marked dead. 1320 void markDefDead(MachineInstr &MI, MachineInstr &DefMI, 1321 SmallVectorImpl<MachineInstr *> &DeadInsts, 1322 unsigned DefIdx = 0) { 1323 // Collect all the copy instructions that are made dead, due to deleting 1324 // this instruction. Collect all of them until the Trunc(DefMI). 1325 // Eg, 1326 // %1(s1) = G_TRUNC %0(s32) 1327 // %2(s1) = COPY %1(s1) 1328 // %3(s1) = COPY %2(s1) 1329 // %4(s32) = G_ANYEXT %3(s1) 1330 // In this case, we would have replaced %4 with a copy of %0, 1331 // and as a result, %3, %2, %1 are dead. 1332 MachineInstr *PrevMI = &MI; 1333 while (PrevMI != &DefMI) { 1334 Register PrevRegSrc = getArtifactSrcReg(*PrevMI); 1335 1336 MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc); 1337 if (MRI.hasOneUse(PrevRegSrc)) { 1338 if (TmpDef != &DefMI) { 1339 assert((TmpDef->getOpcode() == TargetOpcode::COPY || 1340 isArtifactCast(TmpDef->getOpcode())) && 1341 "Expecting copy or artifact cast here"); 1342 1343 DeadInsts.push_back(TmpDef); 1344 } 1345 } else 1346 break; 1347 PrevMI = TmpDef; 1348 } 1349 1350 if (PrevMI == &DefMI) { 1351 unsigned I = 0; 1352 bool IsDead = true; 1353 for (MachineOperand &Def : DefMI.defs()) { 1354 if (I != DefIdx) { 1355 if (!MRI.use_empty(Def.getReg())) { 1356 IsDead = false; 1357 break; 1358 } 1359 } else { 1360 if (!MRI.hasOneUse(DefMI.getOperand(DefIdx).getReg())) 1361 break; 1362 } 1363 1364 ++I; 1365 } 1366 1367 if (IsDead) 1368 DeadInsts.push_back(&DefMI); 1369 } 1370 } 1371 1372 /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be 1373 /// dead due to MI being killed, then mark DefMI as dead too. 1374 /// Some of the combines (extends(trunc)), try to walk through redundant 1375 /// copies in between the extends and the truncs, and this attempts to collect 1376 /// the in between copies if they're dead. 1377 void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI, 1378 SmallVectorImpl<MachineInstr *> &DeadInsts, 1379 unsigned DefIdx = 0) { 1380 DeadInsts.push_back(&MI); 1381 markDefDead(MI, DefMI, DeadInsts, DefIdx); 1382 } 1383 1384 /// Erase the dead instructions in the list and call the observer hooks. 1385 /// Normally the Legalizer will deal with erasing instructions that have been 1386 /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to 1387 /// process instructions which have been marked dead, but otherwise break the 1388 /// MIR by introducing multiple vreg defs. For those cases, allow the combines 1389 /// to explicitly delete the instructions before we run into trouble. 1390 void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts, 1391 GISelObserverWrapper &WrapperObserver) { 1392 for (auto *DeadMI : DeadInsts) { 1393 LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n"); 1394 WrapperObserver.erasingInstr(*DeadMI); 1395 DeadMI->eraseFromParent(); 1396 } 1397 DeadInsts.clear(); 1398 } 1399 1400 /// Checks if the target legalizer info has specified anything about the 1401 /// instruction, or if unsupported. 1402 bool isInstUnsupported(const LegalityQuery &Query) const { 1403 using namespace LegalizeActions; 1404 auto Step = LI.getAction(Query); 1405 return Step.Action == Unsupported || Step.Action == NotFound; 1406 } 1407 1408 bool isInstLegal(const LegalityQuery &Query) const { 1409 return LI.getAction(Query).Action == LegalizeActions::Legal; 1410 } 1411 1412 bool isConstantUnsupported(LLT Ty) const { 1413 if (!Ty.isVector()) 1414 return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}}); 1415 1416 LLT EltTy = Ty.getElementType(); 1417 return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) || 1418 isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}); 1419 } 1420 1421 /// Looks through copy instructions and returns the actual 1422 /// source register. 1423 Register lookThroughCopyInstrs(Register Reg) { 1424 using namespace llvm::MIPatternMatch; 1425 1426 Register TmpReg; 1427 while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) { 1428 if (MRI.getType(TmpReg).isValid()) 1429 Reg = TmpReg; 1430 else 1431 break; 1432 } 1433 return Reg; 1434 } 1435 }; 1436 1437 } // namespace llvm 1438 1439 #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H 1440