1 //===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file contains some helper functions which try to cleanup artifacts
9 // such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make
10 // the types match. This file also contains some combines of merges that happens
11 // at the end of the legalization.
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
15 #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
16 
17 #include "llvm/ADT/SmallBitVector.h"
18 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
21 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
22 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24 #include "llvm/CodeGen/GlobalISel/Utils.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/Register.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/Support/Debug.h"
29 
30 #define DEBUG_TYPE "legalizer"
31 
32 namespace llvm {
33 class LegalizationArtifactCombiner {
34   MachineIRBuilder &Builder;
35   MachineRegisterInfo &MRI;
36   const LegalizerInfo &LI;
37 
38   static bool isArtifactCast(unsigned Opc) {
39     switch (Opc) {
40     case TargetOpcode::G_TRUNC:
41     case TargetOpcode::G_SEXT:
42     case TargetOpcode::G_ZEXT:
43     case TargetOpcode::G_ANYEXT:
44       return true;
45     default:
46       return false;
47     }
48   }
49 
50 public:
51   LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI,
52                     const LegalizerInfo &LI)
53       : Builder(B), MRI(MRI), LI(LI) {}
54 
55   bool tryCombineAnyExt(MachineInstr &MI,
56                         SmallVectorImpl<MachineInstr *> &DeadInsts,
57                         SmallVectorImpl<Register> &UpdatedDefs,
58                         GISelObserverWrapper &Observer) {
59     using namespace llvm::MIPatternMatch;
60     assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
61 
62     Builder.setInstrAndDebugLoc(MI);
63     Register DstReg = MI.getOperand(0).getReg();
64     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
65 
66     // aext(trunc x) - > aext/copy/trunc x
67     Register TruncSrc;
68     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
69       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
70       if (MRI.getType(DstReg) == MRI.getType(TruncSrc))
71         replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs,
72                               Observer);
73       else
74         Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
75       UpdatedDefs.push_back(DstReg);
76       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
77       return true;
78     }
79 
80     // aext([asz]ext x) -> [asz]ext x
81     Register ExtSrc;
82     MachineInstr *ExtMI;
83     if (mi_match(SrcReg, MRI,
84                  m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)),
85                                                     m_GSExt(m_Reg(ExtSrc)),
86                                                     m_GZExt(m_Reg(ExtSrc)))))) {
87       Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
88       UpdatedDefs.push_back(DstReg);
89       markInstAndDefDead(MI, *ExtMI, DeadInsts);
90       return true;
91     }
92 
93     // Try to fold aext(g_constant) when the larger constant type is legal.
94     auto *SrcMI = MRI.getVRegDef(SrcReg);
95     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
96       const LLT DstTy = MRI.getType(DstReg);
97       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
98         auto &CstVal = SrcMI->getOperand(1);
99         Builder.buildConstant(
100             DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
101         UpdatedDefs.push_back(DstReg);
102         markInstAndDefDead(MI, *SrcMI, DeadInsts);
103         return true;
104       }
105     }
106     return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
107   }
108 
109   bool tryCombineZExt(MachineInstr &MI,
110                       SmallVectorImpl<MachineInstr *> &DeadInsts,
111                       SmallVectorImpl<Register> &UpdatedDefs,
112                       GISelObserverWrapper &Observer) {
113     using namespace llvm::MIPatternMatch;
114     assert(MI.getOpcode() == TargetOpcode::G_ZEXT);
115 
116     Builder.setInstrAndDebugLoc(MI);
117     Register DstReg = MI.getOperand(0).getReg();
118     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
119 
120     // zext(trunc x) - > and (aext/copy/trunc x), mask
121     // zext(sext x) -> and (sext x), mask
122     Register TruncSrc;
123     Register SextSrc;
124     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) ||
125         mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) {
126       LLT DstTy = MRI.getType(DstReg);
127       if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) ||
128           isConstantUnsupported(DstTy))
129         return false;
130       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
131       LLT SrcTy = MRI.getType(SrcReg);
132       APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits());
133       auto Mask = Builder.buildConstant(
134         DstTy, MaskVal.zext(DstTy.getScalarSizeInBits()));
135       if (SextSrc && (DstTy != MRI.getType(SextSrc)))
136         SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0);
137       if (TruncSrc && (DstTy != MRI.getType(TruncSrc)))
138         TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
139       Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask);
140       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
141       return true;
142     }
143 
144     // zext(zext x) -> (zext x)
145     Register ZextSrc;
146     if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) {
147       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
148       Observer.changingInstr(MI);
149       MI.getOperand(1).setReg(ZextSrc);
150       Observer.changedInstr(MI);
151       UpdatedDefs.push_back(DstReg);
152       markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
153       return true;
154     }
155 
156     // Try to fold zext(g_constant) when the larger constant type is legal.
157     auto *SrcMI = MRI.getVRegDef(SrcReg);
158     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
159       const LLT DstTy = MRI.getType(DstReg);
160       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
161         auto &CstVal = SrcMI->getOperand(1);
162         Builder.buildConstant(
163             DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits()));
164         UpdatedDefs.push_back(DstReg);
165         markInstAndDefDead(MI, *SrcMI, DeadInsts);
166         return true;
167       }
168     }
169     return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
170   }
171 
172   bool tryCombineSExt(MachineInstr &MI,
173                       SmallVectorImpl<MachineInstr *> &DeadInsts,
174                       SmallVectorImpl<Register> &UpdatedDefs) {
175     using namespace llvm::MIPatternMatch;
176     assert(MI.getOpcode() == TargetOpcode::G_SEXT);
177 
178     Builder.setInstrAndDebugLoc(MI);
179     Register DstReg = MI.getOperand(0).getReg();
180     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
181 
182     // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c)
183     Register TruncSrc;
184     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
185       LLT DstTy = MRI.getType(DstReg);
186       if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}}))
187         return false;
188       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
189       LLT SrcTy = MRI.getType(SrcReg);
190       uint64_t SizeInBits = SrcTy.getScalarSizeInBits();
191       if (DstTy != MRI.getType(TruncSrc))
192         TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
193       Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits);
194       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
195       return true;
196     }
197 
198     // sext(zext x) -> (zext x)
199     // sext(sext x) -> (sext x)
200     Register ExtSrc;
201     MachineInstr *ExtMI;
202     if (mi_match(SrcReg, MRI,
203                  m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)),
204                                                     m_GSExt(m_Reg(ExtSrc)))))) {
205       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
206       Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
207       UpdatedDefs.push_back(DstReg);
208       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
209       return true;
210     }
211 
212     // Try to fold sext(g_constant) when the larger constant type is legal.
213     auto *SrcMI = MRI.getVRegDef(SrcReg);
214     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
215       const LLT DstTy = MRI.getType(DstReg);
216       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
217         auto &CstVal = SrcMI->getOperand(1);
218         Builder.buildConstant(
219             DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
220         UpdatedDefs.push_back(DstReg);
221         markInstAndDefDead(MI, *SrcMI, DeadInsts);
222         return true;
223       }
224     }
225 
226     return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
227   }
228 
229   bool tryCombineTrunc(MachineInstr &MI,
230                        SmallVectorImpl<MachineInstr *> &DeadInsts,
231                        SmallVectorImpl<Register> &UpdatedDefs,
232                        GISelObserverWrapper &Observer) {
233     using namespace llvm::MIPatternMatch;
234     assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
235 
236     Builder.setInstr(MI);
237     Register DstReg = MI.getOperand(0).getReg();
238     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
239 
240     // Try to fold trunc(g_constant) when the smaller constant type is legal.
241     auto *SrcMI = MRI.getVRegDef(SrcReg);
242     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
243       const LLT DstTy = MRI.getType(DstReg);
244       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
245         auto &CstVal = SrcMI->getOperand(1);
246         Builder.buildConstant(
247             DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits()));
248         UpdatedDefs.push_back(DstReg);
249         markInstAndDefDead(MI, *SrcMI, DeadInsts);
250         return true;
251       }
252     }
253 
254     // Try to fold trunc(merge) to directly use the source of the merge.
255     // This gets rid of large, difficult to legalize, merges
256     if (auto *SrcMerge = dyn_cast<GMerge>(SrcMI)) {
257       const Register MergeSrcReg = SrcMerge->getSourceReg(0);
258       const LLT MergeSrcTy = MRI.getType(MergeSrcReg);
259       const LLT DstTy = MRI.getType(DstReg);
260 
261       // We can only fold if the types are scalar
262       const unsigned DstSize = DstTy.getSizeInBits();
263       const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits();
264       if (!DstTy.isScalar() || !MergeSrcTy.isScalar())
265         return false;
266 
267       if (DstSize < MergeSrcSize) {
268         // When the merge source is larger than the destination, we can just
269         // truncate the merge source directly
270         if (isInstUnsupported({TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}}))
271           return false;
272 
273         LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: "
274                           << MI);
275 
276         Builder.buildTrunc(DstReg, MergeSrcReg);
277         UpdatedDefs.push_back(DstReg);
278       } else if (DstSize == MergeSrcSize) {
279         // If the sizes match we can simply try to replace the register
280         LLVM_DEBUG(
281             dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: "
282                    << MI);
283         replaceRegOrBuildCopy(DstReg, MergeSrcReg, MRI, Builder, UpdatedDefs,
284                               Observer);
285       } else if (DstSize % MergeSrcSize == 0) {
286         // If the trunc size is a multiple of the merge source size we can use
287         // a smaller merge instead
288         if (isInstUnsupported(
289                 {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}}))
290           return false;
291 
292         LLVM_DEBUG(
293             dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: "
294                    << MI);
295 
296         const unsigned NumSrcs = DstSize / MergeSrcSize;
297         assert(NumSrcs < SrcMI->getNumOperands() - 1 &&
298                "trunc(merge) should require less inputs than merge");
299         SmallVector<Register, 8> SrcRegs(NumSrcs);
300         for (unsigned i = 0; i < NumSrcs; ++i)
301           SrcRegs[i] = SrcMerge->getSourceReg(i);
302 
303         Builder.buildMergeValues(DstReg, SrcRegs);
304         UpdatedDefs.push_back(DstReg);
305       } else {
306         // Unable to combine
307         return false;
308       }
309 
310       markInstAndDefDead(MI, *SrcMerge, DeadInsts);
311       return true;
312     }
313 
314     // trunc(trunc) -> trunc
315     Register TruncSrc;
316     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
317       // Always combine trunc(trunc) since the eventual resulting trunc must be
318       // legal anyway as it must be legal for all outputs of the consumer type
319       // set.
320       LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI);
321 
322       Builder.buildTrunc(DstReg, TruncSrc);
323       UpdatedDefs.push_back(DstReg);
324       markInstAndDefDead(MI, *MRI.getVRegDef(TruncSrc), DeadInsts);
325       return true;
326     }
327 
328     return false;
329   }
330 
331   /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF).
332   bool tryFoldImplicitDef(MachineInstr &MI,
333                           SmallVectorImpl<MachineInstr *> &DeadInsts,
334                           SmallVectorImpl<Register> &UpdatedDefs) {
335     unsigned Opcode = MI.getOpcode();
336     assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT ||
337            Opcode == TargetOpcode::G_SEXT);
338 
339     if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF,
340                                            MI.getOperand(1).getReg(), MRI)) {
341       Builder.setInstr(MI);
342       Register DstReg = MI.getOperand(0).getReg();
343       LLT DstTy = MRI.getType(DstReg);
344 
345       if (Opcode == TargetOpcode::G_ANYEXT) {
346         // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF
347         if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
348           return false;
349         LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;);
350         Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {});
351         UpdatedDefs.push_back(DstReg);
352       } else {
353         // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top
354         // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT.
355         if (isConstantUnsupported(DstTy))
356           return false;
357         LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;);
358         Builder.buildConstant(DstReg, 0);
359         UpdatedDefs.push_back(DstReg);
360       }
361 
362       markInstAndDefDead(MI, *DefMI, DeadInsts);
363       return true;
364     }
365     return false;
366   }
367 
368   bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI,
369                           SmallVectorImpl<MachineInstr *> &DeadInsts,
370                           SmallVectorImpl<Register> &UpdatedDefs) {
371 
372     assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
373 
374     const unsigned CastOpc = CastMI.getOpcode();
375 
376     if (!isArtifactCast(CastOpc))
377       return false;
378 
379     const unsigned NumDefs = MI.getNumOperands() - 1;
380 
381     const Register CastSrcReg = CastMI.getOperand(1).getReg();
382     const LLT CastSrcTy = MRI.getType(CastSrcReg);
383     const LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
384     const LLT SrcTy = MRI.getType(MI.getOperand(NumDefs).getReg());
385 
386     const unsigned CastSrcSize = CastSrcTy.getSizeInBits();
387     const unsigned DestSize = DestTy.getSizeInBits();
388 
389     if (CastOpc == TargetOpcode::G_TRUNC) {
390       if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) {
391         //  %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>)
392         //  %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1
393         // =>
394         //  %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0
395         //  %2:_(s8) = G_TRUNC %6
396         //  %3:_(s8) = G_TRUNC %7
397         //  %4:_(s8) = G_TRUNC %8
398         //  %5:_(s8) = G_TRUNC %9
399 
400         unsigned UnmergeNumElts =
401             DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1;
402         LLT UnmergeTy = CastSrcTy.changeElementCount(
403             ElementCount::getFixed(UnmergeNumElts));
404 
405         if (isInstUnsupported(
406                 {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}}))
407           return false;
408 
409         Builder.setInstr(MI);
410         auto NewUnmerge = Builder.buildUnmerge(UnmergeTy, CastSrcReg);
411 
412         for (unsigned I = 0; I != NumDefs; ++I) {
413           Register DefReg = MI.getOperand(I).getReg();
414           UpdatedDefs.push_back(DefReg);
415           Builder.buildTrunc(DefReg, NewUnmerge.getReg(I));
416         }
417 
418         markInstAndDefDead(MI, CastMI, DeadInsts);
419         return true;
420       }
421 
422       if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) {
423         //  %1:_(s16) = G_TRUNC %0(s32)
424         //  %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1
425         // =>
426         //  %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0
427 
428         // Unmerge(trunc) can be combined if the trunc source size is a multiple
429         // of the unmerge destination size
430         if (CastSrcSize % DestSize != 0)
431           return false;
432 
433         // Check if the new unmerge is supported
434         if (isInstUnsupported(
435                 {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}}))
436           return false;
437 
438         // Gather the original destination registers and create new ones for the
439         // unused bits
440         const unsigned NewNumDefs = CastSrcSize / DestSize;
441         SmallVector<Register, 8> DstRegs(NewNumDefs);
442         for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) {
443           if (Idx < NumDefs)
444             DstRegs[Idx] = MI.getOperand(Idx).getReg();
445           else
446             DstRegs[Idx] = MRI.createGenericVirtualRegister(DestTy);
447         }
448 
449         // Build new unmerge
450         Builder.setInstr(MI);
451         Builder.buildUnmerge(DstRegs, CastSrcReg);
452         UpdatedDefs.append(DstRegs.begin(), DstRegs.begin() + NewNumDefs);
453         markInstAndDefDead(MI, CastMI, DeadInsts);
454         return true;
455       }
456     }
457 
458     // TODO: support combines with other casts as well
459     return false;
460   }
461 
462   static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp,
463                                  LLT OpTy, LLT DestTy) {
464     // Check if we found a definition that is like G_MERGE_VALUES.
465     switch (MergeOp) {
466     default:
467       return false;
468     case TargetOpcode::G_BUILD_VECTOR:
469     case TargetOpcode::G_MERGE_VALUES:
470       // The convert operation that we will need to insert is
471       // going to convert the input of that type of instruction (scalar)
472       // to the destination type (DestTy).
473       // The conversion needs to stay in the same domain (scalar to scalar
474       // and vector to vector), so if we were to allow to fold the merge
475       // we would need to insert some bitcasts.
476       // E.g.,
477       // <2 x s16> = build_vector s16, s16
478       // <2 x s32> = zext <2 x s16>
479       // <2 x s16>, <2 x s16> = unmerge <2 x s32>
480       //
481       // As is the folding would produce:
482       // <2 x s16> = zext s16  <-- scalar to vector
483       // <2 x s16> = zext s16  <-- scalar to vector
484       // Which is invalid.
485       // Instead we would want to generate:
486       // s32 = zext s16
487       // <2 x s16> = bitcast s32
488       // s32 = zext s16
489       // <2 x s16> = bitcast s32
490       //
491       // That is not done yet.
492       if (ConvertOp == 0)
493         return true;
494       return !DestTy.isVector() && OpTy.isVector() &&
495              DestTy == OpTy.getElementType();
496     case TargetOpcode::G_CONCAT_VECTORS: {
497       if (ConvertOp == 0)
498         return true;
499       if (!DestTy.isVector())
500         return false;
501 
502       const unsigned OpEltSize = OpTy.getElementType().getSizeInBits();
503 
504       // Don't handle scalarization with a cast that isn't in the same
505       // direction as the vector cast. This could be handled, but it would
506       // require more intermediate unmerges.
507       if (ConvertOp == TargetOpcode::G_TRUNC)
508         return DestTy.getSizeInBits() <= OpEltSize;
509       return DestTy.getSizeInBits() >= OpEltSize;
510     }
511     }
512   }
513 
514   /// Try to replace DstReg with SrcReg or build a COPY instruction
515   /// depending on the register constraints.
516   static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg,
517                                     MachineRegisterInfo &MRI,
518                                     MachineIRBuilder &Builder,
519                                     SmallVectorImpl<Register> &UpdatedDefs,
520                                     GISelChangeObserver &Observer) {
521     if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) {
522       Builder.buildCopy(DstReg, SrcReg);
523       UpdatedDefs.push_back(DstReg);
524       return;
525     }
526     SmallVector<MachineInstr *, 4> UseMIs;
527     // Get the users and notify the observer before replacing.
528     for (auto &UseMI : MRI.use_instructions(DstReg)) {
529       UseMIs.push_back(&UseMI);
530       Observer.changingInstr(UseMI);
531     }
532     // Replace the registers.
533     MRI.replaceRegWith(DstReg, SrcReg);
534     UpdatedDefs.push_back(SrcReg);
535     // Notify the observer that we changed the instructions.
536     for (auto *UseMI : UseMIs)
537       Observer.changedInstr(*UseMI);
538   }
539 
540   /// Return the operand index in \p MI that defines \p Def
541   static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) {
542     unsigned DefIdx = 0;
543     for (const MachineOperand &Def : MI.defs()) {
544       if (Def.getReg() == SearchDef)
545         break;
546       ++DefIdx;
547     }
548 
549     return DefIdx;
550   }
551 
552   /// This class provides utilities for finding source registers of specific
553   /// bit ranges in an artifact. The routines can look through the source
554   /// registers if they're other artifacts to try to find a non-artifact source
555   /// of a value.
556   class ArtifactValueFinder {
557     MachineRegisterInfo &MRI;
558     MachineIRBuilder &MIB;
559     const LegalizerInfo &LI;
560 
561     // Stores the best register found in the current query so far.
562     Register CurrentBest = Register();
563 
564     /// Given an concat_vector op \p Concat and a start bit and size, try to
565     /// find the origin of the value defined by that start position and size.
566     ///
567     /// \returns a register with the requested size, or the current best
568     /// register found during the current query.
569     Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit,
570                                  unsigned Size) {
571       assert(Size > 0);
572 
573       // Find the source operand that provides the bits requested.
574       Register Src1Reg = Concat.getSourceReg(0);
575       unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits();
576 
577       // Operand index of the source that provides the start of the bit range.
578       unsigned StartSrcIdx = (StartBit / SrcSize) + 1;
579       // Offset into the source at which the bit range starts.
580       unsigned InRegOffset = StartBit % SrcSize;
581       // Check that the bits don't span multiple sources.
582       // FIXME: we might be able return multiple sources? Or create an
583       // appropriate concat to make it fit.
584       if (InRegOffset + Size > SrcSize)
585         return CurrentBest;
586 
587       Register SrcReg = Concat.getReg(StartSrcIdx);
588       if (InRegOffset == 0 && Size == SrcSize) {
589         CurrentBest = SrcReg;
590         return findValueFromDefImpl(SrcReg, 0, Size);
591       }
592 
593       return findValueFromDefImpl(SrcReg, InRegOffset, Size);
594     }
595 
596     /// Given an build_vector op \p BV and a start bit and size, try to find
597     /// the origin of the value defined by that start position and size.
598     ///
599     /// \returns a register with the requested size, or the current best
600     /// register found during the current query.
601     Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit,
602                                       unsigned Size) {
603       assert(Size > 0);
604 
605       // Find the source operand that provides the bits requested.
606       Register Src1Reg = BV.getSourceReg(0);
607       unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits();
608 
609       // Operand index of the source that provides the start of the bit range.
610       unsigned StartSrcIdx = (StartBit / SrcSize) + 1;
611       // Offset into the source at which the bit range starts.
612       unsigned InRegOffset = StartBit % SrcSize;
613 
614       if (InRegOffset != 0)
615         return CurrentBest; // Give up, bits don't start at a scalar source.
616       if (Size < SrcSize)
617         return CurrentBest; // Scalar source is too large for requested bits.
618 
619       // If the bits cover multiple sources evenly, then create a new
620       // build_vector to synthesize the required size, if that's been requested.
621       if (Size > SrcSize) {
622         if (Size % SrcSize > 0)
623           return CurrentBest; // Isn't covered exactly by sources.
624 
625         unsigned NumSrcsUsed = Size / SrcSize;
626         // If we're requesting all of the sources, just return this def.
627         if (NumSrcsUsed == BV.getNumSources())
628           return BV.getReg(0);
629 
630         LLT SrcTy = MRI.getType(Src1Reg);
631         LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy);
632 
633         // Check if the resulting build vector would be legal.
634         LegalizeActionStep ActionStep =
635             LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}});
636         if (ActionStep.Action != LegalizeActions::Legal)
637           return CurrentBest;
638 
639         SmallVector<Register> NewSrcs;
640         for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed;
641              ++SrcIdx)
642           NewSrcs.push_back(BV.getReg(SrcIdx));
643         MIB.setInstrAndDebugLoc(BV);
644         return MIB.buildBuildVector(NewBVTy, NewSrcs).getReg(0);
645       }
646       // A single source is requested, just return it.
647       return BV.getReg(StartSrcIdx);
648     }
649 
650     /// Given an G_INSERT op \p MI and a start bit and size, try to find
651     /// the origin of the value defined by that start position and size.
652     ///
653     /// \returns a register with the requested size, or the current best
654     /// register found during the current query.
655     Register findValueFromInsert(MachineInstr &MI, unsigned StartBit,
656                                  unsigned Size) {
657       assert(MI.getOpcode() == TargetOpcode::G_INSERT);
658       assert(Size > 0);
659 
660       Register ContainerSrcReg = MI.getOperand(1).getReg();
661       Register InsertedReg = MI.getOperand(2).getReg();
662       LLT InsertedRegTy = MRI.getType(InsertedReg);
663       unsigned InsertOffset = MI.getOperand(3).getImm();
664 
665       // There are 4 possible container/insertreg + requested bit-range layouts
666       // that the instruction and query could be representing.
667       // For: %_ = G_INSERT %CONTAINER, %INS, InsOff (abbrev. to 'IO')
668       // and a start bit 'SB', with size S, giving an end bit 'EB', we could
669       // have...
670       // Scenario A:
671       //   --------------------------
672       //  |  INS    |  CONTAINER     |
673       //   --------------------------
674       //       |   |
675       //       SB  EB
676       //
677       // Scenario B:
678       //   --------------------------
679       //  |  INS    |  CONTAINER     |
680       //   --------------------------
681       //                |    |
682       //                SB   EB
683       //
684       // Scenario C:
685       //   --------------------------
686       //  |  CONTAINER    |  INS     |
687       //   --------------------------
688       //       |    |
689       //       SB   EB
690       //
691       // Scenario D:
692       //   --------------------------
693       //  |  CONTAINER    |  INS     |
694       //   --------------------------
695       //                     |   |
696       //                     SB  EB
697       //
698       // So therefore, A and D are requesting data from the INS operand, while
699       // B and C are requesting from the container operand.
700 
701       unsigned InsertedEndBit = InsertOffset + InsertedRegTy.getSizeInBits();
702       unsigned EndBit = StartBit + Size;
703       unsigned NewStartBit;
704       Register SrcRegToUse;
705       if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) {
706         SrcRegToUse = ContainerSrcReg;
707         NewStartBit = StartBit;
708         return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
709       }
710       if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) {
711         SrcRegToUse = InsertedReg;
712         NewStartBit = StartBit - InsertOffset;
713         if (NewStartBit == 0 &&
714             Size == MRI.getType(SrcRegToUse).getSizeInBits())
715           CurrentBest = SrcRegToUse;
716         return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
717       }
718       // The bit range spans both the inserted and container regions.
719       return Register();
720     }
721 
722     /// Internal implementation for findValueFromDef(). findValueFromDef()
723     /// initializes some data like the CurrentBest register, which this method
724     /// and its callees rely upon.
725     Register findValueFromDefImpl(Register DefReg, unsigned StartBit,
726                                   unsigned Size) {
727       std::optional<DefinitionAndSourceRegister> DefSrcReg =
728           getDefSrcRegIgnoringCopies(DefReg, MRI);
729       MachineInstr *Def = DefSrcReg->MI;
730       DefReg = DefSrcReg->Reg;
731       // If the instruction has a single def, then simply delegate the search.
732       // For unmerge however with multiple defs, we need to compute the offset
733       // into the source of the unmerge.
734       switch (Def->getOpcode()) {
735       case TargetOpcode::G_CONCAT_VECTORS:
736         return findValueFromConcat(cast<GConcatVectors>(*Def), StartBit, Size);
737       case TargetOpcode::G_UNMERGE_VALUES: {
738         unsigned DefStartBit = 0;
739         unsigned DefSize = MRI.getType(DefReg).getSizeInBits();
740         for (const auto &MO : Def->defs()) {
741           if (MO.getReg() == DefReg)
742             break;
743           DefStartBit += DefSize;
744         }
745         Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg();
746         Register SrcOriginReg =
747             findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size);
748         if (SrcOriginReg)
749           return SrcOriginReg;
750         // Failed to find a further value. If the StartBit and Size perfectly
751         // covered the requested DefReg, return that since it's better than
752         // nothing.
753         if (StartBit == 0 && Size == DefSize)
754           return DefReg;
755         return CurrentBest;
756       }
757       case TargetOpcode::G_BUILD_VECTOR:
758         return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit,
759                                         Size);
760       case TargetOpcode::G_INSERT:
761         return findValueFromInsert(*Def, StartBit, Size);
762       default:
763         return CurrentBest;
764       }
765     }
766 
767   public:
768     ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder,
769                         const LegalizerInfo &Info)
770         : MRI(Mri), MIB(Builder), LI(Info) {}
771 
772     /// Try to find a source of the value defined in the def \p DefReg, starting
773     /// at position \p StartBit with size \p Size.
774     /// \returns a register with the requested size, or an empty Register if no
775     /// better value could be found.
776     Register findValueFromDef(Register DefReg, unsigned StartBit,
777                               unsigned Size) {
778       CurrentBest = Register();
779       Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size);
780       return FoundReg != DefReg ? FoundReg : Register();
781     }
782 
783     /// Try to combine the defs of an unmerge \p MI by attempting to find
784     /// values that provides the bits for each def reg.
785     /// \returns true if all the defs of the unmerge have been made dead.
786     bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer,
787                                SmallVectorImpl<Register> &UpdatedDefs) {
788       unsigned NumDefs = MI.getNumDefs();
789       LLT DestTy = MRI.getType(MI.getReg(0));
790 
791       SmallBitVector DeadDefs(NumDefs);
792       for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
793         Register DefReg = MI.getReg(DefIdx);
794         if (MRI.use_nodbg_empty(DefReg)) {
795           DeadDefs[DefIdx] = true;
796           continue;
797         }
798         Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits());
799         if (!FoundVal)
800           continue;
801         if (MRI.getType(FoundVal) != DestTy)
802           continue;
803 
804         replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs,
805                               Observer);
806         // We only want to replace the uses, not the def of the old reg.
807         Observer.changingInstr(MI);
808         MI.getOperand(DefIdx).setReg(DefReg);
809         Observer.changedInstr(MI);
810         DeadDefs[DefIdx] = true;
811       }
812       return DeadDefs.all();
813     }
814 
815     GUnmerge *findUnmergeThatDefinesReg(Register Reg, unsigned Size,
816                                         unsigned &DefOperandIdx) {
817       if (Register Def = findValueFromDefImpl(Reg, 0, Size)) {
818         if (auto *Unmerge = dyn_cast<GUnmerge>(MRI.getVRegDef(Def))) {
819           DefOperandIdx = Unmerge->findRegisterDefOperandIdx(Def);
820           return Unmerge;
821         }
822       }
823       return nullptr;
824     }
825 
826     // Check if sequence of elements from merge-like instruction is defined by
827     // another sequence of elements defined by unmerge. Most often this is the
828     // same sequence. Search for elements using findValueFromDefImpl.
829     bool isSequenceFromUnmerge(GMergeLikeInstr &MI, unsigned MergeStartIdx,
830                                GUnmerge *Unmerge, unsigned UnmergeIdxStart,
831                                unsigned NumElts, unsigned EltSize) {
832       assert(MergeStartIdx + NumElts <= MI.getNumSources());
833       for (unsigned i = MergeStartIdx; i < MergeStartIdx + NumElts; ++i) {
834         unsigned EltUnmergeIdx;
835         GUnmerge *EltUnmerge = findUnmergeThatDefinesReg(
836             MI.getSourceReg(i), EltSize, EltUnmergeIdx);
837         // Check if source i comes from the same Unmerge.
838         if (!EltUnmerge || EltUnmerge != Unmerge)
839           return false;
840         // Check that source i's def has same index in sequence in Unmerge.
841         if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart)
842           return false;
843       }
844       return true;
845     }
846 
847     bool tryCombineMergeLike(GMergeLikeInstr &MI,
848                              SmallVectorImpl<MachineInstr *> &DeadInsts,
849                              SmallVectorImpl<Register> &UpdatedDefs,
850                              GISelChangeObserver &Observer) {
851       Register Elt0 = MI.getSourceReg(0);
852       LLT EltTy = MRI.getType(Elt0);
853       unsigned EltSize = EltTy.getSizeInBits();
854 
855       unsigned Elt0UnmergeIdx;
856       // Search for unmerge that will be candidate for combine.
857       auto *Unmerge = findUnmergeThatDefinesReg(Elt0, EltSize, Elt0UnmergeIdx);
858       if (!Unmerge)
859         return false;
860 
861       unsigned NumMIElts = MI.getNumSources();
862       Register Dst = MI.getReg(0);
863       LLT DstTy = MRI.getType(Dst);
864       Register UnmergeSrc = Unmerge->getSourceReg();
865       LLT UnmergeSrcTy = MRI.getType(UnmergeSrc);
866 
867       // Recognize copy of UnmergeSrc to Dst.
868       // Unmerge UnmergeSrc and reassemble it using merge-like opcode into Dst.
869       //
870       // %0:_(EltTy), %1, ... = G_UNMERGE_VALUES %UnmergeSrc:_(Ty)
871       // %Dst:_(Ty) = G_merge_like_opcode %0:_(EltTy), %1, ...
872       //
873       // %Dst:_(Ty) = COPY %UnmergeSrc:_(Ty)
874       if ((DstTy == UnmergeSrcTy) && (Elt0UnmergeIdx == 0)) {
875         if (!isSequenceFromUnmerge(MI, 0, Unmerge, 0, NumMIElts, EltSize))
876           return false;
877         replaceRegOrBuildCopy(Dst, UnmergeSrc, MRI, MIB, UpdatedDefs, Observer);
878         DeadInsts.push_back(&MI);
879         return true;
880       }
881 
882       // Recognize UnmergeSrc that can be unmerged to DstTy directly.
883       // Types have to be either both vector or both non-vector types.
884       // Merge-like opcodes are combined one at the time. First one creates new
885       // unmerge, following should use the same unmerge (builder performs CSE).
886       //
887       // %0:_(EltTy), %1, %2, %3 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy)
888       // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1
889       // %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3
890       //
891       // %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc
892       if ((DstTy.isVector() == UnmergeSrcTy.isVector()) &&
893           (Elt0UnmergeIdx % NumMIElts == 0) &&
894           getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) {
895         if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts,
896                                    EltSize))
897           return false;
898         MIB.setInstrAndDebugLoc(MI);
899         auto NewUnmerge = MIB.buildUnmerge(DstTy, Unmerge->getSourceReg());
900         unsigned DstIdx = (Elt0UnmergeIdx * EltSize) / DstTy.getSizeInBits();
901         replaceRegOrBuildCopy(Dst, NewUnmerge.getReg(DstIdx), MRI, MIB,
902                               UpdatedDefs, Observer);
903         DeadInsts.push_back(&MI);
904         return true;
905       }
906 
907       // Recognize when multiple unmerged sources with UnmergeSrcTy type
908       // can be merged into Dst with DstTy type directly.
909       // Types have to be either both vector or both non-vector types.
910 
911       // %0:_(EltTy), %1 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy)
912       // %2:_(EltTy), %3 = G_UNMERGE_VALUES %AnotherUnmergeSrc:_(UnmergeSrcTy)
913       // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1, %2, %3
914       //
915       // %Dst:_(DstTy) = G_merge_like_opcode %UnmergeSrc, %AnotherUnmergeSrc
916 
917       if ((DstTy.isVector() == UnmergeSrcTy.isVector()) &&
918           getCoverTy(DstTy, UnmergeSrcTy) == DstTy) {
919         SmallVector<Register, 4> ConcatSources;
920         unsigned NumElts = Unmerge->getNumDefs();
921         for (unsigned i = 0; i < MI.getNumSources(); i += NumElts) {
922           unsigned EltUnmergeIdx;
923           auto *UnmergeI = findUnmergeThatDefinesReg(MI.getSourceReg(i),
924                                                      EltSize, EltUnmergeIdx);
925           // All unmerges have to be the same size.
926           if ((!UnmergeI) || (UnmergeI->getNumDefs() != NumElts) ||
927               (EltUnmergeIdx != 0))
928             return false;
929           if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize))
930             return false;
931           ConcatSources.push_back(UnmergeI->getSourceReg());
932         }
933 
934         MIB.setInstrAndDebugLoc(MI);
935         MIB.buildMergeLikeInstr(Dst, ConcatSources);
936         DeadInsts.push_back(&MI);
937         return true;
938       }
939 
940       return false;
941     }
942   };
943 
944   bool tryCombineUnmergeValues(GUnmerge &MI,
945                                SmallVectorImpl<MachineInstr *> &DeadInsts,
946                                SmallVectorImpl<Register> &UpdatedDefs,
947                                GISelChangeObserver &Observer) {
948     unsigned NumDefs = MI.getNumDefs();
949     Register SrcReg = MI.getSourceReg();
950     MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI);
951     if (!SrcDef)
952       return false;
953 
954     LLT OpTy = MRI.getType(SrcReg);
955     LLT DestTy = MRI.getType(MI.getReg(0));
956     unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg);
957 
958     Builder.setInstrAndDebugLoc(MI);
959 
960     ArtifactValueFinder Finder(MRI, Builder, LI);
961     if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) {
962       markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
963       return true;
964     }
965 
966     if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) {
967       // %0:_(<4 x s16>) = G_FOO
968       // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0
969       // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1
970       //
971       // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0
972       Register SrcUnmergeSrc = SrcUnmerge->getSourceReg();
973       LLT SrcUnmergeSrcTy = MRI.getType(SrcUnmergeSrc);
974 
975       // If we need to decrease the number of vector elements in the result type
976       // of an unmerge, this would involve the creation of an equivalent unmerge
977       // to copy back to the original result registers.
978       LegalizeActionStep ActionStep = LI.getAction(
979           {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}});
980       switch (ActionStep.Action) {
981       case LegalizeActions::Lower:
982       case LegalizeActions::Unsupported:
983         break;
984       case LegalizeActions::FewerElements:
985       case LegalizeActions::NarrowScalar:
986         if (ActionStep.TypeIdx == 1)
987           return false;
988         break;
989       default:
990         return false;
991       }
992 
993       auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc);
994 
995       // TODO: Should we try to process out the other defs now? If the other
996       // defs of the source unmerge are also unmerged, we end up with a separate
997       // unmerge for each one.
998       for (unsigned I = 0; I != NumDefs; ++I) {
999         Register Def = MI.getReg(I);
1000         replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I),
1001                               MRI, Builder, UpdatedDefs, Observer);
1002       }
1003 
1004       markInstAndDefDead(MI, *SrcUnmerge, DeadInsts, SrcDefIdx);
1005       return true;
1006     }
1007 
1008     MachineInstr *MergeI = SrcDef;
1009     unsigned ConvertOp = 0;
1010 
1011     // Handle intermediate conversions
1012     unsigned SrcOp = SrcDef->getOpcode();
1013     if (isArtifactCast(SrcOp)) {
1014       ConvertOp = SrcOp;
1015       MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI);
1016     }
1017 
1018     if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(),
1019                                        ConvertOp, OpTy, DestTy)) {
1020       // We might have a chance to combine later by trying to combine
1021       // unmerge(cast) first
1022       return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs);
1023     }
1024 
1025     const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
1026 
1027     if (NumMergeRegs < NumDefs) {
1028       if (NumDefs % NumMergeRegs != 0)
1029         return false;
1030 
1031       Builder.setInstr(MI);
1032       // Transform to UNMERGEs, for example
1033       //   %1 = G_MERGE_VALUES %4, %5
1034       //   %9, %10, %11, %12 = G_UNMERGE_VALUES %1
1035       // to
1036       //   %9, %10 = G_UNMERGE_VALUES %4
1037       //   %11, %12 = G_UNMERGE_VALUES %5
1038 
1039       const unsigned NewNumDefs = NumDefs / NumMergeRegs;
1040       for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) {
1041         SmallVector<Register, 8> DstRegs;
1042         for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs;
1043              ++j, ++DefIdx)
1044           DstRegs.push_back(MI.getReg(DefIdx));
1045 
1046         if (ConvertOp) {
1047           LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
1048 
1049           // This is a vector that is being split and casted. Extract to the
1050           // element type, and do the conversion on the scalars (or smaller
1051           // vectors).
1052           LLT MergeEltTy = MergeSrcTy.divide(NewNumDefs);
1053 
1054           // Handle split to smaller vectors, with conversions.
1055           // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>)
1056           // %3(<8 x s16>) = G_SEXT %2
1057           // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %3
1058           //
1059           // =>
1060           //
1061           // %8(<2 x s8>), %9(<2 x s8>) = G_UNMERGE_VALUES %0
1062           // %10(<2 x s8>), %11(<2 x s8>) = G_UNMERGE_VALUES %1
1063           // %4(<2 x s16>) = G_SEXT %8
1064           // %5(<2 x s16>) = G_SEXT %9
1065           // %6(<2 x s16>) = G_SEXT %10
1066           // %7(<2 x s16>)= G_SEXT %11
1067 
1068           SmallVector<Register, 4> TmpRegs(NewNumDefs);
1069           for (unsigned k = 0; k < NewNumDefs; ++k)
1070             TmpRegs[k] = MRI.createGenericVirtualRegister(MergeEltTy);
1071 
1072           Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg());
1073 
1074           for (unsigned k = 0; k < NewNumDefs; ++k)
1075             Builder.buildInstr(ConvertOp, {DstRegs[k]}, {TmpRegs[k]});
1076         } else {
1077           Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg());
1078         }
1079         UpdatedDefs.append(DstRegs.begin(), DstRegs.end());
1080       }
1081 
1082     } else if (NumMergeRegs > NumDefs) {
1083       if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0)
1084         return false;
1085 
1086       Builder.setInstr(MI);
1087       // Transform to MERGEs
1088       //   %6 = G_MERGE_VALUES %17, %18, %19, %20
1089       //   %7, %8 = G_UNMERGE_VALUES %6
1090       // to
1091       //   %7 = G_MERGE_VALUES %17, %18
1092       //   %8 = G_MERGE_VALUES %19, %20
1093 
1094       const unsigned NumRegs = NumMergeRegs / NumDefs;
1095       for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
1096         SmallVector<Register, 8> Regs;
1097         for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs;
1098              ++j, ++Idx)
1099           Regs.push_back(MergeI->getOperand(Idx).getReg());
1100 
1101         Register DefReg = MI.getReg(DefIdx);
1102         Builder.buildMergeLikeInstr(DefReg, Regs);
1103         UpdatedDefs.push_back(DefReg);
1104       }
1105 
1106     } else {
1107       LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
1108 
1109       if (!ConvertOp && DestTy != MergeSrcTy)
1110         ConvertOp = TargetOpcode::G_BITCAST;
1111 
1112       if (ConvertOp) {
1113         Builder.setInstr(MI);
1114 
1115         for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
1116           Register DefReg = MI.getOperand(Idx).getReg();
1117           Register MergeSrc = MergeI->getOperand(Idx + 1).getReg();
1118 
1119           if (!MRI.use_empty(DefReg)) {
1120             Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc});
1121             UpdatedDefs.push_back(DefReg);
1122           }
1123         }
1124 
1125         markInstAndDefDead(MI, *MergeI, DeadInsts);
1126         return true;
1127       }
1128 
1129       assert(DestTy == MergeSrcTy &&
1130              "Bitcast and the other kinds of conversions should "
1131              "have happened earlier");
1132 
1133       Builder.setInstr(MI);
1134       for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
1135         Register DstReg = MI.getOperand(Idx).getReg();
1136         Register SrcReg = MergeI->getOperand(Idx + 1).getReg();
1137         replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs,
1138                               Observer);
1139       }
1140     }
1141 
1142     markInstAndDefDead(MI, *MergeI, DeadInsts);
1143     return true;
1144   }
1145 
1146   bool tryCombineExtract(MachineInstr &MI,
1147                          SmallVectorImpl<MachineInstr *> &DeadInsts,
1148                          SmallVectorImpl<Register> &UpdatedDefs) {
1149     assert(MI.getOpcode() == TargetOpcode::G_EXTRACT);
1150 
1151     // Try to use the source registers from a G_MERGE_VALUES
1152     //
1153     // %2 = G_MERGE_VALUES %0, %1
1154     // %3 = G_EXTRACT %2, N
1155     // =>
1156     //
1157     // for N < %2.getSizeInBits() / 2
1158     //     %3 = G_EXTRACT %0, N
1159     //
1160     // for N >= %2.getSizeInBits() / 2
1161     //    %3 = G_EXTRACT %1, (N - %0.getSizeInBits()
1162 
1163     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
1164     MachineInstr *MergeI = MRI.getVRegDef(SrcReg);
1165     if (!MergeI || !isa<GMergeLikeInstr>(MergeI))
1166       return false;
1167 
1168     Register DstReg = MI.getOperand(0).getReg();
1169     LLT DstTy = MRI.getType(DstReg);
1170     LLT SrcTy = MRI.getType(SrcReg);
1171 
1172     // TODO: Do we need to check if the resulting extract is supported?
1173     unsigned ExtractDstSize = DstTy.getSizeInBits();
1174     unsigned Offset = MI.getOperand(2).getImm();
1175     unsigned NumMergeSrcs = MergeI->getNumOperands() - 1;
1176     unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs;
1177     unsigned MergeSrcIdx = Offset / MergeSrcSize;
1178 
1179     // Compute the offset of the last bit the extract needs.
1180     unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize;
1181 
1182     // Can't handle the case where the extract spans multiple inputs.
1183     if (MergeSrcIdx != EndMergeSrcIdx)
1184       return false;
1185 
1186     // TODO: We could modify MI in place in most cases.
1187     Builder.setInstr(MI);
1188     Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(),
1189                          Offset - MergeSrcIdx * MergeSrcSize);
1190     UpdatedDefs.push_back(DstReg);
1191     markInstAndDefDead(MI, *MergeI, DeadInsts);
1192     return true;
1193   }
1194 
1195   /// Try to combine away MI.
1196   /// Returns true if it combined away the MI.
1197   /// Adds instructions that are dead as a result of the combine
1198   /// into DeadInsts, which can include MI.
1199   bool tryCombineInstruction(MachineInstr &MI,
1200                              SmallVectorImpl<MachineInstr *> &DeadInsts,
1201                              GISelObserverWrapper &WrapperObserver) {
1202     ArtifactValueFinder Finder(MRI, Builder, LI);
1203 
1204     // This might be a recursive call, and we might have DeadInsts already
1205     // populated. To avoid bad things happening later with multiple vreg defs
1206     // etc, process the dead instructions now if any.
1207     if (!DeadInsts.empty())
1208       deleteMarkedDeadInsts(DeadInsts, WrapperObserver);
1209 
1210     // Put here every vreg that was redefined in such a way that it's at least
1211     // possible that one (or more) of its users (immediate or COPY-separated)
1212     // could become artifact combinable with the new definition (or the
1213     // instruction reachable from it through a chain of copies if any).
1214     SmallVector<Register, 4> UpdatedDefs;
1215     bool Changed = false;
1216     switch (MI.getOpcode()) {
1217     default:
1218       return false;
1219     case TargetOpcode::G_ANYEXT:
1220       Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1221       break;
1222     case TargetOpcode::G_ZEXT:
1223       Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1224       break;
1225     case TargetOpcode::G_SEXT:
1226       Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs);
1227       break;
1228     case TargetOpcode::G_UNMERGE_VALUES:
1229       Changed = tryCombineUnmergeValues(cast<GUnmerge>(MI), DeadInsts,
1230                                         UpdatedDefs, WrapperObserver);
1231       break;
1232     case TargetOpcode::G_MERGE_VALUES:
1233     case TargetOpcode::G_BUILD_VECTOR:
1234     case TargetOpcode::G_CONCAT_VECTORS:
1235       // If any of the users of this merge are an unmerge, then add them to the
1236       // artifact worklist in case there's folding that can be done looking up.
1237       for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) {
1238         if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES ||
1239             U.getOpcode() == TargetOpcode::G_TRUNC) {
1240           UpdatedDefs.push_back(MI.getOperand(0).getReg());
1241           break;
1242         }
1243       }
1244       Changed = Finder.tryCombineMergeLike(cast<GMergeLikeInstr>(MI), DeadInsts,
1245                                            UpdatedDefs, WrapperObserver);
1246       break;
1247     case TargetOpcode::G_EXTRACT:
1248       Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs);
1249       break;
1250     case TargetOpcode::G_TRUNC:
1251       Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1252       if (!Changed) {
1253         // Try to combine truncates away even if they are legal. As all artifact
1254         // combines at the moment look only "up" the def-use chains, we achieve
1255         // that by throwing truncates' users (with look through copies) into the
1256         // ArtifactList again.
1257         UpdatedDefs.push_back(MI.getOperand(0).getReg());
1258       }
1259       break;
1260     }
1261     // If the main loop through the ArtifactList found at least one combinable
1262     // pair of artifacts, not only combine it away (as done above), but also
1263     // follow the def-use chain from there to combine everything that can be
1264     // combined within this def-use chain of artifacts.
1265     while (!UpdatedDefs.empty()) {
1266       Register NewDef = UpdatedDefs.pop_back_val();
1267       assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg");
1268       for (MachineInstr &Use : MRI.use_instructions(NewDef)) {
1269         switch (Use.getOpcode()) {
1270         // Keep this list in sync with the list of all artifact combines.
1271         case TargetOpcode::G_ANYEXT:
1272         case TargetOpcode::G_ZEXT:
1273         case TargetOpcode::G_SEXT:
1274         case TargetOpcode::G_UNMERGE_VALUES:
1275         case TargetOpcode::G_EXTRACT:
1276         case TargetOpcode::G_TRUNC:
1277         case TargetOpcode::G_BUILD_VECTOR:
1278           // Adding Use to ArtifactList.
1279           WrapperObserver.changedInstr(Use);
1280           break;
1281         case TargetOpcode::COPY: {
1282           Register Copy = Use.getOperand(0).getReg();
1283           if (Copy.isVirtual())
1284             UpdatedDefs.push_back(Copy);
1285           break;
1286         }
1287         default:
1288           // If we do not have an artifact combine for the opcode, there is no
1289           // point in adding it to the ArtifactList as nothing interesting will
1290           // be done to it anyway.
1291           break;
1292         }
1293       }
1294     }
1295     return Changed;
1296   }
1297 
1298 private:
1299   static Register getArtifactSrcReg(const MachineInstr &MI) {
1300     switch (MI.getOpcode()) {
1301     case TargetOpcode::COPY:
1302     case TargetOpcode::G_TRUNC:
1303     case TargetOpcode::G_ZEXT:
1304     case TargetOpcode::G_ANYEXT:
1305     case TargetOpcode::G_SEXT:
1306     case TargetOpcode::G_EXTRACT:
1307       return MI.getOperand(1).getReg();
1308     case TargetOpcode::G_UNMERGE_VALUES:
1309       return MI.getOperand(MI.getNumOperands() - 1).getReg();
1310     default:
1311       llvm_unreachable("Not a legalization artifact happen");
1312     }
1313   }
1314 
1315   /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI
1316   /// (either by killing it or changing operands) results in DefMI being dead
1317   /// too. In-between COPYs or artifact-casts are also collected if they are
1318   /// dead.
1319   /// MI is not marked dead.
1320   void markDefDead(MachineInstr &MI, MachineInstr &DefMI,
1321                    SmallVectorImpl<MachineInstr *> &DeadInsts,
1322                    unsigned DefIdx = 0) {
1323     // Collect all the copy instructions that are made dead, due to deleting
1324     // this instruction. Collect all of them until the Trunc(DefMI).
1325     // Eg,
1326     // %1(s1) = G_TRUNC %0(s32)
1327     // %2(s1) = COPY %1(s1)
1328     // %3(s1) = COPY %2(s1)
1329     // %4(s32) = G_ANYEXT %3(s1)
1330     // In this case, we would have replaced %4 with a copy of %0,
1331     // and as a result, %3, %2, %1 are dead.
1332     MachineInstr *PrevMI = &MI;
1333     while (PrevMI != &DefMI) {
1334       Register PrevRegSrc = getArtifactSrcReg(*PrevMI);
1335 
1336       MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc);
1337       if (MRI.hasOneUse(PrevRegSrc)) {
1338         if (TmpDef != &DefMI) {
1339           assert((TmpDef->getOpcode() == TargetOpcode::COPY ||
1340                   isArtifactCast(TmpDef->getOpcode())) &&
1341                  "Expecting copy or artifact cast here");
1342 
1343           DeadInsts.push_back(TmpDef);
1344         }
1345       } else
1346         break;
1347       PrevMI = TmpDef;
1348     }
1349 
1350     if (PrevMI == &DefMI) {
1351       unsigned I = 0;
1352       bool IsDead = true;
1353       for (MachineOperand &Def : DefMI.defs()) {
1354         if (I != DefIdx) {
1355           if (!MRI.use_empty(Def.getReg())) {
1356             IsDead = false;
1357             break;
1358           }
1359         } else {
1360           if (!MRI.hasOneUse(DefMI.getOperand(DefIdx).getReg()))
1361             break;
1362         }
1363 
1364         ++I;
1365       }
1366 
1367       if (IsDead)
1368         DeadInsts.push_back(&DefMI);
1369     }
1370   }
1371 
1372   /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
1373   /// dead due to MI being killed, then mark DefMI as dead too.
1374   /// Some of the combines (extends(trunc)), try to walk through redundant
1375   /// copies in between the extends and the truncs, and this attempts to collect
1376   /// the in between copies if they're dead.
1377   void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI,
1378                           SmallVectorImpl<MachineInstr *> &DeadInsts,
1379                           unsigned DefIdx = 0) {
1380     DeadInsts.push_back(&MI);
1381     markDefDead(MI, DefMI, DeadInsts, DefIdx);
1382   }
1383 
1384   /// Erase the dead instructions in the list and call the observer hooks.
1385   /// Normally the Legalizer will deal with erasing instructions that have been
1386   /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to
1387   /// process instructions which have been marked dead, but otherwise break the
1388   /// MIR by introducing multiple vreg defs. For those cases, allow the combines
1389   /// to explicitly delete the instructions before we run into trouble.
1390   void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts,
1391                              GISelObserverWrapper &WrapperObserver) {
1392     for (auto *DeadMI : DeadInsts) {
1393       LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n");
1394       WrapperObserver.erasingInstr(*DeadMI);
1395       DeadMI->eraseFromParent();
1396     }
1397     DeadInsts.clear();
1398   }
1399 
1400   /// Checks if the target legalizer info has specified anything about the
1401   /// instruction, or if unsupported.
1402   bool isInstUnsupported(const LegalityQuery &Query) const {
1403     using namespace LegalizeActions;
1404     auto Step = LI.getAction(Query);
1405     return Step.Action == Unsupported || Step.Action == NotFound;
1406   }
1407 
1408   bool isInstLegal(const LegalityQuery &Query) const {
1409     return LI.getAction(Query).Action == LegalizeActions::Legal;
1410   }
1411 
1412   bool isConstantUnsupported(LLT Ty) const {
1413     if (!Ty.isVector())
1414       return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}});
1415 
1416     LLT EltTy = Ty.getElementType();
1417     return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) ||
1418            isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}});
1419   }
1420 
1421   /// Looks through copy instructions and returns the actual
1422   /// source register.
1423   Register lookThroughCopyInstrs(Register Reg) {
1424     using namespace llvm::MIPatternMatch;
1425 
1426     Register TmpReg;
1427     while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) {
1428       if (MRI.getType(TmpReg).isValid())
1429         Reg = TmpReg;
1430       else
1431         break;
1432     }
1433     return Reg;
1434   }
1435 };
1436 
1437 } // namespace llvm
1438 
1439 #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
1440