1 //===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file contains some helper functions which try to cleanup artifacts
9 // such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make
10 // the types match. This file also contains some combines of merges that happens
11 // at the end of the legalization.
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
15 #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
16 
17 #include "llvm/ADT/SmallBitVector.h"
18 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
21 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
22 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24 #include "llvm/CodeGen/GlobalISel/Utils.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/Register.h"
27 #include "llvm/Support/Debug.h"
28 
29 #define DEBUG_TYPE "legalizer"
30 using namespace llvm::MIPatternMatch;
31 
32 namespace llvm {
33 class LegalizationArtifactCombiner {
34   MachineIRBuilder &Builder;
35   MachineRegisterInfo &MRI;
36   const LegalizerInfo &LI;
37 
38   static bool isArtifactCast(unsigned Opc) {
39     switch (Opc) {
40     case TargetOpcode::G_TRUNC:
41     case TargetOpcode::G_SEXT:
42     case TargetOpcode::G_ZEXT:
43     case TargetOpcode::G_ANYEXT:
44       return true;
45     default:
46       return false;
47     }
48   }
49 
50 public:
51   LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI,
52                     const LegalizerInfo &LI)
53       : Builder(B), MRI(MRI), LI(LI) {}
54 
55   bool tryCombineAnyExt(MachineInstr &MI,
56                         SmallVectorImpl<MachineInstr *> &DeadInsts,
57                         SmallVectorImpl<Register> &UpdatedDefs,
58                         GISelObserverWrapper &Observer) {
59     assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
60 
61     Builder.setInstrAndDebugLoc(MI);
62     Register DstReg = MI.getOperand(0).getReg();
63     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
64 
65     // aext(trunc x) - > aext/copy/trunc x
66     Register TruncSrc;
67     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
68       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
69       if (MRI.getType(DstReg) == MRI.getType(TruncSrc))
70         replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs,
71                               Observer);
72       else
73         Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
74       UpdatedDefs.push_back(DstReg);
75       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
76       return true;
77     }
78 
79     // aext([asz]ext x) -> [asz]ext x
80     Register ExtSrc;
81     MachineInstr *ExtMI;
82     if (mi_match(SrcReg, MRI,
83                  m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)),
84                                                     m_GSExt(m_Reg(ExtSrc)),
85                                                     m_GZExt(m_Reg(ExtSrc)))))) {
86       Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
87       UpdatedDefs.push_back(DstReg);
88       markInstAndDefDead(MI, *ExtMI, DeadInsts);
89       return true;
90     }
91 
92     // Try to fold aext(g_constant) when the larger constant type is legal.
93     auto *SrcMI = MRI.getVRegDef(SrcReg);
94     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
95       const LLT DstTy = MRI.getType(DstReg);
96       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
97         auto &CstVal = SrcMI->getOperand(1);
98         Builder.buildConstant(
99             DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
100         UpdatedDefs.push_back(DstReg);
101         markInstAndDefDead(MI, *SrcMI, DeadInsts);
102         return true;
103       }
104     }
105     return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
106   }
107 
108   bool tryCombineZExt(MachineInstr &MI,
109                       SmallVectorImpl<MachineInstr *> &DeadInsts,
110                       SmallVectorImpl<Register> &UpdatedDefs,
111                       GISelObserverWrapper &Observer) {
112     assert(MI.getOpcode() == TargetOpcode::G_ZEXT);
113 
114     Builder.setInstrAndDebugLoc(MI);
115     Register DstReg = MI.getOperand(0).getReg();
116     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
117 
118     // zext(trunc x) - > and (aext/copy/trunc x), mask
119     // zext(sext x) -> and (sext x), mask
120     Register TruncSrc;
121     Register SextSrc;
122     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) ||
123         mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) {
124       LLT DstTy = MRI.getType(DstReg);
125       if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) ||
126           isConstantUnsupported(DstTy))
127         return false;
128       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
129       LLT SrcTy = MRI.getType(SrcReg);
130       APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits());
131       auto Mask = Builder.buildConstant(
132         DstTy, MaskVal.zext(DstTy.getScalarSizeInBits()));
133       if (SextSrc && (DstTy != MRI.getType(SextSrc)))
134         SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0);
135       if (TruncSrc && (DstTy != MRI.getType(TruncSrc)))
136         TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
137       Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask);
138       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
139       return true;
140     }
141 
142     // zext(zext x) -> (zext x)
143     Register ZextSrc;
144     if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) {
145       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
146       Observer.changingInstr(MI);
147       MI.getOperand(1).setReg(ZextSrc);
148       Observer.changedInstr(MI);
149       UpdatedDefs.push_back(DstReg);
150       markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
151       return true;
152     }
153 
154     // Try to fold zext(g_constant) when the larger constant type is legal.
155     auto *SrcMI = MRI.getVRegDef(SrcReg);
156     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
157       const LLT DstTy = MRI.getType(DstReg);
158       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
159         auto &CstVal = SrcMI->getOperand(1);
160         Builder.buildConstant(
161             DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits()));
162         UpdatedDefs.push_back(DstReg);
163         markInstAndDefDead(MI, *SrcMI, DeadInsts);
164         return true;
165       }
166     }
167     return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
168   }
169 
170   bool tryCombineSExt(MachineInstr &MI,
171                       SmallVectorImpl<MachineInstr *> &DeadInsts,
172                       SmallVectorImpl<Register> &UpdatedDefs) {
173     assert(MI.getOpcode() == TargetOpcode::G_SEXT);
174 
175     Builder.setInstrAndDebugLoc(MI);
176     Register DstReg = MI.getOperand(0).getReg();
177     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
178 
179     // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c)
180     Register TruncSrc;
181     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
182       LLT DstTy = MRI.getType(DstReg);
183       if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}}))
184         return false;
185       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
186       LLT SrcTy = MRI.getType(SrcReg);
187       uint64_t SizeInBits = SrcTy.getScalarSizeInBits();
188       if (DstTy != MRI.getType(TruncSrc))
189         TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
190       Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits);
191       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
192       return true;
193     }
194 
195     // sext(zext x) -> (zext x)
196     // sext(sext x) -> (sext x)
197     Register ExtSrc;
198     MachineInstr *ExtMI;
199     if (mi_match(SrcReg, MRI,
200                  m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)),
201                                                     m_GSExt(m_Reg(ExtSrc)))))) {
202       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
203       Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
204       UpdatedDefs.push_back(DstReg);
205       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
206       return true;
207     }
208 
209     // Try to fold sext(g_constant) when the larger constant type is legal.
210     auto *SrcMI = MRI.getVRegDef(SrcReg);
211     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
212       const LLT DstTy = MRI.getType(DstReg);
213       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
214         auto &CstVal = SrcMI->getOperand(1);
215         Builder.buildConstant(
216             DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
217         UpdatedDefs.push_back(DstReg);
218         markInstAndDefDead(MI, *SrcMI, DeadInsts);
219         return true;
220       }
221     }
222 
223     return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
224   }
225 
226   bool tryCombineTrunc(MachineInstr &MI,
227                        SmallVectorImpl<MachineInstr *> &DeadInsts,
228                        SmallVectorImpl<Register> &UpdatedDefs,
229                        GISelObserverWrapper &Observer) {
230     assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
231 
232     Builder.setInstr(MI);
233     Register DstReg = MI.getOperand(0).getReg();
234     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
235 
236     // Try to fold trunc(g_constant) when the smaller constant type is legal.
237     auto *SrcMI = MRI.getVRegDef(SrcReg);
238     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
239       const LLT DstTy = MRI.getType(DstReg);
240       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
241         auto &CstVal = SrcMI->getOperand(1);
242         Builder.buildConstant(
243             DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits()));
244         UpdatedDefs.push_back(DstReg);
245         markInstAndDefDead(MI, *SrcMI, DeadInsts);
246         return true;
247       }
248     }
249 
250     // Try to fold trunc(merge) to directly use the source of the merge.
251     // This gets rid of large, difficult to legalize, merges
252     if (auto *SrcMerge = dyn_cast<GMerge>(SrcMI)) {
253       const Register MergeSrcReg = SrcMerge->getSourceReg(0);
254       const LLT MergeSrcTy = MRI.getType(MergeSrcReg);
255       const LLT DstTy = MRI.getType(DstReg);
256 
257       // We can only fold if the types are scalar
258       const unsigned DstSize = DstTy.getSizeInBits();
259       const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits();
260       if (!DstTy.isScalar() || !MergeSrcTy.isScalar())
261         return false;
262 
263       if (DstSize < MergeSrcSize) {
264         // When the merge source is larger than the destination, we can just
265         // truncate the merge source directly
266         if (isInstUnsupported({TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}}))
267           return false;
268 
269         LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: "
270                           << MI);
271 
272         Builder.buildTrunc(DstReg, MergeSrcReg);
273         UpdatedDefs.push_back(DstReg);
274       } else if (DstSize == MergeSrcSize) {
275         // If the sizes match we can simply try to replace the register
276         LLVM_DEBUG(
277             dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: "
278                    << MI);
279         replaceRegOrBuildCopy(DstReg, MergeSrcReg, MRI, Builder, UpdatedDefs,
280                               Observer);
281       } else if (DstSize % MergeSrcSize == 0) {
282         // If the trunc size is a multiple of the merge source size we can use
283         // a smaller merge instead
284         if (isInstUnsupported(
285                 {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}}))
286           return false;
287 
288         LLVM_DEBUG(
289             dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: "
290                    << MI);
291 
292         const unsigned NumSrcs = DstSize / MergeSrcSize;
293         assert(NumSrcs < SrcMI->getNumOperands() - 1 &&
294                "trunc(merge) should require less inputs than merge");
295         SmallVector<Register, 8> SrcRegs(NumSrcs);
296         for (unsigned i = 0; i < NumSrcs; ++i)
297           SrcRegs[i] = SrcMerge->getSourceReg(i);
298 
299         Builder.buildMerge(DstReg, SrcRegs);
300         UpdatedDefs.push_back(DstReg);
301       } else {
302         // Unable to combine
303         return false;
304       }
305 
306       markInstAndDefDead(MI, *SrcMerge, DeadInsts);
307       return true;
308     }
309 
310     // trunc(trunc) -> trunc
311     Register TruncSrc;
312     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
313       // Always combine trunc(trunc) since the eventual resulting trunc must be
314       // legal anyway as it must be legal for all outputs of the consumer type
315       // set.
316       LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI);
317 
318       Builder.buildTrunc(DstReg, TruncSrc);
319       UpdatedDefs.push_back(DstReg);
320       markInstAndDefDead(MI, *MRI.getVRegDef(TruncSrc), DeadInsts);
321       return true;
322     }
323 
324     return false;
325   }
326 
327   /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF).
328   bool tryFoldImplicitDef(MachineInstr &MI,
329                           SmallVectorImpl<MachineInstr *> &DeadInsts,
330                           SmallVectorImpl<Register> &UpdatedDefs) {
331     unsigned Opcode = MI.getOpcode();
332     assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT ||
333            Opcode == TargetOpcode::G_SEXT);
334 
335     if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF,
336                                            MI.getOperand(1).getReg(), MRI)) {
337       Builder.setInstr(MI);
338       Register DstReg = MI.getOperand(0).getReg();
339       LLT DstTy = MRI.getType(DstReg);
340 
341       if (Opcode == TargetOpcode::G_ANYEXT) {
342         // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF
343         if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
344           return false;
345         LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;);
346         Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {});
347         UpdatedDefs.push_back(DstReg);
348       } else {
349         // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top
350         // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT.
351         if (isConstantUnsupported(DstTy))
352           return false;
353         LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;);
354         Builder.buildConstant(DstReg, 0);
355         UpdatedDefs.push_back(DstReg);
356       }
357 
358       markInstAndDefDead(MI, *DefMI, DeadInsts);
359       return true;
360     }
361     return false;
362   }
363 
364   bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI,
365                           SmallVectorImpl<MachineInstr *> &DeadInsts,
366                           SmallVectorImpl<Register> &UpdatedDefs) {
367 
368     assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
369 
370     const unsigned CastOpc = CastMI.getOpcode();
371 
372     if (!isArtifactCast(CastOpc))
373       return false;
374 
375     const unsigned NumDefs = MI.getNumOperands() - 1;
376 
377     const Register CastSrcReg = CastMI.getOperand(1).getReg();
378     const LLT CastSrcTy = MRI.getType(CastSrcReg);
379     const LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
380     const LLT SrcTy = MRI.getType(MI.getOperand(NumDefs).getReg());
381 
382     const unsigned CastSrcSize = CastSrcTy.getSizeInBits();
383     const unsigned DestSize = DestTy.getSizeInBits();
384 
385     if (CastOpc == TargetOpcode::G_TRUNC) {
386       if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) {
387         //  %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>)
388         //  %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1
389         // =>
390         //  %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0
391         //  %2:_(s8) = G_TRUNC %6
392         //  %3:_(s8) = G_TRUNC %7
393         //  %4:_(s8) = G_TRUNC %8
394         //  %5:_(s8) = G_TRUNC %9
395 
396         unsigned UnmergeNumElts =
397             DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1;
398         LLT UnmergeTy = CastSrcTy.changeElementCount(
399             ElementCount::getFixed(UnmergeNumElts));
400 
401         if (isInstUnsupported(
402                 {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}}))
403           return false;
404 
405         Builder.setInstr(MI);
406         auto NewUnmerge = Builder.buildUnmerge(UnmergeTy, CastSrcReg);
407 
408         for (unsigned I = 0; I != NumDefs; ++I) {
409           Register DefReg = MI.getOperand(I).getReg();
410           UpdatedDefs.push_back(DefReg);
411           Builder.buildTrunc(DefReg, NewUnmerge.getReg(I));
412         }
413 
414         markInstAndDefDead(MI, CastMI, DeadInsts);
415         return true;
416       }
417 
418       if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) {
419         //  %1:_(s16) = G_TRUNC %0(s32)
420         //  %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1
421         // =>
422         //  %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0
423 
424         // Unmerge(trunc) can be combined if the trunc source size is a multiple
425         // of the unmerge destination size
426         if (CastSrcSize % DestSize != 0)
427           return false;
428 
429         // Check if the new unmerge is supported
430         if (isInstUnsupported(
431                 {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}}))
432           return false;
433 
434         // Gather the original destination registers and create new ones for the
435         // unused bits
436         const unsigned NewNumDefs = CastSrcSize / DestSize;
437         SmallVector<Register, 8> DstRegs(NewNumDefs);
438         for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) {
439           if (Idx < NumDefs)
440             DstRegs[Idx] = MI.getOperand(Idx).getReg();
441           else
442             DstRegs[Idx] = MRI.createGenericVirtualRegister(DestTy);
443         }
444 
445         // Build new unmerge
446         Builder.setInstr(MI);
447         Builder.buildUnmerge(DstRegs, CastSrcReg);
448         UpdatedDefs.append(DstRegs.begin(), DstRegs.begin() + NewNumDefs);
449         markInstAndDefDead(MI, CastMI, DeadInsts);
450         return true;
451       }
452     }
453 
454     // TODO: support combines with other casts as well
455     return false;
456   }
457 
458   static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp,
459                                  LLT OpTy, LLT DestTy) {
460     // Check if we found a definition that is like G_MERGE_VALUES.
461     switch (MergeOp) {
462     default:
463       return false;
464     case TargetOpcode::G_BUILD_VECTOR:
465     case TargetOpcode::G_MERGE_VALUES:
466       // The convert operation that we will need to insert is
467       // going to convert the input of that type of instruction (scalar)
468       // to the destination type (DestTy).
469       // The conversion needs to stay in the same domain (scalar to scalar
470       // and vector to vector), so if we were to allow to fold the merge
471       // we would need to insert some bitcasts.
472       // E.g.,
473       // <2 x s16> = build_vector s16, s16
474       // <2 x s32> = zext <2 x s16>
475       // <2 x s16>, <2 x s16> = unmerge <2 x s32>
476       //
477       // As is the folding would produce:
478       // <2 x s16> = zext s16  <-- scalar to vector
479       // <2 x s16> = zext s16  <-- scalar to vector
480       // Which is invalid.
481       // Instead we would want to generate:
482       // s32 = zext s16
483       // <2 x s16> = bitcast s32
484       // s32 = zext s16
485       // <2 x s16> = bitcast s32
486       //
487       // That is not done yet.
488       if (ConvertOp == 0)
489         return true;
490       return !DestTy.isVector() && OpTy.isVector() &&
491              DestTy == OpTy.getElementType();
492     case TargetOpcode::G_CONCAT_VECTORS: {
493       if (ConvertOp == 0)
494         return true;
495       if (!DestTy.isVector())
496         return false;
497 
498       const unsigned OpEltSize = OpTy.getElementType().getSizeInBits();
499 
500       // Don't handle scalarization with a cast that isn't in the same
501       // direction as the vector cast. This could be handled, but it would
502       // require more intermediate unmerges.
503       if (ConvertOp == TargetOpcode::G_TRUNC)
504         return DestTy.getSizeInBits() <= OpEltSize;
505       return DestTy.getSizeInBits() >= OpEltSize;
506     }
507     }
508   }
509 
510   /// Try to replace DstReg with SrcReg or build a COPY instruction
511   /// depending on the register constraints.
512   static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg,
513                                     MachineRegisterInfo &MRI,
514                                     MachineIRBuilder &Builder,
515                                     SmallVectorImpl<Register> &UpdatedDefs,
516                                     GISelChangeObserver &Observer) {
517     if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) {
518       Builder.buildCopy(DstReg, SrcReg);
519       UpdatedDefs.push_back(DstReg);
520       return;
521     }
522     SmallVector<MachineInstr *, 4> UseMIs;
523     // Get the users and notify the observer before replacing.
524     for (auto &UseMI : MRI.use_instructions(DstReg)) {
525       UseMIs.push_back(&UseMI);
526       Observer.changingInstr(UseMI);
527     }
528     // Replace the registers.
529     MRI.replaceRegWith(DstReg, SrcReg);
530     UpdatedDefs.push_back(SrcReg);
531     // Notify the observer that we changed the instructions.
532     for (auto *UseMI : UseMIs)
533       Observer.changedInstr(*UseMI);
534   }
535 
536   /// Return the operand index in \p MI that defines \p Def
537   static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) {
538     unsigned DefIdx = 0;
539     for (const MachineOperand &Def : MI.defs()) {
540       if (Def.getReg() == SearchDef)
541         break;
542       ++DefIdx;
543     }
544 
545     return DefIdx;
546   }
547 
548   /// This class provides utilities for finding source registers of specific
549   /// bit ranges in an artifact. The routines can look through the source
550   /// registers if they're other artifacts to try to find a non-artifact source
551   /// of a value.
552   class ArtifactValueFinder {
553     MachineRegisterInfo &MRI;
554     MachineIRBuilder &MIB;
555     const LegalizerInfo &LI;
556 
557     // Stores the best register found in the current query so far.
558     Register CurrentBest = Register();
559 
560     /// Given an concat_vector op \p Concat and a start bit and size, try to
561     /// find the origin of the value defined by that start position and size.
562     ///
563     /// \returns a register with the requested size, or the current best
564     /// register found during the current query.
565     Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit,
566                                  unsigned Size) {
567       assert(Size > 0);
568 
569       // Find the source operand that provides the bits requested.
570       Register Src1Reg = Concat.getSourceReg(0);
571       unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits();
572 
573       // Operand index of the source that provides the start of the bit range.
574       unsigned StartSrcIdx = (StartBit / SrcSize) + 1;
575       // Offset into the source at which the bit range starts.
576       unsigned InRegOffset = StartBit % SrcSize;
577       // Check that the bits don't span multiple sources.
578       // FIXME: we might be able return multiple sources? Or create an
579       // appropriate concat to make it fit.
580       if (InRegOffset + Size > SrcSize)
581         return CurrentBest;
582 
583       Register SrcReg = Concat.getReg(StartSrcIdx);
584       if (InRegOffset == 0 && Size == SrcSize) {
585         CurrentBest = SrcReg;
586         return findValueFromDefImpl(SrcReg, 0, Size);
587       }
588 
589       return findValueFromDefImpl(SrcReg, InRegOffset, Size);
590     }
591 
592     /// Given an build_vector op \p BV and a start bit and size, try to find
593     /// the origin of the value defined by that start position and size.
594     ///
595     /// \returns a register with the requested size, or the current best
596     /// register found during the current query.
597     Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit,
598                                       unsigned Size) {
599       assert(Size > 0);
600 
601       // Find the source operand that provides the bits requested.
602       Register Src1Reg = BV.getSourceReg(0);
603       unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits();
604 
605       // Operand index of the source that provides the start of the bit range.
606       unsigned StartSrcIdx = (StartBit / SrcSize) + 1;
607       // Offset into the source at which the bit range starts.
608       unsigned InRegOffset = StartBit % SrcSize;
609 
610       if (InRegOffset != 0)
611         return CurrentBest; // Give up, bits don't start at a scalar source.
612       if (Size < SrcSize)
613         return CurrentBest; // Scalar source is too large for requested bits.
614 
615       // If the bits cover multiple sources evenly, then create a new
616       // build_vector to synthesize the required size, if that's been requested.
617       if (Size > SrcSize) {
618         if (Size % SrcSize > 0)
619           return CurrentBest; // Isn't covered exactly by sources.
620 
621         unsigned NumSrcsUsed = Size / SrcSize;
622         // If we're requesting all of the sources, just return this def.
623         if (NumSrcsUsed == BV.getNumSources())
624           return BV.getReg(0);
625 
626         LLT SrcTy = MRI.getType(Src1Reg);
627         LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy);
628 
629         // Check if the resulting build vector would be legal.
630         LegalizeActionStep ActionStep =
631             LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}});
632         if (ActionStep.Action != LegalizeActions::Legal)
633           return CurrentBest;
634 
635         SmallVector<Register> NewSrcs;
636         for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed;
637              ++SrcIdx)
638           NewSrcs.push_back(BV.getReg(SrcIdx));
639         MIB.setInstrAndDebugLoc(BV);
640         return MIB.buildBuildVector(NewBVTy, NewSrcs).getReg(0);
641       }
642       // A single source is requested, just return it.
643       return BV.getReg(StartSrcIdx);
644     }
645 
646     /// Given an G_INSERT op \p MI and a start bit and size, try to find
647     /// the origin of the value defined by that start position and size.
648     ///
649     /// \returns a register with the requested size, or the current best
650     /// register found during the current query.
651     Register findValueFromInsert(MachineInstr &MI, unsigned StartBit,
652                                  unsigned Size) {
653       assert(MI.getOpcode() == TargetOpcode::G_INSERT);
654       assert(Size > 0);
655 
656       Register ContainerSrcReg = MI.getOperand(1).getReg();
657       Register InsertedReg = MI.getOperand(2).getReg();
658       LLT InsertedRegTy = MRI.getType(InsertedReg);
659       unsigned InsertOffset = MI.getOperand(3).getImm();
660 
661       // There are 4 possible container/insertreg + requested bit-range layouts
662       // that the instruction and query could be representing.
663       // For: %_ = G_INSERT %CONTAINER, %INS, InsOff (abbrev. to 'IO')
664       // and a start bit 'SB', with size S, giving an end bit 'EB', we could
665       // have...
666       // Scenario A:
667       //   --------------------------
668       //  |  INS    |  CONTAINER     |
669       //   --------------------------
670       //       |   |
671       //       SB  EB
672       //
673       // Scenario B:
674       //   --------------------------
675       //  |  INS    |  CONTAINER     |
676       //   --------------------------
677       //                |    |
678       //                SB   EB
679       //
680       // Scenario C:
681       //   --------------------------
682       //  |  CONTAINER    |  INS     |
683       //   --------------------------
684       //       |    |
685       //       SB   EB
686       //
687       // Scenario D:
688       //   --------------------------
689       //  |  CONTAINER    |  INS     |
690       //   --------------------------
691       //                     |   |
692       //                     SB  EB
693       //
694       // So therefore, A and D are requesting data from the INS operand, while
695       // B and C are requesting from the container operand.
696 
697       unsigned InsertedEndBit = InsertOffset + InsertedRegTy.getSizeInBits();
698       unsigned EndBit = StartBit + Size;
699       unsigned NewStartBit;
700       Register SrcRegToUse;
701       if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) {
702         SrcRegToUse = ContainerSrcReg;
703         NewStartBit = StartBit;
704         return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
705       }
706       if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) {
707         SrcRegToUse = InsertedReg;
708         NewStartBit = StartBit - InsertOffset;
709         if (NewStartBit == 0 &&
710             Size == MRI.getType(SrcRegToUse).getSizeInBits())
711           CurrentBest = SrcRegToUse;
712         return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
713       }
714       // The bit range spans both the inserted and container regions.
715       return Register();
716     }
717 
718     /// Internal implementation for findValueFromDef(). findValueFromDef()
719     /// initializes some data like the CurrentBest register, which this method
720     /// and its callees rely upon.
721     Register findValueFromDefImpl(Register DefReg, unsigned StartBit,
722                                   unsigned Size) {
723       MachineInstr *Def = getDefIgnoringCopies(DefReg, MRI);
724       // If the instruction has a single def, then simply delegate the search.
725       // For unmerge however with multiple defs, we need to compute the offset
726       // into the source of the unmerge.
727       switch (Def->getOpcode()) {
728       case TargetOpcode::G_CONCAT_VECTORS:
729         return findValueFromConcat(cast<GConcatVectors>(*Def), StartBit, Size);
730       case TargetOpcode::G_UNMERGE_VALUES: {
731         unsigned DefStartBit = 0;
732         unsigned DefSize = MRI.getType(DefReg).getSizeInBits();
733         for (const auto &MO : Def->defs()) {
734           if (MO.getReg() == DefReg)
735             break;
736           DefStartBit += DefSize;
737         }
738         Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg();
739         Register SrcOriginReg =
740             findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size);
741         if (SrcOriginReg)
742           return SrcOriginReg;
743         // Failed to find a further value. If the StartBit and Size perfectly
744         // covered the requested DefReg, return that since it's better than
745         // nothing.
746         if (StartBit == 0 && Size == DefSize)
747           return DefReg;
748         return CurrentBest;
749       }
750       case TargetOpcode::G_BUILD_VECTOR:
751         return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit,
752                                         Size);
753       case TargetOpcode::G_INSERT:
754         return findValueFromInsert(*Def, StartBit, Size);
755       default:
756         return CurrentBest;
757       }
758     }
759 
760   public:
761     ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder,
762                         const LegalizerInfo &Info)
763         : MRI(Mri), MIB(Builder), LI(Info) {}
764 
765     /// Try to find a source of the value defined in the def \p DefReg, starting
766     /// at position \p StartBit with size \p Size.
767     /// \returns a register with the requested size, or an empty Register if no
768     /// better value could be found.
769     Register findValueFromDef(Register DefReg, unsigned StartBit,
770                               unsigned Size) {
771       CurrentBest = Register();
772       Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size);
773       return FoundReg != DefReg ? FoundReg : Register();
774     }
775 
776     /// Try to combine the defs of an unmerge \p MI by attempting to find
777     /// values that provides the bits for each def reg.
778     /// \returns true if all the defs of the unmerge have been made dead.
779     bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer,
780                                SmallVectorImpl<Register> &UpdatedDefs) {
781       unsigned NumDefs = MI.getNumDefs();
782       LLT DestTy = MRI.getType(MI.getReg(0));
783 
784       SmallBitVector DeadDefs(NumDefs);
785       for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
786         Register DefReg = MI.getReg(DefIdx);
787         if (MRI.use_nodbg_empty(DefReg)) {
788           DeadDefs[DefIdx] = true;
789           continue;
790         }
791         Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits());
792         if (!FoundVal)
793           continue;
794         if (MRI.getType(FoundVal) != DestTy)
795           continue;
796 
797         replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs,
798                               Observer);
799         // We only want to replace the uses, not the def of the old reg.
800         Observer.changingInstr(MI);
801         MI.getOperand(DefIdx).setReg(DefReg);
802         Observer.changedInstr(MI);
803         DeadDefs[DefIdx] = true;
804       }
805       return DeadDefs.all();
806     }
807   };
808 
809   bool tryCombineUnmergeValues(GUnmerge &MI,
810                                SmallVectorImpl<MachineInstr *> &DeadInsts,
811                                SmallVectorImpl<Register> &UpdatedDefs,
812                                GISelChangeObserver &Observer) {
813     unsigned NumDefs = MI.getNumDefs();
814     Register SrcReg = MI.getSourceReg();
815     MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI);
816     if (!SrcDef)
817       return false;
818 
819     LLT OpTy = MRI.getType(SrcReg);
820     LLT DestTy = MRI.getType(MI.getReg(0));
821     unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg);
822 
823     Builder.setInstrAndDebugLoc(MI);
824 
825     ArtifactValueFinder Finder(MRI, Builder, LI);
826     if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) {
827       markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
828       return true;
829     }
830 
831     if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) {
832       // %0:_(<4 x s16>) = G_FOO
833       // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0
834       // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1
835       //
836       // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0
837       Register SrcUnmergeSrc = SrcUnmerge->getSourceReg();
838       LLT SrcUnmergeSrcTy = MRI.getType(SrcUnmergeSrc);
839 
840       // If we need to decrease the number of vector elements in the result type
841       // of an unmerge, this would involve the creation of an equivalent unmerge
842       // to copy back to the original result registers.
843       LegalizeActionStep ActionStep = LI.getAction(
844           {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}});
845       switch (ActionStep.Action) {
846       case LegalizeActions::Lower:
847       case LegalizeActions::Unsupported:
848         break;
849       case LegalizeActions::FewerElements:
850       case LegalizeActions::NarrowScalar:
851         if (ActionStep.TypeIdx == 1)
852           return false;
853         break;
854       default:
855         return false;
856       }
857 
858       auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc);
859 
860       // TODO: Should we try to process out the other defs now? If the other
861       // defs of the source unmerge are also unmerged, we end up with a separate
862       // unmerge for each one.
863       for (unsigned I = 0; I != NumDefs; ++I) {
864         Register Def = MI.getReg(I);
865         replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I),
866                               MRI, Builder, UpdatedDefs, Observer);
867       }
868 
869       markInstAndDefDead(MI, *SrcUnmerge, DeadInsts, SrcDefIdx);
870       return true;
871     }
872 
873     MachineInstr *MergeI = SrcDef;
874     unsigned ConvertOp = 0;
875 
876     // Handle intermediate conversions
877     unsigned SrcOp = SrcDef->getOpcode();
878     if (isArtifactCast(SrcOp)) {
879       ConvertOp = SrcOp;
880       MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI);
881     }
882 
883     if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(),
884                                        ConvertOp, OpTy, DestTy)) {
885       // We might have a chance to combine later by trying to combine
886       // unmerge(cast) first
887       return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs);
888     }
889 
890     const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
891 
892     if (NumMergeRegs < NumDefs) {
893       if (NumDefs % NumMergeRegs != 0)
894         return false;
895 
896       Builder.setInstr(MI);
897       // Transform to UNMERGEs, for example
898       //   %1 = G_MERGE_VALUES %4, %5
899       //   %9, %10, %11, %12 = G_UNMERGE_VALUES %1
900       // to
901       //   %9, %10 = G_UNMERGE_VALUES %4
902       //   %11, %12 = G_UNMERGE_VALUES %5
903 
904       const unsigned NewNumDefs = NumDefs / NumMergeRegs;
905       for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) {
906         SmallVector<Register, 8> DstRegs;
907         for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs;
908              ++j, ++DefIdx)
909           DstRegs.push_back(MI.getReg(DefIdx));
910 
911         if (ConvertOp) {
912           LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
913 
914           // This is a vector that is being split and casted. Extract to the
915           // element type, and do the conversion on the scalars (or smaller
916           // vectors).
917           LLT MergeEltTy = MergeSrcTy.divide(NewNumDefs);
918 
919           // Handle split to smaller vectors, with conversions.
920           // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>)
921           // %3(<8 x s16>) = G_SEXT %2
922           // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %3
923           //
924           // =>
925           //
926           // %8(<2 x s8>), %9(<2 x s8>) = G_UNMERGE_VALUES %0
927           // %10(<2 x s8>), %11(<2 x s8>) = G_UNMERGE_VALUES %1
928           // %4(<2 x s16>) = G_SEXT %8
929           // %5(<2 x s16>) = G_SEXT %9
930           // %6(<2 x s16>) = G_SEXT %10
931           // %7(<2 x s16>)= G_SEXT %11
932 
933           SmallVector<Register, 4> TmpRegs(NewNumDefs);
934           for (unsigned k = 0; k < NewNumDefs; ++k)
935             TmpRegs[k] = MRI.createGenericVirtualRegister(MergeEltTy);
936 
937           Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg());
938 
939           for (unsigned k = 0; k < NewNumDefs; ++k)
940             Builder.buildInstr(ConvertOp, {DstRegs[k]}, {TmpRegs[k]});
941         } else {
942           Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg());
943         }
944         UpdatedDefs.append(DstRegs.begin(), DstRegs.end());
945       }
946 
947     } else if (NumMergeRegs > NumDefs) {
948       if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0)
949         return false;
950 
951       Builder.setInstr(MI);
952       // Transform to MERGEs
953       //   %6 = G_MERGE_VALUES %17, %18, %19, %20
954       //   %7, %8 = G_UNMERGE_VALUES %6
955       // to
956       //   %7 = G_MERGE_VALUES %17, %18
957       //   %8 = G_MERGE_VALUES %19, %20
958 
959       const unsigned NumRegs = NumMergeRegs / NumDefs;
960       for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
961         SmallVector<Register, 8> Regs;
962         for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs;
963              ++j, ++Idx)
964           Regs.push_back(MergeI->getOperand(Idx).getReg());
965 
966         Register DefReg = MI.getReg(DefIdx);
967         Builder.buildMerge(DefReg, Regs);
968         UpdatedDefs.push_back(DefReg);
969       }
970 
971     } else {
972       LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
973 
974       if (!ConvertOp && DestTy != MergeSrcTy)
975         ConvertOp = TargetOpcode::G_BITCAST;
976 
977       if (ConvertOp) {
978         Builder.setInstr(MI);
979 
980         for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
981           Register DefReg = MI.getOperand(Idx).getReg();
982           Register MergeSrc = MergeI->getOperand(Idx + 1).getReg();
983 
984           if (!MRI.use_empty(DefReg)) {
985             Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc});
986             UpdatedDefs.push_back(DefReg);
987           }
988         }
989 
990         markInstAndDefDead(MI, *MergeI, DeadInsts);
991         return true;
992       }
993 
994       assert(DestTy == MergeSrcTy &&
995              "Bitcast and the other kinds of conversions should "
996              "have happened earlier");
997 
998       Builder.setInstr(MI);
999       for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
1000         Register DstReg = MI.getOperand(Idx).getReg();
1001         Register SrcReg = MergeI->getOperand(Idx + 1).getReg();
1002         replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs,
1003                               Observer);
1004       }
1005     }
1006 
1007     markInstAndDefDead(MI, *MergeI, DeadInsts);
1008     return true;
1009   }
1010 
1011   bool tryCombineExtract(MachineInstr &MI,
1012                          SmallVectorImpl<MachineInstr *> &DeadInsts,
1013                          SmallVectorImpl<Register> &UpdatedDefs) {
1014     assert(MI.getOpcode() == TargetOpcode::G_EXTRACT);
1015 
1016     // Try to use the source registers from a G_MERGE_VALUES
1017     //
1018     // %2 = G_MERGE_VALUES %0, %1
1019     // %3 = G_EXTRACT %2, N
1020     // =>
1021     //
1022     // for N < %2.getSizeInBits() / 2
1023     //     %3 = G_EXTRACT %0, N
1024     //
1025     // for N >= %2.getSizeInBits() / 2
1026     //    %3 = G_EXTRACT %1, (N - %0.getSizeInBits()
1027 
1028     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
1029     MachineInstr *MergeI = MRI.getVRegDef(SrcReg);
1030     if (!MergeI || !isa<GMergeLikeOp>(MergeI))
1031       return false;
1032 
1033     Register DstReg = MI.getOperand(0).getReg();
1034     LLT DstTy = MRI.getType(DstReg);
1035     LLT SrcTy = MRI.getType(SrcReg);
1036 
1037     // TODO: Do we need to check if the resulting extract is supported?
1038     unsigned ExtractDstSize = DstTy.getSizeInBits();
1039     unsigned Offset = MI.getOperand(2).getImm();
1040     unsigned NumMergeSrcs = MergeI->getNumOperands() - 1;
1041     unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs;
1042     unsigned MergeSrcIdx = Offset / MergeSrcSize;
1043 
1044     // Compute the offset of the last bit the extract needs.
1045     unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize;
1046 
1047     // Can't handle the case where the extract spans multiple inputs.
1048     if (MergeSrcIdx != EndMergeSrcIdx)
1049       return false;
1050 
1051     // TODO: We could modify MI in place in most cases.
1052     Builder.setInstr(MI);
1053     Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(),
1054                          Offset - MergeSrcIdx * MergeSrcSize);
1055     UpdatedDefs.push_back(DstReg);
1056     markInstAndDefDead(MI, *MergeI, DeadInsts);
1057     return true;
1058   }
1059 
1060   /// Try to combine away MI.
1061   /// Returns true if it combined away the MI.
1062   /// Adds instructions that are dead as a result of the combine
1063   /// into DeadInsts, which can include MI.
1064   bool tryCombineInstruction(MachineInstr &MI,
1065                              SmallVectorImpl<MachineInstr *> &DeadInsts,
1066                              GISelObserverWrapper &WrapperObserver) {
1067     // This might be a recursive call, and we might have DeadInsts already
1068     // populated. To avoid bad things happening later with multiple vreg defs
1069     // etc, process the dead instructions now if any.
1070     if (!DeadInsts.empty())
1071       deleteMarkedDeadInsts(DeadInsts, WrapperObserver);
1072 
1073     // Put here every vreg that was redefined in such a way that it's at least
1074     // possible that one (or more) of its users (immediate or COPY-separated)
1075     // could become artifact combinable with the new definition (or the
1076     // instruction reachable from it through a chain of copies if any).
1077     SmallVector<Register, 4> UpdatedDefs;
1078     bool Changed = false;
1079     switch (MI.getOpcode()) {
1080     default:
1081       return false;
1082     case TargetOpcode::G_ANYEXT:
1083       Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1084       break;
1085     case TargetOpcode::G_ZEXT:
1086       Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1087       break;
1088     case TargetOpcode::G_SEXT:
1089       Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs);
1090       break;
1091     case TargetOpcode::G_UNMERGE_VALUES:
1092       Changed = tryCombineUnmergeValues(cast<GUnmerge>(MI), DeadInsts,
1093                                         UpdatedDefs, WrapperObserver);
1094       break;
1095     case TargetOpcode::G_MERGE_VALUES:
1096     case TargetOpcode::G_BUILD_VECTOR:
1097     case TargetOpcode::G_CONCAT_VECTORS:
1098       // If any of the users of this merge are an unmerge, then add them to the
1099       // artifact worklist in case there's folding that can be done looking up.
1100       for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) {
1101         if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES ||
1102             U.getOpcode() == TargetOpcode::G_TRUNC) {
1103           UpdatedDefs.push_back(MI.getOperand(0).getReg());
1104           break;
1105         }
1106       }
1107       break;
1108     case TargetOpcode::G_EXTRACT:
1109       Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs);
1110       break;
1111     case TargetOpcode::G_TRUNC:
1112       Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1113       if (!Changed) {
1114         // Try to combine truncates away even if they are legal. As all artifact
1115         // combines at the moment look only "up" the def-use chains, we achieve
1116         // that by throwing truncates' users (with look through copies) into the
1117         // ArtifactList again.
1118         UpdatedDefs.push_back(MI.getOperand(0).getReg());
1119       }
1120       break;
1121     }
1122     // If the main loop through the ArtifactList found at least one combinable
1123     // pair of artifacts, not only combine it away (as done above), but also
1124     // follow the def-use chain from there to combine everything that can be
1125     // combined within this def-use chain of artifacts.
1126     while (!UpdatedDefs.empty()) {
1127       Register NewDef = UpdatedDefs.pop_back_val();
1128       assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg");
1129       for (MachineInstr &Use : MRI.use_instructions(NewDef)) {
1130         switch (Use.getOpcode()) {
1131         // Keep this list in sync with the list of all artifact combines.
1132         case TargetOpcode::G_ANYEXT:
1133         case TargetOpcode::G_ZEXT:
1134         case TargetOpcode::G_SEXT:
1135         case TargetOpcode::G_UNMERGE_VALUES:
1136         case TargetOpcode::G_EXTRACT:
1137         case TargetOpcode::G_TRUNC:
1138           // Adding Use to ArtifactList.
1139           WrapperObserver.changedInstr(Use);
1140           break;
1141         case TargetOpcode::COPY: {
1142           Register Copy = Use.getOperand(0).getReg();
1143           if (Copy.isVirtual())
1144             UpdatedDefs.push_back(Copy);
1145           break;
1146         }
1147         default:
1148           // If we do not have an artifact combine for the opcode, there is no
1149           // point in adding it to the ArtifactList as nothing interesting will
1150           // be done to it anyway.
1151           break;
1152         }
1153       }
1154     }
1155     return Changed;
1156   }
1157 
1158 private:
1159   static Register getArtifactSrcReg(const MachineInstr &MI) {
1160     switch (MI.getOpcode()) {
1161     case TargetOpcode::COPY:
1162     case TargetOpcode::G_TRUNC:
1163     case TargetOpcode::G_ZEXT:
1164     case TargetOpcode::G_ANYEXT:
1165     case TargetOpcode::G_SEXT:
1166     case TargetOpcode::G_EXTRACT:
1167       return MI.getOperand(1).getReg();
1168     case TargetOpcode::G_UNMERGE_VALUES:
1169       return MI.getOperand(MI.getNumOperands() - 1).getReg();
1170     default:
1171       llvm_unreachable("Not a legalization artifact happen");
1172     }
1173   }
1174 
1175   /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI
1176   /// (either by killing it or changing operands) results in DefMI being dead
1177   /// too. In-between COPYs or artifact-casts are also collected if they are
1178   /// dead.
1179   /// MI is not marked dead.
1180   void markDefDead(MachineInstr &MI, MachineInstr &DefMI,
1181                    SmallVectorImpl<MachineInstr *> &DeadInsts,
1182                    unsigned DefIdx = 0) {
1183     // Collect all the copy instructions that are made dead, due to deleting
1184     // this instruction. Collect all of them until the Trunc(DefMI).
1185     // Eg,
1186     // %1(s1) = G_TRUNC %0(s32)
1187     // %2(s1) = COPY %1(s1)
1188     // %3(s1) = COPY %2(s1)
1189     // %4(s32) = G_ANYEXT %3(s1)
1190     // In this case, we would have replaced %4 with a copy of %0,
1191     // and as a result, %3, %2, %1 are dead.
1192     MachineInstr *PrevMI = &MI;
1193     while (PrevMI != &DefMI) {
1194       Register PrevRegSrc = getArtifactSrcReg(*PrevMI);
1195 
1196       MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc);
1197       if (MRI.hasOneUse(PrevRegSrc)) {
1198         if (TmpDef != &DefMI) {
1199           assert((TmpDef->getOpcode() == TargetOpcode::COPY ||
1200                   isArtifactCast(TmpDef->getOpcode())) &&
1201                  "Expecting copy or artifact cast here");
1202 
1203           DeadInsts.push_back(TmpDef);
1204         }
1205       } else
1206         break;
1207       PrevMI = TmpDef;
1208     }
1209 
1210     if (PrevMI == &DefMI) {
1211       unsigned I = 0;
1212       bool IsDead = true;
1213       for (MachineOperand &Def : DefMI.defs()) {
1214         if (I != DefIdx) {
1215           if (!MRI.use_empty(Def.getReg())) {
1216             IsDead = false;
1217             break;
1218           }
1219         } else {
1220           if (!MRI.hasOneUse(DefMI.getOperand(DefIdx).getReg()))
1221             break;
1222         }
1223 
1224         ++I;
1225       }
1226 
1227       if (IsDead)
1228         DeadInsts.push_back(&DefMI);
1229     }
1230   }
1231 
1232   /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
1233   /// dead due to MI being killed, then mark DefMI as dead too.
1234   /// Some of the combines (extends(trunc)), try to walk through redundant
1235   /// copies in between the extends and the truncs, and this attempts to collect
1236   /// the in between copies if they're dead.
1237   void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI,
1238                           SmallVectorImpl<MachineInstr *> &DeadInsts,
1239                           unsigned DefIdx = 0) {
1240     DeadInsts.push_back(&MI);
1241     markDefDead(MI, DefMI, DeadInsts, DefIdx);
1242   }
1243 
1244   /// Erase the dead instructions in the list and call the observer hooks.
1245   /// Normally the Legalizer will deal with erasing instructions that have been
1246   /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to
1247   /// process instructions which have been marked dead, but otherwise break the
1248   /// MIR by introducing multiple vreg defs. For those cases, allow the combines
1249   /// to explicitly delete the instructions before we run into trouble.
1250   void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts,
1251                              GISelObserverWrapper &WrapperObserver) {
1252     for (auto *DeadMI : DeadInsts) {
1253       LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n");
1254       WrapperObserver.erasingInstr(*DeadMI);
1255       DeadMI->eraseFromParent();
1256     }
1257     DeadInsts.clear();
1258   }
1259 
1260   /// Checks if the target legalizer info has specified anything about the
1261   /// instruction, or if unsupported.
1262   bool isInstUnsupported(const LegalityQuery &Query) const {
1263     using namespace LegalizeActions;
1264     auto Step = LI.getAction(Query);
1265     return Step.Action == Unsupported || Step.Action == NotFound;
1266   }
1267 
1268   bool isInstLegal(const LegalityQuery &Query) const {
1269     return LI.getAction(Query).Action == LegalizeActions::Legal;
1270   }
1271 
1272   bool isConstantUnsupported(LLT Ty) const {
1273     if (!Ty.isVector())
1274       return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}});
1275 
1276     LLT EltTy = Ty.getElementType();
1277     return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) ||
1278            isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}});
1279   }
1280 
1281   /// Looks through copy instructions and returns the actual
1282   /// source register.
1283   Register lookThroughCopyInstrs(Register Reg) {
1284     Register TmpReg;
1285     while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) {
1286       if (MRI.getType(TmpReg).isValid())
1287         Reg = TmpReg;
1288       else
1289         break;
1290     }
1291     return Reg;
1292   }
1293 };
1294 
1295 } // namespace llvm
1296 
1297 #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
1298