1 //===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file contains some helper functions which try to cleanup artifacts
9 // such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make
10 // the types match. This file also contains some combines of merges that happens
11 // at the end of the legalization.
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
15 #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
16 
17 #include "llvm/ADT/SmallBitVector.h"
18 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
20 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
21 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
22 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
24 #include "llvm/CodeGen/GlobalISel/Utils.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/Register.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/Support/Debug.h"
29 
30 #define DEBUG_TYPE "legalizer"
31 
32 namespace llvm {
33 class LegalizationArtifactCombiner {
34   MachineIRBuilder &Builder;
35   MachineRegisterInfo &MRI;
36   const LegalizerInfo &LI;
37 
38   static bool isArtifactCast(unsigned Opc) {
39     switch (Opc) {
40     case TargetOpcode::G_TRUNC:
41     case TargetOpcode::G_SEXT:
42     case TargetOpcode::G_ZEXT:
43     case TargetOpcode::G_ANYEXT:
44       return true;
45     default:
46       return false;
47     }
48   }
49 
50 public:
51   LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI,
52                     const LegalizerInfo &LI)
53       : Builder(B), MRI(MRI), LI(LI) {}
54 
55   bool tryCombineAnyExt(MachineInstr &MI,
56                         SmallVectorImpl<MachineInstr *> &DeadInsts,
57                         SmallVectorImpl<Register> &UpdatedDefs,
58                         GISelObserverWrapper &Observer) {
59     using namespace llvm::MIPatternMatch;
60     assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);
61 
62     Builder.setInstrAndDebugLoc(MI);
63     Register DstReg = MI.getOperand(0).getReg();
64     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
65 
66     // aext(trunc x) - > aext/copy/trunc x
67     Register TruncSrc;
68     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
69       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
70       if (MRI.getType(DstReg) == MRI.getType(TruncSrc))
71         replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs,
72                               Observer);
73       else
74         Builder.buildAnyExtOrTrunc(DstReg, TruncSrc);
75       UpdatedDefs.push_back(DstReg);
76       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
77       return true;
78     }
79 
80     // aext([asz]ext x) -> [asz]ext x
81     Register ExtSrc;
82     MachineInstr *ExtMI;
83     if (mi_match(SrcReg, MRI,
84                  m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)),
85                                                     m_GSExt(m_Reg(ExtSrc)),
86                                                     m_GZExt(m_Reg(ExtSrc)))))) {
87       Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
88       UpdatedDefs.push_back(DstReg);
89       markInstAndDefDead(MI, *ExtMI, DeadInsts);
90       return true;
91     }
92 
93     // Try to fold aext(g_constant) when the larger constant type is legal.
94     auto *SrcMI = MRI.getVRegDef(SrcReg);
95     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
96       const LLT DstTy = MRI.getType(DstReg);
97       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
98         auto &CstVal = SrcMI->getOperand(1);
99         Builder.buildConstant(
100             DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
101         UpdatedDefs.push_back(DstReg);
102         markInstAndDefDead(MI, *SrcMI, DeadInsts);
103         return true;
104       }
105     }
106     return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
107   }
108 
109   bool tryCombineZExt(MachineInstr &MI,
110                       SmallVectorImpl<MachineInstr *> &DeadInsts,
111                       SmallVectorImpl<Register> &UpdatedDefs,
112                       GISelObserverWrapper &Observer) {
113     using namespace llvm::MIPatternMatch;
114     assert(MI.getOpcode() == TargetOpcode::G_ZEXT);
115 
116     Builder.setInstrAndDebugLoc(MI);
117     Register DstReg = MI.getOperand(0).getReg();
118     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
119 
120     // zext(trunc x) - > and (aext/copy/trunc x), mask
121     // zext(sext x) -> and (sext x), mask
122     Register TruncSrc;
123     Register SextSrc;
124     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) ||
125         mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) {
126       LLT DstTy = MRI.getType(DstReg);
127       if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) ||
128           isConstantUnsupported(DstTy))
129         return false;
130       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
131       LLT SrcTy = MRI.getType(SrcReg);
132       APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits());
133       auto Mask = Builder.buildConstant(
134         DstTy, MaskVal.zext(DstTy.getScalarSizeInBits()));
135       if (SextSrc && (DstTy != MRI.getType(SextSrc)))
136         SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0);
137       if (TruncSrc && (DstTy != MRI.getType(TruncSrc)))
138         TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
139       Builder.buildAnd(DstReg, SextSrc ? SextSrc : TruncSrc, Mask);
140       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
141       return true;
142     }
143 
144     // zext(zext x) -> (zext x)
145     Register ZextSrc;
146     if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) {
147       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
148       Observer.changingInstr(MI);
149       MI.getOperand(1).setReg(ZextSrc);
150       Observer.changedInstr(MI);
151       UpdatedDefs.push_back(DstReg);
152       markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
153       return true;
154     }
155 
156     // Try to fold zext(g_constant) when the larger constant type is legal.
157     auto *SrcMI = MRI.getVRegDef(SrcReg);
158     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
159       const LLT DstTy = MRI.getType(DstReg);
160       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
161         auto &CstVal = SrcMI->getOperand(1);
162         Builder.buildConstant(
163             DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits()));
164         UpdatedDefs.push_back(DstReg);
165         markInstAndDefDead(MI, *SrcMI, DeadInsts);
166         return true;
167       }
168     }
169     return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
170   }
171 
172   bool tryCombineSExt(MachineInstr &MI,
173                       SmallVectorImpl<MachineInstr *> &DeadInsts,
174                       SmallVectorImpl<Register> &UpdatedDefs) {
175     using namespace llvm::MIPatternMatch;
176     assert(MI.getOpcode() == TargetOpcode::G_SEXT);
177 
178     Builder.setInstrAndDebugLoc(MI);
179     Register DstReg = MI.getOperand(0).getReg();
180     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
181 
182     // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c)
183     Register TruncSrc;
184     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
185       LLT DstTy = MRI.getType(DstReg);
186       if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}}))
187         return false;
188       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;);
189       LLT SrcTy = MRI.getType(SrcReg);
190       uint64_t SizeInBits = SrcTy.getScalarSizeInBits();
191       if (DstTy != MRI.getType(TruncSrc))
192         TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0);
193       Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits);
194       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
195       return true;
196     }
197 
198     // sext(zext x) -> (zext x)
199     // sext(sext x) -> (sext x)
200     Register ExtSrc;
201     MachineInstr *ExtMI;
202     if (mi_match(SrcReg, MRI,
203                  m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)),
204                                                     m_GSExt(m_Reg(ExtSrc)))))) {
205       LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
206       Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
207       UpdatedDefs.push_back(DstReg);
208       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
209       return true;
210     }
211 
212     // Try to fold sext(g_constant) when the larger constant type is legal.
213     auto *SrcMI = MRI.getVRegDef(SrcReg);
214     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
215       const LLT DstTy = MRI.getType(DstReg);
216       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
217         auto &CstVal = SrcMI->getOperand(1);
218         Builder.buildConstant(
219             DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits()));
220         UpdatedDefs.push_back(DstReg);
221         markInstAndDefDead(MI, *SrcMI, DeadInsts);
222         return true;
223       }
224     }
225 
226     return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
227   }
228 
229   bool tryCombineTrunc(MachineInstr &MI,
230                        SmallVectorImpl<MachineInstr *> &DeadInsts,
231                        SmallVectorImpl<Register> &UpdatedDefs,
232                        GISelObserverWrapper &Observer) {
233     using namespace llvm::MIPatternMatch;
234     assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
235 
236     Builder.setInstr(MI);
237     Register DstReg = MI.getOperand(0).getReg();
238     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
239 
240     // Try to fold trunc(g_constant) when the smaller constant type is legal.
241     auto *SrcMI = MRI.getVRegDef(SrcReg);
242     if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) {
243       const LLT DstTy = MRI.getType(DstReg);
244       if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) {
245         auto &CstVal = SrcMI->getOperand(1);
246         Builder.buildConstant(
247             DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits()));
248         UpdatedDefs.push_back(DstReg);
249         markInstAndDefDead(MI, *SrcMI, DeadInsts);
250         return true;
251       }
252     }
253 
254     // Try to fold trunc(merge) to directly use the source of the merge.
255     // This gets rid of large, difficult to legalize, merges
256     if (auto *SrcMerge = dyn_cast<GMerge>(SrcMI)) {
257       const Register MergeSrcReg = SrcMerge->getSourceReg(0);
258       const LLT MergeSrcTy = MRI.getType(MergeSrcReg);
259       const LLT DstTy = MRI.getType(DstReg);
260 
261       // We can only fold if the types are scalar
262       const unsigned DstSize = DstTy.getSizeInBits();
263       const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits();
264       if (!DstTy.isScalar() || !MergeSrcTy.isScalar())
265         return false;
266 
267       if (DstSize < MergeSrcSize) {
268         // When the merge source is larger than the destination, we can just
269         // truncate the merge source directly
270         if (isInstUnsupported({TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}}))
271           return false;
272 
273         LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: "
274                           << MI);
275 
276         Builder.buildTrunc(DstReg, MergeSrcReg);
277         UpdatedDefs.push_back(DstReg);
278       } else if (DstSize == MergeSrcSize) {
279         // If the sizes match we can simply try to replace the register
280         LLVM_DEBUG(
281             dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: "
282                    << MI);
283         replaceRegOrBuildCopy(DstReg, MergeSrcReg, MRI, Builder, UpdatedDefs,
284                               Observer);
285       } else if (DstSize % MergeSrcSize == 0) {
286         // If the trunc size is a multiple of the merge source size we can use
287         // a smaller merge instead
288         if (isInstUnsupported(
289                 {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}}))
290           return false;
291 
292         LLVM_DEBUG(
293             dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: "
294                    << MI);
295 
296         const unsigned NumSrcs = DstSize / MergeSrcSize;
297         assert(NumSrcs < SrcMI->getNumOperands() - 1 &&
298                "trunc(merge) should require less inputs than merge");
299         SmallVector<Register, 8> SrcRegs(NumSrcs);
300         for (unsigned i = 0; i < NumSrcs; ++i)
301           SrcRegs[i] = SrcMerge->getSourceReg(i);
302 
303         Builder.buildMerge(DstReg, SrcRegs);
304         UpdatedDefs.push_back(DstReg);
305       } else {
306         // Unable to combine
307         return false;
308       }
309 
310       markInstAndDefDead(MI, *SrcMerge, DeadInsts);
311       return true;
312     }
313 
314     // trunc(trunc) -> trunc
315     Register TruncSrc;
316     if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) {
317       // Always combine trunc(trunc) since the eventual resulting trunc must be
318       // legal anyway as it must be legal for all outputs of the consumer type
319       // set.
320       LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI);
321 
322       Builder.buildTrunc(DstReg, TruncSrc);
323       UpdatedDefs.push_back(DstReg);
324       markInstAndDefDead(MI, *MRI.getVRegDef(TruncSrc), DeadInsts);
325       return true;
326     }
327 
328     return false;
329   }
330 
331   /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF).
332   bool tryFoldImplicitDef(MachineInstr &MI,
333                           SmallVectorImpl<MachineInstr *> &DeadInsts,
334                           SmallVectorImpl<Register> &UpdatedDefs) {
335     unsigned Opcode = MI.getOpcode();
336     assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT ||
337            Opcode == TargetOpcode::G_SEXT);
338 
339     if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF,
340                                            MI.getOperand(1).getReg(), MRI)) {
341       Builder.setInstr(MI);
342       Register DstReg = MI.getOperand(0).getReg();
343       LLT DstTy = MRI.getType(DstReg);
344 
345       if (Opcode == TargetOpcode::G_ANYEXT) {
346         // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF
347         if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}}))
348           return false;
349         LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;);
350         Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {});
351         UpdatedDefs.push_back(DstReg);
352       } else {
353         // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top
354         // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT.
355         if (isConstantUnsupported(DstTy))
356           return false;
357         LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;);
358         Builder.buildConstant(DstReg, 0);
359         UpdatedDefs.push_back(DstReg);
360       }
361 
362       markInstAndDefDead(MI, *DefMI, DeadInsts);
363       return true;
364     }
365     return false;
366   }
367 
368   bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI,
369                           SmallVectorImpl<MachineInstr *> &DeadInsts,
370                           SmallVectorImpl<Register> &UpdatedDefs) {
371 
372     assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
373 
374     const unsigned CastOpc = CastMI.getOpcode();
375 
376     if (!isArtifactCast(CastOpc))
377       return false;
378 
379     const unsigned NumDefs = MI.getNumOperands() - 1;
380 
381     const Register CastSrcReg = CastMI.getOperand(1).getReg();
382     const LLT CastSrcTy = MRI.getType(CastSrcReg);
383     const LLT DestTy = MRI.getType(MI.getOperand(0).getReg());
384     const LLT SrcTy = MRI.getType(MI.getOperand(NumDefs).getReg());
385 
386     const unsigned CastSrcSize = CastSrcTy.getSizeInBits();
387     const unsigned DestSize = DestTy.getSizeInBits();
388 
389     if (CastOpc == TargetOpcode::G_TRUNC) {
390       if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) {
391         //  %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>)
392         //  %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1
393         // =>
394         //  %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0
395         //  %2:_(s8) = G_TRUNC %6
396         //  %3:_(s8) = G_TRUNC %7
397         //  %4:_(s8) = G_TRUNC %8
398         //  %5:_(s8) = G_TRUNC %9
399 
400         unsigned UnmergeNumElts =
401             DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1;
402         LLT UnmergeTy = CastSrcTy.changeElementCount(
403             ElementCount::getFixed(UnmergeNumElts));
404 
405         if (isInstUnsupported(
406                 {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}}))
407           return false;
408 
409         Builder.setInstr(MI);
410         auto NewUnmerge = Builder.buildUnmerge(UnmergeTy, CastSrcReg);
411 
412         for (unsigned I = 0; I != NumDefs; ++I) {
413           Register DefReg = MI.getOperand(I).getReg();
414           UpdatedDefs.push_back(DefReg);
415           Builder.buildTrunc(DefReg, NewUnmerge.getReg(I));
416         }
417 
418         markInstAndDefDead(MI, CastMI, DeadInsts);
419         return true;
420       }
421 
422       if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) {
423         //  %1:_(s16) = G_TRUNC %0(s32)
424         //  %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1
425         // =>
426         //  %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0
427 
428         // Unmerge(trunc) can be combined if the trunc source size is a multiple
429         // of the unmerge destination size
430         if (CastSrcSize % DestSize != 0)
431           return false;
432 
433         // Check if the new unmerge is supported
434         if (isInstUnsupported(
435                 {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}}))
436           return false;
437 
438         // Gather the original destination registers and create new ones for the
439         // unused bits
440         const unsigned NewNumDefs = CastSrcSize / DestSize;
441         SmallVector<Register, 8> DstRegs(NewNumDefs);
442         for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) {
443           if (Idx < NumDefs)
444             DstRegs[Idx] = MI.getOperand(Idx).getReg();
445           else
446             DstRegs[Idx] = MRI.createGenericVirtualRegister(DestTy);
447         }
448 
449         // Build new unmerge
450         Builder.setInstr(MI);
451         Builder.buildUnmerge(DstRegs, CastSrcReg);
452         UpdatedDefs.append(DstRegs.begin(), DstRegs.begin() + NewNumDefs);
453         markInstAndDefDead(MI, CastMI, DeadInsts);
454         return true;
455       }
456     }
457 
458     // TODO: support combines with other casts as well
459     return false;
460   }
461 
462   static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp,
463                                  LLT OpTy, LLT DestTy) {
464     // Check if we found a definition that is like G_MERGE_VALUES.
465     switch (MergeOp) {
466     default:
467       return false;
468     case TargetOpcode::G_BUILD_VECTOR:
469     case TargetOpcode::G_MERGE_VALUES:
470       // The convert operation that we will need to insert is
471       // going to convert the input of that type of instruction (scalar)
472       // to the destination type (DestTy).
473       // The conversion needs to stay in the same domain (scalar to scalar
474       // and vector to vector), so if we were to allow to fold the merge
475       // we would need to insert some bitcasts.
476       // E.g.,
477       // <2 x s16> = build_vector s16, s16
478       // <2 x s32> = zext <2 x s16>
479       // <2 x s16>, <2 x s16> = unmerge <2 x s32>
480       //
481       // As is the folding would produce:
482       // <2 x s16> = zext s16  <-- scalar to vector
483       // <2 x s16> = zext s16  <-- scalar to vector
484       // Which is invalid.
485       // Instead we would want to generate:
486       // s32 = zext s16
487       // <2 x s16> = bitcast s32
488       // s32 = zext s16
489       // <2 x s16> = bitcast s32
490       //
491       // That is not done yet.
492       if (ConvertOp == 0)
493         return true;
494       return !DestTy.isVector() && OpTy.isVector() &&
495              DestTy == OpTy.getElementType();
496     case TargetOpcode::G_CONCAT_VECTORS: {
497       if (ConvertOp == 0)
498         return true;
499       if (!DestTy.isVector())
500         return false;
501 
502       const unsigned OpEltSize = OpTy.getElementType().getSizeInBits();
503 
504       // Don't handle scalarization with a cast that isn't in the same
505       // direction as the vector cast. This could be handled, but it would
506       // require more intermediate unmerges.
507       if (ConvertOp == TargetOpcode::G_TRUNC)
508         return DestTy.getSizeInBits() <= OpEltSize;
509       return DestTy.getSizeInBits() >= OpEltSize;
510     }
511     }
512   }
513 
514   /// Try to replace DstReg with SrcReg or build a COPY instruction
515   /// depending on the register constraints.
516   static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg,
517                                     MachineRegisterInfo &MRI,
518                                     MachineIRBuilder &Builder,
519                                     SmallVectorImpl<Register> &UpdatedDefs,
520                                     GISelChangeObserver &Observer) {
521     if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) {
522       Builder.buildCopy(DstReg, SrcReg);
523       UpdatedDefs.push_back(DstReg);
524       return;
525     }
526     SmallVector<MachineInstr *, 4> UseMIs;
527     // Get the users and notify the observer before replacing.
528     for (auto &UseMI : MRI.use_instructions(DstReg)) {
529       UseMIs.push_back(&UseMI);
530       Observer.changingInstr(UseMI);
531     }
532     // Replace the registers.
533     MRI.replaceRegWith(DstReg, SrcReg);
534     UpdatedDefs.push_back(SrcReg);
535     // Notify the observer that we changed the instructions.
536     for (auto *UseMI : UseMIs)
537       Observer.changedInstr(*UseMI);
538   }
539 
540   /// Return the operand index in \p MI that defines \p Def
541   static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) {
542     unsigned DefIdx = 0;
543     for (const MachineOperand &Def : MI.defs()) {
544       if (Def.getReg() == SearchDef)
545         break;
546       ++DefIdx;
547     }
548 
549     return DefIdx;
550   }
551 
552   /// This class provides utilities for finding source registers of specific
553   /// bit ranges in an artifact. The routines can look through the source
554   /// registers if they're other artifacts to try to find a non-artifact source
555   /// of a value.
556   class ArtifactValueFinder {
557     MachineRegisterInfo &MRI;
558     MachineIRBuilder &MIB;
559     const LegalizerInfo &LI;
560 
561     // Stores the best register found in the current query so far.
562     Register CurrentBest = Register();
563 
564     /// Given an concat_vector op \p Concat and a start bit and size, try to
565     /// find the origin of the value defined by that start position and size.
566     ///
567     /// \returns a register with the requested size, or the current best
568     /// register found during the current query.
569     Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit,
570                                  unsigned Size) {
571       assert(Size > 0);
572 
573       // Find the source operand that provides the bits requested.
574       Register Src1Reg = Concat.getSourceReg(0);
575       unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits();
576 
577       // Operand index of the source that provides the start of the bit range.
578       unsigned StartSrcIdx = (StartBit / SrcSize) + 1;
579       // Offset into the source at which the bit range starts.
580       unsigned InRegOffset = StartBit % SrcSize;
581       // Check that the bits don't span multiple sources.
582       // FIXME: we might be able return multiple sources? Or create an
583       // appropriate concat to make it fit.
584       if (InRegOffset + Size > SrcSize)
585         return CurrentBest;
586 
587       Register SrcReg = Concat.getReg(StartSrcIdx);
588       if (InRegOffset == 0 && Size == SrcSize) {
589         CurrentBest = SrcReg;
590         return findValueFromDefImpl(SrcReg, 0, Size);
591       }
592 
593       return findValueFromDefImpl(SrcReg, InRegOffset, Size);
594     }
595 
596     /// Given an build_vector op \p BV and a start bit and size, try to find
597     /// the origin of the value defined by that start position and size.
598     ///
599     /// \returns a register with the requested size, or the current best
600     /// register found during the current query.
601     Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit,
602                                       unsigned Size) {
603       assert(Size > 0);
604 
605       // Find the source operand that provides the bits requested.
606       Register Src1Reg = BV.getSourceReg(0);
607       unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits();
608 
609       // Operand index of the source that provides the start of the bit range.
610       unsigned StartSrcIdx = (StartBit / SrcSize) + 1;
611       // Offset into the source at which the bit range starts.
612       unsigned InRegOffset = StartBit % SrcSize;
613 
614       if (InRegOffset != 0)
615         return CurrentBest; // Give up, bits don't start at a scalar source.
616       if (Size < SrcSize)
617         return CurrentBest; // Scalar source is too large for requested bits.
618 
619       // If the bits cover multiple sources evenly, then create a new
620       // build_vector to synthesize the required size, if that's been requested.
621       if (Size > SrcSize) {
622         if (Size % SrcSize > 0)
623           return CurrentBest; // Isn't covered exactly by sources.
624 
625         unsigned NumSrcsUsed = Size / SrcSize;
626         // If we're requesting all of the sources, just return this def.
627         if (NumSrcsUsed == BV.getNumSources())
628           return BV.getReg(0);
629 
630         LLT SrcTy = MRI.getType(Src1Reg);
631         LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy);
632 
633         // Check if the resulting build vector would be legal.
634         LegalizeActionStep ActionStep =
635             LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}});
636         if (ActionStep.Action != LegalizeActions::Legal)
637           return CurrentBest;
638 
639         SmallVector<Register> NewSrcs;
640         for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed;
641              ++SrcIdx)
642           NewSrcs.push_back(BV.getReg(SrcIdx));
643         MIB.setInstrAndDebugLoc(BV);
644         return MIB.buildBuildVector(NewBVTy, NewSrcs).getReg(0);
645       }
646       // A single source is requested, just return it.
647       return BV.getReg(StartSrcIdx);
648     }
649 
650     /// Given an G_INSERT op \p MI and a start bit and size, try to find
651     /// the origin of the value defined by that start position and size.
652     ///
653     /// \returns a register with the requested size, or the current best
654     /// register found during the current query.
655     Register findValueFromInsert(MachineInstr &MI, unsigned StartBit,
656                                  unsigned Size) {
657       assert(MI.getOpcode() == TargetOpcode::G_INSERT);
658       assert(Size > 0);
659 
660       Register ContainerSrcReg = MI.getOperand(1).getReg();
661       Register InsertedReg = MI.getOperand(2).getReg();
662       LLT InsertedRegTy = MRI.getType(InsertedReg);
663       unsigned InsertOffset = MI.getOperand(3).getImm();
664 
665       // There are 4 possible container/insertreg + requested bit-range layouts
666       // that the instruction and query could be representing.
667       // For: %_ = G_INSERT %CONTAINER, %INS, InsOff (abbrev. to 'IO')
668       // and a start bit 'SB', with size S, giving an end bit 'EB', we could
669       // have...
670       // Scenario A:
671       //   --------------------------
672       //  |  INS    |  CONTAINER     |
673       //   --------------------------
674       //       |   |
675       //       SB  EB
676       //
677       // Scenario B:
678       //   --------------------------
679       //  |  INS    |  CONTAINER     |
680       //   --------------------------
681       //                |    |
682       //                SB   EB
683       //
684       // Scenario C:
685       //   --------------------------
686       //  |  CONTAINER    |  INS     |
687       //   --------------------------
688       //       |    |
689       //       SB   EB
690       //
691       // Scenario D:
692       //   --------------------------
693       //  |  CONTAINER    |  INS     |
694       //   --------------------------
695       //                     |   |
696       //                     SB  EB
697       //
698       // So therefore, A and D are requesting data from the INS operand, while
699       // B and C are requesting from the container operand.
700 
701       unsigned InsertedEndBit = InsertOffset + InsertedRegTy.getSizeInBits();
702       unsigned EndBit = StartBit + Size;
703       unsigned NewStartBit;
704       Register SrcRegToUse;
705       if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) {
706         SrcRegToUse = ContainerSrcReg;
707         NewStartBit = StartBit;
708         return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
709       }
710       if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) {
711         SrcRegToUse = InsertedReg;
712         NewStartBit = StartBit - InsertOffset;
713         if (NewStartBit == 0 &&
714             Size == MRI.getType(SrcRegToUse).getSizeInBits())
715           CurrentBest = SrcRegToUse;
716         return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size);
717       }
718       // The bit range spans both the inserted and container regions.
719       return Register();
720     }
721 
722     /// Internal implementation for findValueFromDef(). findValueFromDef()
723     /// initializes some data like the CurrentBest register, which this method
724     /// and its callees rely upon.
725     Register findValueFromDefImpl(Register DefReg, unsigned StartBit,
726                                   unsigned Size) {
727       MachineInstr *Def = getDefIgnoringCopies(DefReg, MRI);
728       // If the instruction has a single def, then simply delegate the search.
729       // For unmerge however with multiple defs, we need to compute the offset
730       // into the source of the unmerge.
731       switch (Def->getOpcode()) {
732       case TargetOpcode::G_CONCAT_VECTORS:
733         return findValueFromConcat(cast<GConcatVectors>(*Def), StartBit, Size);
734       case TargetOpcode::G_UNMERGE_VALUES: {
735         unsigned DefStartBit = 0;
736         unsigned DefSize = MRI.getType(DefReg).getSizeInBits();
737         for (const auto &MO : Def->defs()) {
738           if (MO.getReg() == DefReg)
739             break;
740           DefStartBit += DefSize;
741         }
742         Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg();
743         Register SrcOriginReg =
744             findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size);
745         if (SrcOriginReg)
746           return SrcOriginReg;
747         // Failed to find a further value. If the StartBit and Size perfectly
748         // covered the requested DefReg, return that since it's better than
749         // nothing.
750         if (StartBit == 0 && Size == DefSize)
751           return DefReg;
752         return CurrentBest;
753       }
754       case TargetOpcode::G_BUILD_VECTOR:
755         return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit,
756                                         Size);
757       case TargetOpcode::G_INSERT:
758         return findValueFromInsert(*Def, StartBit, Size);
759       default:
760         return CurrentBest;
761       }
762     }
763 
764   public:
765     ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder,
766                         const LegalizerInfo &Info)
767         : MRI(Mri), MIB(Builder), LI(Info) {}
768 
769     /// Try to find a source of the value defined in the def \p DefReg, starting
770     /// at position \p StartBit with size \p Size.
771     /// \returns a register with the requested size, or an empty Register if no
772     /// better value could be found.
773     Register findValueFromDef(Register DefReg, unsigned StartBit,
774                               unsigned Size) {
775       CurrentBest = Register();
776       Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size);
777       return FoundReg != DefReg ? FoundReg : Register();
778     }
779 
780     /// Try to combine the defs of an unmerge \p MI by attempting to find
781     /// values that provides the bits for each def reg.
782     /// \returns true if all the defs of the unmerge have been made dead.
783     bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer,
784                                SmallVectorImpl<Register> &UpdatedDefs) {
785       unsigned NumDefs = MI.getNumDefs();
786       LLT DestTy = MRI.getType(MI.getReg(0));
787 
788       SmallBitVector DeadDefs(NumDefs);
789       for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
790         Register DefReg = MI.getReg(DefIdx);
791         if (MRI.use_nodbg_empty(DefReg)) {
792           DeadDefs[DefIdx] = true;
793           continue;
794         }
795         Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits());
796         if (!FoundVal)
797           continue;
798         if (MRI.getType(FoundVal) != DestTy)
799           continue;
800 
801         replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs,
802                               Observer);
803         // We only want to replace the uses, not the def of the old reg.
804         Observer.changingInstr(MI);
805         MI.getOperand(DefIdx).setReg(DefReg);
806         Observer.changedInstr(MI);
807         DeadDefs[DefIdx] = true;
808       }
809       return DeadDefs.all();
810     }
811   };
812 
813   bool tryCombineUnmergeValues(GUnmerge &MI,
814                                SmallVectorImpl<MachineInstr *> &DeadInsts,
815                                SmallVectorImpl<Register> &UpdatedDefs,
816                                GISelChangeObserver &Observer) {
817     unsigned NumDefs = MI.getNumDefs();
818     Register SrcReg = MI.getSourceReg();
819     MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI);
820     if (!SrcDef)
821       return false;
822 
823     LLT OpTy = MRI.getType(SrcReg);
824     LLT DestTy = MRI.getType(MI.getReg(0));
825     unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg);
826 
827     Builder.setInstrAndDebugLoc(MI);
828 
829     ArtifactValueFinder Finder(MRI, Builder, LI);
830     if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) {
831       markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx);
832       return true;
833     }
834 
835     if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) {
836       // %0:_(<4 x s16>) = G_FOO
837       // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0
838       // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1
839       //
840       // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0
841       Register SrcUnmergeSrc = SrcUnmerge->getSourceReg();
842       LLT SrcUnmergeSrcTy = MRI.getType(SrcUnmergeSrc);
843 
844       // If we need to decrease the number of vector elements in the result type
845       // of an unmerge, this would involve the creation of an equivalent unmerge
846       // to copy back to the original result registers.
847       LegalizeActionStep ActionStep = LI.getAction(
848           {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}});
849       switch (ActionStep.Action) {
850       case LegalizeActions::Lower:
851       case LegalizeActions::Unsupported:
852         break;
853       case LegalizeActions::FewerElements:
854       case LegalizeActions::NarrowScalar:
855         if (ActionStep.TypeIdx == 1)
856           return false;
857         break;
858       default:
859         return false;
860       }
861 
862       auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc);
863 
864       // TODO: Should we try to process out the other defs now? If the other
865       // defs of the source unmerge are also unmerged, we end up with a separate
866       // unmerge for each one.
867       for (unsigned I = 0; I != NumDefs; ++I) {
868         Register Def = MI.getReg(I);
869         replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I),
870                               MRI, Builder, UpdatedDefs, Observer);
871       }
872 
873       markInstAndDefDead(MI, *SrcUnmerge, DeadInsts, SrcDefIdx);
874       return true;
875     }
876 
877     MachineInstr *MergeI = SrcDef;
878     unsigned ConvertOp = 0;
879 
880     // Handle intermediate conversions
881     unsigned SrcOp = SrcDef->getOpcode();
882     if (isArtifactCast(SrcOp)) {
883       ConvertOp = SrcOp;
884       MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI);
885     }
886 
887     if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(),
888                                        ConvertOp, OpTy, DestTy)) {
889       // We might have a chance to combine later by trying to combine
890       // unmerge(cast) first
891       return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs);
892     }
893 
894     const unsigned NumMergeRegs = MergeI->getNumOperands() - 1;
895 
896     if (NumMergeRegs < NumDefs) {
897       if (NumDefs % NumMergeRegs != 0)
898         return false;
899 
900       Builder.setInstr(MI);
901       // Transform to UNMERGEs, for example
902       //   %1 = G_MERGE_VALUES %4, %5
903       //   %9, %10, %11, %12 = G_UNMERGE_VALUES %1
904       // to
905       //   %9, %10 = G_UNMERGE_VALUES %4
906       //   %11, %12 = G_UNMERGE_VALUES %5
907 
908       const unsigned NewNumDefs = NumDefs / NumMergeRegs;
909       for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) {
910         SmallVector<Register, 8> DstRegs;
911         for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs;
912              ++j, ++DefIdx)
913           DstRegs.push_back(MI.getReg(DefIdx));
914 
915         if (ConvertOp) {
916           LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
917 
918           // This is a vector that is being split and casted. Extract to the
919           // element type, and do the conversion on the scalars (or smaller
920           // vectors).
921           LLT MergeEltTy = MergeSrcTy.divide(NewNumDefs);
922 
923           // Handle split to smaller vectors, with conversions.
924           // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>)
925           // %3(<8 x s16>) = G_SEXT %2
926           // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %3
927           //
928           // =>
929           //
930           // %8(<2 x s8>), %9(<2 x s8>) = G_UNMERGE_VALUES %0
931           // %10(<2 x s8>), %11(<2 x s8>) = G_UNMERGE_VALUES %1
932           // %4(<2 x s16>) = G_SEXT %8
933           // %5(<2 x s16>) = G_SEXT %9
934           // %6(<2 x s16>) = G_SEXT %10
935           // %7(<2 x s16>)= G_SEXT %11
936 
937           SmallVector<Register, 4> TmpRegs(NewNumDefs);
938           for (unsigned k = 0; k < NewNumDefs; ++k)
939             TmpRegs[k] = MRI.createGenericVirtualRegister(MergeEltTy);
940 
941           Builder.buildUnmerge(TmpRegs, MergeI->getOperand(Idx + 1).getReg());
942 
943           for (unsigned k = 0; k < NewNumDefs; ++k)
944             Builder.buildInstr(ConvertOp, {DstRegs[k]}, {TmpRegs[k]});
945         } else {
946           Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg());
947         }
948         UpdatedDefs.append(DstRegs.begin(), DstRegs.end());
949       }
950 
951     } else if (NumMergeRegs > NumDefs) {
952       if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0)
953         return false;
954 
955       Builder.setInstr(MI);
956       // Transform to MERGEs
957       //   %6 = G_MERGE_VALUES %17, %18, %19, %20
958       //   %7, %8 = G_UNMERGE_VALUES %6
959       // to
960       //   %7 = G_MERGE_VALUES %17, %18
961       //   %8 = G_MERGE_VALUES %19, %20
962 
963       const unsigned NumRegs = NumMergeRegs / NumDefs;
964       for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) {
965         SmallVector<Register, 8> Regs;
966         for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs;
967              ++j, ++Idx)
968           Regs.push_back(MergeI->getOperand(Idx).getReg());
969 
970         Register DefReg = MI.getReg(DefIdx);
971         Builder.buildMerge(DefReg, Regs);
972         UpdatedDefs.push_back(DefReg);
973       }
974 
975     } else {
976       LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg());
977 
978       if (!ConvertOp && DestTy != MergeSrcTy)
979         ConvertOp = TargetOpcode::G_BITCAST;
980 
981       if (ConvertOp) {
982         Builder.setInstr(MI);
983 
984         for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
985           Register DefReg = MI.getOperand(Idx).getReg();
986           Register MergeSrc = MergeI->getOperand(Idx + 1).getReg();
987 
988           if (!MRI.use_empty(DefReg)) {
989             Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc});
990             UpdatedDefs.push_back(DefReg);
991           }
992         }
993 
994         markInstAndDefDead(MI, *MergeI, DeadInsts);
995         return true;
996       }
997 
998       assert(DestTy == MergeSrcTy &&
999              "Bitcast and the other kinds of conversions should "
1000              "have happened earlier");
1001 
1002       Builder.setInstr(MI);
1003       for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
1004         Register DstReg = MI.getOperand(Idx).getReg();
1005         Register SrcReg = MergeI->getOperand(Idx + 1).getReg();
1006         replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs,
1007                               Observer);
1008       }
1009     }
1010 
1011     markInstAndDefDead(MI, *MergeI, DeadInsts);
1012     return true;
1013   }
1014 
1015   bool tryCombineExtract(MachineInstr &MI,
1016                          SmallVectorImpl<MachineInstr *> &DeadInsts,
1017                          SmallVectorImpl<Register> &UpdatedDefs) {
1018     assert(MI.getOpcode() == TargetOpcode::G_EXTRACT);
1019 
1020     // Try to use the source registers from a G_MERGE_VALUES
1021     //
1022     // %2 = G_MERGE_VALUES %0, %1
1023     // %3 = G_EXTRACT %2, N
1024     // =>
1025     //
1026     // for N < %2.getSizeInBits() / 2
1027     //     %3 = G_EXTRACT %0, N
1028     //
1029     // for N >= %2.getSizeInBits() / 2
1030     //    %3 = G_EXTRACT %1, (N - %0.getSizeInBits()
1031 
1032     Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg());
1033     MachineInstr *MergeI = MRI.getVRegDef(SrcReg);
1034     if (!MergeI || !isa<GMergeLikeOp>(MergeI))
1035       return false;
1036 
1037     Register DstReg = MI.getOperand(0).getReg();
1038     LLT DstTy = MRI.getType(DstReg);
1039     LLT SrcTy = MRI.getType(SrcReg);
1040 
1041     // TODO: Do we need to check if the resulting extract is supported?
1042     unsigned ExtractDstSize = DstTy.getSizeInBits();
1043     unsigned Offset = MI.getOperand(2).getImm();
1044     unsigned NumMergeSrcs = MergeI->getNumOperands() - 1;
1045     unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs;
1046     unsigned MergeSrcIdx = Offset / MergeSrcSize;
1047 
1048     // Compute the offset of the last bit the extract needs.
1049     unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize;
1050 
1051     // Can't handle the case where the extract spans multiple inputs.
1052     if (MergeSrcIdx != EndMergeSrcIdx)
1053       return false;
1054 
1055     // TODO: We could modify MI in place in most cases.
1056     Builder.setInstr(MI);
1057     Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(),
1058                          Offset - MergeSrcIdx * MergeSrcSize);
1059     UpdatedDefs.push_back(DstReg);
1060     markInstAndDefDead(MI, *MergeI, DeadInsts);
1061     return true;
1062   }
1063 
1064   /// Try to combine away MI.
1065   /// Returns true if it combined away the MI.
1066   /// Adds instructions that are dead as a result of the combine
1067   /// into DeadInsts, which can include MI.
1068   bool tryCombineInstruction(MachineInstr &MI,
1069                              SmallVectorImpl<MachineInstr *> &DeadInsts,
1070                              GISelObserverWrapper &WrapperObserver) {
1071     // This might be a recursive call, and we might have DeadInsts already
1072     // populated. To avoid bad things happening later with multiple vreg defs
1073     // etc, process the dead instructions now if any.
1074     if (!DeadInsts.empty())
1075       deleteMarkedDeadInsts(DeadInsts, WrapperObserver);
1076 
1077     // Put here every vreg that was redefined in such a way that it's at least
1078     // possible that one (or more) of its users (immediate or COPY-separated)
1079     // could become artifact combinable with the new definition (or the
1080     // instruction reachable from it through a chain of copies if any).
1081     SmallVector<Register, 4> UpdatedDefs;
1082     bool Changed = false;
1083     switch (MI.getOpcode()) {
1084     default:
1085       return false;
1086     case TargetOpcode::G_ANYEXT:
1087       Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1088       break;
1089     case TargetOpcode::G_ZEXT:
1090       Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1091       break;
1092     case TargetOpcode::G_SEXT:
1093       Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs);
1094       break;
1095     case TargetOpcode::G_UNMERGE_VALUES:
1096       Changed = tryCombineUnmergeValues(cast<GUnmerge>(MI), DeadInsts,
1097                                         UpdatedDefs, WrapperObserver);
1098       break;
1099     case TargetOpcode::G_MERGE_VALUES:
1100     case TargetOpcode::G_BUILD_VECTOR:
1101     case TargetOpcode::G_CONCAT_VECTORS:
1102       // If any of the users of this merge are an unmerge, then add them to the
1103       // artifact worklist in case there's folding that can be done looking up.
1104       for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) {
1105         if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES ||
1106             U.getOpcode() == TargetOpcode::G_TRUNC) {
1107           UpdatedDefs.push_back(MI.getOperand(0).getReg());
1108           break;
1109         }
1110       }
1111       break;
1112     case TargetOpcode::G_EXTRACT:
1113       Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs);
1114       break;
1115     case TargetOpcode::G_TRUNC:
1116       Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, WrapperObserver);
1117       if (!Changed) {
1118         // Try to combine truncates away even if they are legal. As all artifact
1119         // combines at the moment look only "up" the def-use chains, we achieve
1120         // that by throwing truncates' users (with look through copies) into the
1121         // ArtifactList again.
1122         UpdatedDefs.push_back(MI.getOperand(0).getReg());
1123       }
1124       break;
1125     }
1126     // If the main loop through the ArtifactList found at least one combinable
1127     // pair of artifacts, not only combine it away (as done above), but also
1128     // follow the def-use chain from there to combine everything that can be
1129     // combined within this def-use chain of artifacts.
1130     while (!UpdatedDefs.empty()) {
1131       Register NewDef = UpdatedDefs.pop_back_val();
1132       assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg");
1133       for (MachineInstr &Use : MRI.use_instructions(NewDef)) {
1134         switch (Use.getOpcode()) {
1135         // Keep this list in sync with the list of all artifact combines.
1136         case TargetOpcode::G_ANYEXT:
1137         case TargetOpcode::G_ZEXT:
1138         case TargetOpcode::G_SEXT:
1139         case TargetOpcode::G_UNMERGE_VALUES:
1140         case TargetOpcode::G_EXTRACT:
1141         case TargetOpcode::G_TRUNC:
1142           // Adding Use to ArtifactList.
1143           WrapperObserver.changedInstr(Use);
1144           break;
1145         case TargetOpcode::COPY: {
1146           Register Copy = Use.getOperand(0).getReg();
1147           if (Copy.isVirtual())
1148             UpdatedDefs.push_back(Copy);
1149           break;
1150         }
1151         default:
1152           // If we do not have an artifact combine for the opcode, there is no
1153           // point in adding it to the ArtifactList as nothing interesting will
1154           // be done to it anyway.
1155           break;
1156         }
1157       }
1158     }
1159     return Changed;
1160   }
1161 
1162 private:
1163   static Register getArtifactSrcReg(const MachineInstr &MI) {
1164     switch (MI.getOpcode()) {
1165     case TargetOpcode::COPY:
1166     case TargetOpcode::G_TRUNC:
1167     case TargetOpcode::G_ZEXT:
1168     case TargetOpcode::G_ANYEXT:
1169     case TargetOpcode::G_SEXT:
1170     case TargetOpcode::G_EXTRACT:
1171       return MI.getOperand(1).getReg();
1172     case TargetOpcode::G_UNMERGE_VALUES:
1173       return MI.getOperand(MI.getNumOperands() - 1).getReg();
1174     default:
1175       llvm_unreachable("Not a legalization artifact happen");
1176     }
1177   }
1178 
1179   /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI
1180   /// (either by killing it or changing operands) results in DefMI being dead
1181   /// too. In-between COPYs or artifact-casts are also collected if they are
1182   /// dead.
1183   /// MI is not marked dead.
1184   void markDefDead(MachineInstr &MI, MachineInstr &DefMI,
1185                    SmallVectorImpl<MachineInstr *> &DeadInsts,
1186                    unsigned DefIdx = 0) {
1187     // Collect all the copy instructions that are made dead, due to deleting
1188     // this instruction. Collect all of them until the Trunc(DefMI).
1189     // Eg,
1190     // %1(s1) = G_TRUNC %0(s32)
1191     // %2(s1) = COPY %1(s1)
1192     // %3(s1) = COPY %2(s1)
1193     // %4(s32) = G_ANYEXT %3(s1)
1194     // In this case, we would have replaced %4 with a copy of %0,
1195     // and as a result, %3, %2, %1 are dead.
1196     MachineInstr *PrevMI = &MI;
1197     while (PrevMI != &DefMI) {
1198       Register PrevRegSrc = getArtifactSrcReg(*PrevMI);
1199 
1200       MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc);
1201       if (MRI.hasOneUse(PrevRegSrc)) {
1202         if (TmpDef != &DefMI) {
1203           assert((TmpDef->getOpcode() == TargetOpcode::COPY ||
1204                   isArtifactCast(TmpDef->getOpcode())) &&
1205                  "Expecting copy or artifact cast here");
1206 
1207           DeadInsts.push_back(TmpDef);
1208         }
1209       } else
1210         break;
1211       PrevMI = TmpDef;
1212     }
1213 
1214     if (PrevMI == &DefMI) {
1215       unsigned I = 0;
1216       bool IsDead = true;
1217       for (MachineOperand &Def : DefMI.defs()) {
1218         if (I != DefIdx) {
1219           if (!MRI.use_empty(Def.getReg())) {
1220             IsDead = false;
1221             break;
1222           }
1223         } else {
1224           if (!MRI.hasOneUse(DefMI.getOperand(DefIdx).getReg()))
1225             break;
1226         }
1227 
1228         ++I;
1229       }
1230 
1231       if (IsDead)
1232         DeadInsts.push_back(&DefMI);
1233     }
1234   }
1235 
1236   /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
1237   /// dead due to MI being killed, then mark DefMI as dead too.
1238   /// Some of the combines (extends(trunc)), try to walk through redundant
1239   /// copies in between the extends and the truncs, and this attempts to collect
1240   /// the in between copies if they're dead.
1241   void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI,
1242                           SmallVectorImpl<MachineInstr *> &DeadInsts,
1243                           unsigned DefIdx = 0) {
1244     DeadInsts.push_back(&MI);
1245     markDefDead(MI, DefMI, DeadInsts, DefIdx);
1246   }
1247 
1248   /// Erase the dead instructions in the list and call the observer hooks.
1249   /// Normally the Legalizer will deal with erasing instructions that have been
1250   /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to
1251   /// process instructions which have been marked dead, but otherwise break the
1252   /// MIR by introducing multiple vreg defs. For those cases, allow the combines
1253   /// to explicitly delete the instructions before we run into trouble.
1254   void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts,
1255                              GISelObserverWrapper &WrapperObserver) {
1256     for (auto *DeadMI : DeadInsts) {
1257       LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n");
1258       WrapperObserver.erasingInstr(*DeadMI);
1259       DeadMI->eraseFromParent();
1260     }
1261     DeadInsts.clear();
1262   }
1263 
1264   /// Checks if the target legalizer info has specified anything about the
1265   /// instruction, or if unsupported.
1266   bool isInstUnsupported(const LegalityQuery &Query) const {
1267     using namespace LegalizeActions;
1268     auto Step = LI.getAction(Query);
1269     return Step.Action == Unsupported || Step.Action == NotFound;
1270   }
1271 
1272   bool isInstLegal(const LegalityQuery &Query) const {
1273     return LI.getAction(Query).Action == LegalizeActions::Legal;
1274   }
1275 
1276   bool isConstantUnsupported(LLT Ty) const {
1277     if (!Ty.isVector())
1278       return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}});
1279 
1280     LLT EltTy = Ty.getElementType();
1281     return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) ||
1282            isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}});
1283   }
1284 
1285   /// Looks through copy instructions and returns the actual
1286   /// source register.
1287   Register lookThroughCopyInstrs(Register Reg) {
1288     using namespace llvm::MIPatternMatch;
1289 
1290     Register TmpReg;
1291     while (mi_match(Reg, MRI, m_Copy(m_Reg(TmpReg)))) {
1292       if (MRI.getType(TmpReg).isValid())
1293         Reg = TmpReg;
1294       else
1295         break;
1296     }
1297     return Reg;
1298   }
1299 };
1300 
1301 } // namespace llvm
1302 
1303 #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H
1304