1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetFrameLowering.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/Support/Debug.h"
25 #include "llvm/Support/MathExtras.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 #define DEBUG_TYPE "legalizer"
29 
30 using namespace llvm;
31 using namespace LegalizeActions;
32 
33 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
34 ///
35 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
36 /// with any leftover piece as type \p LeftoverTy
37 ///
38 /// Returns -1 in the first element of the pair if the breakdown is not
39 /// satisfiable.
40 static std::pair<int, int>
getNarrowTypeBreakDown(LLT OrigTy,LLT NarrowTy,LLT & LeftoverTy)41 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
42   assert(!LeftoverTy.isValid() && "this is an out argument");
43 
44   unsigned Size = OrigTy.getSizeInBits();
45   unsigned NarrowSize = NarrowTy.getSizeInBits();
46   unsigned NumParts = Size / NarrowSize;
47   unsigned LeftoverSize = Size - NumParts * NarrowSize;
48   assert(Size > NarrowSize);
49 
50   if (LeftoverSize == 0)
51     return {NumParts, 0};
52 
53   if (NarrowTy.isVector()) {
54     unsigned EltSize = OrigTy.getScalarSizeInBits();
55     if (LeftoverSize % EltSize != 0)
56       return {-1, -1};
57     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
58   } else {
59     LeftoverTy = LLT::scalar(LeftoverSize);
60   }
61 
62   int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
63   return std::make_pair(NumParts, NumLeftover);
64 }
65 
LegalizerHelper(MachineFunction & MF,GISelChangeObserver & Observer,MachineIRBuilder & Builder)66 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
67                                  GISelChangeObserver &Observer,
68                                  MachineIRBuilder &Builder)
69     : MIRBuilder(Builder), MRI(MF.getRegInfo()),
70       LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
71   MIRBuilder.setMF(MF);
72   MIRBuilder.setChangeObserver(Observer);
73 }
74 
LegalizerHelper(MachineFunction & MF,const LegalizerInfo & LI,GISelChangeObserver & Observer,MachineIRBuilder & B)75 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
76                                  GISelChangeObserver &Observer,
77                                  MachineIRBuilder &B)
78     : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
79   MIRBuilder.setMF(MF);
80   MIRBuilder.setChangeObserver(Observer);
81 }
82 LegalizerHelper::LegalizeResult
legalizeInstrStep(MachineInstr & MI)83 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
84   LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
85 
86   if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
87       MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
88     return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized
89                                                      : UnableToLegalize;
90   auto Step = LI.getAction(MI, MRI);
91   switch (Step.Action) {
92   case Legal:
93     LLVM_DEBUG(dbgs() << ".. Already legal\n");
94     return AlreadyLegal;
95   case Libcall:
96     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
97     return libcall(MI);
98   case NarrowScalar:
99     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
100     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
101   case WidenScalar:
102     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
103     return widenScalar(MI, Step.TypeIdx, Step.NewType);
104   case Lower:
105     LLVM_DEBUG(dbgs() << ".. Lower\n");
106     return lower(MI, Step.TypeIdx, Step.NewType);
107   case FewerElements:
108     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
109     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
110   case MoreElements:
111     LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
112     return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
113   case Custom:
114     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
115     return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
116                                                             : UnableToLegalize;
117   default:
118     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
119     return UnableToLegalize;
120   }
121 }
122 
extractParts(Register Reg,LLT Ty,int NumParts,SmallVectorImpl<Register> & VRegs)123 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
124                                    SmallVectorImpl<Register> &VRegs) {
125   for (int i = 0; i < NumParts; ++i)
126     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
127   MIRBuilder.buildUnmerge(VRegs, Reg);
128 }
129 
extractParts(Register Reg,LLT RegTy,LLT MainTy,LLT & LeftoverTy,SmallVectorImpl<Register> & VRegs,SmallVectorImpl<Register> & LeftoverRegs)130 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
131                                    LLT MainTy, LLT &LeftoverTy,
132                                    SmallVectorImpl<Register> &VRegs,
133                                    SmallVectorImpl<Register> &LeftoverRegs) {
134   assert(!LeftoverTy.isValid() && "this is an out argument");
135 
136   unsigned RegSize = RegTy.getSizeInBits();
137   unsigned MainSize = MainTy.getSizeInBits();
138   unsigned NumParts = RegSize / MainSize;
139   unsigned LeftoverSize = RegSize - NumParts * MainSize;
140 
141   // Use an unmerge when possible.
142   if (LeftoverSize == 0) {
143     for (unsigned I = 0; I < NumParts; ++I)
144       VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
145     MIRBuilder.buildUnmerge(VRegs, Reg);
146     return true;
147   }
148 
149   if (MainTy.isVector()) {
150     unsigned EltSize = MainTy.getScalarSizeInBits();
151     if (LeftoverSize % EltSize != 0)
152       return false;
153     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
154   } else {
155     LeftoverTy = LLT::scalar(LeftoverSize);
156   }
157 
158   // For irregular sizes, extract the individual parts.
159   for (unsigned I = 0; I != NumParts; ++I) {
160     Register NewReg = MRI.createGenericVirtualRegister(MainTy);
161     VRegs.push_back(NewReg);
162     MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
163   }
164 
165   for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
166        Offset += LeftoverSize) {
167     Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
168     LeftoverRegs.push_back(NewReg);
169     MIRBuilder.buildExtract(NewReg, Reg, Offset);
170   }
171 
172   return true;
173 }
174 
getGCDType(LLT OrigTy,LLT TargetTy)175 static LLT getGCDType(LLT OrigTy, LLT TargetTy) {
176   if (OrigTy.isVector() && TargetTy.isVector()) {
177     assert(OrigTy.getElementType() == TargetTy.getElementType());
178     int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
179                                     TargetTy.getNumElements());
180     return LLT::scalarOrVector(GCD, OrigTy.getElementType());
181   }
182 
183   if (OrigTy.isVector() && !TargetTy.isVector()) {
184     assert(OrigTy.getElementType() == TargetTy);
185     return TargetTy;
186   }
187 
188   assert(!OrigTy.isVector() && !TargetTy.isVector());
189 
190   int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(),
191                                   TargetTy.getSizeInBits());
192   return LLT::scalar(GCD);
193 }
194 
insertParts(Register DstReg,LLT ResultTy,LLT PartTy,ArrayRef<Register> PartRegs,LLT LeftoverTy,ArrayRef<Register> LeftoverRegs)195 void LegalizerHelper::insertParts(Register DstReg,
196                                   LLT ResultTy, LLT PartTy,
197                                   ArrayRef<Register> PartRegs,
198                                   LLT LeftoverTy,
199                                   ArrayRef<Register> LeftoverRegs) {
200   if (!LeftoverTy.isValid()) {
201     assert(LeftoverRegs.empty());
202 
203     if (!ResultTy.isVector()) {
204       MIRBuilder.buildMerge(DstReg, PartRegs);
205       return;
206     }
207 
208     if (PartTy.isVector())
209       MIRBuilder.buildConcatVectors(DstReg, PartRegs);
210     else
211       MIRBuilder.buildBuildVector(DstReg, PartRegs);
212     return;
213   }
214 
215   unsigned PartSize = PartTy.getSizeInBits();
216   unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
217 
218   Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
219   MIRBuilder.buildUndef(CurResultReg);
220 
221   unsigned Offset = 0;
222   for (Register PartReg : PartRegs) {
223     Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
224     MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
225     CurResultReg = NewResultReg;
226     Offset += PartSize;
227   }
228 
229   for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
230     // Use the original output register for the final insert to avoid a copy.
231     Register NewResultReg = (I + 1 == E) ?
232       DstReg : MRI.createGenericVirtualRegister(ResultTy);
233 
234     MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
235     CurResultReg = NewResultReg;
236     Offset += LeftoverPartSize;
237   }
238 }
239 
getRTLibDesc(unsigned Opcode,unsigned Size)240 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
241   switch (Opcode) {
242   case TargetOpcode::G_SDIV:
243     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
244     switch (Size) {
245     case 32:
246       return RTLIB::SDIV_I32;
247     case 64:
248       return RTLIB::SDIV_I64;
249     case 128:
250       return RTLIB::SDIV_I128;
251     default:
252       llvm_unreachable("unexpected size");
253     }
254   case TargetOpcode::G_UDIV:
255     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
256     switch (Size) {
257     case 32:
258       return RTLIB::UDIV_I32;
259     case 64:
260       return RTLIB::UDIV_I64;
261     case 128:
262       return RTLIB::UDIV_I128;
263     default:
264       llvm_unreachable("unexpected size");
265     }
266   case TargetOpcode::G_SREM:
267     assert((Size == 32 || Size == 64) && "Unsupported size");
268     return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
269   case TargetOpcode::G_UREM:
270     assert((Size == 32 || Size == 64) && "Unsupported size");
271     return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
272   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
273     assert(Size == 32 && "Unsupported size");
274     return RTLIB::CTLZ_I32;
275   case TargetOpcode::G_FADD:
276     assert((Size == 32 || Size == 64) && "Unsupported size");
277     return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
278   case TargetOpcode::G_FSUB:
279     assert((Size == 32 || Size == 64) && "Unsupported size");
280     return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
281   case TargetOpcode::G_FMUL:
282     assert((Size == 32 || Size == 64) && "Unsupported size");
283     return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
284   case TargetOpcode::G_FDIV:
285     assert((Size == 32 || Size == 64) && "Unsupported size");
286     return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
287   case TargetOpcode::G_FEXP:
288     assert((Size == 32 || Size == 64) && "Unsupported size");
289     return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
290   case TargetOpcode::G_FEXP2:
291     assert((Size == 32 || Size == 64) && "Unsupported size");
292     return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
293   case TargetOpcode::G_FREM:
294     return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
295   case TargetOpcode::G_FPOW:
296     return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
297   case TargetOpcode::G_FMA:
298     assert((Size == 32 || Size == 64) && "Unsupported size");
299     return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
300   case TargetOpcode::G_FSIN:
301     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
302     return Size == 128 ? RTLIB::SIN_F128
303                        : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
304   case TargetOpcode::G_FCOS:
305     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
306     return Size == 128 ? RTLIB::COS_F128
307                        : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
308   case TargetOpcode::G_FLOG10:
309     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
310     return Size == 128 ? RTLIB::LOG10_F128
311                        : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
312   case TargetOpcode::G_FLOG:
313     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
314     return Size == 128 ? RTLIB::LOG_F128
315                        : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
316   case TargetOpcode::G_FLOG2:
317     assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
318     return Size == 128 ? RTLIB::LOG2_F128
319                        : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
320   case TargetOpcode::G_FCEIL:
321     assert((Size == 32 || Size == 64) && "Unsupported size");
322     return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32;
323   case TargetOpcode::G_FFLOOR:
324     assert((Size == 32 || Size == 64) && "Unsupported size");
325     return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32;
326   }
327   llvm_unreachable("Unknown libcall function");
328 }
329 
330 /// True if an instruction is in tail position in its caller. Intended for
331 /// legalizing libcalls as tail calls when possible.
isLibCallInTailPosition(MachineInstr & MI)332 static bool isLibCallInTailPosition(MachineInstr &MI) {
333   const Function &F = MI.getParent()->getParent()->getFunction();
334 
335   // Conservatively require the attributes of the call to match those of
336   // the return. Ignore NoAlias and NonNull because they don't affect the
337   // call sequence.
338   AttributeList CallerAttrs = F.getAttributes();
339   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
340           .removeAttribute(Attribute::NoAlias)
341           .removeAttribute(Attribute::NonNull)
342           .hasAttributes())
343     return false;
344 
345   // It's not safe to eliminate the sign / zero extension of the return value.
346   if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
347       CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
348     return false;
349 
350   // Only tail call if the following instruction is a standard return.
351   auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
352   MachineInstr *Next = MI.getNextNode();
353   if (!Next || TII.isTailCall(*Next) || !Next->isReturn())
354     return false;
355 
356   return true;
357 }
358 
359 LegalizerHelper::LegalizeResult
createLibcall(MachineIRBuilder & MIRBuilder,RTLIB::Libcall Libcall,const CallLowering::ArgInfo & Result,ArrayRef<CallLowering::ArgInfo> Args)360 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
361                     const CallLowering::ArgInfo &Result,
362                     ArrayRef<CallLowering::ArgInfo> Args) {
363   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
364   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
365   const char *Name = TLI.getLibcallName(Libcall);
366 
367   CallLowering::CallLoweringInfo Info;
368   Info.CallConv = TLI.getLibcallCallingConv(Libcall);
369   Info.Callee = MachineOperand::CreateES(Name);
370   Info.OrigRet = Result;
371   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
372   if (!CLI.lowerCall(MIRBuilder, Info))
373     return LegalizerHelper::UnableToLegalize;
374 
375   return LegalizerHelper::Legalized;
376 }
377 
378 // Useful for libcalls where all operands have the same type.
379 static LegalizerHelper::LegalizeResult
simpleLibcall(MachineInstr & MI,MachineIRBuilder & MIRBuilder,unsigned Size,Type * OpType)380 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
381               Type *OpType) {
382   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
383 
384   SmallVector<CallLowering::ArgInfo, 3> Args;
385   for (unsigned i = 1; i < MI.getNumOperands(); i++)
386     Args.push_back({MI.getOperand(i).getReg(), OpType});
387   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
388                        Args);
389 }
390 
391 LegalizerHelper::LegalizeResult
createMemLibcall(MachineIRBuilder & MIRBuilder,MachineRegisterInfo & MRI,MachineInstr & MI)392 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
393                        MachineInstr &MI) {
394   assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
395   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
396 
397   SmallVector<CallLowering::ArgInfo, 3> Args;
398   // Add all the args, except for the last which is an imm denoting 'tail'.
399   for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) {
400     Register Reg = MI.getOperand(i).getReg();
401 
402     // Need derive an IR type for call lowering.
403     LLT OpLLT = MRI.getType(Reg);
404     Type *OpTy = nullptr;
405     if (OpLLT.isPointer())
406       OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
407     else
408       OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
409     Args.push_back({Reg, OpTy});
410   }
411 
412   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
413   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
414   Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
415   RTLIB::Libcall RTLibcall;
416   switch (ID) {
417   case Intrinsic::memcpy:
418     RTLibcall = RTLIB::MEMCPY;
419     break;
420   case Intrinsic::memset:
421     RTLibcall = RTLIB::MEMSET;
422     break;
423   case Intrinsic::memmove:
424     RTLibcall = RTLIB::MEMMOVE;
425     break;
426   default:
427     return LegalizerHelper::UnableToLegalize;
428   }
429   const char *Name = TLI.getLibcallName(RTLibcall);
430 
431   MIRBuilder.setInstr(MI);
432 
433   CallLowering::CallLoweringInfo Info;
434   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
435   Info.Callee = MachineOperand::CreateES(Name);
436   Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
437   Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 &&
438                     isLibCallInTailPosition(MI);
439 
440   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
441   if (!CLI.lowerCall(MIRBuilder, Info))
442     return LegalizerHelper::UnableToLegalize;
443 
444   if (Info.LoweredTailCall) {
445     assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
446     // We must have a return following the call to get past
447     // isLibCallInTailPosition.
448     assert(MI.getNextNode() && MI.getNextNode()->isReturn() &&
449            "Expected instr following MI to be a return?");
450 
451     // We lowered a tail call, so the call is now the return from the block.
452     // Delete the old return.
453     MI.getNextNode()->eraseFromParent();
454   }
455 
456   return LegalizerHelper::Legalized;
457 }
458 
getConvRTLibDesc(unsigned Opcode,Type * ToType,Type * FromType)459 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
460                                        Type *FromType) {
461   auto ToMVT = MVT::getVT(ToType);
462   auto FromMVT = MVT::getVT(FromType);
463 
464   switch (Opcode) {
465   case TargetOpcode::G_FPEXT:
466     return RTLIB::getFPEXT(FromMVT, ToMVT);
467   case TargetOpcode::G_FPTRUNC:
468     return RTLIB::getFPROUND(FromMVT, ToMVT);
469   case TargetOpcode::G_FPTOSI:
470     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
471   case TargetOpcode::G_FPTOUI:
472     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
473   case TargetOpcode::G_SITOFP:
474     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
475   case TargetOpcode::G_UITOFP:
476     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
477   }
478   llvm_unreachable("Unsupported libcall function");
479 }
480 
481 static LegalizerHelper::LegalizeResult
conversionLibcall(MachineInstr & MI,MachineIRBuilder & MIRBuilder,Type * ToType,Type * FromType)482 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
483                   Type *FromType) {
484   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
485   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
486                        {{MI.getOperand(1).getReg(), FromType}});
487 }
488 
489 LegalizerHelper::LegalizeResult
libcall(MachineInstr & MI)490 LegalizerHelper::libcall(MachineInstr &MI) {
491   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
492   unsigned Size = LLTy.getSizeInBits();
493   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
494 
495   MIRBuilder.setInstr(MI);
496 
497   switch (MI.getOpcode()) {
498   default:
499     return UnableToLegalize;
500   case TargetOpcode::G_SDIV:
501   case TargetOpcode::G_UDIV:
502   case TargetOpcode::G_SREM:
503   case TargetOpcode::G_UREM:
504   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
505     Type *HLTy = IntegerType::get(Ctx, Size);
506     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
507     if (Status != Legalized)
508       return Status;
509     break;
510   }
511   case TargetOpcode::G_FADD:
512   case TargetOpcode::G_FSUB:
513   case TargetOpcode::G_FMUL:
514   case TargetOpcode::G_FDIV:
515   case TargetOpcode::G_FMA:
516   case TargetOpcode::G_FPOW:
517   case TargetOpcode::G_FREM:
518   case TargetOpcode::G_FCOS:
519   case TargetOpcode::G_FSIN:
520   case TargetOpcode::G_FLOG10:
521   case TargetOpcode::G_FLOG:
522   case TargetOpcode::G_FLOG2:
523   case TargetOpcode::G_FEXP:
524   case TargetOpcode::G_FEXP2:
525   case TargetOpcode::G_FCEIL:
526   case TargetOpcode::G_FFLOOR: {
527     if (Size > 64) {
528       LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
529       return UnableToLegalize;
530     }
531     Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
532     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
533     if (Status != Legalized)
534       return Status;
535     break;
536   }
537   case TargetOpcode::G_FPEXT: {
538     // FIXME: Support other floating point types (half, fp128 etc)
539     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
540     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
541     if (ToSize != 64 || FromSize != 32)
542       return UnableToLegalize;
543     LegalizeResult Status = conversionLibcall(
544         MI, MIRBuilder, Type::getDoubleTy(Ctx), Type::getFloatTy(Ctx));
545     if (Status != Legalized)
546       return Status;
547     break;
548   }
549   case TargetOpcode::G_FPTRUNC: {
550     // FIXME: Support other floating point types (half, fp128 etc)
551     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
552     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
553     if (ToSize != 32 || FromSize != 64)
554       return UnableToLegalize;
555     LegalizeResult Status = conversionLibcall(
556         MI, MIRBuilder, Type::getFloatTy(Ctx), Type::getDoubleTy(Ctx));
557     if (Status != Legalized)
558       return Status;
559     break;
560   }
561   case TargetOpcode::G_FPTOSI:
562   case TargetOpcode::G_FPTOUI: {
563     // FIXME: Support other types
564     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
565     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
566     if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
567       return UnableToLegalize;
568     LegalizeResult Status = conversionLibcall(
569         MI, MIRBuilder,
570         ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
571         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
572     if (Status != Legalized)
573       return Status;
574     break;
575   }
576   case TargetOpcode::G_SITOFP:
577   case TargetOpcode::G_UITOFP: {
578     // FIXME: Support other types
579     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
580     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
581     if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
582       return UnableToLegalize;
583     LegalizeResult Status = conversionLibcall(
584         MI, MIRBuilder,
585         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
586         FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
587     if (Status != Legalized)
588       return Status;
589     break;
590   }
591   }
592 
593   MI.eraseFromParent();
594   return Legalized;
595 }
596 
narrowScalar(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)597 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
598                                                               unsigned TypeIdx,
599                                                               LLT NarrowTy) {
600   MIRBuilder.setInstr(MI);
601 
602   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
603   uint64_t NarrowSize = NarrowTy.getSizeInBits();
604 
605   switch (MI.getOpcode()) {
606   default:
607     return UnableToLegalize;
608   case TargetOpcode::G_IMPLICIT_DEF: {
609     // FIXME: add support for when SizeOp0 isn't an exact multiple of
610     // NarrowSize.
611     if (SizeOp0 % NarrowSize != 0)
612       return UnableToLegalize;
613     int NumParts = SizeOp0 / NarrowSize;
614 
615     SmallVector<Register, 2> DstRegs;
616     for (int i = 0; i < NumParts; ++i)
617       DstRegs.push_back(
618           MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
619 
620     Register DstReg = MI.getOperand(0).getReg();
621     if(MRI.getType(DstReg).isVector())
622       MIRBuilder.buildBuildVector(DstReg, DstRegs);
623     else
624       MIRBuilder.buildMerge(DstReg, DstRegs);
625     MI.eraseFromParent();
626     return Legalized;
627   }
628   case TargetOpcode::G_CONSTANT: {
629     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
630     const APInt &Val = MI.getOperand(1).getCImm()->getValue();
631     unsigned TotalSize = Ty.getSizeInBits();
632     unsigned NarrowSize = NarrowTy.getSizeInBits();
633     int NumParts = TotalSize / NarrowSize;
634 
635     SmallVector<Register, 4> PartRegs;
636     for (int I = 0; I != NumParts; ++I) {
637       unsigned Offset = I * NarrowSize;
638       auto K = MIRBuilder.buildConstant(NarrowTy,
639                                         Val.lshr(Offset).trunc(NarrowSize));
640       PartRegs.push_back(K.getReg(0));
641     }
642 
643     LLT LeftoverTy;
644     unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
645     SmallVector<Register, 1> LeftoverRegs;
646     if (LeftoverBits != 0) {
647       LeftoverTy = LLT::scalar(LeftoverBits);
648       auto K = MIRBuilder.buildConstant(
649         LeftoverTy,
650         Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
651       LeftoverRegs.push_back(K.getReg(0));
652     }
653 
654     insertParts(MI.getOperand(0).getReg(),
655                 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
656 
657     MI.eraseFromParent();
658     return Legalized;
659   }
660   case TargetOpcode::G_SEXT: {
661     if (TypeIdx != 0)
662       return UnableToLegalize;
663 
664     Register SrcReg = MI.getOperand(1).getReg();
665     LLT SrcTy = MRI.getType(SrcReg);
666 
667     // FIXME: support the general case where the requested NarrowTy may not be
668     // the same as the source type. E.g. s128 = sext(s32)
669     if ((SrcTy.getSizeInBits() != SizeOp0 / 2) ||
670         SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) {
671       LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n");
672       return UnableToLegalize;
673     }
674 
675     // Shift the sign bit of the low register through the high register.
676     auto ShiftAmt =
677         MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1);
678     auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt);
679     MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)});
680     MI.eraseFromParent();
681     return Legalized;
682   }
683   case TargetOpcode::G_ZEXT: {
684     if (TypeIdx != 0)
685       return UnableToLegalize;
686 
687     LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
688     uint64_t SizeOp1 = SrcTy.getSizeInBits();
689     if (SizeOp0 % SizeOp1 != 0)
690       return UnableToLegalize;
691 
692     // Generate a merge where the bottom bits are taken from the source, and
693     // zero everything else.
694     Register ZeroReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0);
695     unsigned NumParts = SizeOp0 / SizeOp1;
696     SmallVector<Register, 4> Srcs = {MI.getOperand(1).getReg()};
697     for (unsigned Part = 1; Part < NumParts; ++Part)
698       Srcs.push_back(ZeroReg);
699     MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs);
700     MI.eraseFromParent();
701     return Legalized;
702   }
703   case TargetOpcode::G_TRUNC: {
704     if (TypeIdx != 1)
705       return UnableToLegalize;
706 
707     uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
708     if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
709       LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
710       return UnableToLegalize;
711     }
712 
713     auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
714     MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Unmerge.getReg(0));
715     MI.eraseFromParent();
716     return Legalized;
717   }
718 
719   case TargetOpcode::G_ADD: {
720     // FIXME: add support for when SizeOp0 isn't an exact multiple of
721     // NarrowSize.
722     if (SizeOp0 % NarrowSize != 0)
723       return UnableToLegalize;
724     // Expand in terms of carry-setting/consuming G_ADDE instructions.
725     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
726 
727     SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
728     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
729     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
730 
731     Register CarryIn;
732     for (int i = 0; i < NumParts; ++i) {
733       Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
734       Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
735 
736       if (i == 0)
737         MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]);
738       else {
739         MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
740                               Src2Regs[i], CarryIn);
741       }
742 
743       DstRegs.push_back(DstReg);
744       CarryIn = CarryOut;
745     }
746     Register DstReg = MI.getOperand(0).getReg();
747     if(MRI.getType(DstReg).isVector())
748       MIRBuilder.buildBuildVector(DstReg, DstRegs);
749     else
750       MIRBuilder.buildMerge(DstReg, DstRegs);
751     MI.eraseFromParent();
752     return Legalized;
753   }
754   case TargetOpcode::G_SUB: {
755     // FIXME: add support for when SizeOp0 isn't an exact multiple of
756     // NarrowSize.
757     if (SizeOp0 % NarrowSize != 0)
758       return UnableToLegalize;
759 
760     int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
761 
762     SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
763     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
764     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
765 
766     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
767     Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
768     MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
769                           {Src1Regs[0], Src2Regs[0]});
770     DstRegs.push_back(DstReg);
771     Register BorrowIn = BorrowOut;
772     for (int i = 1; i < NumParts; ++i) {
773       DstReg = MRI.createGenericVirtualRegister(NarrowTy);
774       BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
775 
776       MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
777                             {Src1Regs[i], Src2Regs[i], BorrowIn});
778 
779       DstRegs.push_back(DstReg);
780       BorrowIn = BorrowOut;
781     }
782     MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
783     MI.eraseFromParent();
784     return Legalized;
785   }
786   case TargetOpcode::G_MUL:
787   case TargetOpcode::G_UMULH:
788     return narrowScalarMul(MI, NarrowTy);
789   case TargetOpcode::G_EXTRACT:
790     return narrowScalarExtract(MI, TypeIdx, NarrowTy);
791   case TargetOpcode::G_INSERT:
792     return narrowScalarInsert(MI, TypeIdx, NarrowTy);
793   case TargetOpcode::G_LOAD: {
794     const auto &MMO = **MI.memoperands_begin();
795     Register DstReg = MI.getOperand(0).getReg();
796     LLT DstTy = MRI.getType(DstReg);
797     if (DstTy.isVector())
798       return UnableToLegalize;
799 
800     if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
801       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
802       auto &MMO = **MI.memoperands_begin();
803       MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
804       MIRBuilder.buildAnyExt(DstReg, TmpReg);
805       MI.eraseFromParent();
806       return Legalized;
807     }
808 
809     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
810   }
811   case TargetOpcode::G_ZEXTLOAD:
812   case TargetOpcode::G_SEXTLOAD: {
813     bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
814     Register DstReg = MI.getOperand(0).getReg();
815     Register PtrReg = MI.getOperand(1).getReg();
816 
817     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
818     auto &MMO = **MI.memoperands_begin();
819     if (MMO.getSizeInBits() == NarrowSize) {
820       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
821     } else {
822       unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
823         : TargetOpcode::G_SEXTLOAD;
824       MIRBuilder.buildInstr(ExtLoad)
825         .addDef(TmpReg)
826         .addUse(PtrReg)
827         .addMemOperand(&MMO);
828     }
829 
830     if (ZExt)
831       MIRBuilder.buildZExt(DstReg, TmpReg);
832     else
833       MIRBuilder.buildSExt(DstReg, TmpReg);
834 
835     MI.eraseFromParent();
836     return Legalized;
837   }
838   case TargetOpcode::G_STORE: {
839     const auto &MMO = **MI.memoperands_begin();
840 
841     Register SrcReg = MI.getOperand(0).getReg();
842     LLT SrcTy = MRI.getType(SrcReg);
843     if (SrcTy.isVector())
844       return UnableToLegalize;
845 
846     int NumParts = SizeOp0 / NarrowSize;
847     unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
848     unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
849     if (SrcTy.isVector() && LeftoverBits != 0)
850       return UnableToLegalize;
851 
852     if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
853       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
854       auto &MMO = **MI.memoperands_begin();
855       MIRBuilder.buildTrunc(TmpReg, SrcReg);
856       MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
857       MI.eraseFromParent();
858       return Legalized;
859     }
860 
861     return reduceLoadStoreWidth(MI, 0, NarrowTy);
862   }
863   case TargetOpcode::G_SELECT:
864     return narrowScalarSelect(MI, TypeIdx, NarrowTy);
865   case TargetOpcode::G_AND:
866   case TargetOpcode::G_OR:
867   case TargetOpcode::G_XOR: {
868     // Legalize bitwise operation:
869     // A = BinOp<Ty> B, C
870     // into:
871     // B1, ..., BN = G_UNMERGE_VALUES B
872     // C1, ..., CN = G_UNMERGE_VALUES C
873     // A1 = BinOp<Ty/N> B1, C2
874     // ...
875     // AN = BinOp<Ty/N> BN, CN
876     // A = G_MERGE_VALUES A1, ..., AN
877     return narrowScalarBasic(MI, TypeIdx, NarrowTy);
878   }
879   case TargetOpcode::G_SHL:
880   case TargetOpcode::G_LSHR:
881   case TargetOpcode::G_ASHR:
882     return narrowScalarShift(MI, TypeIdx, NarrowTy);
883   case TargetOpcode::G_CTLZ:
884   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
885   case TargetOpcode::G_CTTZ:
886   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
887   case TargetOpcode::G_CTPOP:
888     if (TypeIdx != 0)
889       return UnableToLegalize; // TODO
890 
891     Observer.changingInstr(MI);
892     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
893     Observer.changedInstr(MI);
894     return Legalized;
895   case TargetOpcode::G_INTTOPTR:
896     if (TypeIdx != 1)
897       return UnableToLegalize;
898 
899     Observer.changingInstr(MI);
900     narrowScalarSrc(MI, NarrowTy, 1);
901     Observer.changedInstr(MI);
902     return Legalized;
903   case TargetOpcode::G_PTRTOINT:
904     if (TypeIdx != 0)
905       return UnableToLegalize;
906 
907     Observer.changingInstr(MI);
908     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
909     Observer.changedInstr(MI);
910     return Legalized;
911   case TargetOpcode::G_PHI: {
912     unsigned NumParts = SizeOp0 / NarrowSize;
913     SmallVector<Register, 2> DstRegs;
914     SmallVector<SmallVector<Register, 2>, 2> SrcRegs;
915     DstRegs.resize(NumParts);
916     SrcRegs.resize(MI.getNumOperands() / 2);
917     Observer.changingInstr(MI);
918     for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
919       MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
920       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
921       extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
922                    SrcRegs[i / 2]);
923     }
924     MachineBasicBlock &MBB = *MI.getParent();
925     MIRBuilder.setInsertPt(MBB, MI);
926     for (unsigned i = 0; i < NumParts; ++i) {
927       DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
928       MachineInstrBuilder MIB =
929           MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
930       for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
931         MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
932     }
933     MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
934     MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
935     Observer.changedInstr(MI);
936     MI.eraseFromParent();
937     return Legalized;
938   }
939   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
940   case TargetOpcode::G_INSERT_VECTOR_ELT: {
941     if (TypeIdx != 2)
942       return UnableToLegalize;
943 
944     int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
945     Observer.changingInstr(MI);
946     narrowScalarSrc(MI, NarrowTy, OpIdx);
947     Observer.changedInstr(MI);
948     return Legalized;
949   }
950   case TargetOpcode::G_ICMP: {
951     uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
952     if (NarrowSize * 2 != SrcSize)
953       return UnableToLegalize;
954 
955     Observer.changingInstr(MI);
956     Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
957     Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
958     MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg());
959 
960     Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
961     Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
962     MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg());
963 
964     CmpInst::Predicate Pred =
965         static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
966     LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
967 
968     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
969       MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
970       MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
971       MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
972       MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
973       MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
974     } else {
975       MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
976       MachineInstrBuilder CmpHEQ =
977           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
978       MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
979           ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
980       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
981     }
982     Observer.changedInstr(MI);
983     MI.eraseFromParent();
984     return Legalized;
985   }
986   case TargetOpcode::G_SEXT_INREG: {
987     if (TypeIdx != 0)
988       return UnableToLegalize;
989 
990     if (!MI.getOperand(2).isImm())
991       return UnableToLegalize;
992     int64_t SizeInBits = MI.getOperand(2).getImm();
993 
994     // So long as the new type has more bits than the bits we're extending we
995     // don't need to break it apart.
996     if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
997       Observer.changingInstr(MI);
998       // We don't lose any non-extension bits by truncating the src and
999       // sign-extending the dst.
1000       MachineOperand &MO1 = MI.getOperand(1);
1001       auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg());
1002       MO1.setReg(TruncMIB->getOperand(0).getReg());
1003 
1004       MachineOperand &MO2 = MI.getOperand(0);
1005       Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1006       MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1007       MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt});
1008       MO2.setReg(DstExt);
1009       Observer.changedInstr(MI);
1010       return Legalized;
1011     }
1012 
1013     // Break it apart. Components below the extension point are unmodified. The
1014     // component containing the extension point becomes a narrower SEXT_INREG.
1015     // Components above it are ashr'd from the component containing the
1016     // extension point.
1017     if (SizeOp0 % NarrowSize != 0)
1018       return UnableToLegalize;
1019     int NumParts = SizeOp0 / NarrowSize;
1020 
1021     // List the registers where the destination will be scattered.
1022     SmallVector<Register, 2> DstRegs;
1023     // List the registers where the source will be split.
1024     SmallVector<Register, 2> SrcRegs;
1025 
1026     // Create all the temporary registers.
1027     for (int i = 0; i < NumParts; ++i) {
1028       Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1029 
1030       SrcRegs.push_back(SrcReg);
1031     }
1032 
1033     // Explode the big arguments into smaller chunks.
1034     MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg());
1035 
1036     Register AshrCstReg =
1037         MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1038             ->getOperand(0)
1039             .getReg();
1040     Register FullExtensionReg = 0;
1041     Register PartialExtensionReg = 0;
1042 
1043     // Do the operation on each small part.
1044     for (int i = 0; i < NumParts; ++i) {
1045       if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1046         DstRegs.push_back(SrcRegs[i]);
1047       else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1048         assert(PartialExtensionReg &&
1049                "Expected to visit partial extension before full");
1050         if (FullExtensionReg) {
1051           DstRegs.push_back(FullExtensionReg);
1052           continue;
1053         }
1054         DstRegs.push_back(MIRBuilder
1055                               .buildInstr(TargetOpcode::G_ASHR, {NarrowTy},
1056                                           {PartialExtensionReg, AshrCstReg})
1057                               ->getOperand(0)
1058                               .getReg());
1059         FullExtensionReg = DstRegs.back();
1060       } else {
1061         DstRegs.push_back(
1062             MIRBuilder
1063                 .buildInstr(
1064                     TargetOpcode::G_SEXT_INREG, {NarrowTy},
1065                     {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1066                 ->getOperand(0)
1067                 .getReg());
1068         PartialExtensionReg = DstRegs.back();
1069       }
1070     }
1071 
1072     // Gather the destination registers into the final destination.
1073     Register DstReg = MI.getOperand(0).getReg();
1074     MIRBuilder.buildMerge(DstReg, DstRegs);
1075     MI.eraseFromParent();
1076     return Legalized;
1077   }
1078   case TargetOpcode::G_BSWAP:
1079   case TargetOpcode::G_BITREVERSE: {
1080     if (SizeOp0 % NarrowSize != 0)
1081       return UnableToLegalize;
1082 
1083     Observer.changingInstr(MI);
1084     SmallVector<Register, 2> SrcRegs, DstRegs;
1085     unsigned NumParts = SizeOp0 / NarrowSize;
1086     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1087 
1088     for (unsigned i = 0; i < NumParts; ++i) {
1089       auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1090                                            {SrcRegs[NumParts - 1 - i]});
1091       DstRegs.push_back(DstPart.getReg(0));
1092     }
1093 
1094     MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
1095 
1096     Observer.changedInstr(MI);
1097     MI.eraseFromParent();
1098     return Legalized;
1099   }
1100   }
1101 }
1102 
widenScalarSrc(MachineInstr & MI,LLT WideTy,unsigned OpIdx,unsigned ExtOpcode)1103 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1104                                      unsigned OpIdx, unsigned ExtOpcode) {
1105   MachineOperand &MO = MI.getOperand(OpIdx);
1106   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
1107   MO.setReg(ExtB->getOperand(0).getReg());
1108 }
1109 
narrowScalarSrc(MachineInstr & MI,LLT NarrowTy,unsigned OpIdx)1110 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1111                                       unsigned OpIdx) {
1112   MachineOperand &MO = MI.getOperand(OpIdx);
1113   auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
1114                                     {MO.getReg()});
1115   MO.setReg(ExtB->getOperand(0).getReg());
1116 }
1117 
widenScalarDst(MachineInstr & MI,LLT WideTy,unsigned OpIdx,unsigned TruncOpcode)1118 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1119                                      unsigned OpIdx, unsigned TruncOpcode) {
1120   MachineOperand &MO = MI.getOperand(OpIdx);
1121   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1122   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1123   MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
1124   MO.setReg(DstExt);
1125 }
1126 
narrowScalarDst(MachineInstr & MI,LLT NarrowTy,unsigned OpIdx,unsigned ExtOpcode)1127 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1128                                       unsigned OpIdx, unsigned ExtOpcode) {
1129   MachineOperand &MO = MI.getOperand(OpIdx);
1130   Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1131   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1132   MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
1133   MO.setReg(DstTrunc);
1134 }
1135 
moreElementsVectorDst(MachineInstr & MI,LLT WideTy,unsigned OpIdx)1136 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1137                                             unsigned OpIdx) {
1138   MachineOperand &MO = MI.getOperand(OpIdx);
1139   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1140   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1141   MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
1142   MO.setReg(DstExt);
1143 }
1144 
moreElementsVectorSrc(MachineInstr & MI,LLT MoreTy,unsigned OpIdx)1145 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1146                                             unsigned OpIdx) {
1147   MachineOperand &MO = MI.getOperand(OpIdx);
1148 
1149   LLT OldTy = MRI.getType(MO.getReg());
1150   unsigned OldElts = OldTy.getNumElements();
1151   unsigned NewElts = MoreTy.getNumElements();
1152 
1153   unsigned NumParts = NewElts / OldElts;
1154 
1155   // Use concat_vectors if the result is a multiple of the number of elements.
1156   if (NumParts * OldElts == NewElts) {
1157     SmallVector<Register, 8> Parts;
1158     Parts.push_back(MO.getReg());
1159 
1160     Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1161     for (unsigned I = 1; I != NumParts; ++I)
1162       Parts.push_back(ImpDef);
1163 
1164     auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1165     MO.setReg(Concat.getReg(0));
1166     return;
1167   }
1168 
1169   Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1170   Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1171   MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1172   MO.setReg(MoreReg);
1173 }
1174 
1175 LegalizerHelper::LegalizeResult
widenScalarMergeValues(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1176 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1177                                         LLT WideTy) {
1178   if (TypeIdx != 1)
1179     return UnableToLegalize;
1180 
1181   Register DstReg = MI.getOperand(0).getReg();
1182   LLT DstTy = MRI.getType(DstReg);
1183   if (DstTy.isVector())
1184     return UnableToLegalize;
1185 
1186   Register Src1 = MI.getOperand(1).getReg();
1187   LLT SrcTy = MRI.getType(Src1);
1188   const int DstSize = DstTy.getSizeInBits();
1189   const int SrcSize = SrcTy.getSizeInBits();
1190   const int WideSize = WideTy.getSizeInBits();
1191   const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1192 
1193   unsigned NumOps = MI.getNumOperands();
1194   unsigned NumSrc = MI.getNumOperands() - 1;
1195   unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1196 
1197   if (WideSize >= DstSize) {
1198     // Directly pack the bits in the target type.
1199     Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1200 
1201     for (unsigned I = 2; I != NumOps; ++I) {
1202       const unsigned Offset = (I - 1) * PartSize;
1203 
1204       Register SrcReg = MI.getOperand(I).getReg();
1205       assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1206 
1207       auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1208 
1209       Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1210         MRI.createGenericVirtualRegister(WideTy);
1211 
1212       auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1213       auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1214       MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1215       ResultReg = NextResult;
1216     }
1217 
1218     if (WideSize > DstSize)
1219       MIRBuilder.buildTrunc(DstReg, ResultReg);
1220     else if (DstTy.isPointer())
1221       MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1222 
1223     MI.eraseFromParent();
1224     return Legalized;
1225   }
1226 
1227   // Unmerge the original values to the GCD type, and recombine to the next
1228   // multiple greater than the original type.
1229   //
1230   // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1231   // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1232   // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1233   // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1234   // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1235   // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1236   // %12:_(s12) = G_MERGE_VALUES %10, %11
1237   //
1238   // Padding with undef if necessary:
1239   //
1240   // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1241   // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1242   // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1243   // %7:_(s2) = G_IMPLICIT_DEF
1244   // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1245   // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1246   // %10:_(s12) = G_MERGE_VALUES %8, %9
1247 
1248   const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1249   LLT GCDTy = LLT::scalar(GCD);
1250 
1251   SmallVector<Register, 8> Parts;
1252   SmallVector<Register, 8> NewMergeRegs;
1253   SmallVector<Register, 8> Unmerges;
1254   LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1255 
1256   // Decompose the original operands if they don't evenly divide.
1257   for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1258     Register SrcReg = MI.getOperand(I).getReg();
1259     if (GCD == SrcSize) {
1260       Unmerges.push_back(SrcReg);
1261     } else {
1262       auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1263       for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1264         Unmerges.push_back(Unmerge.getReg(J));
1265     }
1266   }
1267 
1268   // Pad with undef to the next size that is a multiple of the requested size.
1269   if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1270     Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1271     for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1272       Unmerges.push_back(UndefReg);
1273   }
1274 
1275   const int PartsPerGCD = WideSize / GCD;
1276 
1277   // Build merges of each piece.
1278   ArrayRef<Register> Slicer(Unmerges);
1279   for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1280     auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1281     NewMergeRegs.push_back(Merge.getReg(0));
1282   }
1283 
1284   // A truncate may be necessary if the requested type doesn't evenly divide the
1285   // original result type.
1286   if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1287     MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1288   } else {
1289     auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1290     MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1291   }
1292 
1293   MI.eraseFromParent();
1294   return Legalized;
1295 }
1296 
1297 LegalizerHelper::LegalizeResult
widenScalarUnmergeValues(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1298 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1299                                           LLT WideTy) {
1300   if (TypeIdx != 0)
1301     return UnableToLegalize;
1302 
1303   unsigned NumDst = MI.getNumOperands() - 1;
1304   Register SrcReg = MI.getOperand(NumDst).getReg();
1305   LLT SrcTy = MRI.getType(SrcReg);
1306   if (!SrcTy.isScalar())
1307     return UnableToLegalize;
1308 
1309   Register Dst0Reg = MI.getOperand(0).getReg();
1310   LLT DstTy = MRI.getType(Dst0Reg);
1311   if (!DstTy.isScalar())
1312     return UnableToLegalize;
1313 
1314   unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
1315   LLT NewSrcTy = LLT::scalar(NewSrcSize);
1316   unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
1317 
1318   auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
1319 
1320   for (unsigned I = 1; I != NumDst; ++I) {
1321     auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
1322     auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
1323     WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
1324   }
1325 
1326   Observer.changingInstr(MI);
1327 
1328   MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
1329   for (unsigned I = 0; I != NumDst; ++I)
1330     widenScalarDst(MI, WideTy, I);
1331 
1332   Observer.changedInstr(MI);
1333 
1334   return Legalized;
1335 }
1336 
1337 LegalizerHelper::LegalizeResult
widenScalarExtract(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1338 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1339                                     LLT WideTy) {
1340   Register DstReg = MI.getOperand(0).getReg();
1341   Register SrcReg = MI.getOperand(1).getReg();
1342   LLT SrcTy = MRI.getType(SrcReg);
1343 
1344   LLT DstTy = MRI.getType(DstReg);
1345   unsigned Offset = MI.getOperand(2).getImm();
1346 
1347   if (TypeIdx == 0) {
1348     if (SrcTy.isVector() || DstTy.isVector())
1349       return UnableToLegalize;
1350 
1351     SrcOp Src(SrcReg);
1352     if (SrcTy.isPointer()) {
1353       // Extracts from pointers can be handled only if they are really just
1354       // simple integers.
1355       const DataLayout &DL = MIRBuilder.getDataLayout();
1356       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1357         return UnableToLegalize;
1358 
1359       LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1360       Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1361       SrcTy = SrcAsIntTy;
1362     }
1363 
1364     if (DstTy.isPointer())
1365       return UnableToLegalize;
1366 
1367     if (Offset == 0) {
1368       // Avoid a shift in the degenerate case.
1369       MIRBuilder.buildTrunc(DstReg,
1370                             MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1371       MI.eraseFromParent();
1372       return Legalized;
1373     }
1374 
1375     // Do a shift in the source type.
1376     LLT ShiftTy = SrcTy;
1377     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1378       Src = MIRBuilder.buildAnyExt(WideTy, Src);
1379       ShiftTy = WideTy;
1380     } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
1381       return UnableToLegalize;
1382 
1383     auto LShr = MIRBuilder.buildLShr(
1384       ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1385     MIRBuilder.buildTrunc(DstReg, LShr);
1386     MI.eraseFromParent();
1387     return Legalized;
1388   }
1389 
1390   if (SrcTy.isScalar()) {
1391     Observer.changingInstr(MI);
1392     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1393     Observer.changedInstr(MI);
1394     return Legalized;
1395   }
1396 
1397   if (!SrcTy.isVector())
1398     return UnableToLegalize;
1399 
1400   if (DstTy != SrcTy.getElementType())
1401     return UnableToLegalize;
1402 
1403   if (Offset % SrcTy.getScalarSizeInBits() != 0)
1404     return UnableToLegalize;
1405 
1406   Observer.changingInstr(MI);
1407   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1408 
1409   MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1410                           Offset);
1411   widenScalarDst(MI, WideTy.getScalarType(), 0);
1412   Observer.changedInstr(MI);
1413   return Legalized;
1414 }
1415 
1416 LegalizerHelper::LegalizeResult
widenScalarInsert(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1417 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1418                                    LLT WideTy) {
1419   if (TypeIdx != 0)
1420     return UnableToLegalize;
1421   Observer.changingInstr(MI);
1422   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1423   widenScalarDst(MI, WideTy);
1424   Observer.changedInstr(MI);
1425   return Legalized;
1426 }
1427 
1428 LegalizerHelper::LegalizeResult
widenScalar(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1429 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1430   MIRBuilder.setInstr(MI);
1431 
1432   switch (MI.getOpcode()) {
1433   default:
1434     return UnableToLegalize;
1435   case TargetOpcode::G_EXTRACT:
1436     return widenScalarExtract(MI, TypeIdx, WideTy);
1437   case TargetOpcode::G_INSERT:
1438     return widenScalarInsert(MI, TypeIdx, WideTy);
1439   case TargetOpcode::G_MERGE_VALUES:
1440     return widenScalarMergeValues(MI, TypeIdx, WideTy);
1441   case TargetOpcode::G_UNMERGE_VALUES:
1442     return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1443   case TargetOpcode::G_UADDO:
1444   case TargetOpcode::G_USUBO: {
1445     if (TypeIdx == 1)
1446       return UnableToLegalize; // TODO
1447     auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1448                                          {MI.getOperand(2).getReg()});
1449     auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1450                                          {MI.getOperand(3).getReg()});
1451     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
1452                           ? TargetOpcode::G_ADD
1453                           : TargetOpcode::G_SUB;
1454     // Do the arithmetic in the larger type.
1455     auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
1456     LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1457     APInt Mask = APInt::getAllOnesValue(OrigTy.getSizeInBits());
1458     auto AndOp = MIRBuilder.buildInstr(
1459         TargetOpcode::G_AND, {WideTy},
1460         {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
1461     // There is no overflow if the AndOp is the same as NewOp.
1462     MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1).getReg(), NewOp,
1463                          AndOp);
1464     // Now trunc the NewOp to the original result.
1465     MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
1466     MI.eraseFromParent();
1467     return Legalized;
1468   }
1469   case TargetOpcode::G_CTTZ:
1470   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1471   case TargetOpcode::G_CTLZ:
1472   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1473   case TargetOpcode::G_CTPOP: {
1474     if (TypeIdx == 0) {
1475       Observer.changingInstr(MI);
1476       widenScalarDst(MI, WideTy, 0);
1477       Observer.changedInstr(MI);
1478       return Legalized;
1479     }
1480 
1481     Register SrcReg = MI.getOperand(1).getReg();
1482 
1483     // First ZEXT the input.
1484     auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1485     LLT CurTy = MRI.getType(SrcReg);
1486     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1487       // The count is the same in the larger type except if the original
1488       // value was zero.  This can be handled by setting the bit just off
1489       // the top of the original type.
1490       auto TopBit =
1491           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1492       MIBSrc = MIRBuilder.buildOr(
1493         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1494     }
1495 
1496     // Perform the operation at the larger size.
1497     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1498     // This is already the correct result for CTPOP and CTTZs
1499     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1500         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1501       // The correct result is NewOp - (Difference in widety and current ty).
1502       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1503       MIBNewOp = MIRBuilder.buildInstr(
1504           TargetOpcode::G_SUB, {WideTy},
1505           {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
1506     }
1507 
1508     MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1509     MI.eraseFromParent();
1510     return Legalized;
1511   }
1512   case TargetOpcode::G_BSWAP: {
1513     Observer.changingInstr(MI);
1514     Register DstReg = MI.getOperand(0).getReg();
1515 
1516     Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1517     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1518     Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1519     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1520 
1521     MI.getOperand(0).setReg(DstExt);
1522 
1523     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1524 
1525     LLT Ty = MRI.getType(DstReg);
1526     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1527     MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1528     MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
1529       .addDef(ShrReg)
1530       .addUse(DstExt)
1531       .addUse(ShiftAmtReg);
1532 
1533     MIRBuilder.buildTrunc(DstReg, ShrReg);
1534     Observer.changedInstr(MI);
1535     return Legalized;
1536   }
1537   case TargetOpcode::G_BITREVERSE: {
1538     Observer.changingInstr(MI);
1539 
1540     Register DstReg = MI.getOperand(0).getReg();
1541     LLT Ty = MRI.getType(DstReg);
1542     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1543 
1544     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1545     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1546     MI.getOperand(0).setReg(DstExt);
1547     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1548 
1549     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
1550     auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
1551     MIRBuilder.buildTrunc(DstReg, Shift);
1552     Observer.changedInstr(MI);
1553     return Legalized;
1554   }
1555   case TargetOpcode::G_ADD:
1556   case TargetOpcode::G_AND:
1557   case TargetOpcode::G_MUL:
1558   case TargetOpcode::G_OR:
1559   case TargetOpcode::G_XOR:
1560   case TargetOpcode::G_SUB:
1561     // Perform operation at larger width (any extension is fines here, high bits
1562     // don't affect the result) and then truncate the result back to the
1563     // original type.
1564     Observer.changingInstr(MI);
1565     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1566     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1567     widenScalarDst(MI, WideTy);
1568     Observer.changedInstr(MI);
1569     return Legalized;
1570 
1571   case TargetOpcode::G_SHL:
1572     Observer.changingInstr(MI);
1573 
1574     if (TypeIdx == 0) {
1575       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1576       widenScalarDst(MI, WideTy);
1577     } else {
1578       assert(TypeIdx == 1);
1579       // The "number of bits to shift" operand must preserve its value as an
1580       // unsigned integer:
1581       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1582     }
1583 
1584     Observer.changedInstr(MI);
1585     return Legalized;
1586 
1587   case TargetOpcode::G_SDIV:
1588   case TargetOpcode::G_SREM:
1589   case TargetOpcode::G_SMIN:
1590   case TargetOpcode::G_SMAX:
1591     Observer.changingInstr(MI);
1592     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1593     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1594     widenScalarDst(MI, WideTy);
1595     Observer.changedInstr(MI);
1596     return Legalized;
1597 
1598   case TargetOpcode::G_ASHR:
1599   case TargetOpcode::G_LSHR:
1600     Observer.changingInstr(MI);
1601 
1602     if (TypeIdx == 0) {
1603       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1604         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1605 
1606       widenScalarSrc(MI, WideTy, 1, CvtOp);
1607       widenScalarDst(MI, WideTy);
1608     } else {
1609       assert(TypeIdx == 1);
1610       // The "number of bits to shift" operand must preserve its value as an
1611       // unsigned integer:
1612       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1613     }
1614 
1615     Observer.changedInstr(MI);
1616     return Legalized;
1617   case TargetOpcode::G_UDIV:
1618   case TargetOpcode::G_UREM:
1619   case TargetOpcode::G_UMIN:
1620   case TargetOpcode::G_UMAX:
1621     Observer.changingInstr(MI);
1622     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1623     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1624     widenScalarDst(MI, WideTy);
1625     Observer.changedInstr(MI);
1626     return Legalized;
1627 
1628   case TargetOpcode::G_SELECT:
1629     Observer.changingInstr(MI);
1630     if (TypeIdx == 0) {
1631       // Perform operation at larger width (any extension is fine here, high
1632       // bits don't affect the result) and then truncate the result back to the
1633       // original type.
1634       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1635       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1636       widenScalarDst(MI, WideTy);
1637     } else {
1638       bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1639       // Explicit extension is required here since high bits affect the result.
1640       widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1641     }
1642     Observer.changedInstr(MI);
1643     return Legalized;
1644 
1645   case TargetOpcode::G_FPTOSI:
1646   case TargetOpcode::G_FPTOUI:
1647     Observer.changingInstr(MI);
1648 
1649     if (TypeIdx == 0)
1650       widenScalarDst(MI, WideTy);
1651     else
1652       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
1653 
1654     Observer.changedInstr(MI);
1655     return Legalized;
1656   case TargetOpcode::G_SITOFP:
1657     if (TypeIdx != 1)
1658       return UnableToLegalize;
1659     Observer.changingInstr(MI);
1660     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1661     Observer.changedInstr(MI);
1662     return Legalized;
1663 
1664   case TargetOpcode::G_UITOFP:
1665     if (TypeIdx != 1)
1666       return UnableToLegalize;
1667     Observer.changingInstr(MI);
1668     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1669     Observer.changedInstr(MI);
1670     return Legalized;
1671 
1672   case TargetOpcode::G_LOAD:
1673   case TargetOpcode::G_SEXTLOAD:
1674   case TargetOpcode::G_ZEXTLOAD:
1675     Observer.changingInstr(MI);
1676     widenScalarDst(MI, WideTy);
1677     Observer.changedInstr(MI);
1678     return Legalized;
1679 
1680   case TargetOpcode::G_STORE: {
1681     if (TypeIdx != 0)
1682       return UnableToLegalize;
1683 
1684     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1685     if (!isPowerOf2_32(Ty.getSizeInBits()))
1686       return UnableToLegalize;
1687 
1688     Observer.changingInstr(MI);
1689 
1690     unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1691       TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1692     widenScalarSrc(MI, WideTy, 0, ExtType);
1693 
1694     Observer.changedInstr(MI);
1695     return Legalized;
1696   }
1697   case TargetOpcode::G_CONSTANT: {
1698     MachineOperand &SrcMO = MI.getOperand(1);
1699     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1700     unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
1701         MRI.getType(MI.getOperand(0).getReg()));
1702     assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
1703             ExtOpc == TargetOpcode::G_ANYEXT) &&
1704            "Illegal Extend");
1705     const APInt &SrcVal = SrcMO.getCImm()->getValue();
1706     const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
1707                            ? SrcVal.sext(WideTy.getSizeInBits())
1708                            : SrcVal.zext(WideTy.getSizeInBits());
1709     Observer.changingInstr(MI);
1710     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1711 
1712     widenScalarDst(MI, WideTy);
1713     Observer.changedInstr(MI);
1714     return Legalized;
1715   }
1716   case TargetOpcode::G_FCONSTANT: {
1717     MachineOperand &SrcMO = MI.getOperand(1);
1718     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
1719     APFloat Val = SrcMO.getFPImm()->getValueAPF();
1720     bool LosesInfo;
1721     switch (WideTy.getSizeInBits()) {
1722     case 32:
1723       Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
1724                   &LosesInfo);
1725       break;
1726     case 64:
1727       Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
1728                   &LosesInfo);
1729       break;
1730     default:
1731       return UnableToLegalize;
1732     }
1733 
1734     assert(!LosesInfo && "extend should always be lossless");
1735 
1736     Observer.changingInstr(MI);
1737     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1738 
1739     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1740     Observer.changedInstr(MI);
1741     return Legalized;
1742   }
1743   case TargetOpcode::G_IMPLICIT_DEF: {
1744     Observer.changingInstr(MI);
1745     widenScalarDst(MI, WideTy);
1746     Observer.changedInstr(MI);
1747     return Legalized;
1748   }
1749   case TargetOpcode::G_BRCOND:
1750     Observer.changingInstr(MI);
1751     widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1752     Observer.changedInstr(MI);
1753     return Legalized;
1754 
1755   case TargetOpcode::G_FCMP:
1756     Observer.changingInstr(MI);
1757     if (TypeIdx == 0)
1758       widenScalarDst(MI, WideTy);
1759     else {
1760       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1761       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1762     }
1763     Observer.changedInstr(MI);
1764     return Legalized;
1765 
1766   case TargetOpcode::G_ICMP:
1767     Observer.changingInstr(MI);
1768     if (TypeIdx == 0)
1769       widenScalarDst(MI, WideTy);
1770     else {
1771       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1772                                MI.getOperand(1).getPredicate()))
1773                                ? TargetOpcode::G_SEXT
1774                                : TargetOpcode::G_ZEXT;
1775       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1776       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1777     }
1778     Observer.changedInstr(MI);
1779     return Legalized;
1780 
1781   case TargetOpcode::G_PTR_ADD:
1782     assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
1783     Observer.changingInstr(MI);
1784     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1785     Observer.changedInstr(MI);
1786     return Legalized;
1787 
1788   case TargetOpcode::G_PHI: {
1789     assert(TypeIdx == 0 && "Expecting only Idx 0");
1790 
1791     Observer.changingInstr(MI);
1792     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1793       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1794       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1795       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1796     }
1797 
1798     MachineBasicBlock &MBB = *MI.getParent();
1799     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1800     widenScalarDst(MI, WideTy);
1801     Observer.changedInstr(MI);
1802     return Legalized;
1803   }
1804   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1805     if (TypeIdx == 0) {
1806       Register VecReg = MI.getOperand(1).getReg();
1807       LLT VecTy = MRI.getType(VecReg);
1808       Observer.changingInstr(MI);
1809 
1810       widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1811                                      WideTy.getSizeInBits()),
1812                      1, TargetOpcode::G_SEXT);
1813 
1814       widenScalarDst(MI, WideTy, 0);
1815       Observer.changedInstr(MI);
1816       return Legalized;
1817     }
1818 
1819     if (TypeIdx != 2)
1820       return UnableToLegalize;
1821     Observer.changingInstr(MI);
1822     // TODO: Probably should be zext
1823     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1824     Observer.changedInstr(MI);
1825     return Legalized;
1826   }
1827   case TargetOpcode::G_INSERT_VECTOR_ELT: {
1828     if (TypeIdx == 1) {
1829       Observer.changingInstr(MI);
1830 
1831       Register VecReg = MI.getOperand(1).getReg();
1832       LLT VecTy = MRI.getType(VecReg);
1833       LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
1834 
1835       widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
1836       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1837       widenScalarDst(MI, WideVecTy, 0);
1838       Observer.changedInstr(MI);
1839       return Legalized;
1840     }
1841 
1842     if (TypeIdx == 2) {
1843       Observer.changingInstr(MI);
1844       // TODO: Probably should be zext
1845       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
1846       Observer.changedInstr(MI);
1847     }
1848 
1849     return Legalized;
1850   }
1851   case TargetOpcode::G_FADD:
1852   case TargetOpcode::G_FMUL:
1853   case TargetOpcode::G_FSUB:
1854   case TargetOpcode::G_FMA:
1855   case TargetOpcode::G_FMAD:
1856   case TargetOpcode::G_FNEG:
1857   case TargetOpcode::G_FABS:
1858   case TargetOpcode::G_FCANONICALIZE:
1859   case TargetOpcode::G_FMINNUM:
1860   case TargetOpcode::G_FMAXNUM:
1861   case TargetOpcode::G_FMINNUM_IEEE:
1862   case TargetOpcode::G_FMAXNUM_IEEE:
1863   case TargetOpcode::G_FMINIMUM:
1864   case TargetOpcode::G_FMAXIMUM:
1865   case TargetOpcode::G_FDIV:
1866   case TargetOpcode::G_FREM:
1867   case TargetOpcode::G_FCEIL:
1868   case TargetOpcode::G_FFLOOR:
1869   case TargetOpcode::G_FCOS:
1870   case TargetOpcode::G_FSIN:
1871   case TargetOpcode::G_FLOG10:
1872   case TargetOpcode::G_FLOG:
1873   case TargetOpcode::G_FLOG2:
1874   case TargetOpcode::G_FRINT:
1875   case TargetOpcode::G_FNEARBYINT:
1876   case TargetOpcode::G_FSQRT:
1877   case TargetOpcode::G_FEXP:
1878   case TargetOpcode::G_FEXP2:
1879   case TargetOpcode::G_FPOW:
1880   case TargetOpcode::G_INTRINSIC_TRUNC:
1881   case TargetOpcode::G_INTRINSIC_ROUND:
1882     assert(TypeIdx == 0);
1883     Observer.changingInstr(MI);
1884 
1885     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1886       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1887 
1888     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1889     Observer.changedInstr(MI);
1890     return Legalized;
1891   case TargetOpcode::G_INTTOPTR:
1892     if (TypeIdx != 1)
1893       return UnableToLegalize;
1894 
1895     Observer.changingInstr(MI);
1896     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1897     Observer.changedInstr(MI);
1898     return Legalized;
1899   case TargetOpcode::G_PTRTOINT:
1900     if (TypeIdx != 0)
1901       return UnableToLegalize;
1902 
1903     Observer.changingInstr(MI);
1904     widenScalarDst(MI, WideTy, 0);
1905     Observer.changedInstr(MI);
1906     return Legalized;
1907   case TargetOpcode::G_BUILD_VECTOR: {
1908     Observer.changingInstr(MI);
1909 
1910     const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
1911     for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
1912       widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
1913 
1914     // Avoid changing the result vector type if the source element type was
1915     // requested.
1916     if (TypeIdx == 1) {
1917       auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1918       MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
1919     } else {
1920       widenScalarDst(MI, WideTy, 0);
1921     }
1922 
1923     Observer.changedInstr(MI);
1924     return Legalized;
1925   }
1926   case TargetOpcode::G_SEXT_INREG:
1927     if (TypeIdx != 0)
1928       return UnableToLegalize;
1929 
1930     Observer.changingInstr(MI);
1931     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1932     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
1933     Observer.changedInstr(MI);
1934     return Legalized;
1935   }
1936 }
1937 
1938 LegalizerHelper::LegalizeResult
lower(MachineInstr & MI,unsigned TypeIdx,LLT Ty)1939 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1940   using namespace TargetOpcode;
1941   MIRBuilder.setInstr(MI);
1942 
1943   switch(MI.getOpcode()) {
1944   default:
1945     return UnableToLegalize;
1946   case TargetOpcode::G_SREM:
1947   case TargetOpcode::G_UREM: {
1948     Register QuotReg = MRI.createGenericVirtualRegister(Ty);
1949     MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
1950         .addDef(QuotReg)
1951         .addUse(MI.getOperand(1).getReg())
1952         .addUse(MI.getOperand(2).getReg());
1953 
1954     Register ProdReg = MRI.createGenericVirtualRegister(Ty);
1955     MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1956     MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1957                         ProdReg);
1958     MI.eraseFromParent();
1959     return Legalized;
1960   }
1961   case TargetOpcode::G_SADDO:
1962   case TargetOpcode::G_SSUBO:
1963     return lowerSADDO_SSUBO(MI);
1964   case TargetOpcode::G_SMULO:
1965   case TargetOpcode::G_UMULO: {
1966     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1967     // result.
1968     Register Res = MI.getOperand(0).getReg();
1969     Register Overflow = MI.getOperand(1).getReg();
1970     Register LHS = MI.getOperand(2).getReg();
1971     Register RHS = MI.getOperand(3).getReg();
1972 
1973     MIRBuilder.buildMul(Res, LHS, RHS);
1974 
1975     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1976                           ? TargetOpcode::G_SMULH
1977                           : TargetOpcode::G_UMULH;
1978 
1979     Register HiPart = MRI.createGenericVirtualRegister(Ty);
1980     MIRBuilder.buildInstr(Opcode)
1981       .addDef(HiPart)
1982       .addUse(LHS)
1983       .addUse(RHS);
1984 
1985     Register Zero = MRI.createGenericVirtualRegister(Ty);
1986     MIRBuilder.buildConstant(Zero, 0);
1987 
1988     // For *signed* multiply, overflow is detected by checking:
1989     // (hi != (lo >> bitwidth-1))
1990     if (Opcode == TargetOpcode::G_SMULH) {
1991       Register Shifted = MRI.createGenericVirtualRegister(Ty);
1992       Register ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1993       MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1994       MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1995         .addDef(Shifted)
1996         .addUse(Res)
1997         .addUse(ShiftAmt);
1998       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1999     } else {
2000       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
2001     }
2002     MI.eraseFromParent();
2003     return Legalized;
2004   }
2005   case TargetOpcode::G_FNEG: {
2006     // TODO: Handle vector types once we are able to
2007     // represent them.
2008     if (Ty.isVector())
2009       return UnableToLegalize;
2010     Register Res = MI.getOperand(0).getReg();
2011     Type *ZeroTy;
2012     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2013     switch (Ty.getSizeInBits()) {
2014     case 16:
2015       ZeroTy = Type::getHalfTy(Ctx);
2016       break;
2017     case 32:
2018       ZeroTy = Type::getFloatTy(Ctx);
2019       break;
2020     case 64:
2021       ZeroTy = Type::getDoubleTy(Ctx);
2022       break;
2023     case 128:
2024       ZeroTy = Type::getFP128Ty(Ctx);
2025       break;
2026     default:
2027       llvm_unreachable("unexpected floating-point type");
2028     }
2029     ConstantFP &ZeroForNegation =
2030         *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
2031     auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
2032     Register SubByReg = MI.getOperand(1).getReg();
2033     Register ZeroReg = Zero->getOperand(0).getReg();
2034     MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
2035                           MI.getFlags());
2036     MI.eraseFromParent();
2037     return Legalized;
2038   }
2039   case TargetOpcode::G_FSUB: {
2040     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
2041     // First, check if G_FNEG is marked as Lower. If so, we may
2042     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
2043     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
2044       return UnableToLegalize;
2045     Register Res = MI.getOperand(0).getReg();
2046     Register LHS = MI.getOperand(1).getReg();
2047     Register RHS = MI.getOperand(2).getReg();
2048     Register Neg = MRI.createGenericVirtualRegister(Ty);
2049     MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
2050     MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags());
2051     MI.eraseFromParent();
2052     return Legalized;
2053   }
2054   case TargetOpcode::G_FMAD:
2055     return lowerFMad(MI);
2056   case TargetOpcode::G_INTRINSIC_ROUND:
2057     return lowerIntrinsicRound(MI);
2058   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
2059     Register OldValRes = MI.getOperand(0).getReg();
2060     Register SuccessRes = MI.getOperand(1).getReg();
2061     Register Addr = MI.getOperand(2).getReg();
2062     Register CmpVal = MI.getOperand(3).getReg();
2063     Register NewVal = MI.getOperand(4).getReg();
2064     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
2065                                   **MI.memoperands_begin());
2066     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
2067     MI.eraseFromParent();
2068     return Legalized;
2069   }
2070   case TargetOpcode::G_LOAD:
2071   case TargetOpcode::G_SEXTLOAD:
2072   case TargetOpcode::G_ZEXTLOAD: {
2073     // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2074     Register DstReg = MI.getOperand(0).getReg();
2075     Register PtrReg = MI.getOperand(1).getReg();
2076     LLT DstTy = MRI.getType(DstReg);
2077     auto &MMO = **MI.memoperands_begin();
2078 
2079     if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2080       if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2081         // This load needs splitting into power of 2 sized loads.
2082         if (DstTy.isVector())
2083           return UnableToLegalize;
2084         if (isPowerOf2_32(DstTy.getSizeInBits()))
2085           return UnableToLegalize; // Don't know what we're being asked to do.
2086 
2087         // Our strategy here is to generate anyextending loads for the smaller
2088         // types up to next power-2 result type, and then combine the two larger
2089         // result values together, before truncating back down to the non-pow-2
2090         // type.
2091         // E.g. v1 = i24 load =>
2092         // v2 = i32 load (2 byte)
2093         // v3 = i32 load (1 byte)
2094         // v4 = i32 shl v3, 16
2095         // v5 = i32 or v4, v2
2096         // v1 = i24 trunc v5
2097         // By doing this we generate the correct truncate which should get
2098         // combined away as an artifact with a matching extend.
2099         uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2100         uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2101 
2102         MachineFunction &MF = MIRBuilder.getMF();
2103         MachineMemOperand *LargeMMO =
2104             MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2105         MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2106             &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2107 
2108         LLT PtrTy = MRI.getType(PtrReg);
2109         unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2110         LLT AnyExtTy = LLT::scalar(AnyExtSize);
2111         Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2112         Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2113         auto LargeLoad =
2114             MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO);
2115 
2116         auto OffsetCst =
2117             MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
2118         Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2119         auto SmallPtr =
2120             MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2121         auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
2122                                               *SmallMMO);
2123 
2124         auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2125         auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2126         auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2127         MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
2128         MI.eraseFromParent();
2129         return Legalized;
2130       }
2131       MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2132       MI.eraseFromParent();
2133       return Legalized;
2134     }
2135 
2136     if (DstTy.isScalar()) {
2137       Register TmpReg =
2138           MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
2139       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
2140       switch (MI.getOpcode()) {
2141       default:
2142         llvm_unreachable("Unexpected opcode");
2143       case TargetOpcode::G_LOAD:
2144         MIRBuilder.buildExtOrTrunc(TargetOpcode::G_ANYEXT, DstReg, TmpReg);
2145         break;
2146       case TargetOpcode::G_SEXTLOAD:
2147         MIRBuilder.buildSExt(DstReg, TmpReg);
2148         break;
2149       case TargetOpcode::G_ZEXTLOAD:
2150         MIRBuilder.buildZExt(DstReg, TmpReg);
2151         break;
2152       }
2153       MI.eraseFromParent();
2154       return Legalized;
2155     }
2156 
2157     return UnableToLegalize;
2158   }
2159   case TargetOpcode::G_STORE: {
2160     // Lower a non-power of 2 store into multiple pow-2 stores.
2161     // E.g. split an i24 store into an i16 store + i8 store.
2162     // We do this by first extending the stored value to the next largest power
2163     // of 2 type, and then using truncating stores to store the components.
2164     // By doing this, likewise with G_LOAD, generate an extend that can be
2165     // artifact-combined away instead of leaving behind extracts.
2166     Register SrcReg = MI.getOperand(0).getReg();
2167     Register PtrReg = MI.getOperand(1).getReg();
2168     LLT SrcTy = MRI.getType(SrcReg);
2169     MachineMemOperand &MMO = **MI.memoperands_begin();
2170     if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2171       return UnableToLegalize;
2172     if (SrcTy.isVector())
2173       return UnableToLegalize;
2174     if (isPowerOf2_32(SrcTy.getSizeInBits()))
2175       return UnableToLegalize; // Don't know what we're being asked to do.
2176 
2177     // Extend to the next pow-2.
2178     const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2179     auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2180 
2181     // Obtain the smaller value by shifting away the larger value.
2182     uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2183     uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2184     auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2185     auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2186 
2187     // Generate the PtrAdd and truncating stores.
2188     LLT PtrTy = MRI.getType(PtrReg);
2189     auto OffsetCst =
2190         MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
2191     Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2192     auto SmallPtr =
2193         MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2194 
2195     MachineFunction &MF = MIRBuilder.getMF();
2196     MachineMemOperand *LargeMMO =
2197         MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2198     MachineMemOperand *SmallMMO =
2199         MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2200     MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2201     MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2202     MI.eraseFromParent();
2203     return Legalized;
2204   }
2205   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2206   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2207   case TargetOpcode::G_CTLZ:
2208   case TargetOpcode::G_CTTZ:
2209   case TargetOpcode::G_CTPOP:
2210     return lowerBitCount(MI, TypeIdx, Ty);
2211   case G_UADDO: {
2212     Register Res = MI.getOperand(0).getReg();
2213     Register CarryOut = MI.getOperand(1).getReg();
2214     Register LHS = MI.getOperand(2).getReg();
2215     Register RHS = MI.getOperand(3).getReg();
2216 
2217     MIRBuilder.buildAdd(Res, LHS, RHS);
2218     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
2219 
2220     MI.eraseFromParent();
2221     return Legalized;
2222   }
2223   case G_UADDE: {
2224     Register Res = MI.getOperand(0).getReg();
2225     Register CarryOut = MI.getOperand(1).getReg();
2226     Register LHS = MI.getOperand(2).getReg();
2227     Register RHS = MI.getOperand(3).getReg();
2228     Register CarryIn = MI.getOperand(4).getReg();
2229 
2230     Register TmpRes = MRI.createGenericVirtualRegister(Ty);
2231     Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
2232 
2233     MIRBuilder.buildAdd(TmpRes, LHS, RHS);
2234     MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
2235     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
2236     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
2237 
2238     MI.eraseFromParent();
2239     return Legalized;
2240   }
2241   case G_USUBO: {
2242     Register Res = MI.getOperand(0).getReg();
2243     Register BorrowOut = MI.getOperand(1).getReg();
2244     Register LHS = MI.getOperand(2).getReg();
2245     Register RHS = MI.getOperand(3).getReg();
2246 
2247     MIRBuilder.buildSub(Res, LHS, RHS);
2248     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
2249 
2250     MI.eraseFromParent();
2251     return Legalized;
2252   }
2253   case G_USUBE: {
2254     Register Res = MI.getOperand(0).getReg();
2255     Register BorrowOut = MI.getOperand(1).getReg();
2256     Register LHS = MI.getOperand(2).getReg();
2257     Register RHS = MI.getOperand(3).getReg();
2258     Register BorrowIn = MI.getOperand(4).getReg();
2259 
2260     Register TmpRes = MRI.createGenericVirtualRegister(Ty);
2261     Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
2262     Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
2263     Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
2264 
2265     MIRBuilder.buildSub(TmpRes, LHS, RHS);
2266     MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
2267     MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
2268     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
2269     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
2270     MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
2271 
2272     MI.eraseFromParent();
2273     return Legalized;
2274   }
2275   case G_UITOFP:
2276     return lowerUITOFP(MI, TypeIdx, Ty);
2277   case G_SITOFP:
2278     return lowerSITOFP(MI, TypeIdx, Ty);
2279   case G_FPTOUI:
2280     return lowerFPTOUI(MI, TypeIdx, Ty);
2281   case G_SMIN:
2282   case G_SMAX:
2283   case G_UMIN:
2284   case G_UMAX:
2285     return lowerMinMax(MI, TypeIdx, Ty);
2286   case G_FCOPYSIGN:
2287     return lowerFCopySign(MI, TypeIdx, Ty);
2288   case G_FMINNUM:
2289   case G_FMAXNUM:
2290     return lowerFMinNumMaxNum(MI);
2291   case G_UNMERGE_VALUES:
2292     return lowerUnmergeValues(MI);
2293   case TargetOpcode::G_SEXT_INREG: {
2294     assert(MI.getOperand(2).isImm() && "Expected immediate");
2295     int64_t SizeInBits = MI.getOperand(2).getImm();
2296 
2297     Register DstReg = MI.getOperand(0).getReg();
2298     Register SrcReg = MI.getOperand(1).getReg();
2299     LLT DstTy = MRI.getType(DstReg);
2300     Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
2301 
2302     auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
2303     MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()});
2304     MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()});
2305     MI.eraseFromParent();
2306     return Legalized;
2307   }
2308   case G_SHUFFLE_VECTOR:
2309     return lowerShuffleVector(MI);
2310   case G_DYN_STACKALLOC:
2311     return lowerDynStackAlloc(MI);
2312   case G_EXTRACT:
2313     return lowerExtract(MI);
2314   case G_INSERT:
2315     return lowerInsert(MI);
2316   case G_BSWAP:
2317     return lowerBswap(MI);
2318   case G_BITREVERSE:
2319     return lowerBitreverse(MI);
2320   case G_READ_REGISTER:
2321     return lowerReadRegister(MI);
2322   }
2323 }
2324 
fewerElementsVectorImplicitDef(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2325 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
2326     MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
2327   SmallVector<Register, 2> DstRegs;
2328 
2329   unsigned NarrowSize = NarrowTy.getSizeInBits();
2330   Register DstReg = MI.getOperand(0).getReg();
2331   unsigned Size = MRI.getType(DstReg).getSizeInBits();
2332   int NumParts = Size / NarrowSize;
2333   // FIXME: Don't know how to handle the situation where the small vectors
2334   // aren't all the same size yet.
2335   if (Size % NarrowSize != 0)
2336     return UnableToLegalize;
2337 
2338   for (int i = 0; i < NumParts; ++i) {
2339     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
2340     MIRBuilder.buildUndef(TmpReg);
2341     DstRegs.push_back(TmpReg);
2342   }
2343 
2344   if (NarrowTy.isVector())
2345     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2346   else
2347     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2348 
2349   MI.eraseFromParent();
2350   return Legalized;
2351 }
2352 
2353 LegalizerHelper::LegalizeResult
fewerElementsVectorBasic(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2354 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
2355                                           LLT NarrowTy) {
2356   const unsigned Opc = MI.getOpcode();
2357   const unsigned NumOps = MI.getNumOperands() - 1;
2358   const unsigned NarrowSize = NarrowTy.getSizeInBits();
2359   const Register DstReg = MI.getOperand(0).getReg();
2360   const unsigned Flags = MI.getFlags();
2361   const LLT DstTy = MRI.getType(DstReg);
2362   const unsigned Size = DstTy.getSizeInBits();
2363   const int NumParts = Size / NarrowSize;
2364   const LLT EltTy = DstTy.getElementType();
2365   const unsigned EltSize = EltTy.getSizeInBits();
2366   const unsigned BitsForNumParts = NarrowSize * NumParts;
2367 
2368   // Check if we have any leftovers. If we do, then only handle the case where
2369   // the leftover is one element.
2370   if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
2371     return UnableToLegalize;
2372 
2373   if (BitsForNumParts != Size) {
2374     Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
2375     MIRBuilder.buildUndef(AccumDstReg);
2376 
2377     // Handle the pieces which evenly divide into the requested type with
2378     // extract/op/insert sequence.
2379     for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
2380       SmallVector<SrcOp, 4> SrcOps;
2381       for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2382         Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
2383         MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
2384         SrcOps.push_back(PartOpReg);
2385       }
2386 
2387       Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
2388       MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2389 
2390       Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
2391       MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
2392       AccumDstReg = PartInsertReg;
2393     }
2394 
2395     // Handle the remaining element sized leftover piece.
2396     SmallVector<SrcOp, 4> SrcOps;
2397     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2398       Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
2399       MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
2400                               BitsForNumParts);
2401       SrcOps.push_back(PartOpReg);
2402     }
2403 
2404     Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
2405     MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
2406     MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
2407     MI.eraseFromParent();
2408 
2409     return Legalized;
2410   }
2411 
2412   SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2413 
2414   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
2415 
2416   if (NumOps >= 2)
2417     extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
2418 
2419   if (NumOps >= 3)
2420     extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
2421 
2422   for (int i = 0; i < NumParts; ++i) {
2423     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
2424 
2425     if (NumOps == 1)
2426       MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
2427     else if (NumOps == 2) {
2428       MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
2429     } else if (NumOps == 3) {
2430       MIRBuilder.buildInstr(Opc, {DstReg},
2431                             {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
2432     }
2433 
2434     DstRegs.push_back(DstReg);
2435   }
2436 
2437   if (NarrowTy.isVector())
2438     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2439   else
2440     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2441 
2442   MI.eraseFromParent();
2443   return Legalized;
2444 }
2445 
2446 // Handle splitting vector operations which need to have the same number of
2447 // elements in each type index, but each type index may have a different element
2448 // type.
2449 //
2450 // e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
2451 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2452 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2453 //
2454 // Also handles some irregular breakdown cases, e.g.
2455 // e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
2456 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
2457 //             s64 = G_SHL s64, s32
2458 LegalizerHelper::LegalizeResult
fewerElementsVectorMultiEltType(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTyArg)2459 LegalizerHelper::fewerElementsVectorMultiEltType(
2460   MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
2461   if (TypeIdx != 0)
2462     return UnableToLegalize;
2463 
2464   const LLT NarrowTy0 = NarrowTyArg;
2465   const unsigned NewNumElts =
2466       NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
2467 
2468   const Register DstReg = MI.getOperand(0).getReg();
2469   LLT DstTy = MRI.getType(DstReg);
2470   LLT LeftoverTy0;
2471 
2472   // All of the operands need to have the same number of elements, so if we can
2473   // determine a type breakdown for the result type, we can for all of the
2474   // source types.
2475   int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
2476   if (NumParts < 0)
2477     return UnableToLegalize;
2478 
2479   SmallVector<MachineInstrBuilder, 4> NewInsts;
2480 
2481   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2482   SmallVector<Register, 4> PartRegs, LeftoverRegs;
2483 
2484   for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2485     LLT LeftoverTy;
2486     Register SrcReg = MI.getOperand(I).getReg();
2487     LLT SrcTyI = MRI.getType(SrcReg);
2488     LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
2489     LLT LeftoverTyI;
2490 
2491     // Split this operand into the requested typed registers, and any leftover
2492     // required to reproduce the original type.
2493     if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
2494                       LeftoverRegs))
2495       return UnableToLegalize;
2496 
2497     if (I == 1) {
2498       // For the first operand, create an instruction for each part and setup
2499       // the result.
2500       for (Register PartReg : PartRegs) {
2501         Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2502         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2503                                .addDef(PartDstReg)
2504                                .addUse(PartReg));
2505         DstRegs.push_back(PartDstReg);
2506       }
2507 
2508       for (Register LeftoverReg : LeftoverRegs) {
2509         Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
2510         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
2511                                .addDef(PartDstReg)
2512                                .addUse(LeftoverReg));
2513         LeftoverDstRegs.push_back(PartDstReg);
2514       }
2515     } else {
2516       assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
2517 
2518       // Add the newly created operand splits to the existing instructions. The
2519       // odd-sized pieces are ordered after the requested NarrowTyArg sized
2520       // pieces.
2521       unsigned InstCount = 0;
2522       for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
2523         NewInsts[InstCount++].addUse(PartRegs[J]);
2524       for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
2525         NewInsts[InstCount++].addUse(LeftoverRegs[J]);
2526     }
2527 
2528     PartRegs.clear();
2529     LeftoverRegs.clear();
2530   }
2531 
2532   // Insert the newly built operations and rebuild the result register.
2533   for (auto &MIB : NewInsts)
2534     MIRBuilder.insertInstr(MIB);
2535 
2536   insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
2537 
2538   MI.eraseFromParent();
2539   return Legalized;
2540 }
2541 
2542 LegalizerHelper::LegalizeResult
fewerElementsVectorCasts(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2543 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
2544                                           LLT NarrowTy) {
2545   if (TypeIdx != 0)
2546     return UnableToLegalize;
2547 
2548   Register DstReg = MI.getOperand(0).getReg();
2549   Register SrcReg = MI.getOperand(1).getReg();
2550   LLT DstTy = MRI.getType(DstReg);
2551   LLT SrcTy = MRI.getType(SrcReg);
2552 
2553   LLT NarrowTy0 = NarrowTy;
2554   LLT NarrowTy1;
2555   unsigned NumParts;
2556 
2557   if (NarrowTy.isVector()) {
2558     // Uneven breakdown not handled.
2559     NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
2560     if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
2561       return UnableToLegalize;
2562 
2563     NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
2564   } else {
2565     NumParts = DstTy.getNumElements();
2566     NarrowTy1 = SrcTy.getElementType();
2567   }
2568 
2569   SmallVector<Register, 4> SrcRegs, DstRegs;
2570   extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
2571 
2572   for (unsigned I = 0; I < NumParts; ++I) {
2573     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2574     MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
2575       .addDef(DstReg)
2576       .addUse(SrcRegs[I]);
2577 
2578     NewInst->setFlags(MI.getFlags());
2579     DstRegs.push_back(DstReg);
2580   }
2581 
2582   if (NarrowTy.isVector())
2583     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2584   else
2585     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2586 
2587   MI.eraseFromParent();
2588   return Legalized;
2589 }
2590 
2591 LegalizerHelper::LegalizeResult
fewerElementsVectorCmp(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2592 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
2593                                         LLT NarrowTy) {
2594   Register DstReg = MI.getOperand(0).getReg();
2595   Register Src0Reg = MI.getOperand(2).getReg();
2596   LLT DstTy = MRI.getType(DstReg);
2597   LLT SrcTy = MRI.getType(Src0Reg);
2598 
2599   unsigned NumParts;
2600   LLT NarrowTy0, NarrowTy1;
2601 
2602   if (TypeIdx == 0) {
2603     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2604     unsigned OldElts = DstTy.getNumElements();
2605 
2606     NarrowTy0 = NarrowTy;
2607     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
2608     NarrowTy1 = NarrowTy.isVector() ?
2609       LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
2610       SrcTy.getElementType();
2611 
2612   } else {
2613     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2614     unsigned OldElts = SrcTy.getNumElements();
2615 
2616     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
2617       NarrowTy.getNumElements();
2618     NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
2619                             DstTy.getScalarSizeInBits());
2620     NarrowTy1 = NarrowTy;
2621   }
2622 
2623   // FIXME: Don't know how to handle the situation where the small vectors
2624   // aren't all the same size yet.
2625   if (NarrowTy1.isVector() &&
2626       NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
2627     return UnableToLegalize;
2628 
2629   CmpInst::Predicate Pred
2630     = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
2631 
2632   SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
2633   extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
2634   extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
2635 
2636   for (unsigned I = 0; I < NumParts; ++I) {
2637     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2638     DstRegs.push_back(DstReg);
2639 
2640     if (MI.getOpcode() == TargetOpcode::G_ICMP)
2641       MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2642     else {
2643       MachineInstr *NewCmp
2644         = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2645       NewCmp->setFlags(MI.getFlags());
2646     }
2647   }
2648 
2649   if (NarrowTy1.isVector())
2650     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2651   else
2652     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2653 
2654   MI.eraseFromParent();
2655   return Legalized;
2656 }
2657 
2658 LegalizerHelper::LegalizeResult
fewerElementsVectorSelect(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2659 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
2660                                            LLT NarrowTy) {
2661   Register DstReg = MI.getOperand(0).getReg();
2662   Register CondReg = MI.getOperand(1).getReg();
2663 
2664   unsigned NumParts = 0;
2665   LLT NarrowTy0, NarrowTy1;
2666 
2667   LLT DstTy = MRI.getType(DstReg);
2668   LLT CondTy = MRI.getType(CondReg);
2669   unsigned Size = DstTy.getSizeInBits();
2670 
2671   assert(TypeIdx == 0 || CondTy.isVector());
2672 
2673   if (TypeIdx == 0) {
2674     NarrowTy0 = NarrowTy;
2675     NarrowTy1 = CondTy;
2676 
2677     unsigned NarrowSize = NarrowTy0.getSizeInBits();
2678     // FIXME: Don't know how to handle the situation where the small vectors
2679     // aren't all the same size yet.
2680     if (Size % NarrowSize != 0)
2681       return UnableToLegalize;
2682 
2683     NumParts = Size / NarrowSize;
2684 
2685     // Need to break down the condition type
2686     if (CondTy.isVector()) {
2687       if (CondTy.getNumElements() == NumParts)
2688         NarrowTy1 = CondTy.getElementType();
2689       else
2690         NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
2691                                 CondTy.getScalarSizeInBits());
2692     }
2693   } else {
2694     NumParts = CondTy.getNumElements();
2695     if (NarrowTy.isVector()) {
2696       // TODO: Handle uneven breakdown.
2697       if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
2698         return UnableToLegalize;
2699 
2700       return UnableToLegalize;
2701     } else {
2702       NarrowTy0 = DstTy.getElementType();
2703       NarrowTy1 = NarrowTy;
2704     }
2705   }
2706 
2707   SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2708   if (CondTy.isVector())
2709     extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
2710 
2711   extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
2712   extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
2713 
2714   for (unsigned i = 0; i < NumParts; ++i) {
2715     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2716     MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
2717                            Src1Regs[i], Src2Regs[i]);
2718     DstRegs.push_back(DstReg);
2719   }
2720 
2721   if (NarrowTy0.isVector())
2722     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2723   else
2724     MIRBuilder.buildBuildVector(DstReg, DstRegs);
2725 
2726   MI.eraseFromParent();
2727   return Legalized;
2728 }
2729 
2730 LegalizerHelper::LegalizeResult
fewerElementsVectorPhi(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2731 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2732                                         LLT NarrowTy) {
2733   const Register DstReg = MI.getOperand(0).getReg();
2734   LLT PhiTy = MRI.getType(DstReg);
2735   LLT LeftoverTy;
2736 
2737   // All of the operands need to have the same number of elements, so if we can
2738   // determine a type breakdown for the result type, we can for all of the
2739   // source types.
2740   int NumParts, NumLeftover;
2741   std::tie(NumParts, NumLeftover)
2742     = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
2743   if (NumParts < 0)
2744     return UnableToLegalize;
2745 
2746   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2747   SmallVector<MachineInstrBuilder, 4> NewInsts;
2748 
2749   const int TotalNumParts = NumParts + NumLeftover;
2750 
2751   // Insert the new phis in the result block first.
2752   for (int I = 0; I != TotalNumParts; ++I) {
2753     LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
2754     Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
2755     NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
2756                        .addDef(PartDstReg));
2757     if (I < NumParts)
2758       DstRegs.push_back(PartDstReg);
2759     else
2760       LeftoverDstRegs.push_back(PartDstReg);
2761   }
2762 
2763   MachineBasicBlock *MBB = MI.getParent();
2764   MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
2765   insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
2766 
2767   SmallVector<Register, 4> PartRegs, LeftoverRegs;
2768 
2769   // Insert code to extract the incoming values in each predecessor block.
2770   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2771     PartRegs.clear();
2772     LeftoverRegs.clear();
2773 
2774     Register SrcReg = MI.getOperand(I).getReg();
2775     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2776     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2777 
2778     LLT Unused;
2779     if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
2780                       LeftoverRegs))
2781       return UnableToLegalize;
2782 
2783     // Add the newly created operand splits to the existing instructions. The
2784     // odd-sized pieces are ordered after the requested NarrowTyArg sized
2785     // pieces.
2786     for (int J = 0; J != TotalNumParts; ++J) {
2787       MachineInstrBuilder MIB = NewInsts[J];
2788       MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
2789       MIB.addMBB(&OpMBB);
2790     }
2791   }
2792 
2793   MI.eraseFromParent();
2794   return Legalized;
2795 }
2796 
2797 LegalizerHelper::LegalizeResult
fewerElementsVectorUnmergeValues(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2798 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
2799                                                   unsigned TypeIdx,
2800                                                   LLT NarrowTy) {
2801   if (TypeIdx != 1)
2802     return UnableToLegalize;
2803 
2804   const int NumDst = MI.getNumOperands() - 1;
2805   const Register SrcReg = MI.getOperand(NumDst).getReg();
2806   LLT SrcTy = MRI.getType(SrcReg);
2807 
2808   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2809 
2810   // TODO: Create sequence of extracts.
2811   if (DstTy == NarrowTy)
2812     return UnableToLegalize;
2813 
2814   LLT GCDTy = getGCDType(SrcTy, NarrowTy);
2815   if (DstTy == GCDTy) {
2816     // This would just be a copy of the same unmerge.
2817     // TODO: Create extracts, pad with undef and create intermediate merges.
2818     return UnableToLegalize;
2819   }
2820 
2821   auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2822   const int NumUnmerge = Unmerge->getNumOperands() - 1;
2823   const int PartsPerUnmerge = NumDst / NumUnmerge;
2824 
2825   for (int I = 0; I != NumUnmerge; ++I) {
2826     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2827 
2828     for (int J = 0; J != PartsPerUnmerge; ++J)
2829       MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
2830     MIB.addUse(Unmerge.getReg(I));
2831   }
2832 
2833   MI.eraseFromParent();
2834   return Legalized;
2835 }
2836 
2837 LegalizerHelper::LegalizeResult
fewerElementsVectorBuildVector(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2838 LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI,
2839                                                 unsigned TypeIdx,
2840                                                 LLT NarrowTy) {
2841   assert(TypeIdx == 0 && "not a vector type index");
2842   Register DstReg = MI.getOperand(0).getReg();
2843   LLT DstTy = MRI.getType(DstReg);
2844   LLT SrcTy = DstTy.getElementType();
2845 
2846   int DstNumElts = DstTy.getNumElements();
2847   int NarrowNumElts = NarrowTy.getNumElements();
2848   int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts;
2849   LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy);
2850 
2851   SmallVector<Register, 8> ConcatOps;
2852   SmallVector<Register, 8> SubBuildVector;
2853 
2854   Register UndefReg;
2855   if (WidenedDstTy != DstTy)
2856     UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0);
2857 
2858   // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as
2859   // necessary.
2860   //
2861   // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
2862   //   -> <2 x s16>
2863   //
2864   // %4:_(s16) = G_IMPLICIT_DEF
2865   // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
2866   // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
2867   // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6
2868   // %3:_(<3 x s16>) = G_EXTRACT %7, 0
2869   for (int I = 0; I != NumConcat; ++I) {
2870     for (int J = 0; J != NarrowNumElts; ++J) {
2871       int SrcIdx = NarrowNumElts * I + J;
2872 
2873       if (SrcIdx < DstNumElts) {
2874         Register SrcReg = MI.getOperand(SrcIdx + 1).getReg();
2875         SubBuildVector.push_back(SrcReg);
2876       } else
2877         SubBuildVector.push_back(UndefReg);
2878     }
2879 
2880     auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector);
2881     ConcatOps.push_back(BuildVec.getReg(0));
2882     SubBuildVector.clear();
2883   }
2884 
2885   if (DstTy == WidenedDstTy)
2886     MIRBuilder.buildConcatVectors(DstReg, ConcatOps);
2887   else {
2888     auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps);
2889     MIRBuilder.buildExtract(DstReg, Concat, 0);
2890   }
2891 
2892   MI.eraseFromParent();
2893   return Legalized;
2894 }
2895 
2896 LegalizerHelper::LegalizeResult
reduceLoadStoreWidth(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2897 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
2898                                       LLT NarrowTy) {
2899   // FIXME: Don't know how to handle secondary types yet.
2900   if (TypeIdx != 0)
2901     return UnableToLegalize;
2902 
2903   MachineMemOperand *MMO = *MI.memoperands_begin();
2904 
2905   // This implementation doesn't work for atomics. Give up instead of doing
2906   // something invalid.
2907   if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
2908       MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
2909     return UnableToLegalize;
2910 
2911   bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2912   Register ValReg = MI.getOperand(0).getReg();
2913   Register AddrReg = MI.getOperand(1).getReg();
2914   LLT ValTy = MRI.getType(ValReg);
2915 
2916   int NumParts = -1;
2917   int NumLeftover = -1;
2918   LLT LeftoverTy;
2919   SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
2920   if (IsLoad) {
2921     std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
2922   } else {
2923     if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
2924                      NarrowLeftoverRegs)) {
2925       NumParts = NarrowRegs.size();
2926       NumLeftover = NarrowLeftoverRegs.size();
2927     }
2928   }
2929 
2930   if (NumParts == -1)
2931     return UnableToLegalize;
2932 
2933   const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
2934 
2935   unsigned TotalSize = ValTy.getSizeInBits();
2936 
2937   // Split the load/store into PartTy sized pieces starting at Offset. If this
2938   // is a load, return the new registers in ValRegs. For a store, each elements
2939   // of ValRegs should be PartTy. Returns the next offset that needs to be
2940   // handled.
2941   auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
2942                              unsigned Offset) -> unsigned {
2943     MachineFunction &MF = MIRBuilder.getMF();
2944     unsigned PartSize = PartTy.getSizeInBits();
2945     for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
2946          Offset += PartSize, ++Idx) {
2947       unsigned ByteSize = PartSize / 8;
2948       unsigned ByteOffset = Offset / 8;
2949       Register NewAddrReg;
2950 
2951       MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
2952 
2953       MachineMemOperand *NewMMO =
2954         MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
2955 
2956       if (IsLoad) {
2957         Register Dst = MRI.createGenericVirtualRegister(PartTy);
2958         ValRegs.push_back(Dst);
2959         MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
2960       } else {
2961         MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
2962       }
2963     }
2964 
2965     return Offset;
2966   };
2967 
2968   unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
2969 
2970   // Handle the rest of the register if this isn't an even type breakdown.
2971   if (LeftoverTy.isValid())
2972     splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
2973 
2974   if (IsLoad) {
2975     insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
2976                 LeftoverTy, NarrowLeftoverRegs);
2977   }
2978 
2979   MI.eraseFromParent();
2980   return Legalized;
2981 }
2982 
2983 LegalizerHelper::LegalizeResult
fewerElementsVector(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)2984 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
2985                                      LLT NarrowTy) {
2986   using namespace TargetOpcode;
2987 
2988   MIRBuilder.setInstr(MI);
2989   switch (MI.getOpcode()) {
2990   case G_IMPLICIT_DEF:
2991     return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
2992   case G_AND:
2993   case G_OR:
2994   case G_XOR:
2995   case G_ADD:
2996   case G_SUB:
2997   case G_MUL:
2998   case G_SMULH:
2999   case G_UMULH:
3000   case G_FADD:
3001   case G_FMUL:
3002   case G_FSUB:
3003   case G_FNEG:
3004   case G_FABS:
3005   case G_FCANONICALIZE:
3006   case G_FDIV:
3007   case G_FREM:
3008   case G_FMA:
3009   case G_FMAD:
3010   case G_FPOW:
3011   case G_FEXP:
3012   case G_FEXP2:
3013   case G_FLOG:
3014   case G_FLOG2:
3015   case G_FLOG10:
3016   case G_FNEARBYINT:
3017   case G_FCEIL:
3018   case G_FFLOOR:
3019   case G_FRINT:
3020   case G_INTRINSIC_ROUND:
3021   case G_INTRINSIC_TRUNC:
3022   case G_FCOS:
3023   case G_FSIN:
3024   case G_FSQRT:
3025   case G_BSWAP:
3026   case G_BITREVERSE:
3027   case G_SDIV:
3028   case G_UDIV:
3029   case G_SREM:
3030   case G_UREM:
3031   case G_SMIN:
3032   case G_SMAX:
3033   case G_UMIN:
3034   case G_UMAX:
3035   case G_FMINNUM:
3036   case G_FMAXNUM:
3037   case G_FMINNUM_IEEE:
3038   case G_FMAXNUM_IEEE:
3039   case G_FMINIMUM:
3040   case G_FMAXIMUM:
3041     return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
3042   case G_SHL:
3043   case G_LSHR:
3044   case G_ASHR:
3045   case G_CTLZ:
3046   case G_CTLZ_ZERO_UNDEF:
3047   case G_CTTZ:
3048   case G_CTTZ_ZERO_UNDEF:
3049   case G_CTPOP:
3050   case G_FCOPYSIGN:
3051     return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
3052   case G_ZEXT:
3053   case G_SEXT:
3054   case G_ANYEXT:
3055   case G_FPEXT:
3056   case G_FPTRUNC:
3057   case G_SITOFP:
3058   case G_UITOFP:
3059   case G_FPTOSI:
3060   case G_FPTOUI:
3061   case G_INTTOPTR:
3062   case G_PTRTOINT:
3063   case G_ADDRSPACE_CAST:
3064     return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
3065   case G_ICMP:
3066   case G_FCMP:
3067     return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
3068   case G_SELECT:
3069     return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
3070   case G_PHI:
3071     return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
3072   case G_UNMERGE_VALUES:
3073     return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
3074   case G_BUILD_VECTOR:
3075     return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
3076   case G_LOAD:
3077   case G_STORE:
3078     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
3079   default:
3080     return UnableToLegalize;
3081   }
3082 }
3083 
3084 LegalizerHelper::LegalizeResult
narrowScalarShiftByConstant(MachineInstr & MI,const APInt & Amt,const LLT HalfTy,const LLT AmtTy)3085 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
3086                                              const LLT HalfTy, const LLT AmtTy) {
3087 
3088   Register InL = MRI.createGenericVirtualRegister(HalfTy);
3089   Register InH = MRI.createGenericVirtualRegister(HalfTy);
3090   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
3091 
3092   if (Amt.isNullValue()) {
3093     MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
3094     MI.eraseFromParent();
3095     return Legalized;
3096   }
3097 
3098   LLT NVT = HalfTy;
3099   unsigned NVTBits = HalfTy.getSizeInBits();
3100   unsigned VTBits = 2 * NVTBits;
3101 
3102   SrcOp Lo(Register(0)), Hi(Register(0));
3103   if (MI.getOpcode() == TargetOpcode::G_SHL) {
3104     if (Amt.ugt(VTBits)) {
3105       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3106     } else if (Amt.ugt(NVTBits)) {
3107       Lo = MIRBuilder.buildConstant(NVT, 0);
3108       Hi = MIRBuilder.buildShl(NVT, InL,
3109                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3110     } else if (Amt == NVTBits) {
3111       Lo = MIRBuilder.buildConstant(NVT, 0);
3112       Hi = InL;
3113     } else {
3114       Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
3115       auto OrLHS =
3116           MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
3117       auto OrRHS = MIRBuilder.buildLShr(
3118           NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3119       Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3120     }
3121   } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3122     if (Amt.ugt(VTBits)) {
3123       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
3124     } else if (Amt.ugt(NVTBits)) {
3125       Lo = MIRBuilder.buildLShr(NVT, InH,
3126                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3127       Hi = MIRBuilder.buildConstant(NVT, 0);
3128     } else if (Amt == NVTBits) {
3129       Lo = InH;
3130       Hi = MIRBuilder.buildConstant(NVT, 0);
3131     } else {
3132       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3133 
3134       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3135       auto OrRHS = MIRBuilder.buildShl(
3136           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3137 
3138       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3139       Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
3140     }
3141   } else {
3142     if (Amt.ugt(VTBits)) {
3143       Hi = Lo = MIRBuilder.buildAShr(
3144           NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3145     } else if (Amt.ugt(NVTBits)) {
3146       Lo = MIRBuilder.buildAShr(NVT, InH,
3147                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
3148       Hi = MIRBuilder.buildAShr(NVT, InH,
3149                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3150     } else if (Amt == NVTBits) {
3151       Lo = InH;
3152       Hi = MIRBuilder.buildAShr(NVT, InH,
3153                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
3154     } else {
3155       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
3156 
3157       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
3158       auto OrRHS = MIRBuilder.buildShl(
3159           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
3160 
3161       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
3162       Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
3163     }
3164   }
3165 
3166   MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
3167   MI.eraseFromParent();
3168 
3169   return Legalized;
3170 }
3171 
3172 // TODO: Optimize if constant shift amount.
3173 LegalizerHelper::LegalizeResult
narrowScalarShift(MachineInstr & MI,unsigned TypeIdx,LLT RequestedTy)3174 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
3175                                    LLT RequestedTy) {
3176   if (TypeIdx == 1) {
3177     Observer.changingInstr(MI);
3178     narrowScalarSrc(MI, RequestedTy, 2);
3179     Observer.changedInstr(MI);
3180     return Legalized;
3181   }
3182 
3183   Register DstReg = MI.getOperand(0).getReg();
3184   LLT DstTy = MRI.getType(DstReg);
3185   if (DstTy.isVector())
3186     return UnableToLegalize;
3187 
3188   Register Amt = MI.getOperand(2).getReg();
3189   LLT ShiftAmtTy = MRI.getType(Amt);
3190   const unsigned DstEltSize = DstTy.getScalarSizeInBits();
3191   if (DstEltSize % 2 != 0)
3192     return UnableToLegalize;
3193 
3194   // Ignore the input type. We can only go to exactly half the size of the
3195   // input. If that isn't small enough, the resulting pieces will be further
3196   // legalized.
3197   const unsigned NewBitSize = DstEltSize / 2;
3198   const LLT HalfTy = LLT::scalar(NewBitSize);
3199   const LLT CondTy = LLT::scalar(1);
3200 
3201   if (const MachineInstr *KShiftAmt =
3202           getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
3203     return narrowScalarShiftByConstant(
3204         MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
3205   }
3206 
3207   // TODO: Expand with known bits.
3208 
3209   // Handle the fully general expansion by an unknown amount.
3210   auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
3211 
3212   Register InL = MRI.createGenericVirtualRegister(HalfTy);
3213   Register InH = MRI.createGenericVirtualRegister(HalfTy);
3214   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
3215 
3216   auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
3217   auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
3218 
3219   auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
3220   auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
3221   auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
3222 
3223   Register ResultRegs[2];
3224   switch (MI.getOpcode()) {
3225   case TargetOpcode::G_SHL: {
3226     // Short: ShAmt < NewBitSize
3227     auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
3228 
3229     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
3230     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
3231     auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3232 
3233     // Long: ShAmt >= NewBitSize
3234     auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
3235     auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
3236 
3237     auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
3238     auto Hi = MIRBuilder.buildSelect(
3239         HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
3240 
3241     ResultRegs[0] = Lo.getReg(0);
3242     ResultRegs[1] = Hi.getReg(0);
3243     break;
3244   }
3245   case TargetOpcode::G_LSHR:
3246   case TargetOpcode::G_ASHR: {
3247     // Short: ShAmt < NewBitSize
3248     auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
3249 
3250     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
3251     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
3252     auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
3253 
3254     // Long: ShAmt >= NewBitSize
3255     MachineInstrBuilder HiL;
3256     if (MI.getOpcode() == TargetOpcode::G_LSHR) {
3257       HiL = MIRBuilder.buildConstant(HalfTy, 0);            // Hi part is zero.
3258     } else {
3259       auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
3260       HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);    // Sign of Hi part.
3261     }
3262     auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
3263                                      {InH, AmtExcess});     // Lo from Hi part.
3264 
3265     auto Lo = MIRBuilder.buildSelect(
3266         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
3267 
3268     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
3269 
3270     ResultRegs[0] = Lo.getReg(0);
3271     ResultRegs[1] = Hi.getReg(0);
3272     break;
3273   }
3274   default:
3275     llvm_unreachable("not a shift");
3276   }
3277 
3278   MIRBuilder.buildMerge(DstReg, ResultRegs);
3279   MI.eraseFromParent();
3280   return Legalized;
3281 }
3282 
3283 LegalizerHelper::LegalizeResult
moreElementsVectorPhi(MachineInstr & MI,unsigned TypeIdx,LLT MoreTy)3284 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
3285                                        LLT MoreTy) {
3286   assert(TypeIdx == 0 && "Expecting only Idx 0");
3287 
3288   Observer.changingInstr(MI);
3289   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3290     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3291     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3292     moreElementsVectorSrc(MI, MoreTy, I);
3293   }
3294 
3295   MachineBasicBlock &MBB = *MI.getParent();
3296   MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3297   moreElementsVectorDst(MI, MoreTy, 0);
3298   Observer.changedInstr(MI);
3299   return Legalized;
3300 }
3301 
3302 LegalizerHelper::LegalizeResult
moreElementsVector(MachineInstr & MI,unsigned TypeIdx,LLT MoreTy)3303 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
3304                                     LLT MoreTy) {
3305   MIRBuilder.setInstr(MI);
3306   unsigned Opc = MI.getOpcode();
3307   switch (Opc) {
3308   case TargetOpcode::G_IMPLICIT_DEF:
3309   case TargetOpcode::G_LOAD: {
3310     if (TypeIdx != 0)
3311       return UnableToLegalize;
3312     Observer.changingInstr(MI);
3313     moreElementsVectorDst(MI, MoreTy, 0);
3314     Observer.changedInstr(MI);
3315     return Legalized;
3316   }
3317   case TargetOpcode::G_STORE:
3318     if (TypeIdx != 0)
3319       return UnableToLegalize;
3320     Observer.changingInstr(MI);
3321     moreElementsVectorSrc(MI, MoreTy, 0);
3322     Observer.changedInstr(MI);
3323     return Legalized;
3324   case TargetOpcode::G_AND:
3325   case TargetOpcode::G_OR:
3326   case TargetOpcode::G_XOR:
3327   case TargetOpcode::G_SMIN:
3328   case TargetOpcode::G_SMAX:
3329   case TargetOpcode::G_UMIN:
3330   case TargetOpcode::G_UMAX:
3331   case TargetOpcode::G_FMINNUM:
3332   case TargetOpcode::G_FMAXNUM:
3333   case TargetOpcode::G_FMINNUM_IEEE:
3334   case TargetOpcode::G_FMAXNUM_IEEE:
3335   case TargetOpcode::G_FMINIMUM:
3336   case TargetOpcode::G_FMAXIMUM: {
3337     Observer.changingInstr(MI);
3338     moreElementsVectorSrc(MI, MoreTy, 1);
3339     moreElementsVectorSrc(MI, MoreTy, 2);
3340     moreElementsVectorDst(MI, MoreTy, 0);
3341     Observer.changedInstr(MI);
3342     return Legalized;
3343   }
3344   case TargetOpcode::G_EXTRACT:
3345     if (TypeIdx != 1)
3346       return UnableToLegalize;
3347     Observer.changingInstr(MI);
3348     moreElementsVectorSrc(MI, MoreTy, 1);
3349     Observer.changedInstr(MI);
3350     return Legalized;
3351   case TargetOpcode::G_INSERT:
3352     if (TypeIdx != 0)
3353       return UnableToLegalize;
3354     Observer.changingInstr(MI);
3355     moreElementsVectorSrc(MI, MoreTy, 1);
3356     moreElementsVectorDst(MI, MoreTy, 0);
3357     Observer.changedInstr(MI);
3358     return Legalized;
3359   case TargetOpcode::G_SELECT:
3360     if (TypeIdx != 0)
3361       return UnableToLegalize;
3362     if (MRI.getType(MI.getOperand(1).getReg()).isVector())
3363       return UnableToLegalize;
3364 
3365     Observer.changingInstr(MI);
3366     moreElementsVectorSrc(MI, MoreTy, 2);
3367     moreElementsVectorSrc(MI, MoreTy, 3);
3368     moreElementsVectorDst(MI, MoreTy, 0);
3369     Observer.changedInstr(MI);
3370     return Legalized;
3371   case TargetOpcode::G_UNMERGE_VALUES: {
3372     if (TypeIdx != 1)
3373       return UnableToLegalize;
3374 
3375     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3376     int NumDst = MI.getNumOperands() - 1;
3377     moreElementsVectorSrc(MI, MoreTy, NumDst);
3378 
3379     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3380     for (int I = 0; I != NumDst; ++I)
3381       MIB.addDef(MI.getOperand(I).getReg());
3382 
3383     int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
3384     for (int I = NumDst; I != NewNumDst; ++I)
3385       MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
3386 
3387     MIB.addUse(MI.getOperand(NumDst).getReg());
3388     MI.eraseFromParent();
3389     return Legalized;
3390   }
3391   case TargetOpcode::G_PHI:
3392     return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
3393   default:
3394     return UnableToLegalize;
3395   }
3396 }
3397 
multiplyRegisters(SmallVectorImpl<Register> & DstRegs,ArrayRef<Register> Src1Regs,ArrayRef<Register> Src2Regs,LLT NarrowTy)3398 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
3399                                         ArrayRef<Register> Src1Regs,
3400                                         ArrayRef<Register> Src2Regs,
3401                                         LLT NarrowTy) {
3402   MachineIRBuilder &B = MIRBuilder;
3403   unsigned SrcParts = Src1Regs.size();
3404   unsigned DstParts = DstRegs.size();
3405 
3406   unsigned DstIdx = 0; // Low bits of the result.
3407   Register FactorSum =
3408       B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
3409   DstRegs[DstIdx] = FactorSum;
3410 
3411   unsigned CarrySumPrevDstIdx;
3412   SmallVector<Register, 4> Factors;
3413 
3414   for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
3415     // Collect low parts of muls for DstIdx.
3416     for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
3417          i <= std::min(DstIdx, SrcParts - 1); ++i) {
3418       MachineInstrBuilder Mul =
3419           B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
3420       Factors.push_back(Mul.getReg(0));
3421     }
3422     // Collect high parts of muls from previous DstIdx.
3423     for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
3424          i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
3425       MachineInstrBuilder Umulh =
3426           B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
3427       Factors.push_back(Umulh.getReg(0));
3428     }
3429     // Add CarrySum from additions calculated for previous DstIdx.
3430     if (DstIdx != 1) {
3431       Factors.push_back(CarrySumPrevDstIdx);
3432     }
3433 
3434     Register CarrySum;
3435     // Add all factors and accumulate all carries into CarrySum.
3436     if (DstIdx != DstParts - 1) {
3437       MachineInstrBuilder Uaddo =
3438           B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
3439       FactorSum = Uaddo.getReg(0);
3440       CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
3441       for (unsigned i = 2; i < Factors.size(); ++i) {
3442         MachineInstrBuilder Uaddo =
3443             B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
3444         FactorSum = Uaddo.getReg(0);
3445         MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
3446         CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
3447       }
3448     } else {
3449       // Since value for the next index is not calculated, neither is CarrySum.
3450       FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
3451       for (unsigned i = 2; i < Factors.size(); ++i)
3452         FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
3453     }
3454 
3455     CarrySumPrevDstIdx = CarrySum;
3456     DstRegs[DstIdx] = FactorSum;
3457     Factors.clear();
3458   }
3459 }
3460 
3461 LegalizerHelper::LegalizeResult
narrowScalarMul(MachineInstr & MI,LLT NarrowTy)3462 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
3463   Register DstReg = MI.getOperand(0).getReg();
3464   Register Src1 = MI.getOperand(1).getReg();
3465   Register Src2 = MI.getOperand(2).getReg();
3466 
3467   LLT Ty = MRI.getType(DstReg);
3468   if (Ty.isVector())
3469     return UnableToLegalize;
3470 
3471   unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
3472   unsigned DstSize = Ty.getSizeInBits();
3473   unsigned NarrowSize = NarrowTy.getSizeInBits();
3474   if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
3475     return UnableToLegalize;
3476 
3477   unsigned NumDstParts = DstSize / NarrowSize;
3478   unsigned NumSrcParts = SrcSize / NarrowSize;
3479   bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
3480   unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
3481 
3482   SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs;
3483   extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
3484   extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
3485   DstTmpRegs.resize(DstTmpParts);
3486   multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
3487 
3488   // Take only high half of registers if this is high mul.
3489   ArrayRef<Register> DstRegs(
3490       IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
3491   MIRBuilder.buildMerge(DstReg, DstRegs);
3492   MI.eraseFromParent();
3493   return Legalized;
3494 }
3495 
3496 LegalizerHelper::LegalizeResult
narrowScalarExtract(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3497 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
3498                                      LLT NarrowTy) {
3499   if (TypeIdx != 1)
3500     return UnableToLegalize;
3501 
3502   uint64_t NarrowSize = NarrowTy.getSizeInBits();
3503 
3504   int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
3505   // FIXME: add support for when SizeOp1 isn't an exact multiple of
3506   // NarrowSize.
3507   if (SizeOp1 % NarrowSize != 0)
3508     return UnableToLegalize;
3509   int NumParts = SizeOp1 / NarrowSize;
3510 
3511   SmallVector<Register, 2> SrcRegs, DstRegs;
3512   SmallVector<uint64_t, 2> Indexes;
3513   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3514 
3515   Register OpReg = MI.getOperand(0).getReg();
3516   uint64_t OpStart = MI.getOperand(2).getImm();
3517   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3518   for (int i = 0; i < NumParts; ++i) {
3519     unsigned SrcStart = i * NarrowSize;
3520 
3521     if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
3522       // No part of the extract uses this subregister, ignore it.
3523       continue;
3524     } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3525       // The entire subregister is extracted, forward the value.
3526       DstRegs.push_back(SrcRegs[i]);
3527       continue;
3528     }
3529 
3530     // OpSegStart is where this destination segment would start in OpReg if it
3531     // extended infinitely in both directions.
3532     int64_t ExtractOffset;
3533     uint64_t SegSize;
3534     if (OpStart < SrcStart) {
3535       ExtractOffset = 0;
3536       SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
3537     } else {
3538       ExtractOffset = OpStart - SrcStart;
3539       SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
3540     }
3541 
3542     Register SegReg = SrcRegs[i];
3543     if (ExtractOffset != 0 || SegSize != NarrowSize) {
3544       // A genuine extract is needed.
3545       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3546       MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
3547     }
3548 
3549     DstRegs.push_back(SegReg);
3550   }
3551 
3552   Register DstReg = MI.getOperand(0).getReg();
3553   if(MRI.getType(DstReg).isVector())
3554     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3555   else
3556     MIRBuilder.buildMerge(DstReg, DstRegs);
3557   MI.eraseFromParent();
3558   return Legalized;
3559 }
3560 
3561 LegalizerHelper::LegalizeResult
narrowScalarInsert(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3562 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
3563                                     LLT NarrowTy) {
3564   // FIXME: Don't know how to handle secondary types yet.
3565   if (TypeIdx != 0)
3566     return UnableToLegalize;
3567 
3568   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
3569   uint64_t NarrowSize = NarrowTy.getSizeInBits();
3570 
3571   // FIXME: add support for when SizeOp0 isn't an exact multiple of
3572   // NarrowSize.
3573   if (SizeOp0 % NarrowSize != 0)
3574     return UnableToLegalize;
3575 
3576   int NumParts = SizeOp0 / NarrowSize;
3577 
3578   SmallVector<Register, 2> SrcRegs, DstRegs;
3579   SmallVector<uint64_t, 2> Indexes;
3580   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
3581 
3582   Register OpReg = MI.getOperand(2).getReg();
3583   uint64_t OpStart = MI.getOperand(3).getImm();
3584   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
3585   for (int i = 0; i < NumParts; ++i) {
3586     unsigned DstStart = i * NarrowSize;
3587 
3588     if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
3589       // No part of the insert affects this subregister, forward the original.
3590       DstRegs.push_back(SrcRegs[i]);
3591       continue;
3592     } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3593       // The entire subregister is defined by this insert, forward the new
3594       // value.
3595       DstRegs.push_back(OpReg);
3596       continue;
3597     }
3598 
3599     // OpSegStart is where this destination segment would start in OpReg if it
3600     // extended infinitely in both directions.
3601     int64_t ExtractOffset, InsertOffset;
3602     uint64_t SegSize;
3603     if (OpStart < DstStart) {
3604       InsertOffset = 0;
3605       ExtractOffset = DstStart - OpStart;
3606       SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
3607     } else {
3608       InsertOffset = OpStart - DstStart;
3609       ExtractOffset = 0;
3610       SegSize =
3611         std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
3612     }
3613 
3614     Register SegReg = OpReg;
3615     if (ExtractOffset != 0 || SegSize != OpSize) {
3616       // A genuine extract is needed.
3617       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3618       MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
3619     }
3620 
3621     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
3622     MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
3623     DstRegs.push_back(DstReg);
3624   }
3625 
3626   assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
3627   Register DstReg = MI.getOperand(0).getReg();
3628   if(MRI.getType(DstReg).isVector())
3629     MIRBuilder.buildBuildVector(DstReg, DstRegs);
3630   else
3631     MIRBuilder.buildMerge(DstReg, DstRegs);
3632   MI.eraseFromParent();
3633   return Legalized;
3634 }
3635 
3636 LegalizerHelper::LegalizeResult
narrowScalarBasic(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3637 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
3638                                    LLT NarrowTy) {
3639   Register DstReg = MI.getOperand(0).getReg();
3640   LLT DstTy = MRI.getType(DstReg);
3641 
3642   assert(MI.getNumOperands() == 3 && TypeIdx == 0);
3643 
3644   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3645   SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
3646   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3647   LLT LeftoverTy;
3648   if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
3649                     Src0Regs, Src0LeftoverRegs))
3650     return UnableToLegalize;
3651 
3652   LLT Unused;
3653   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
3654                     Src1Regs, Src1LeftoverRegs))
3655     llvm_unreachable("inconsistent extractParts result");
3656 
3657   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3658     auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
3659                                         {Src0Regs[I], Src1Regs[I]});
3660     DstRegs.push_back(Inst->getOperand(0).getReg());
3661   }
3662 
3663   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3664     auto Inst = MIRBuilder.buildInstr(
3665       MI.getOpcode(),
3666       {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
3667     DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
3668   }
3669 
3670   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3671               LeftoverTy, DstLeftoverRegs);
3672 
3673   MI.eraseFromParent();
3674   return Legalized;
3675 }
3676 
3677 LegalizerHelper::LegalizeResult
narrowScalarSelect(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3678 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
3679                                     LLT NarrowTy) {
3680   if (TypeIdx != 0)
3681     return UnableToLegalize;
3682 
3683   Register CondReg = MI.getOperand(1).getReg();
3684   LLT CondTy = MRI.getType(CondReg);
3685   if (CondTy.isVector()) // TODO: Handle vselect
3686     return UnableToLegalize;
3687 
3688   Register DstReg = MI.getOperand(0).getReg();
3689   LLT DstTy = MRI.getType(DstReg);
3690 
3691   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3692   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3693   SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
3694   LLT LeftoverTy;
3695   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
3696                     Src1Regs, Src1LeftoverRegs))
3697     return UnableToLegalize;
3698 
3699   LLT Unused;
3700   if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
3701                     Src2Regs, Src2LeftoverRegs))
3702     llvm_unreachable("inconsistent extractParts result");
3703 
3704   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3705     auto Select = MIRBuilder.buildSelect(NarrowTy,
3706                                          CondReg, Src1Regs[I], Src2Regs[I]);
3707     DstRegs.push_back(Select->getOperand(0).getReg());
3708   }
3709 
3710   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3711     auto Select = MIRBuilder.buildSelect(
3712       LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
3713     DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
3714   }
3715 
3716   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3717               LeftoverTy, DstLeftoverRegs);
3718 
3719   MI.eraseFromParent();
3720   return Legalized;
3721 }
3722 
3723 LegalizerHelper::LegalizeResult
lowerBitCount(MachineInstr & MI,unsigned TypeIdx,LLT Ty)3724 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3725   unsigned Opc = MI.getOpcode();
3726   auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
3727   auto isSupported = [this](const LegalityQuery &Q) {
3728     auto QAction = LI.getAction(Q).Action;
3729     return QAction == Legal || QAction == Libcall || QAction == Custom;
3730   };
3731   switch (Opc) {
3732   default:
3733     return UnableToLegalize;
3734   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
3735     // This trivially expands to CTLZ.
3736     Observer.changingInstr(MI);
3737     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
3738     Observer.changedInstr(MI);
3739     return Legalized;
3740   }
3741   case TargetOpcode::G_CTLZ: {
3742     Register SrcReg = MI.getOperand(1).getReg();
3743     unsigned Len = Ty.getSizeInBits();
3744     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
3745       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
3746       auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
3747                                              {Ty}, {SrcReg});
3748       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3749       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3750       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3751                                           SrcReg, MIBZero);
3752       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3753                              MIBCtlzZU);
3754       MI.eraseFromParent();
3755       return Legalized;
3756     }
3757     // for now, we do this:
3758     // NewLen = NextPowerOf2(Len);
3759     // x = x | (x >> 1);
3760     // x = x | (x >> 2);
3761     // ...
3762     // x = x | (x >>16);
3763     // x = x | (x >>32); // for 64-bit input
3764     // Upto NewLen/2
3765     // return Len - popcount(x);
3766     //
3767     // Ref: "Hacker's Delight" by Henry Warren
3768     Register Op = SrcReg;
3769     unsigned NewLen = PowerOf2Ceil(Len);
3770     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
3771       auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
3772       auto MIBOp = MIRBuilder.buildInstr(
3773           TargetOpcode::G_OR, {Ty},
3774           {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
3775                                      {Op, MIBShiftAmt})});
3776       Op = MIBOp->getOperand(0).getReg();
3777     }
3778     auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
3779     MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3780                           {MIRBuilder.buildConstant(Ty, Len), MIBPop});
3781     MI.eraseFromParent();
3782     return Legalized;
3783   }
3784   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
3785     // This trivially expands to CTTZ.
3786     Observer.changingInstr(MI);
3787     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
3788     Observer.changedInstr(MI);
3789     return Legalized;
3790   }
3791   case TargetOpcode::G_CTTZ: {
3792     Register SrcReg = MI.getOperand(1).getReg();
3793     unsigned Len = Ty.getSizeInBits();
3794     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
3795       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
3796       // zero.
3797       auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
3798                                              {Ty}, {SrcReg});
3799       auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3800       auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3801       auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3802                                           SrcReg, MIBZero);
3803       MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3804                              MIBCttzZU);
3805       MI.eraseFromParent();
3806       return Legalized;
3807     }
3808     // for now, we use: { return popcount(~x & (x - 1)); }
3809     // unless the target has ctlz but not ctpop, in which case we use:
3810     // { return 32 - nlz(~x & (x-1)); }
3811     // Ref: "Hacker's Delight" by Henry Warren
3812     auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
3813     auto MIBNot =
3814         MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
3815     auto MIBTmp = MIRBuilder.buildInstr(
3816         TargetOpcode::G_AND, {Ty},
3817         {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
3818                                        {SrcReg, MIBCstNeg1})});
3819     if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
3820         isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
3821       auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
3822       MIRBuilder.buildInstr(
3823           TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3824           {MIBCstLen,
3825            MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
3826       MI.eraseFromParent();
3827       return Legalized;
3828     }
3829     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
3830     MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
3831     return Legalized;
3832   }
3833   }
3834 }
3835 
3836 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
3837 // representation.
3838 LegalizerHelper::LegalizeResult
lowerU64ToF32BitOps(MachineInstr & MI)3839 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
3840   Register Dst = MI.getOperand(0).getReg();
3841   Register Src = MI.getOperand(1).getReg();
3842   const LLT S64 = LLT::scalar(64);
3843   const LLT S32 = LLT::scalar(32);
3844   const LLT S1 = LLT::scalar(1);
3845 
3846   assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
3847 
3848   // unsigned cul2f(ulong u) {
3849   //   uint lz = clz(u);
3850   //   uint e = (u != 0) ? 127U + 63U - lz : 0;
3851   //   u = (u << lz) & 0x7fffffffffffffffUL;
3852   //   ulong t = u & 0xffffffffffUL;
3853   //   uint v = (e << 23) | (uint)(u >> 40);
3854   //   uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
3855   //   return as_float(v + r);
3856   // }
3857 
3858   auto Zero32 = MIRBuilder.buildConstant(S32, 0);
3859   auto Zero64 = MIRBuilder.buildConstant(S64, 0);
3860 
3861   auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
3862 
3863   auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
3864   auto Sub = MIRBuilder.buildSub(S32, K, LZ);
3865 
3866   auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
3867   auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
3868 
3869   auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
3870   auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
3871 
3872   auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
3873 
3874   auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
3875   auto T = MIRBuilder.buildAnd(S64, U, Mask1);
3876 
3877   auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
3878   auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
3879   auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
3880 
3881   auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
3882   auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
3883   auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
3884   auto One = MIRBuilder.buildConstant(S32, 1);
3885 
3886   auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
3887   auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
3888   auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
3889   MIRBuilder.buildAdd(Dst, V, R);
3890 
3891   return Legalized;
3892 }
3893 
3894 LegalizerHelper::LegalizeResult
lowerUITOFP(MachineInstr & MI,unsigned TypeIdx,LLT Ty)3895 LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3896   Register Dst = MI.getOperand(0).getReg();
3897   Register Src = MI.getOperand(1).getReg();
3898   LLT DstTy = MRI.getType(Dst);
3899   LLT SrcTy = MRI.getType(Src);
3900 
3901   if (SrcTy == LLT::scalar(1)) {
3902     auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
3903     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
3904     MIRBuilder.buildSelect(Dst, Src, True, False);
3905     MI.eraseFromParent();
3906     return Legalized;
3907   }
3908 
3909   if (SrcTy != LLT::scalar(64))
3910     return UnableToLegalize;
3911 
3912   if (DstTy == LLT::scalar(32)) {
3913     // TODO: SelectionDAG has several alternative expansions to port which may
3914     // be more reasonble depending on the available instructions. If a target
3915     // has sitofp, does not have CTLZ, or can efficiently use f64 as an
3916     // intermediate type, this is probably worse.
3917     return lowerU64ToF32BitOps(MI);
3918   }
3919 
3920   return UnableToLegalize;
3921 }
3922 
3923 LegalizerHelper::LegalizeResult
lowerSITOFP(MachineInstr & MI,unsigned TypeIdx,LLT Ty)3924 LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3925   Register Dst = MI.getOperand(0).getReg();
3926   Register Src = MI.getOperand(1).getReg();
3927   LLT DstTy = MRI.getType(Dst);
3928   LLT SrcTy = MRI.getType(Src);
3929 
3930   const LLT S64 = LLT::scalar(64);
3931   const LLT S32 = LLT::scalar(32);
3932   const LLT S1 = LLT::scalar(1);
3933 
3934   if (SrcTy == S1) {
3935     auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
3936     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
3937     MIRBuilder.buildSelect(Dst, Src, True, False);
3938     MI.eraseFromParent();
3939     return Legalized;
3940   }
3941 
3942   if (SrcTy != S64)
3943     return UnableToLegalize;
3944 
3945   if (DstTy == S32) {
3946     // signed cl2f(long l) {
3947     //   long s = l >> 63;
3948     //   float r = cul2f((l + s) ^ s);
3949     //   return s ? -r : r;
3950     // }
3951     Register L = Src;
3952     auto SignBit = MIRBuilder.buildConstant(S64, 63);
3953     auto S = MIRBuilder.buildAShr(S64, L, SignBit);
3954 
3955     auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
3956     auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
3957     auto R = MIRBuilder.buildUITOFP(S32, Xor);
3958 
3959     auto RNeg = MIRBuilder.buildFNeg(S32, R);
3960     auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
3961                                             MIRBuilder.buildConstant(S64, 0));
3962     MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
3963     return Legalized;
3964   }
3965 
3966   return UnableToLegalize;
3967 }
3968 
3969 LegalizerHelper::LegalizeResult
lowerFPTOUI(MachineInstr & MI,unsigned TypeIdx,LLT Ty)3970 LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3971   Register Dst = MI.getOperand(0).getReg();
3972   Register Src = MI.getOperand(1).getReg();
3973   LLT DstTy = MRI.getType(Dst);
3974   LLT SrcTy = MRI.getType(Src);
3975   const LLT S64 = LLT::scalar(64);
3976   const LLT S32 = LLT::scalar(32);
3977 
3978   if (SrcTy != S64 && SrcTy != S32)
3979     return UnableToLegalize;
3980   if (DstTy != S32 && DstTy != S64)
3981     return UnableToLegalize;
3982 
3983   // FPTOSI gives same result as FPTOUI for positive signed integers.
3984   // FPTOUI needs to deal with fp values that convert to unsigned integers
3985   // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
3986 
3987   APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
3988   APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
3989                                                 : APFloat::IEEEdouble(),
3990                     APInt::getNullValue(SrcTy.getSizeInBits()));
3991   TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
3992 
3993   MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
3994 
3995   MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
3996   // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
3997   // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
3998   MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
3999   MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
4000   MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
4001   MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
4002 
4003   const LLT S1 = LLT::scalar(1);
4004 
4005   MachineInstrBuilder FCMP =
4006       MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
4007   MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
4008 
4009   MI.eraseFromParent();
4010   return Legalized;
4011 }
4012 
minMaxToCompare(unsigned Opc)4013 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
4014   switch (Opc) {
4015   case TargetOpcode::G_SMIN:
4016     return CmpInst::ICMP_SLT;
4017   case TargetOpcode::G_SMAX:
4018     return CmpInst::ICMP_SGT;
4019   case TargetOpcode::G_UMIN:
4020     return CmpInst::ICMP_ULT;
4021   case TargetOpcode::G_UMAX:
4022     return CmpInst::ICMP_UGT;
4023   default:
4024     llvm_unreachable("not in integer min/max");
4025   }
4026 }
4027 
4028 LegalizerHelper::LegalizeResult
lowerMinMax(MachineInstr & MI,unsigned TypeIdx,LLT Ty)4029 LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4030   Register Dst = MI.getOperand(0).getReg();
4031   Register Src0 = MI.getOperand(1).getReg();
4032   Register Src1 = MI.getOperand(2).getReg();
4033 
4034   const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
4035   LLT CmpType = MRI.getType(Dst).changeElementSize(1);
4036 
4037   auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
4038   MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
4039 
4040   MI.eraseFromParent();
4041   return Legalized;
4042 }
4043 
4044 LegalizerHelper::LegalizeResult
lowerFCopySign(MachineInstr & MI,unsigned TypeIdx,LLT Ty)4045 LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
4046   Register Dst = MI.getOperand(0).getReg();
4047   Register Src0 = MI.getOperand(1).getReg();
4048   Register Src1 = MI.getOperand(2).getReg();
4049 
4050   const LLT Src0Ty = MRI.getType(Src0);
4051   const LLT Src1Ty = MRI.getType(Src1);
4052 
4053   const int Src0Size = Src0Ty.getScalarSizeInBits();
4054   const int Src1Size = Src1Ty.getScalarSizeInBits();
4055 
4056   auto SignBitMask = MIRBuilder.buildConstant(
4057     Src0Ty, APInt::getSignMask(Src0Size));
4058 
4059   auto NotSignBitMask = MIRBuilder.buildConstant(
4060     Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
4061 
4062   auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
4063   MachineInstr *Or;
4064 
4065   if (Src0Ty == Src1Ty) {
4066     auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
4067     Or = MIRBuilder.buildOr(Dst, And0, And1);
4068   } else if (Src0Size > Src1Size) {
4069     auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
4070     auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
4071     auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
4072     auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
4073     Or = MIRBuilder.buildOr(Dst, And0, And1);
4074   } else {
4075     auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
4076     auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
4077     auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
4078     auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
4079     Or = MIRBuilder.buildOr(Dst, And0, And1);
4080   }
4081 
4082   // Be careful about setting nsz/nnan/ninf on every instruction, since the
4083   // constants are a nan and -0.0, but the final result should preserve
4084   // everything.
4085   if (unsigned Flags = MI.getFlags())
4086     Or->setFlags(Flags);
4087 
4088   MI.eraseFromParent();
4089   return Legalized;
4090 }
4091 
4092 LegalizerHelper::LegalizeResult
lowerFMinNumMaxNum(MachineInstr & MI)4093 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
4094   unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
4095     TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
4096 
4097   Register Dst = MI.getOperand(0).getReg();
4098   Register Src0 = MI.getOperand(1).getReg();
4099   Register Src1 = MI.getOperand(2).getReg();
4100   LLT Ty = MRI.getType(Dst);
4101 
4102   if (!MI.getFlag(MachineInstr::FmNoNans)) {
4103     // Insert canonicalizes if it's possible we need to quiet to get correct
4104     // sNaN behavior.
4105 
4106     // Note this must be done here, and not as an optimization combine in the
4107     // absence of a dedicate quiet-snan instruction as we're using an
4108     // omni-purpose G_FCANONICALIZE.
4109     if (!isKnownNeverSNaN(Src0, MRI))
4110       Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
4111 
4112     if (!isKnownNeverSNaN(Src1, MRI))
4113       Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
4114   }
4115 
4116   // If there are no nans, it's safe to simply replace this with the non-IEEE
4117   // version.
4118   MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
4119   MI.eraseFromParent();
4120   return Legalized;
4121 }
4122 
lowerFMad(MachineInstr & MI)4123 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
4124   // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
4125   Register DstReg = MI.getOperand(0).getReg();
4126   LLT Ty = MRI.getType(DstReg);
4127   unsigned Flags = MI.getFlags();
4128 
4129   auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
4130                                   Flags);
4131   MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
4132   MI.eraseFromParent();
4133   return Legalized;
4134 }
4135 
4136 LegalizerHelper::LegalizeResult
lowerIntrinsicRound(MachineInstr & MI)4137 LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
4138   Register DstReg = MI.getOperand(0).getReg();
4139   Register SrcReg = MI.getOperand(1).getReg();
4140   unsigned Flags = MI.getFlags();
4141   LLT Ty = MRI.getType(DstReg);
4142   const LLT CondTy = Ty.changeElementSize(1);
4143 
4144   // result = trunc(src);
4145   // if (src < 0.0 && src != result)
4146   //   result += -1.0.
4147 
4148   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
4149   auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
4150 
4151   auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
4152                                   SrcReg, Zero, Flags);
4153   auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
4154                                       SrcReg, Trunc, Flags);
4155   auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
4156   auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
4157 
4158   MIRBuilder.buildFAdd(DstReg, Trunc, AddVal);
4159   MI.eraseFromParent();
4160   return Legalized;
4161 }
4162 
4163 LegalizerHelper::LegalizeResult
lowerUnmergeValues(MachineInstr & MI)4164 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
4165   const unsigned NumDst = MI.getNumOperands() - 1;
4166   const Register SrcReg = MI.getOperand(NumDst).getReg();
4167   LLT SrcTy = MRI.getType(SrcReg);
4168 
4169   Register Dst0Reg = MI.getOperand(0).getReg();
4170   LLT DstTy = MRI.getType(Dst0Reg);
4171 
4172 
4173   // Expand scalarizing unmerge as bitcast to integer and shift.
4174   if (!DstTy.isVector() && SrcTy.isVector() &&
4175       SrcTy.getElementType() == DstTy) {
4176     LLT IntTy = LLT::scalar(SrcTy.getSizeInBits());
4177     Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0);
4178 
4179     MIRBuilder.buildTrunc(Dst0Reg, Cast);
4180 
4181     const unsigned DstSize = DstTy.getSizeInBits();
4182     unsigned Offset = DstSize;
4183     for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
4184       auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
4185       auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt);
4186       MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
4187     }
4188 
4189     MI.eraseFromParent();
4190     return Legalized;
4191   }
4192 
4193   return UnableToLegalize;
4194 }
4195 
4196 LegalizerHelper::LegalizeResult
lowerShuffleVector(MachineInstr & MI)4197 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
4198   Register DstReg = MI.getOperand(0).getReg();
4199   Register Src0Reg = MI.getOperand(1).getReg();
4200   Register Src1Reg = MI.getOperand(2).getReg();
4201   LLT Src0Ty = MRI.getType(Src0Reg);
4202   LLT DstTy = MRI.getType(DstReg);
4203   LLT IdxTy = LLT::scalar(32);
4204 
4205   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4206 
4207   if (DstTy.isScalar()) {
4208     if (Src0Ty.isVector())
4209       return UnableToLegalize;
4210 
4211     // This is just a SELECT.
4212     assert(Mask.size() == 1 && "Expected a single mask element");
4213     Register Val;
4214     if (Mask[0] < 0 || Mask[0] > 1)
4215       Val = MIRBuilder.buildUndef(DstTy).getReg(0);
4216     else
4217       Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
4218     MIRBuilder.buildCopy(DstReg, Val);
4219     MI.eraseFromParent();
4220     return Legalized;
4221   }
4222 
4223   Register Undef;
4224   SmallVector<Register, 32> BuildVec;
4225   LLT EltTy = DstTy.getElementType();
4226 
4227   for (int Idx : Mask) {
4228     if (Idx < 0) {
4229       if (!Undef.isValid())
4230         Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
4231       BuildVec.push_back(Undef);
4232       continue;
4233     }
4234 
4235     if (Src0Ty.isScalar()) {
4236       BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
4237     } else {
4238       int NumElts = Src0Ty.getNumElements();
4239       Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
4240       int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
4241       auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
4242       auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
4243       BuildVec.push_back(Extract.getReg(0));
4244     }
4245   }
4246 
4247   MIRBuilder.buildBuildVector(DstReg, BuildVec);
4248   MI.eraseFromParent();
4249   return Legalized;
4250 }
4251 
4252 LegalizerHelper::LegalizeResult
lowerDynStackAlloc(MachineInstr & MI)4253 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
4254   Register Dst = MI.getOperand(0).getReg();
4255   Register AllocSize = MI.getOperand(1).getReg();
4256   unsigned Align = MI.getOperand(2).getImm();
4257 
4258   const auto &MF = *MI.getMF();
4259   const auto &TLI = *MF.getSubtarget().getTargetLowering();
4260 
4261   LLT PtrTy = MRI.getType(Dst);
4262   LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
4263 
4264   Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
4265   auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
4266   SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
4267 
4268   // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
4269   // have to generate an extra instruction to negate the alloc and then use
4270   // G_PTR_ADD to add the negative offset.
4271   auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
4272   if (Align) {
4273     APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true);
4274     AlignMask.negate();
4275     auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
4276     Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
4277   }
4278 
4279   SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
4280   MIRBuilder.buildCopy(SPReg, SPTmp);
4281   MIRBuilder.buildCopy(Dst, SPTmp);
4282 
4283   MI.eraseFromParent();
4284   return Legalized;
4285 }
4286 
4287 LegalizerHelper::LegalizeResult
lowerExtract(MachineInstr & MI)4288 LegalizerHelper::lowerExtract(MachineInstr &MI) {
4289   Register Dst = MI.getOperand(0).getReg();
4290   Register Src = MI.getOperand(1).getReg();
4291   unsigned Offset = MI.getOperand(2).getImm();
4292 
4293   LLT DstTy = MRI.getType(Dst);
4294   LLT SrcTy = MRI.getType(Src);
4295 
4296   if (DstTy.isScalar() &&
4297       (SrcTy.isScalar() ||
4298        (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
4299     LLT SrcIntTy = SrcTy;
4300     if (!SrcTy.isScalar()) {
4301       SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
4302       Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
4303     }
4304 
4305     if (Offset == 0)
4306       MIRBuilder.buildTrunc(Dst, Src);
4307     else {
4308       auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
4309       auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
4310       MIRBuilder.buildTrunc(Dst, Shr);
4311     }
4312 
4313     MI.eraseFromParent();
4314     return Legalized;
4315   }
4316 
4317   return UnableToLegalize;
4318 }
4319 
lowerInsert(MachineInstr & MI)4320 LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
4321   Register Dst = MI.getOperand(0).getReg();
4322   Register Src = MI.getOperand(1).getReg();
4323   Register InsertSrc = MI.getOperand(2).getReg();
4324   uint64_t Offset = MI.getOperand(3).getImm();
4325 
4326   LLT DstTy = MRI.getType(Src);
4327   LLT InsertTy = MRI.getType(InsertSrc);
4328 
4329   if (InsertTy.isScalar() &&
4330       (DstTy.isScalar() ||
4331        (DstTy.isVector() && DstTy.getElementType() == InsertTy))) {
4332     LLT IntDstTy = DstTy;
4333     if (!DstTy.isScalar()) {
4334       IntDstTy = LLT::scalar(DstTy.getSizeInBits());
4335       Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0);
4336     }
4337 
4338     Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
4339     if (Offset != 0) {
4340       auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
4341       ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
4342     }
4343 
4344     APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset,
4345                                        InsertTy.getSizeInBits());
4346 
4347     auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
4348     auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
4349     auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
4350 
4351     MIRBuilder.buildBitcast(Dst, Or);
4352     MI.eraseFromParent();
4353     return Legalized;
4354   }
4355 
4356   return UnableToLegalize;
4357 }
4358 
4359 LegalizerHelper::LegalizeResult
lowerSADDO_SSUBO(MachineInstr & MI)4360 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
4361   Register Dst0 = MI.getOperand(0).getReg();
4362   Register Dst1 = MI.getOperand(1).getReg();
4363   Register LHS = MI.getOperand(2).getReg();
4364   Register RHS = MI.getOperand(3).getReg();
4365   const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
4366 
4367   LLT Ty = MRI.getType(Dst0);
4368   LLT BoolTy = MRI.getType(Dst1);
4369 
4370   if (IsAdd)
4371     MIRBuilder.buildAdd(Dst0, LHS, RHS);
4372   else
4373     MIRBuilder.buildSub(Dst0, LHS, RHS);
4374 
4375   // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
4376 
4377   auto Zero = MIRBuilder.buildConstant(Ty, 0);
4378 
4379   // For an addition, the result should be less than one of the operands (LHS)
4380   // if and only if the other operand (RHS) is negative, otherwise there will
4381   // be overflow.
4382   // For a subtraction, the result should be less than one of the operands
4383   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
4384   // otherwise there will be overflow.
4385   auto ResultLowerThanLHS =
4386       MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
4387   auto ConditionRHS = MIRBuilder.buildICmp(
4388       IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
4389 
4390   MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
4391   MI.eraseFromParent();
4392   return Legalized;
4393 }
4394 
4395 LegalizerHelper::LegalizeResult
lowerBswap(MachineInstr & MI)4396 LegalizerHelper::lowerBswap(MachineInstr &MI) {
4397   Register Dst = MI.getOperand(0).getReg();
4398   Register Src = MI.getOperand(1).getReg();
4399   const LLT Ty = MRI.getType(Src);
4400   unsigned SizeInBytes = Ty.getSizeInBytes();
4401   unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
4402 
4403   // Swap most and least significant byte, set remaining bytes in Res to zero.
4404   auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
4405   auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
4406   auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
4407   auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
4408 
4409   // Set i-th high/low byte in Res to i-th low/high byte from Src.
4410   for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
4411     // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
4412     APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
4413     auto Mask = MIRBuilder.buildConstant(Ty, APMask);
4414     auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
4415     // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
4416     auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
4417     auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
4418     Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
4419     // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
4420     auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
4421     auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
4422     Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
4423   }
4424   Res.getInstr()->getOperand(0).setReg(Dst);
4425 
4426   MI.eraseFromParent();
4427   return Legalized;
4428 }
4429 
4430 //{ (Src & Mask) >> N } | { (Src << N) & Mask }
SwapN(unsigned N,DstOp Dst,MachineIRBuilder & B,MachineInstrBuilder Src,APInt Mask)4431 static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
4432                                  MachineInstrBuilder Src, APInt Mask) {
4433   const LLT Ty = Dst.getLLTTy(*B.getMRI());
4434   MachineInstrBuilder C_N = B.buildConstant(Ty, N);
4435   MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
4436   auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
4437   auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
4438   return B.buildOr(Dst, LHS, RHS);
4439 }
4440 
4441 LegalizerHelper::LegalizeResult
lowerBitreverse(MachineInstr & MI)4442 LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
4443   Register Dst = MI.getOperand(0).getReg();
4444   Register Src = MI.getOperand(1).getReg();
4445   const LLT Ty = MRI.getType(Src);
4446   unsigned Size = Ty.getSizeInBits();
4447 
4448   MachineInstrBuilder BSWAP =
4449       MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
4450 
4451   // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
4452   //    [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
4453   // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
4454   MachineInstrBuilder Swap4 =
4455       SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
4456 
4457   // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
4458   //    [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
4459   // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
4460   MachineInstrBuilder Swap2 =
4461       SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
4462 
4463   // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
4464   //    [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
4465   // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
4466   SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
4467 
4468   MI.eraseFromParent();
4469   return Legalized;
4470 }
4471 
4472 LegalizerHelper::LegalizeResult
lowerReadRegister(MachineInstr & MI)4473 LegalizerHelper::lowerReadRegister(MachineInstr &MI) {
4474   Register Dst = MI.getOperand(0).getReg();
4475   const LLT Ty = MRI.getType(Dst);
4476   const MDString *RegStr = cast<MDString>(
4477     cast<MDNode>(MI.getOperand(1).getMetadata())->getOperand(0));
4478 
4479   MachineFunction &MF = MIRBuilder.getMF();
4480   const TargetSubtargetInfo &STI = MF.getSubtarget();
4481   const TargetLowering *TLI = STI.getTargetLowering();
4482   Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Ty, MF);
4483   if (!Reg.isValid())
4484     return UnableToLegalize;
4485 
4486   MIRBuilder.buildCopy(Dst, Reg);
4487   MI.eraseFromParent();
4488   return Legalized;
4489 }
4490