1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetFrameLowering.h"
22 #include "llvm/CodeGen/TargetInstrInfo.h"
23 #include "llvm/CodeGen/TargetLowering.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/Support/Debug.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28
29 #define DEBUG_TYPE "legalizer"
30
31 using namespace llvm;
32 using namespace LegalizeActions;
33 using namespace MIPatternMatch;
34
35 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
36 ///
37 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
38 /// with any leftover piece as type \p LeftoverTy
39 ///
40 /// Returns -1 in the first element of the pair if the breakdown is not
41 /// satisfiable.
42 static std::pair<int, int>
getNarrowTypeBreakDown(LLT OrigTy,LLT NarrowTy,LLT & LeftoverTy)43 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
44 assert(!LeftoverTy.isValid() && "this is an out argument");
45
46 unsigned Size = OrigTy.getSizeInBits();
47 unsigned NarrowSize = NarrowTy.getSizeInBits();
48 unsigned NumParts = Size / NarrowSize;
49 unsigned LeftoverSize = Size - NumParts * NarrowSize;
50 assert(Size > NarrowSize);
51
52 if (LeftoverSize == 0)
53 return {NumParts, 0};
54
55 if (NarrowTy.isVector()) {
56 unsigned EltSize = OrigTy.getScalarSizeInBits();
57 if (LeftoverSize % EltSize != 0)
58 return {-1, -1};
59 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
60 } else {
61 LeftoverTy = LLT::scalar(LeftoverSize);
62 }
63
64 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
65 return std::make_pair(NumParts, NumLeftover);
66 }
67
getFloatTypeForLLT(LLVMContext & Ctx,LLT Ty)68 static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
69
70 if (!Ty.isScalar())
71 return nullptr;
72
73 switch (Ty.getSizeInBits()) {
74 case 16:
75 return Type::getHalfTy(Ctx);
76 case 32:
77 return Type::getFloatTy(Ctx);
78 case 64:
79 return Type::getDoubleTy(Ctx);
80 case 80:
81 return Type::getX86_FP80Ty(Ctx);
82 case 128:
83 return Type::getFP128Ty(Ctx);
84 default:
85 return nullptr;
86 }
87 }
88
LegalizerHelper(MachineFunction & MF,GISelChangeObserver & Observer,MachineIRBuilder & Builder)89 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
90 GISelChangeObserver &Observer,
91 MachineIRBuilder &Builder)
92 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
93 LI(*MF.getSubtarget().getLegalizerInfo()),
94 TLI(*MF.getSubtarget().getTargetLowering()) { }
95
LegalizerHelper(MachineFunction & MF,const LegalizerInfo & LI,GISelChangeObserver & Observer,MachineIRBuilder & B)96 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
97 GISelChangeObserver &Observer,
98 MachineIRBuilder &B)
99 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
100 TLI(*MF.getSubtarget().getTargetLowering()) { }
101
102 LegalizerHelper::LegalizeResult
legalizeInstrStep(MachineInstr & MI)103 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
104 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
105
106 MIRBuilder.setInstrAndDebugLoc(MI);
107
108 if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
109 MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
110 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
111 auto Step = LI.getAction(MI, MRI);
112 switch (Step.Action) {
113 case Legal:
114 LLVM_DEBUG(dbgs() << ".. Already legal\n");
115 return AlreadyLegal;
116 case Libcall:
117 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
118 return libcall(MI);
119 case NarrowScalar:
120 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
121 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
122 case WidenScalar:
123 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
124 return widenScalar(MI, Step.TypeIdx, Step.NewType);
125 case Bitcast:
126 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
127 return bitcast(MI, Step.TypeIdx, Step.NewType);
128 case Lower:
129 LLVM_DEBUG(dbgs() << ".. Lower\n");
130 return lower(MI, Step.TypeIdx, Step.NewType);
131 case FewerElements:
132 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
133 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
134 case MoreElements:
135 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
136 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
137 case Custom:
138 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
139 return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
140 default:
141 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
142 return UnableToLegalize;
143 }
144 }
145
extractParts(Register Reg,LLT Ty,int NumParts,SmallVectorImpl<Register> & VRegs)146 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
147 SmallVectorImpl<Register> &VRegs) {
148 for (int i = 0; i < NumParts; ++i)
149 VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
150 MIRBuilder.buildUnmerge(VRegs, Reg);
151 }
152
extractParts(Register Reg,LLT RegTy,LLT MainTy,LLT & LeftoverTy,SmallVectorImpl<Register> & VRegs,SmallVectorImpl<Register> & LeftoverRegs)153 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
154 LLT MainTy, LLT &LeftoverTy,
155 SmallVectorImpl<Register> &VRegs,
156 SmallVectorImpl<Register> &LeftoverRegs) {
157 assert(!LeftoverTy.isValid() && "this is an out argument");
158
159 unsigned RegSize = RegTy.getSizeInBits();
160 unsigned MainSize = MainTy.getSizeInBits();
161 unsigned NumParts = RegSize / MainSize;
162 unsigned LeftoverSize = RegSize - NumParts * MainSize;
163
164 // Use an unmerge when possible.
165 if (LeftoverSize == 0) {
166 for (unsigned I = 0; I < NumParts; ++I)
167 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
168 MIRBuilder.buildUnmerge(VRegs, Reg);
169 return true;
170 }
171
172 if (MainTy.isVector()) {
173 unsigned EltSize = MainTy.getScalarSizeInBits();
174 if (LeftoverSize % EltSize != 0)
175 return false;
176 LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
177 } else {
178 LeftoverTy = LLT::scalar(LeftoverSize);
179 }
180
181 // For irregular sizes, extract the individual parts.
182 for (unsigned I = 0; I != NumParts; ++I) {
183 Register NewReg = MRI.createGenericVirtualRegister(MainTy);
184 VRegs.push_back(NewReg);
185 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
186 }
187
188 for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
189 Offset += LeftoverSize) {
190 Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
191 LeftoverRegs.push_back(NewReg);
192 MIRBuilder.buildExtract(NewReg, Reg, Offset);
193 }
194
195 return true;
196 }
197
insertParts(Register DstReg,LLT ResultTy,LLT PartTy,ArrayRef<Register> PartRegs,LLT LeftoverTy,ArrayRef<Register> LeftoverRegs)198 void LegalizerHelper::insertParts(Register DstReg,
199 LLT ResultTy, LLT PartTy,
200 ArrayRef<Register> PartRegs,
201 LLT LeftoverTy,
202 ArrayRef<Register> LeftoverRegs) {
203 if (!LeftoverTy.isValid()) {
204 assert(LeftoverRegs.empty());
205
206 if (!ResultTy.isVector()) {
207 MIRBuilder.buildMerge(DstReg, PartRegs);
208 return;
209 }
210
211 if (PartTy.isVector())
212 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
213 else
214 MIRBuilder.buildBuildVector(DstReg, PartRegs);
215 return;
216 }
217
218 unsigned PartSize = PartTy.getSizeInBits();
219 unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
220
221 Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
222 MIRBuilder.buildUndef(CurResultReg);
223
224 unsigned Offset = 0;
225 for (Register PartReg : PartRegs) {
226 Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
227 MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
228 CurResultReg = NewResultReg;
229 Offset += PartSize;
230 }
231
232 for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
233 // Use the original output register for the final insert to avoid a copy.
234 Register NewResultReg = (I + 1 == E) ?
235 DstReg : MRI.createGenericVirtualRegister(ResultTy);
236
237 MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
238 CurResultReg = NewResultReg;
239 Offset += LeftoverPartSize;
240 }
241 }
242
243 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
getUnmergeResults(SmallVectorImpl<Register> & Regs,const MachineInstr & MI)244 static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
245 const MachineInstr &MI) {
246 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
247
248 const int StartIdx = Regs.size();
249 const int NumResults = MI.getNumOperands() - 1;
250 Regs.resize(Regs.size() + NumResults);
251 for (int I = 0; I != NumResults; ++I)
252 Regs[StartIdx + I] = MI.getOperand(I).getReg();
253 }
254
extractGCDType(SmallVectorImpl<Register> & Parts,LLT GCDTy,Register SrcReg)255 void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
256 LLT GCDTy, Register SrcReg) {
257 LLT SrcTy = MRI.getType(SrcReg);
258 if (SrcTy == GCDTy) {
259 // If the source already evenly divides the result type, we don't need to do
260 // anything.
261 Parts.push_back(SrcReg);
262 } else {
263 // Need to split into common type sized pieces.
264 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
265 getUnmergeResults(Parts, *Unmerge);
266 }
267 }
268
extractGCDType(SmallVectorImpl<Register> & Parts,LLT DstTy,LLT NarrowTy,Register SrcReg)269 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
270 LLT NarrowTy, Register SrcReg) {
271 LLT SrcTy = MRI.getType(SrcReg);
272 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
273 extractGCDType(Parts, GCDTy, SrcReg);
274 return GCDTy;
275 }
276
buildLCMMergePieces(LLT DstTy,LLT NarrowTy,LLT GCDTy,SmallVectorImpl<Register> & VRegs,unsigned PadStrategy)277 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
278 SmallVectorImpl<Register> &VRegs,
279 unsigned PadStrategy) {
280 LLT LCMTy = getLCMType(DstTy, NarrowTy);
281
282 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
283 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
284 int NumOrigSrc = VRegs.size();
285
286 Register PadReg;
287
288 // Get a value we can use to pad the source value if the sources won't evenly
289 // cover the result type.
290 if (NumOrigSrc < NumParts * NumSubParts) {
291 if (PadStrategy == TargetOpcode::G_ZEXT)
292 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
293 else if (PadStrategy == TargetOpcode::G_ANYEXT)
294 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
295 else {
296 assert(PadStrategy == TargetOpcode::G_SEXT);
297
298 // Shift the sign bit of the low register through the high register.
299 auto ShiftAmt =
300 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
301 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
302 }
303 }
304
305 // Registers for the final merge to be produced.
306 SmallVector<Register, 4> Remerge(NumParts);
307
308 // Registers needed for intermediate merges, which will be merged into a
309 // source for Remerge.
310 SmallVector<Register, 4> SubMerge(NumSubParts);
311
312 // Once we've fully read off the end of the original source bits, we can reuse
313 // the same high bits for remaining padding elements.
314 Register AllPadReg;
315
316 // Build merges to the LCM type to cover the original result type.
317 for (int I = 0; I != NumParts; ++I) {
318 bool AllMergePartsArePadding = true;
319
320 // Build the requested merges to the requested type.
321 for (int J = 0; J != NumSubParts; ++J) {
322 int Idx = I * NumSubParts + J;
323 if (Idx >= NumOrigSrc) {
324 SubMerge[J] = PadReg;
325 continue;
326 }
327
328 SubMerge[J] = VRegs[Idx];
329
330 // There are meaningful bits here we can't reuse later.
331 AllMergePartsArePadding = false;
332 }
333
334 // If we've filled up a complete piece with padding bits, we can directly
335 // emit the natural sized constant if applicable, rather than a merge of
336 // smaller constants.
337 if (AllMergePartsArePadding && !AllPadReg) {
338 if (PadStrategy == TargetOpcode::G_ANYEXT)
339 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
340 else if (PadStrategy == TargetOpcode::G_ZEXT)
341 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
342
343 // If this is a sign extension, we can't materialize a trivial constant
344 // with the right type and have to produce a merge.
345 }
346
347 if (AllPadReg) {
348 // Avoid creating additional instructions if we're just adding additional
349 // copies of padding bits.
350 Remerge[I] = AllPadReg;
351 continue;
352 }
353
354 if (NumSubParts == 1)
355 Remerge[I] = SubMerge[0];
356 else
357 Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
358
359 // In the sign extend padding case, re-use the first all-signbit merge.
360 if (AllMergePartsArePadding && !AllPadReg)
361 AllPadReg = Remerge[I];
362 }
363
364 VRegs = std::move(Remerge);
365 return LCMTy;
366 }
367
buildWidenedRemergeToDst(Register DstReg,LLT LCMTy,ArrayRef<Register> RemergeRegs)368 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
369 ArrayRef<Register> RemergeRegs) {
370 LLT DstTy = MRI.getType(DstReg);
371
372 // Create the merge to the widened source, and extract the relevant bits into
373 // the result.
374
375 if (DstTy == LCMTy) {
376 MIRBuilder.buildMerge(DstReg, RemergeRegs);
377 return;
378 }
379
380 auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
381 if (DstTy.isScalar() && LCMTy.isScalar()) {
382 MIRBuilder.buildTrunc(DstReg, Remerge);
383 return;
384 }
385
386 if (LCMTy.isVector()) {
387 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
388 SmallVector<Register, 8> UnmergeDefs(NumDefs);
389 UnmergeDefs[0] = DstReg;
390 for (unsigned I = 1; I != NumDefs; ++I)
391 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
392
393 MIRBuilder.buildUnmerge(UnmergeDefs,
394 MIRBuilder.buildMerge(LCMTy, RemergeRegs));
395 return;
396 }
397
398 llvm_unreachable("unhandled case");
399 }
400
getRTLibDesc(unsigned Opcode,unsigned Size)401 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
402 #define RTLIBCASE_INT(LibcallPrefix) \
403 do { \
404 switch (Size) { \
405 case 32: \
406 return RTLIB::LibcallPrefix##32; \
407 case 64: \
408 return RTLIB::LibcallPrefix##64; \
409 case 128: \
410 return RTLIB::LibcallPrefix##128; \
411 default: \
412 llvm_unreachable("unexpected size"); \
413 } \
414 } while (0)
415
416 #define RTLIBCASE(LibcallPrefix) \
417 do { \
418 switch (Size) { \
419 case 32: \
420 return RTLIB::LibcallPrefix##32; \
421 case 64: \
422 return RTLIB::LibcallPrefix##64; \
423 case 80: \
424 return RTLIB::LibcallPrefix##80; \
425 case 128: \
426 return RTLIB::LibcallPrefix##128; \
427 default: \
428 llvm_unreachable("unexpected size"); \
429 } \
430 } while (0)
431
432 switch (Opcode) {
433 case TargetOpcode::G_SDIV:
434 RTLIBCASE_INT(SDIV_I);
435 case TargetOpcode::G_UDIV:
436 RTLIBCASE_INT(UDIV_I);
437 case TargetOpcode::G_SREM:
438 RTLIBCASE_INT(SREM_I);
439 case TargetOpcode::G_UREM:
440 RTLIBCASE_INT(UREM_I);
441 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
442 RTLIBCASE_INT(CTLZ_I);
443 case TargetOpcode::G_FADD:
444 RTLIBCASE(ADD_F);
445 case TargetOpcode::G_FSUB:
446 RTLIBCASE(SUB_F);
447 case TargetOpcode::G_FMUL:
448 RTLIBCASE(MUL_F);
449 case TargetOpcode::G_FDIV:
450 RTLIBCASE(DIV_F);
451 case TargetOpcode::G_FEXP:
452 RTLIBCASE(EXP_F);
453 case TargetOpcode::G_FEXP2:
454 RTLIBCASE(EXP2_F);
455 case TargetOpcode::G_FREM:
456 RTLIBCASE(REM_F);
457 case TargetOpcode::G_FPOW:
458 RTLIBCASE(POW_F);
459 case TargetOpcode::G_FMA:
460 RTLIBCASE(FMA_F);
461 case TargetOpcode::G_FSIN:
462 RTLIBCASE(SIN_F);
463 case TargetOpcode::G_FCOS:
464 RTLIBCASE(COS_F);
465 case TargetOpcode::G_FLOG10:
466 RTLIBCASE(LOG10_F);
467 case TargetOpcode::G_FLOG:
468 RTLIBCASE(LOG_F);
469 case TargetOpcode::G_FLOG2:
470 RTLIBCASE(LOG2_F);
471 case TargetOpcode::G_FCEIL:
472 RTLIBCASE(CEIL_F);
473 case TargetOpcode::G_FFLOOR:
474 RTLIBCASE(FLOOR_F);
475 case TargetOpcode::G_FMINNUM:
476 RTLIBCASE(FMIN_F);
477 case TargetOpcode::G_FMAXNUM:
478 RTLIBCASE(FMAX_F);
479 case TargetOpcode::G_FSQRT:
480 RTLIBCASE(SQRT_F);
481 case TargetOpcode::G_FRINT:
482 RTLIBCASE(RINT_F);
483 case TargetOpcode::G_FNEARBYINT:
484 RTLIBCASE(NEARBYINT_F);
485 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
486 RTLIBCASE(ROUNDEVEN_F);
487 }
488 llvm_unreachable("Unknown libcall function");
489 }
490
491 /// True if an instruction is in tail position in its caller. Intended for
492 /// legalizing libcalls as tail calls when possible.
isLibCallInTailPosition(const TargetInstrInfo & TII,MachineInstr & MI)493 static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
494 MachineInstr &MI) {
495 MachineBasicBlock &MBB = *MI.getParent();
496 const Function &F = MBB.getParent()->getFunction();
497
498 // Conservatively require the attributes of the call to match those of
499 // the return. Ignore NoAlias and NonNull because they don't affect the
500 // call sequence.
501 AttributeList CallerAttrs = F.getAttributes();
502 if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
503 .removeAttribute(Attribute::NoAlias)
504 .removeAttribute(Attribute::NonNull)
505 .hasAttributes())
506 return false;
507
508 // It's not safe to eliminate the sign / zero extension of the return value.
509 if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
510 CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
511 return false;
512
513 // Only tail call if the following instruction is a standard return.
514 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
515 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
516 return false;
517
518 return true;
519 }
520
521 LegalizerHelper::LegalizeResult
createLibcall(MachineIRBuilder & MIRBuilder,const char * Name,const CallLowering::ArgInfo & Result,ArrayRef<CallLowering::ArgInfo> Args,const CallingConv::ID CC)522 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
523 const CallLowering::ArgInfo &Result,
524 ArrayRef<CallLowering::ArgInfo> Args,
525 const CallingConv::ID CC) {
526 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
527
528 CallLowering::CallLoweringInfo Info;
529 Info.CallConv = CC;
530 Info.Callee = MachineOperand::CreateES(Name);
531 Info.OrigRet = Result;
532 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
533 if (!CLI.lowerCall(MIRBuilder, Info))
534 return LegalizerHelper::UnableToLegalize;
535
536 return LegalizerHelper::Legalized;
537 }
538
539 LegalizerHelper::LegalizeResult
createLibcall(MachineIRBuilder & MIRBuilder,RTLIB::Libcall Libcall,const CallLowering::ArgInfo & Result,ArrayRef<CallLowering::ArgInfo> Args)540 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
541 const CallLowering::ArgInfo &Result,
542 ArrayRef<CallLowering::ArgInfo> Args) {
543 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
544 const char *Name = TLI.getLibcallName(Libcall);
545 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
546 return createLibcall(MIRBuilder, Name, Result, Args, CC);
547 }
548
549 // Useful for libcalls where all operands have the same type.
550 static LegalizerHelper::LegalizeResult
simpleLibcall(MachineInstr & MI,MachineIRBuilder & MIRBuilder,unsigned Size,Type * OpType)551 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
552 Type *OpType) {
553 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
554
555 SmallVector<CallLowering::ArgInfo, 3> Args;
556 for (unsigned i = 1; i < MI.getNumOperands(); i++)
557 Args.push_back({MI.getOperand(i).getReg(), OpType});
558 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
559 Args);
560 }
561
562 LegalizerHelper::LegalizeResult
createMemLibcall(MachineIRBuilder & MIRBuilder,MachineRegisterInfo & MRI,MachineInstr & MI)563 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
564 MachineInstr &MI) {
565 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
566
567 SmallVector<CallLowering::ArgInfo, 3> Args;
568 // Add all the args, except for the last which is an imm denoting 'tail'.
569 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
570 Register Reg = MI.getOperand(i).getReg();
571
572 // Need derive an IR type for call lowering.
573 LLT OpLLT = MRI.getType(Reg);
574 Type *OpTy = nullptr;
575 if (OpLLT.isPointer())
576 OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
577 else
578 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
579 Args.push_back({Reg, OpTy});
580 }
581
582 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
583 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
584 RTLIB::Libcall RTLibcall;
585 switch (MI.getOpcode()) {
586 case TargetOpcode::G_MEMCPY:
587 RTLibcall = RTLIB::MEMCPY;
588 break;
589 case TargetOpcode::G_MEMMOVE:
590 RTLibcall = RTLIB::MEMMOVE;
591 break;
592 case TargetOpcode::G_MEMSET:
593 RTLibcall = RTLIB::MEMSET;
594 break;
595 default:
596 return LegalizerHelper::UnableToLegalize;
597 }
598 const char *Name = TLI.getLibcallName(RTLibcall);
599
600 CallLowering::CallLoweringInfo Info;
601 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
602 Info.Callee = MachineOperand::CreateES(Name);
603 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
604 Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
605 isLibCallInTailPosition(MIRBuilder.getTII(), MI);
606
607 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
608 if (!CLI.lowerCall(MIRBuilder, Info))
609 return LegalizerHelper::UnableToLegalize;
610
611 if (Info.LoweredTailCall) {
612 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
613 // We must have a return following the call (or debug insts) to get past
614 // isLibCallInTailPosition.
615 do {
616 MachineInstr *Next = MI.getNextNode();
617 assert(Next && (Next->isReturn() || Next->isDebugInstr()) &&
618 "Expected instr following MI to be return or debug inst?");
619 // We lowered a tail call, so the call is now the return from the block.
620 // Delete the old return.
621 Next->eraseFromParent();
622 } while (MI.getNextNode());
623 }
624
625 return LegalizerHelper::Legalized;
626 }
627
getConvRTLibDesc(unsigned Opcode,Type * ToType,Type * FromType)628 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
629 Type *FromType) {
630 auto ToMVT = MVT::getVT(ToType);
631 auto FromMVT = MVT::getVT(FromType);
632
633 switch (Opcode) {
634 case TargetOpcode::G_FPEXT:
635 return RTLIB::getFPEXT(FromMVT, ToMVT);
636 case TargetOpcode::G_FPTRUNC:
637 return RTLIB::getFPROUND(FromMVT, ToMVT);
638 case TargetOpcode::G_FPTOSI:
639 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
640 case TargetOpcode::G_FPTOUI:
641 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
642 case TargetOpcode::G_SITOFP:
643 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
644 case TargetOpcode::G_UITOFP:
645 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
646 }
647 llvm_unreachable("Unsupported libcall function");
648 }
649
650 static LegalizerHelper::LegalizeResult
conversionLibcall(MachineInstr & MI,MachineIRBuilder & MIRBuilder,Type * ToType,Type * FromType)651 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
652 Type *FromType) {
653 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
654 return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
655 {{MI.getOperand(1).getReg(), FromType}});
656 }
657
658 LegalizerHelper::LegalizeResult
libcall(MachineInstr & MI)659 LegalizerHelper::libcall(MachineInstr &MI) {
660 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
661 unsigned Size = LLTy.getSizeInBits();
662 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
663
664 switch (MI.getOpcode()) {
665 default:
666 return UnableToLegalize;
667 case TargetOpcode::G_SDIV:
668 case TargetOpcode::G_UDIV:
669 case TargetOpcode::G_SREM:
670 case TargetOpcode::G_UREM:
671 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
672 Type *HLTy = IntegerType::get(Ctx, Size);
673 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
674 if (Status != Legalized)
675 return Status;
676 break;
677 }
678 case TargetOpcode::G_FADD:
679 case TargetOpcode::G_FSUB:
680 case TargetOpcode::G_FMUL:
681 case TargetOpcode::G_FDIV:
682 case TargetOpcode::G_FMA:
683 case TargetOpcode::G_FPOW:
684 case TargetOpcode::G_FREM:
685 case TargetOpcode::G_FCOS:
686 case TargetOpcode::G_FSIN:
687 case TargetOpcode::G_FLOG10:
688 case TargetOpcode::G_FLOG:
689 case TargetOpcode::G_FLOG2:
690 case TargetOpcode::G_FEXP:
691 case TargetOpcode::G_FEXP2:
692 case TargetOpcode::G_FCEIL:
693 case TargetOpcode::G_FFLOOR:
694 case TargetOpcode::G_FMINNUM:
695 case TargetOpcode::G_FMAXNUM:
696 case TargetOpcode::G_FSQRT:
697 case TargetOpcode::G_FRINT:
698 case TargetOpcode::G_FNEARBYINT:
699 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
700 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
701 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
702 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
703 return UnableToLegalize;
704 }
705 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
706 if (Status != Legalized)
707 return Status;
708 break;
709 }
710 case TargetOpcode::G_FPEXT:
711 case TargetOpcode::G_FPTRUNC: {
712 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
713 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
714 if (!FromTy || !ToTy)
715 return UnableToLegalize;
716 LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
717 if (Status != Legalized)
718 return Status;
719 break;
720 }
721 case TargetOpcode::G_FPTOSI:
722 case TargetOpcode::G_FPTOUI: {
723 // FIXME: Support other types
724 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
725 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
726 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
727 return UnableToLegalize;
728 LegalizeResult Status = conversionLibcall(
729 MI, MIRBuilder,
730 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
731 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
732 if (Status != Legalized)
733 return Status;
734 break;
735 }
736 case TargetOpcode::G_SITOFP:
737 case TargetOpcode::G_UITOFP: {
738 // FIXME: Support other types
739 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
740 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
741 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
742 return UnableToLegalize;
743 LegalizeResult Status = conversionLibcall(
744 MI, MIRBuilder,
745 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
746 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
747 if (Status != Legalized)
748 return Status;
749 break;
750 }
751 case TargetOpcode::G_MEMCPY:
752 case TargetOpcode::G_MEMMOVE:
753 case TargetOpcode::G_MEMSET: {
754 LegalizeResult Result = createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI);
755 MI.eraseFromParent();
756 return Result;
757 }
758 }
759
760 MI.eraseFromParent();
761 return Legalized;
762 }
763
narrowScalar(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)764 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
765 unsigned TypeIdx,
766 LLT NarrowTy) {
767 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
768 uint64_t NarrowSize = NarrowTy.getSizeInBits();
769
770 switch (MI.getOpcode()) {
771 default:
772 return UnableToLegalize;
773 case TargetOpcode::G_IMPLICIT_DEF: {
774 Register DstReg = MI.getOperand(0).getReg();
775 LLT DstTy = MRI.getType(DstReg);
776
777 // If SizeOp0 is not an exact multiple of NarrowSize, emit
778 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
779 // FIXME: Although this would also be legal for the general case, it causes
780 // a lot of regressions in the emitted code (superfluous COPYs, artifact
781 // combines not being hit). This seems to be a problem related to the
782 // artifact combiner.
783 if (SizeOp0 % NarrowSize != 0) {
784 LLT ImplicitTy = NarrowTy;
785 if (DstTy.isVector())
786 ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
787
788 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
789 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
790
791 MI.eraseFromParent();
792 return Legalized;
793 }
794
795 int NumParts = SizeOp0 / NarrowSize;
796
797 SmallVector<Register, 2> DstRegs;
798 for (int i = 0; i < NumParts; ++i)
799 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
800
801 if (DstTy.isVector())
802 MIRBuilder.buildBuildVector(DstReg, DstRegs);
803 else
804 MIRBuilder.buildMerge(DstReg, DstRegs);
805 MI.eraseFromParent();
806 return Legalized;
807 }
808 case TargetOpcode::G_CONSTANT: {
809 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
810 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
811 unsigned TotalSize = Ty.getSizeInBits();
812 unsigned NarrowSize = NarrowTy.getSizeInBits();
813 int NumParts = TotalSize / NarrowSize;
814
815 SmallVector<Register, 4> PartRegs;
816 for (int I = 0; I != NumParts; ++I) {
817 unsigned Offset = I * NarrowSize;
818 auto K = MIRBuilder.buildConstant(NarrowTy,
819 Val.lshr(Offset).trunc(NarrowSize));
820 PartRegs.push_back(K.getReg(0));
821 }
822
823 LLT LeftoverTy;
824 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
825 SmallVector<Register, 1> LeftoverRegs;
826 if (LeftoverBits != 0) {
827 LeftoverTy = LLT::scalar(LeftoverBits);
828 auto K = MIRBuilder.buildConstant(
829 LeftoverTy,
830 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
831 LeftoverRegs.push_back(K.getReg(0));
832 }
833
834 insertParts(MI.getOperand(0).getReg(),
835 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
836
837 MI.eraseFromParent();
838 return Legalized;
839 }
840 case TargetOpcode::G_SEXT:
841 case TargetOpcode::G_ZEXT:
842 case TargetOpcode::G_ANYEXT:
843 return narrowScalarExt(MI, TypeIdx, NarrowTy);
844 case TargetOpcode::G_TRUNC: {
845 if (TypeIdx != 1)
846 return UnableToLegalize;
847
848 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
849 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
850 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
851 return UnableToLegalize;
852 }
853
854 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
855 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
856 MI.eraseFromParent();
857 return Legalized;
858 }
859
860 case TargetOpcode::G_FREEZE:
861 return reduceOperationWidth(MI, TypeIdx, NarrowTy);
862
863 case TargetOpcode::G_ADD: {
864 // FIXME: add support for when SizeOp0 isn't an exact multiple of
865 // NarrowSize.
866 if (SizeOp0 % NarrowSize != 0)
867 return UnableToLegalize;
868 // Expand in terms of carry-setting/consuming G_ADDE instructions.
869 int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
870
871 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
872 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
873 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
874
875 Register CarryIn;
876 for (int i = 0; i < NumParts; ++i) {
877 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
878 Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
879
880 if (i == 0)
881 MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]);
882 else {
883 MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
884 Src2Regs[i], CarryIn);
885 }
886
887 DstRegs.push_back(DstReg);
888 CarryIn = CarryOut;
889 }
890 Register DstReg = MI.getOperand(0).getReg();
891 if(MRI.getType(DstReg).isVector())
892 MIRBuilder.buildBuildVector(DstReg, DstRegs);
893 else
894 MIRBuilder.buildMerge(DstReg, DstRegs);
895 MI.eraseFromParent();
896 return Legalized;
897 }
898 case TargetOpcode::G_SUB: {
899 // FIXME: add support for when SizeOp0 isn't an exact multiple of
900 // NarrowSize.
901 if (SizeOp0 % NarrowSize != 0)
902 return UnableToLegalize;
903
904 int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
905
906 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
907 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
908 extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
909
910 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
911 Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
912 MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
913 {Src1Regs[0], Src2Regs[0]});
914 DstRegs.push_back(DstReg);
915 Register BorrowIn = BorrowOut;
916 for (int i = 1; i < NumParts; ++i) {
917 DstReg = MRI.createGenericVirtualRegister(NarrowTy);
918 BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
919
920 MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
921 {Src1Regs[i], Src2Regs[i], BorrowIn});
922
923 DstRegs.push_back(DstReg);
924 BorrowIn = BorrowOut;
925 }
926 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
927 MI.eraseFromParent();
928 return Legalized;
929 }
930 case TargetOpcode::G_MUL:
931 case TargetOpcode::G_UMULH:
932 return narrowScalarMul(MI, NarrowTy);
933 case TargetOpcode::G_EXTRACT:
934 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
935 case TargetOpcode::G_INSERT:
936 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
937 case TargetOpcode::G_LOAD: {
938 auto &MMO = **MI.memoperands_begin();
939 Register DstReg = MI.getOperand(0).getReg();
940 LLT DstTy = MRI.getType(DstReg);
941 if (DstTy.isVector())
942 return UnableToLegalize;
943
944 if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
945 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
946 MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
947 MIRBuilder.buildAnyExt(DstReg, TmpReg);
948 MI.eraseFromParent();
949 return Legalized;
950 }
951
952 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
953 }
954 case TargetOpcode::G_ZEXTLOAD:
955 case TargetOpcode::G_SEXTLOAD: {
956 bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
957 Register DstReg = MI.getOperand(0).getReg();
958 Register PtrReg = MI.getOperand(1).getReg();
959
960 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
961 auto &MMO = **MI.memoperands_begin();
962 unsigned MemSize = MMO.getSizeInBits();
963
964 if (MemSize == NarrowSize) {
965 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
966 } else if (MemSize < NarrowSize) {
967 MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
968 } else if (MemSize > NarrowSize) {
969 // FIXME: Need to split the load.
970 return UnableToLegalize;
971 }
972
973 if (ZExt)
974 MIRBuilder.buildZExt(DstReg, TmpReg);
975 else
976 MIRBuilder.buildSExt(DstReg, TmpReg);
977
978 MI.eraseFromParent();
979 return Legalized;
980 }
981 case TargetOpcode::G_STORE: {
982 const auto &MMO = **MI.memoperands_begin();
983
984 Register SrcReg = MI.getOperand(0).getReg();
985 LLT SrcTy = MRI.getType(SrcReg);
986 if (SrcTy.isVector())
987 return UnableToLegalize;
988
989 int NumParts = SizeOp0 / NarrowSize;
990 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
991 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
992 if (SrcTy.isVector() && LeftoverBits != 0)
993 return UnableToLegalize;
994
995 if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
996 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
997 auto &MMO = **MI.memoperands_begin();
998 MIRBuilder.buildTrunc(TmpReg, SrcReg);
999 MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
1000 MI.eraseFromParent();
1001 return Legalized;
1002 }
1003
1004 return reduceLoadStoreWidth(MI, 0, NarrowTy);
1005 }
1006 case TargetOpcode::G_SELECT:
1007 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1008 case TargetOpcode::G_AND:
1009 case TargetOpcode::G_OR:
1010 case TargetOpcode::G_XOR: {
1011 // Legalize bitwise operation:
1012 // A = BinOp<Ty> B, C
1013 // into:
1014 // B1, ..., BN = G_UNMERGE_VALUES B
1015 // C1, ..., CN = G_UNMERGE_VALUES C
1016 // A1 = BinOp<Ty/N> B1, C2
1017 // ...
1018 // AN = BinOp<Ty/N> BN, CN
1019 // A = G_MERGE_VALUES A1, ..., AN
1020 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1021 }
1022 case TargetOpcode::G_SHL:
1023 case TargetOpcode::G_LSHR:
1024 case TargetOpcode::G_ASHR:
1025 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1026 case TargetOpcode::G_CTLZ:
1027 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1028 case TargetOpcode::G_CTTZ:
1029 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1030 case TargetOpcode::G_CTPOP:
1031 if (TypeIdx == 1)
1032 switch (MI.getOpcode()) {
1033 case TargetOpcode::G_CTLZ:
1034 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1035 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1036 case TargetOpcode::G_CTTZ:
1037 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1038 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1039 case TargetOpcode::G_CTPOP:
1040 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1041 default:
1042 return UnableToLegalize;
1043 }
1044
1045 Observer.changingInstr(MI);
1046 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1047 Observer.changedInstr(MI);
1048 return Legalized;
1049 case TargetOpcode::G_INTTOPTR:
1050 if (TypeIdx != 1)
1051 return UnableToLegalize;
1052
1053 Observer.changingInstr(MI);
1054 narrowScalarSrc(MI, NarrowTy, 1);
1055 Observer.changedInstr(MI);
1056 return Legalized;
1057 case TargetOpcode::G_PTRTOINT:
1058 if (TypeIdx != 0)
1059 return UnableToLegalize;
1060
1061 Observer.changingInstr(MI);
1062 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1063 Observer.changedInstr(MI);
1064 return Legalized;
1065 case TargetOpcode::G_PHI: {
1066 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1067 // NarrowSize.
1068 if (SizeOp0 % NarrowSize != 0)
1069 return UnableToLegalize;
1070
1071 unsigned NumParts = SizeOp0 / NarrowSize;
1072 SmallVector<Register, 2> DstRegs(NumParts);
1073 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1074 Observer.changingInstr(MI);
1075 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1076 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1077 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1078 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1079 SrcRegs[i / 2]);
1080 }
1081 MachineBasicBlock &MBB = *MI.getParent();
1082 MIRBuilder.setInsertPt(MBB, MI);
1083 for (unsigned i = 0; i < NumParts; ++i) {
1084 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1085 MachineInstrBuilder MIB =
1086 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1087 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1088 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1089 }
1090 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1091 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1092 Observer.changedInstr(MI);
1093 MI.eraseFromParent();
1094 return Legalized;
1095 }
1096 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1097 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1098 if (TypeIdx != 2)
1099 return UnableToLegalize;
1100
1101 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1102 Observer.changingInstr(MI);
1103 narrowScalarSrc(MI, NarrowTy, OpIdx);
1104 Observer.changedInstr(MI);
1105 return Legalized;
1106 }
1107 case TargetOpcode::G_ICMP: {
1108 uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1109 if (NarrowSize * 2 != SrcSize)
1110 return UnableToLegalize;
1111
1112 Observer.changingInstr(MI);
1113 Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
1114 Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
1115 MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
1116
1117 Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
1118 Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
1119 MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
1120
1121 CmpInst::Predicate Pred =
1122 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1123 LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
1124
1125 if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
1126 MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
1127 MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
1128 MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
1129 MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1130 MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
1131 } else {
1132 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1133 MachineInstrBuilder CmpHEQ =
1134 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1135 MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
1136 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1137 MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
1138 }
1139 Observer.changedInstr(MI);
1140 MI.eraseFromParent();
1141 return Legalized;
1142 }
1143 case TargetOpcode::G_SEXT_INREG: {
1144 if (TypeIdx != 0)
1145 return UnableToLegalize;
1146
1147 int64_t SizeInBits = MI.getOperand(2).getImm();
1148
1149 // So long as the new type has more bits than the bits we're extending we
1150 // don't need to break it apart.
1151 if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1152 Observer.changingInstr(MI);
1153 // We don't lose any non-extension bits by truncating the src and
1154 // sign-extending the dst.
1155 MachineOperand &MO1 = MI.getOperand(1);
1156 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1157 MO1.setReg(TruncMIB.getReg(0));
1158
1159 MachineOperand &MO2 = MI.getOperand(0);
1160 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1161 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1162 MIRBuilder.buildSExt(MO2, DstExt);
1163 MO2.setReg(DstExt);
1164 Observer.changedInstr(MI);
1165 return Legalized;
1166 }
1167
1168 // Break it apart. Components below the extension point are unmodified. The
1169 // component containing the extension point becomes a narrower SEXT_INREG.
1170 // Components above it are ashr'd from the component containing the
1171 // extension point.
1172 if (SizeOp0 % NarrowSize != 0)
1173 return UnableToLegalize;
1174 int NumParts = SizeOp0 / NarrowSize;
1175
1176 // List the registers where the destination will be scattered.
1177 SmallVector<Register, 2> DstRegs;
1178 // List the registers where the source will be split.
1179 SmallVector<Register, 2> SrcRegs;
1180
1181 // Create all the temporary registers.
1182 for (int i = 0; i < NumParts; ++i) {
1183 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1184
1185 SrcRegs.push_back(SrcReg);
1186 }
1187
1188 // Explode the big arguments into smaller chunks.
1189 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1190
1191 Register AshrCstReg =
1192 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1193 .getReg(0);
1194 Register FullExtensionReg = 0;
1195 Register PartialExtensionReg = 0;
1196
1197 // Do the operation on each small part.
1198 for (int i = 0; i < NumParts; ++i) {
1199 if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1200 DstRegs.push_back(SrcRegs[i]);
1201 else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1202 assert(PartialExtensionReg &&
1203 "Expected to visit partial extension before full");
1204 if (FullExtensionReg) {
1205 DstRegs.push_back(FullExtensionReg);
1206 continue;
1207 }
1208 DstRegs.push_back(
1209 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1210 .getReg(0));
1211 FullExtensionReg = DstRegs.back();
1212 } else {
1213 DstRegs.push_back(
1214 MIRBuilder
1215 .buildInstr(
1216 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1217 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1218 .getReg(0));
1219 PartialExtensionReg = DstRegs.back();
1220 }
1221 }
1222
1223 // Gather the destination registers into the final destination.
1224 Register DstReg = MI.getOperand(0).getReg();
1225 MIRBuilder.buildMerge(DstReg, DstRegs);
1226 MI.eraseFromParent();
1227 return Legalized;
1228 }
1229 case TargetOpcode::G_BSWAP:
1230 case TargetOpcode::G_BITREVERSE: {
1231 if (SizeOp0 % NarrowSize != 0)
1232 return UnableToLegalize;
1233
1234 Observer.changingInstr(MI);
1235 SmallVector<Register, 2> SrcRegs, DstRegs;
1236 unsigned NumParts = SizeOp0 / NarrowSize;
1237 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1238
1239 for (unsigned i = 0; i < NumParts; ++i) {
1240 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1241 {SrcRegs[NumParts - 1 - i]});
1242 DstRegs.push_back(DstPart.getReg(0));
1243 }
1244
1245 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1246
1247 Observer.changedInstr(MI);
1248 MI.eraseFromParent();
1249 return Legalized;
1250 }
1251 case TargetOpcode::G_PTR_ADD:
1252 case TargetOpcode::G_PTRMASK: {
1253 if (TypeIdx != 1)
1254 return UnableToLegalize;
1255 Observer.changingInstr(MI);
1256 narrowScalarSrc(MI, NarrowTy, 2);
1257 Observer.changedInstr(MI);
1258 return Legalized;
1259 }
1260 case TargetOpcode::G_FPTOUI:
1261 case TargetOpcode::G_FPTOSI:
1262 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1263 case TargetOpcode::G_FPEXT:
1264 if (TypeIdx != 0)
1265 return UnableToLegalize;
1266 Observer.changingInstr(MI);
1267 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1268 Observer.changedInstr(MI);
1269 return Legalized;
1270 }
1271 }
1272
coerceToScalar(Register Val)1273 Register LegalizerHelper::coerceToScalar(Register Val) {
1274 LLT Ty = MRI.getType(Val);
1275 if (Ty.isScalar())
1276 return Val;
1277
1278 const DataLayout &DL = MIRBuilder.getDataLayout();
1279 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1280 if (Ty.isPointer()) {
1281 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1282 return Register();
1283 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1284 }
1285
1286 Register NewVal = Val;
1287
1288 assert(Ty.isVector());
1289 LLT EltTy = Ty.getElementType();
1290 if (EltTy.isPointer())
1291 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1292 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1293 }
1294
widenScalarSrc(MachineInstr & MI,LLT WideTy,unsigned OpIdx,unsigned ExtOpcode)1295 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1296 unsigned OpIdx, unsigned ExtOpcode) {
1297 MachineOperand &MO = MI.getOperand(OpIdx);
1298 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1299 MO.setReg(ExtB.getReg(0));
1300 }
1301
narrowScalarSrc(MachineInstr & MI,LLT NarrowTy,unsigned OpIdx)1302 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1303 unsigned OpIdx) {
1304 MachineOperand &MO = MI.getOperand(OpIdx);
1305 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1306 MO.setReg(ExtB.getReg(0));
1307 }
1308
widenScalarDst(MachineInstr & MI,LLT WideTy,unsigned OpIdx,unsigned TruncOpcode)1309 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1310 unsigned OpIdx, unsigned TruncOpcode) {
1311 MachineOperand &MO = MI.getOperand(OpIdx);
1312 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1313 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1314 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1315 MO.setReg(DstExt);
1316 }
1317
narrowScalarDst(MachineInstr & MI,LLT NarrowTy,unsigned OpIdx,unsigned ExtOpcode)1318 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1319 unsigned OpIdx, unsigned ExtOpcode) {
1320 MachineOperand &MO = MI.getOperand(OpIdx);
1321 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1322 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1323 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1324 MO.setReg(DstTrunc);
1325 }
1326
moreElementsVectorDst(MachineInstr & MI,LLT WideTy,unsigned OpIdx)1327 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1328 unsigned OpIdx) {
1329 MachineOperand &MO = MI.getOperand(OpIdx);
1330 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1331 MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
1332 }
1333
moreElementsVectorSrc(MachineInstr & MI,LLT MoreTy,unsigned OpIdx)1334 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1335 unsigned OpIdx) {
1336 MachineOperand &MO = MI.getOperand(OpIdx);
1337
1338 LLT OldTy = MRI.getType(MO.getReg());
1339 unsigned OldElts = OldTy.getNumElements();
1340 unsigned NewElts = MoreTy.getNumElements();
1341
1342 unsigned NumParts = NewElts / OldElts;
1343
1344 // Use concat_vectors if the result is a multiple of the number of elements.
1345 if (NumParts * OldElts == NewElts) {
1346 SmallVector<Register, 8> Parts;
1347 Parts.push_back(MO.getReg());
1348
1349 Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1350 for (unsigned I = 1; I != NumParts; ++I)
1351 Parts.push_back(ImpDef);
1352
1353 auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1354 MO.setReg(Concat.getReg(0));
1355 return;
1356 }
1357
1358 Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1359 Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1360 MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1361 MO.setReg(MoreReg);
1362 }
1363
bitcastSrc(MachineInstr & MI,LLT CastTy,unsigned OpIdx)1364 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1365 MachineOperand &Op = MI.getOperand(OpIdx);
1366 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1367 }
1368
bitcastDst(MachineInstr & MI,LLT CastTy,unsigned OpIdx)1369 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1370 MachineOperand &MO = MI.getOperand(OpIdx);
1371 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1372 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1373 MIRBuilder.buildBitcast(MO, CastDst);
1374 MO.setReg(CastDst);
1375 }
1376
1377 LegalizerHelper::LegalizeResult
widenScalarMergeValues(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1378 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1379 LLT WideTy) {
1380 if (TypeIdx != 1)
1381 return UnableToLegalize;
1382
1383 Register DstReg = MI.getOperand(0).getReg();
1384 LLT DstTy = MRI.getType(DstReg);
1385 if (DstTy.isVector())
1386 return UnableToLegalize;
1387
1388 Register Src1 = MI.getOperand(1).getReg();
1389 LLT SrcTy = MRI.getType(Src1);
1390 const int DstSize = DstTy.getSizeInBits();
1391 const int SrcSize = SrcTy.getSizeInBits();
1392 const int WideSize = WideTy.getSizeInBits();
1393 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1394
1395 unsigned NumOps = MI.getNumOperands();
1396 unsigned NumSrc = MI.getNumOperands() - 1;
1397 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1398
1399 if (WideSize >= DstSize) {
1400 // Directly pack the bits in the target type.
1401 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1402
1403 for (unsigned I = 2; I != NumOps; ++I) {
1404 const unsigned Offset = (I - 1) * PartSize;
1405
1406 Register SrcReg = MI.getOperand(I).getReg();
1407 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1408
1409 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1410
1411 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1412 MRI.createGenericVirtualRegister(WideTy);
1413
1414 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1415 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1416 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1417 ResultReg = NextResult;
1418 }
1419
1420 if (WideSize > DstSize)
1421 MIRBuilder.buildTrunc(DstReg, ResultReg);
1422 else if (DstTy.isPointer())
1423 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1424
1425 MI.eraseFromParent();
1426 return Legalized;
1427 }
1428
1429 // Unmerge the original values to the GCD type, and recombine to the next
1430 // multiple greater than the original type.
1431 //
1432 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1433 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1434 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1435 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1436 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1437 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1438 // %12:_(s12) = G_MERGE_VALUES %10, %11
1439 //
1440 // Padding with undef if necessary:
1441 //
1442 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1443 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1444 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1445 // %7:_(s2) = G_IMPLICIT_DEF
1446 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1447 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1448 // %10:_(s12) = G_MERGE_VALUES %8, %9
1449
1450 const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1451 LLT GCDTy = LLT::scalar(GCD);
1452
1453 SmallVector<Register, 8> Parts;
1454 SmallVector<Register, 8> NewMergeRegs;
1455 SmallVector<Register, 8> Unmerges;
1456 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1457
1458 // Decompose the original operands if they don't evenly divide.
1459 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1460 Register SrcReg = MI.getOperand(I).getReg();
1461 if (GCD == SrcSize) {
1462 Unmerges.push_back(SrcReg);
1463 } else {
1464 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1465 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1466 Unmerges.push_back(Unmerge.getReg(J));
1467 }
1468 }
1469
1470 // Pad with undef to the next size that is a multiple of the requested size.
1471 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1472 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1473 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1474 Unmerges.push_back(UndefReg);
1475 }
1476
1477 const int PartsPerGCD = WideSize / GCD;
1478
1479 // Build merges of each piece.
1480 ArrayRef<Register> Slicer(Unmerges);
1481 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1482 auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1483 NewMergeRegs.push_back(Merge.getReg(0));
1484 }
1485
1486 // A truncate may be necessary if the requested type doesn't evenly divide the
1487 // original result type.
1488 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1489 MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1490 } else {
1491 auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1492 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1493 }
1494
1495 MI.eraseFromParent();
1496 return Legalized;
1497 }
1498
widenWithUnmerge(LLT WideTy,Register OrigReg)1499 Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
1500 Register WideReg = MRI.createGenericVirtualRegister(WideTy);
1501 LLT OrigTy = MRI.getType(OrigReg);
1502 LLT LCMTy = getLCMType(WideTy, OrigTy);
1503
1504 const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
1505 const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
1506
1507 Register UnmergeSrc = WideReg;
1508
1509 // Create a merge to the LCM type, padding with undef
1510 // %0:_(<3 x s32>) = G_FOO => <4 x s32>
1511 // =>
1512 // %1:_(<4 x s32>) = G_FOO
1513 // %2:_(<4 x s32>) = G_IMPLICIT_DEF
1514 // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
1515 // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
1516 if (NumMergeParts > 1) {
1517 Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
1518 SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
1519 MergeParts[0] = WideReg;
1520 UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
1521 }
1522
1523 // Unmerge to the original register and pad with dead defs.
1524 SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
1525 UnmergeResults[0] = OrigReg;
1526 for (int I = 1; I != NumUnmergeParts; ++I)
1527 UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
1528
1529 MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
1530 return WideReg;
1531 }
1532
1533 LegalizerHelper::LegalizeResult
widenScalarUnmergeValues(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1534 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1535 LLT WideTy) {
1536 if (TypeIdx != 0)
1537 return UnableToLegalize;
1538
1539 int NumDst = MI.getNumOperands() - 1;
1540 Register SrcReg = MI.getOperand(NumDst).getReg();
1541 LLT SrcTy = MRI.getType(SrcReg);
1542 if (SrcTy.isVector())
1543 return UnableToLegalize;
1544
1545 Register Dst0Reg = MI.getOperand(0).getReg();
1546 LLT DstTy = MRI.getType(Dst0Reg);
1547 if (!DstTy.isScalar())
1548 return UnableToLegalize;
1549
1550 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1551 if (SrcTy.isPointer()) {
1552 const DataLayout &DL = MIRBuilder.getDataLayout();
1553 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1554 LLVM_DEBUG(
1555 dbgs() << "Not casting non-integral address space integer\n");
1556 return UnableToLegalize;
1557 }
1558
1559 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1560 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1561 }
1562
1563 // Widen SrcTy to WideTy. This does not affect the result, but since the
1564 // user requested this size, it is probably better handled than SrcTy and
1565 // should reduce the total number of legalization artifacts
1566 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1567 SrcTy = WideTy;
1568 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1569 }
1570
1571 // Theres no unmerge type to target. Directly extract the bits from the
1572 // source type
1573 unsigned DstSize = DstTy.getSizeInBits();
1574
1575 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1576 for (int I = 1; I != NumDst; ++I) {
1577 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1578 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1579 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1580 }
1581
1582 MI.eraseFromParent();
1583 return Legalized;
1584 }
1585
1586 // Extend the source to a wider type.
1587 LLT LCMTy = getLCMType(SrcTy, WideTy);
1588
1589 Register WideSrc = SrcReg;
1590 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1591 // TODO: If this is an integral address space, cast to integer and anyext.
1592 if (SrcTy.isPointer()) {
1593 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1594 return UnableToLegalize;
1595 }
1596
1597 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1598 }
1599
1600 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1601
1602 // Create a sequence of unmerges and merges to the original results. Since we
1603 // may have widened the source, we will need to pad the results with dead defs
1604 // to cover the source register.
1605 // e.g. widen s48 to s64:
1606 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1607 //
1608 // =>
1609 // %4:_(s192) = G_ANYEXT %0:_(s96)
1610 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1611 // ; unpack to GCD type, with extra dead defs
1612 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1613 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1614 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1615 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
1616 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1617 const LLT GCDTy = getGCDType(WideTy, DstTy);
1618 const int NumUnmerge = Unmerge->getNumOperands() - 1;
1619 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1620
1621 // Directly unmerge to the destination without going through a GCD type
1622 // if possible
1623 if (PartsPerRemerge == 1) {
1624 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1625
1626 for (int I = 0; I != NumUnmerge; ++I) {
1627 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1628
1629 for (int J = 0; J != PartsPerUnmerge; ++J) {
1630 int Idx = I * PartsPerUnmerge + J;
1631 if (Idx < NumDst)
1632 MIB.addDef(MI.getOperand(Idx).getReg());
1633 else {
1634 // Create dead def for excess components.
1635 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1636 }
1637 }
1638
1639 MIB.addUse(Unmerge.getReg(I));
1640 }
1641 } else {
1642 SmallVector<Register, 16> Parts;
1643 for (int J = 0; J != NumUnmerge; ++J)
1644 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1645
1646 SmallVector<Register, 8> RemergeParts;
1647 for (int I = 0; I != NumDst; ++I) {
1648 for (int J = 0; J < PartsPerRemerge; ++J) {
1649 const int Idx = I * PartsPerRemerge + J;
1650 RemergeParts.emplace_back(Parts[Idx]);
1651 }
1652
1653 MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
1654 RemergeParts.clear();
1655 }
1656 }
1657
1658 MI.eraseFromParent();
1659 return Legalized;
1660 }
1661
1662 LegalizerHelper::LegalizeResult
widenScalarExtract(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1663 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1664 LLT WideTy) {
1665 Register DstReg = MI.getOperand(0).getReg();
1666 Register SrcReg = MI.getOperand(1).getReg();
1667 LLT SrcTy = MRI.getType(SrcReg);
1668
1669 LLT DstTy = MRI.getType(DstReg);
1670 unsigned Offset = MI.getOperand(2).getImm();
1671
1672 if (TypeIdx == 0) {
1673 if (SrcTy.isVector() || DstTy.isVector())
1674 return UnableToLegalize;
1675
1676 SrcOp Src(SrcReg);
1677 if (SrcTy.isPointer()) {
1678 // Extracts from pointers can be handled only if they are really just
1679 // simple integers.
1680 const DataLayout &DL = MIRBuilder.getDataLayout();
1681 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1682 return UnableToLegalize;
1683
1684 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1685 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1686 SrcTy = SrcAsIntTy;
1687 }
1688
1689 if (DstTy.isPointer())
1690 return UnableToLegalize;
1691
1692 if (Offset == 0) {
1693 // Avoid a shift in the degenerate case.
1694 MIRBuilder.buildTrunc(DstReg,
1695 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1696 MI.eraseFromParent();
1697 return Legalized;
1698 }
1699
1700 // Do a shift in the source type.
1701 LLT ShiftTy = SrcTy;
1702 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1703 Src = MIRBuilder.buildAnyExt(WideTy, Src);
1704 ShiftTy = WideTy;
1705 }
1706
1707 auto LShr = MIRBuilder.buildLShr(
1708 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1709 MIRBuilder.buildTrunc(DstReg, LShr);
1710 MI.eraseFromParent();
1711 return Legalized;
1712 }
1713
1714 if (SrcTy.isScalar()) {
1715 Observer.changingInstr(MI);
1716 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1717 Observer.changedInstr(MI);
1718 return Legalized;
1719 }
1720
1721 if (!SrcTy.isVector())
1722 return UnableToLegalize;
1723
1724 if (DstTy != SrcTy.getElementType())
1725 return UnableToLegalize;
1726
1727 if (Offset % SrcTy.getScalarSizeInBits() != 0)
1728 return UnableToLegalize;
1729
1730 Observer.changingInstr(MI);
1731 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1732
1733 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1734 Offset);
1735 widenScalarDst(MI, WideTy.getScalarType(), 0);
1736 Observer.changedInstr(MI);
1737 return Legalized;
1738 }
1739
1740 LegalizerHelper::LegalizeResult
widenScalarInsert(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1741 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1742 LLT WideTy) {
1743 if (TypeIdx != 0 || WideTy.isVector())
1744 return UnableToLegalize;
1745 Observer.changingInstr(MI);
1746 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1747 widenScalarDst(MI, WideTy);
1748 Observer.changedInstr(MI);
1749 return Legalized;
1750 }
1751
1752 LegalizerHelper::LegalizeResult
widenScalarAddoSubo(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1753 LegalizerHelper::widenScalarAddoSubo(MachineInstr &MI, unsigned TypeIdx,
1754 LLT WideTy) {
1755 if (TypeIdx == 1)
1756 return UnableToLegalize; // TODO
1757 unsigned Op = MI.getOpcode();
1758 unsigned Opcode = Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_SADDO
1759 ? TargetOpcode::G_ADD
1760 : TargetOpcode::G_SUB;
1761 unsigned ExtOpcode =
1762 Op == TargetOpcode::G_UADDO || Op == TargetOpcode::G_USUBO
1763 ? TargetOpcode::G_ZEXT
1764 : TargetOpcode::G_SEXT;
1765 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
1766 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
1767 // Do the arithmetic in the larger type.
1768 auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt});
1769 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1770 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
1771 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
1772 // There is no overflow if the ExtOp is the same as NewOp.
1773 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
1774 // Now trunc the NewOp to the original result.
1775 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1776 MI.eraseFromParent();
1777 return Legalized;
1778 }
1779
1780 LegalizerHelper::LegalizeResult
widenScalarAddSubShlSat(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1781 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
1782 LLT WideTy) {
1783 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1784 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1785 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1786 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1787 MI.getOpcode() == TargetOpcode::G_USHLSAT;
1788 // We can convert this to:
1789 // 1. Any extend iN to iM
1790 // 2. SHL by M-N
1791 // 3. [US][ADD|SUB|SHL]SAT
1792 // 4. L/ASHR by M-N
1793 //
1794 // It may be more efficient to lower this to a min and a max operation in
1795 // the higher precision arithmetic if the promoted operation isn't legal,
1796 // but this decision is up to the target's lowering request.
1797 Register DstReg = MI.getOperand(0).getReg();
1798
1799 unsigned NewBits = WideTy.getScalarSizeInBits();
1800 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1801
1802 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
1803 // must not left shift the RHS to preserve the shift amount.
1804 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1805 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
1806 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1807 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1808 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1809 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1810
1811 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1812 {ShiftL, ShiftR}, MI.getFlags());
1813
1814 // Use a shift that will preserve the number of sign bits when the trunc is
1815 // folded away.
1816 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1817 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1818
1819 MIRBuilder.buildTrunc(DstReg, Result);
1820 MI.eraseFromParent();
1821 return Legalized;
1822 }
1823
1824 LegalizerHelper::LegalizeResult
widenScalar(MachineInstr & MI,unsigned TypeIdx,LLT WideTy)1825 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1826 switch (MI.getOpcode()) {
1827 default:
1828 return UnableToLegalize;
1829 case TargetOpcode::G_EXTRACT:
1830 return widenScalarExtract(MI, TypeIdx, WideTy);
1831 case TargetOpcode::G_INSERT:
1832 return widenScalarInsert(MI, TypeIdx, WideTy);
1833 case TargetOpcode::G_MERGE_VALUES:
1834 return widenScalarMergeValues(MI, TypeIdx, WideTy);
1835 case TargetOpcode::G_UNMERGE_VALUES:
1836 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1837 case TargetOpcode::G_SADDO:
1838 case TargetOpcode::G_SSUBO:
1839 case TargetOpcode::G_UADDO:
1840 case TargetOpcode::G_USUBO:
1841 return widenScalarAddoSubo(MI, TypeIdx, WideTy);
1842 case TargetOpcode::G_SADDSAT:
1843 case TargetOpcode::G_SSUBSAT:
1844 case TargetOpcode::G_SSHLSAT:
1845 case TargetOpcode::G_UADDSAT:
1846 case TargetOpcode::G_USUBSAT:
1847 case TargetOpcode::G_USHLSAT:
1848 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
1849 case TargetOpcode::G_CTTZ:
1850 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1851 case TargetOpcode::G_CTLZ:
1852 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1853 case TargetOpcode::G_CTPOP: {
1854 if (TypeIdx == 0) {
1855 Observer.changingInstr(MI);
1856 widenScalarDst(MI, WideTy, 0);
1857 Observer.changedInstr(MI);
1858 return Legalized;
1859 }
1860
1861 Register SrcReg = MI.getOperand(1).getReg();
1862
1863 // First ZEXT the input.
1864 auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1865 LLT CurTy = MRI.getType(SrcReg);
1866 if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1867 // The count is the same in the larger type except if the original
1868 // value was zero. This can be handled by setting the bit just off
1869 // the top of the original type.
1870 auto TopBit =
1871 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1872 MIBSrc = MIRBuilder.buildOr(
1873 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1874 }
1875
1876 // Perform the operation at the larger size.
1877 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1878 // This is already the correct result for CTPOP and CTTZs
1879 if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1880 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1881 // The correct result is NewOp - (Difference in widety and current ty).
1882 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1883 MIBNewOp = MIRBuilder.buildSub(
1884 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
1885 }
1886
1887 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1888 MI.eraseFromParent();
1889 return Legalized;
1890 }
1891 case TargetOpcode::G_BSWAP: {
1892 Observer.changingInstr(MI);
1893 Register DstReg = MI.getOperand(0).getReg();
1894
1895 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1896 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1897 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1898 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1899
1900 MI.getOperand(0).setReg(DstExt);
1901
1902 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1903
1904 LLT Ty = MRI.getType(DstReg);
1905 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1906 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1907 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
1908
1909 MIRBuilder.buildTrunc(DstReg, ShrReg);
1910 Observer.changedInstr(MI);
1911 return Legalized;
1912 }
1913 case TargetOpcode::G_BITREVERSE: {
1914 Observer.changingInstr(MI);
1915
1916 Register DstReg = MI.getOperand(0).getReg();
1917 LLT Ty = MRI.getType(DstReg);
1918 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1919
1920 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1921 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1922 MI.getOperand(0).setReg(DstExt);
1923 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1924
1925 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
1926 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
1927 MIRBuilder.buildTrunc(DstReg, Shift);
1928 Observer.changedInstr(MI);
1929 return Legalized;
1930 }
1931 case TargetOpcode::G_FREEZE:
1932 Observer.changingInstr(MI);
1933 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1934 widenScalarDst(MI, WideTy);
1935 Observer.changedInstr(MI);
1936 return Legalized;
1937
1938 case TargetOpcode::G_ADD:
1939 case TargetOpcode::G_AND:
1940 case TargetOpcode::G_MUL:
1941 case TargetOpcode::G_OR:
1942 case TargetOpcode::G_XOR:
1943 case TargetOpcode::G_SUB:
1944 // Perform operation at larger width (any extension is fines here, high bits
1945 // don't affect the result) and then truncate the result back to the
1946 // original type.
1947 Observer.changingInstr(MI);
1948 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1949 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1950 widenScalarDst(MI, WideTy);
1951 Observer.changedInstr(MI);
1952 return Legalized;
1953
1954 case TargetOpcode::G_SHL:
1955 Observer.changingInstr(MI);
1956
1957 if (TypeIdx == 0) {
1958 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1959 widenScalarDst(MI, WideTy);
1960 } else {
1961 assert(TypeIdx == 1);
1962 // The "number of bits to shift" operand must preserve its value as an
1963 // unsigned integer:
1964 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1965 }
1966
1967 Observer.changedInstr(MI);
1968 return Legalized;
1969
1970 case TargetOpcode::G_SDIV:
1971 case TargetOpcode::G_SREM:
1972 case TargetOpcode::G_SMIN:
1973 case TargetOpcode::G_SMAX:
1974 Observer.changingInstr(MI);
1975 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1976 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1977 widenScalarDst(MI, WideTy);
1978 Observer.changedInstr(MI);
1979 return Legalized;
1980
1981 case TargetOpcode::G_ASHR:
1982 case TargetOpcode::G_LSHR:
1983 Observer.changingInstr(MI);
1984
1985 if (TypeIdx == 0) {
1986 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1987 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1988
1989 widenScalarSrc(MI, WideTy, 1, CvtOp);
1990 widenScalarDst(MI, WideTy);
1991 } else {
1992 assert(TypeIdx == 1);
1993 // The "number of bits to shift" operand must preserve its value as an
1994 // unsigned integer:
1995 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1996 }
1997
1998 Observer.changedInstr(MI);
1999 return Legalized;
2000 case TargetOpcode::G_UDIV:
2001 case TargetOpcode::G_UREM:
2002 case TargetOpcode::G_UMIN:
2003 case TargetOpcode::G_UMAX:
2004 Observer.changingInstr(MI);
2005 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2006 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2007 widenScalarDst(MI, WideTy);
2008 Observer.changedInstr(MI);
2009 return Legalized;
2010
2011 case TargetOpcode::G_SELECT:
2012 Observer.changingInstr(MI);
2013 if (TypeIdx == 0) {
2014 // Perform operation at larger width (any extension is fine here, high
2015 // bits don't affect the result) and then truncate the result back to the
2016 // original type.
2017 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2018 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2019 widenScalarDst(MI, WideTy);
2020 } else {
2021 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2022 // Explicit extension is required here since high bits affect the result.
2023 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2024 }
2025 Observer.changedInstr(MI);
2026 return Legalized;
2027
2028 case TargetOpcode::G_FPTOSI:
2029 case TargetOpcode::G_FPTOUI:
2030 Observer.changingInstr(MI);
2031
2032 if (TypeIdx == 0)
2033 widenScalarDst(MI, WideTy);
2034 else
2035 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2036
2037 Observer.changedInstr(MI);
2038 return Legalized;
2039 case TargetOpcode::G_SITOFP:
2040 Observer.changingInstr(MI);
2041
2042 if (TypeIdx == 0)
2043 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2044 else
2045 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2046
2047 Observer.changedInstr(MI);
2048 return Legalized;
2049 case TargetOpcode::G_UITOFP:
2050 Observer.changingInstr(MI);
2051
2052 if (TypeIdx == 0)
2053 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2054 else
2055 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2056
2057 Observer.changedInstr(MI);
2058 return Legalized;
2059 case TargetOpcode::G_LOAD:
2060 case TargetOpcode::G_SEXTLOAD:
2061 case TargetOpcode::G_ZEXTLOAD:
2062 Observer.changingInstr(MI);
2063 widenScalarDst(MI, WideTy);
2064 Observer.changedInstr(MI);
2065 return Legalized;
2066
2067 case TargetOpcode::G_STORE: {
2068 if (TypeIdx != 0)
2069 return UnableToLegalize;
2070
2071 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2072 if (!Ty.isScalar())
2073 return UnableToLegalize;
2074
2075 Observer.changingInstr(MI);
2076
2077 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2078 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2079 widenScalarSrc(MI, WideTy, 0, ExtType);
2080
2081 Observer.changedInstr(MI);
2082 return Legalized;
2083 }
2084 case TargetOpcode::G_CONSTANT: {
2085 MachineOperand &SrcMO = MI.getOperand(1);
2086 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2087 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2088 MRI.getType(MI.getOperand(0).getReg()));
2089 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2090 ExtOpc == TargetOpcode::G_ANYEXT) &&
2091 "Illegal Extend");
2092 const APInt &SrcVal = SrcMO.getCImm()->getValue();
2093 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2094 ? SrcVal.sext(WideTy.getSizeInBits())
2095 : SrcVal.zext(WideTy.getSizeInBits());
2096 Observer.changingInstr(MI);
2097 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2098
2099 widenScalarDst(MI, WideTy);
2100 Observer.changedInstr(MI);
2101 return Legalized;
2102 }
2103 case TargetOpcode::G_FCONSTANT: {
2104 MachineOperand &SrcMO = MI.getOperand(1);
2105 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2106 APFloat Val = SrcMO.getFPImm()->getValueAPF();
2107 bool LosesInfo;
2108 switch (WideTy.getSizeInBits()) {
2109 case 32:
2110 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
2111 &LosesInfo);
2112 break;
2113 case 64:
2114 Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
2115 &LosesInfo);
2116 break;
2117 default:
2118 return UnableToLegalize;
2119 }
2120
2121 assert(!LosesInfo && "extend should always be lossless");
2122
2123 Observer.changingInstr(MI);
2124 SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
2125
2126 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2127 Observer.changedInstr(MI);
2128 return Legalized;
2129 }
2130 case TargetOpcode::G_IMPLICIT_DEF: {
2131 Observer.changingInstr(MI);
2132 widenScalarDst(MI, WideTy);
2133 Observer.changedInstr(MI);
2134 return Legalized;
2135 }
2136 case TargetOpcode::G_BRCOND:
2137 Observer.changingInstr(MI);
2138 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2139 Observer.changedInstr(MI);
2140 return Legalized;
2141
2142 case TargetOpcode::G_FCMP:
2143 Observer.changingInstr(MI);
2144 if (TypeIdx == 0)
2145 widenScalarDst(MI, WideTy);
2146 else {
2147 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2148 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2149 }
2150 Observer.changedInstr(MI);
2151 return Legalized;
2152
2153 case TargetOpcode::G_ICMP:
2154 Observer.changingInstr(MI);
2155 if (TypeIdx == 0)
2156 widenScalarDst(MI, WideTy);
2157 else {
2158 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2159 MI.getOperand(1).getPredicate()))
2160 ? TargetOpcode::G_SEXT
2161 : TargetOpcode::G_ZEXT;
2162 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2163 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2164 }
2165 Observer.changedInstr(MI);
2166 return Legalized;
2167
2168 case TargetOpcode::G_PTR_ADD:
2169 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2170 Observer.changingInstr(MI);
2171 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2172 Observer.changedInstr(MI);
2173 return Legalized;
2174
2175 case TargetOpcode::G_PHI: {
2176 assert(TypeIdx == 0 && "Expecting only Idx 0");
2177
2178 Observer.changingInstr(MI);
2179 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2180 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2181 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2182 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2183 }
2184
2185 MachineBasicBlock &MBB = *MI.getParent();
2186 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
2187 widenScalarDst(MI, WideTy);
2188 Observer.changedInstr(MI);
2189 return Legalized;
2190 }
2191 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2192 if (TypeIdx == 0) {
2193 Register VecReg = MI.getOperand(1).getReg();
2194 LLT VecTy = MRI.getType(VecReg);
2195 Observer.changingInstr(MI);
2196
2197 widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
2198 WideTy.getSizeInBits()),
2199 1, TargetOpcode::G_SEXT);
2200
2201 widenScalarDst(MI, WideTy, 0);
2202 Observer.changedInstr(MI);
2203 return Legalized;
2204 }
2205
2206 if (TypeIdx != 2)
2207 return UnableToLegalize;
2208 Observer.changingInstr(MI);
2209 // TODO: Probably should be zext
2210 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2211 Observer.changedInstr(MI);
2212 return Legalized;
2213 }
2214 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2215 if (TypeIdx == 1) {
2216 Observer.changingInstr(MI);
2217
2218 Register VecReg = MI.getOperand(1).getReg();
2219 LLT VecTy = MRI.getType(VecReg);
2220 LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
2221
2222 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2223 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2224 widenScalarDst(MI, WideVecTy, 0);
2225 Observer.changedInstr(MI);
2226 return Legalized;
2227 }
2228
2229 if (TypeIdx == 2) {
2230 Observer.changingInstr(MI);
2231 // TODO: Probably should be zext
2232 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2233 Observer.changedInstr(MI);
2234 return Legalized;
2235 }
2236
2237 return UnableToLegalize;
2238 }
2239 case TargetOpcode::G_FADD:
2240 case TargetOpcode::G_FMUL:
2241 case TargetOpcode::G_FSUB:
2242 case TargetOpcode::G_FMA:
2243 case TargetOpcode::G_FMAD:
2244 case TargetOpcode::G_FNEG:
2245 case TargetOpcode::G_FABS:
2246 case TargetOpcode::G_FCANONICALIZE:
2247 case TargetOpcode::G_FMINNUM:
2248 case TargetOpcode::G_FMAXNUM:
2249 case TargetOpcode::G_FMINNUM_IEEE:
2250 case TargetOpcode::G_FMAXNUM_IEEE:
2251 case TargetOpcode::G_FMINIMUM:
2252 case TargetOpcode::G_FMAXIMUM:
2253 case TargetOpcode::G_FDIV:
2254 case TargetOpcode::G_FREM:
2255 case TargetOpcode::G_FCEIL:
2256 case TargetOpcode::G_FFLOOR:
2257 case TargetOpcode::G_FCOS:
2258 case TargetOpcode::G_FSIN:
2259 case TargetOpcode::G_FLOG10:
2260 case TargetOpcode::G_FLOG:
2261 case TargetOpcode::G_FLOG2:
2262 case TargetOpcode::G_FRINT:
2263 case TargetOpcode::G_FNEARBYINT:
2264 case TargetOpcode::G_FSQRT:
2265 case TargetOpcode::G_FEXP:
2266 case TargetOpcode::G_FEXP2:
2267 case TargetOpcode::G_FPOW:
2268 case TargetOpcode::G_INTRINSIC_TRUNC:
2269 case TargetOpcode::G_INTRINSIC_ROUND:
2270 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2271 assert(TypeIdx == 0);
2272 Observer.changingInstr(MI);
2273
2274 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2275 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2276
2277 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2278 Observer.changedInstr(MI);
2279 return Legalized;
2280 case TargetOpcode::G_FPOWI: {
2281 if (TypeIdx != 0)
2282 return UnableToLegalize;
2283 Observer.changingInstr(MI);
2284 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2285 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2286 Observer.changedInstr(MI);
2287 return Legalized;
2288 }
2289 case TargetOpcode::G_INTTOPTR:
2290 if (TypeIdx != 1)
2291 return UnableToLegalize;
2292
2293 Observer.changingInstr(MI);
2294 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2295 Observer.changedInstr(MI);
2296 return Legalized;
2297 case TargetOpcode::G_PTRTOINT:
2298 if (TypeIdx != 0)
2299 return UnableToLegalize;
2300
2301 Observer.changingInstr(MI);
2302 widenScalarDst(MI, WideTy, 0);
2303 Observer.changedInstr(MI);
2304 return Legalized;
2305 case TargetOpcode::G_BUILD_VECTOR: {
2306 Observer.changingInstr(MI);
2307
2308 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2309 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2310 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2311
2312 // Avoid changing the result vector type if the source element type was
2313 // requested.
2314 if (TypeIdx == 1) {
2315 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2316 } else {
2317 widenScalarDst(MI, WideTy, 0);
2318 }
2319
2320 Observer.changedInstr(MI);
2321 return Legalized;
2322 }
2323 case TargetOpcode::G_SEXT_INREG:
2324 if (TypeIdx != 0)
2325 return UnableToLegalize;
2326
2327 Observer.changingInstr(MI);
2328 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2329 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2330 Observer.changedInstr(MI);
2331 return Legalized;
2332 case TargetOpcode::G_PTRMASK: {
2333 if (TypeIdx != 1)
2334 return UnableToLegalize;
2335 Observer.changingInstr(MI);
2336 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2337 Observer.changedInstr(MI);
2338 return Legalized;
2339 }
2340 }
2341 }
2342
getUnmergePieces(SmallVectorImpl<Register> & Pieces,MachineIRBuilder & B,Register Src,LLT Ty)2343 static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
2344 MachineIRBuilder &B, Register Src, LLT Ty) {
2345 auto Unmerge = B.buildUnmerge(Ty, Src);
2346 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2347 Pieces.push_back(Unmerge.getReg(I));
2348 }
2349
2350 LegalizerHelper::LegalizeResult
lowerBitcast(MachineInstr & MI)2351 LegalizerHelper::lowerBitcast(MachineInstr &MI) {
2352 Register Dst = MI.getOperand(0).getReg();
2353 Register Src = MI.getOperand(1).getReg();
2354 LLT DstTy = MRI.getType(Dst);
2355 LLT SrcTy = MRI.getType(Src);
2356
2357 if (SrcTy.isVector()) {
2358 LLT SrcEltTy = SrcTy.getElementType();
2359 SmallVector<Register, 8> SrcRegs;
2360
2361 if (DstTy.isVector()) {
2362 int NumDstElt = DstTy.getNumElements();
2363 int NumSrcElt = SrcTy.getNumElements();
2364
2365 LLT DstEltTy = DstTy.getElementType();
2366 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2367 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2368
2369 // If there's an element size mismatch, insert intermediate casts to match
2370 // the result element type.
2371 if (NumSrcElt < NumDstElt) { // Source element type is larger.
2372 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2373 //
2374 // =>
2375 //
2376 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2377 // %3:_(<2 x s8>) = G_BITCAST %2
2378 // %4:_(<2 x s8>) = G_BITCAST %3
2379 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2380 DstCastTy = LLT::vector(NumDstElt / NumSrcElt, DstEltTy);
2381 SrcPartTy = SrcEltTy;
2382 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2383 //
2384 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2385 //
2386 // =>
2387 //
2388 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2389 // %3:_(s16) = G_BITCAST %2
2390 // %4:_(s16) = G_BITCAST %3
2391 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2392 SrcPartTy = LLT::vector(NumSrcElt / NumDstElt, SrcEltTy);
2393 DstCastTy = DstEltTy;
2394 }
2395
2396 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2397 for (Register &SrcReg : SrcRegs)
2398 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2399 } else
2400 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2401
2402 MIRBuilder.buildMerge(Dst, SrcRegs);
2403 MI.eraseFromParent();
2404 return Legalized;
2405 }
2406
2407 if (DstTy.isVector()) {
2408 SmallVector<Register, 8> SrcRegs;
2409 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2410 MIRBuilder.buildMerge(Dst, SrcRegs);
2411 MI.eraseFromParent();
2412 return Legalized;
2413 }
2414
2415 return UnableToLegalize;
2416 }
2417
2418 /// Figure out the bit offset into a register when coercing a vector index for
2419 /// the wide element type. This is only for the case when promoting vector to
2420 /// one with larger elements.
2421 //
2422 ///
2423 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2424 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
getBitcastWiderVectorElementOffset(MachineIRBuilder & B,Register Idx,unsigned NewEltSize,unsigned OldEltSize)2425 static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
2426 Register Idx,
2427 unsigned NewEltSize,
2428 unsigned OldEltSize) {
2429 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2430 LLT IdxTy = B.getMRI()->getType(Idx);
2431
2432 // Now figure out the amount we need to shift to get the target bits.
2433 auto OffsetMask = B.buildConstant(
2434 IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
2435 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2436 return B.buildShl(IdxTy, OffsetIdx,
2437 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2438 }
2439
2440 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2441 /// is casting to a vector with a smaller element size, perform multiple element
2442 /// extracts and merge the results. If this is coercing to a vector with larger
2443 /// elements, index the bitcasted vector and extract the target element with bit
2444 /// operations. This is intended to force the indexing in the native register
2445 /// size for architectures that can dynamically index the register file.
2446 LegalizerHelper::LegalizeResult
bitcastExtractVectorElt(MachineInstr & MI,unsigned TypeIdx,LLT CastTy)2447 LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
2448 LLT CastTy) {
2449 if (TypeIdx != 1)
2450 return UnableToLegalize;
2451
2452 Register Dst = MI.getOperand(0).getReg();
2453 Register SrcVec = MI.getOperand(1).getReg();
2454 Register Idx = MI.getOperand(2).getReg();
2455 LLT SrcVecTy = MRI.getType(SrcVec);
2456 LLT IdxTy = MRI.getType(Idx);
2457
2458 LLT SrcEltTy = SrcVecTy.getElementType();
2459 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2460 unsigned OldNumElts = SrcVecTy.getNumElements();
2461
2462 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2463 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2464
2465 const unsigned NewEltSize = NewEltTy.getSizeInBits();
2466 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2467 if (NewNumElts > OldNumElts) {
2468 // Decreasing the vector element size
2469 //
2470 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2471 // =>
2472 // v4i32:castx = bitcast x:v2i64
2473 //
2474 // i64 = bitcast
2475 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
2476 // (i32 (extract_vector_elt castx, (2 * y + 1)))
2477 //
2478 if (NewNumElts % OldNumElts != 0)
2479 return UnableToLegalize;
2480
2481 // Type of the intermediate result vector.
2482 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2483 LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy);
2484
2485 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
2486
2487 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
2488 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
2489
2490 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
2491 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
2492 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
2493 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
2494 NewOps[I] = Elt.getReg(0);
2495 }
2496
2497 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
2498 MIRBuilder.buildBitcast(Dst, NewVec);
2499 MI.eraseFromParent();
2500 return Legalized;
2501 }
2502
2503 if (NewNumElts < OldNumElts) {
2504 if (NewEltSize % OldEltSize != 0)
2505 return UnableToLegalize;
2506
2507 // This only depends on powers of 2 because we use bit tricks to figure out
2508 // the bit offset we need to shift to get the target element. A general
2509 // expansion could emit division/multiply.
2510 if (!isPowerOf2_32(NewEltSize / OldEltSize))
2511 return UnableToLegalize;
2512
2513 // Increasing the vector element size.
2514 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
2515 //
2516 // =>
2517 //
2518 // %cast = G_BITCAST %vec
2519 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
2520 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
2521 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2522 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2523 // %elt_bits = G_LSHR %wide_elt, %offset_bits
2524 // %elt = G_TRUNC %elt_bits
2525
2526 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2527 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2528
2529 // Divide to get the index in the wider element type.
2530 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2531
2532 Register WideElt = CastVec;
2533 if (CastTy.isVector()) {
2534 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2535 ScaledIdx).getReg(0);
2536 }
2537
2538 // Compute the bit offset into the register of the target element.
2539 Register OffsetBits = getBitcastWiderVectorElementOffset(
2540 MIRBuilder, Idx, NewEltSize, OldEltSize);
2541
2542 // Shift the wide element to get the target element.
2543 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
2544 MIRBuilder.buildTrunc(Dst, ExtractedBits);
2545 MI.eraseFromParent();
2546 return Legalized;
2547 }
2548
2549 return UnableToLegalize;
2550 }
2551
2552 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
2553 /// TargetReg, while preserving other bits in \p TargetReg.
2554 ///
2555 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
buildBitFieldInsert(MachineIRBuilder & B,Register TargetReg,Register InsertReg,Register OffsetBits)2556 static Register buildBitFieldInsert(MachineIRBuilder &B,
2557 Register TargetReg, Register InsertReg,
2558 Register OffsetBits) {
2559 LLT TargetTy = B.getMRI()->getType(TargetReg);
2560 LLT InsertTy = B.getMRI()->getType(InsertReg);
2561 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
2562 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
2563
2564 // Produce a bitmask of the value to insert
2565 auto EltMask = B.buildConstant(
2566 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
2567 InsertTy.getSizeInBits()));
2568 // Shift it into position
2569 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
2570 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
2571
2572 // Clear out the bits in the wide element
2573 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2574
2575 // The value to insert has all zeros already, so stick it into the masked
2576 // wide element.
2577 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2578 }
2579
2580 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
2581 /// is increasing the element size, perform the indexing in the target element
2582 /// type, and use bit operations to insert at the element position. This is
2583 /// intended for architectures that can dynamically index the register file and
2584 /// want to force indexing in the native register size.
2585 LegalizerHelper::LegalizeResult
bitcastInsertVectorElt(MachineInstr & MI,unsigned TypeIdx,LLT CastTy)2586 LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
2587 LLT CastTy) {
2588 if (TypeIdx != 0)
2589 return UnableToLegalize;
2590
2591 Register Dst = MI.getOperand(0).getReg();
2592 Register SrcVec = MI.getOperand(1).getReg();
2593 Register Val = MI.getOperand(2).getReg();
2594 Register Idx = MI.getOperand(3).getReg();
2595
2596 LLT VecTy = MRI.getType(Dst);
2597 LLT IdxTy = MRI.getType(Idx);
2598
2599 LLT VecEltTy = VecTy.getElementType();
2600 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2601 const unsigned NewEltSize = NewEltTy.getSizeInBits();
2602 const unsigned OldEltSize = VecEltTy.getSizeInBits();
2603
2604 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2605 unsigned OldNumElts = VecTy.getNumElements();
2606
2607 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2608 if (NewNumElts < OldNumElts) {
2609 if (NewEltSize % OldEltSize != 0)
2610 return UnableToLegalize;
2611
2612 // This only depends on powers of 2 because we use bit tricks to figure out
2613 // the bit offset we need to shift to get the target element. A general
2614 // expansion could emit division/multiply.
2615 if (!isPowerOf2_32(NewEltSize / OldEltSize))
2616 return UnableToLegalize;
2617
2618 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2619 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2620
2621 // Divide to get the index in the wider element type.
2622 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2623
2624 Register ExtractedElt = CastVec;
2625 if (CastTy.isVector()) {
2626 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2627 ScaledIdx).getReg(0);
2628 }
2629
2630 // Compute the bit offset into the register of the target element.
2631 Register OffsetBits = getBitcastWiderVectorElementOffset(
2632 MIRBuilder, Idx, NewEltSize, OldEltSize);
2633
2634 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
2635 Val, OffsetBits);
2636 if (CastTy.isVector()) {
2637 InsertedElt = MIRBuilder.buildInsertVectorElement(
2638 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
2639 }
2640
2641 MIRBuilder.buildBitcast(Dst, InsertedElt);
2642 MI.eraseFromParent();
2643 return Legalized;
2644 }
2645
2646 return UnableToLegalize;
2647 }
2648
2649 LegalizerHelper::LegalizeResult
lowerLoad(MachineInstr & MI)2650 LegalizerHelper::lowerLoad(MachineInstr &MI) {
2651 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2652 Register DstReg = MI.getOperand(0).getReg();
2653 Register PtrReg = MI.getOperand(1).getReg();
2654 LLT DstTy = MRI.getType(DstReg);
2655 auto &MMO = **MI.memoperands_begin();
2656
2657 if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2658 if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2659 // This load needs splitting into power of 2 sized loads.
2660 if (DstTy.isVector())
2661 return UnableToLegalize;
2662 if (isPowerOf2_32(DstTy.getSizeInBits()))
2663 return UnableToLegalize; // Don't know what we're being asked to do.
2664
2665 // Our strategy here is to generate anyextending loads for the smaller
2666 // types up to next power-2 result type, and then combine the two larger
2667 // result values together, before truncating back down to the non-pow-2
2668 // type.
2669 // E.g. v1 = i24 load =>
2670 // v2 = i32 zextload (2 byte)
2671 // v3 = i32 load (1 byte)
2672 // v4 = i32 shl v3, 16
2673 // v5 = i32 or v4, v2
2674 // v1 = i24 trunc v5
2675 // By doing this we generate the correct truncate which should get
2676 // combined away as an artifact with a matching extend.
2677 uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2678 uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2679
2680 MachineFunction &MF = MIRBuilder.getMF();
2681 MachineMemOperand *LargeMMO =
2682 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2683 MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2684 &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2685
2686 LLT PtrTy = MRI.getType(PtrReg);
2687 unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2688 LLT AnyExtTy = LLT::scalar(AnyExtSize);
2689 Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2690 Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2691 auto LargeLoad = MIRBuilder.buildLoadInstr(
2692 TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
2693
2694 auto OffsetCst = MIRBuilder.buildConstant(
2695 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2696 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2697 auto SmallPtr =
2698 MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2699 auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
2700 *SmallMMO);
2701
2702 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2703 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2704 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2705 MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
2706 MI.eraseFromParent();
2707 return Legalized;
2708 }
2709
2710 MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2711 MI.eraseFromParent();
2712 return Legalized;
2713 }
2714
2715 if (DstTy.isScalar()) {
2716 Register TmpReg =
2717 MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
2718 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
2719 switch (MI.getOpcode()) {
2720 default:
2721 llvm_unreachable("Unexpected opcode");
2722 case TargetOpcode::G_LOAD:
2723 MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg);
2724 break;
2725 case TargetOpcode::G_SEXTLOAD:
2726 MIRBuilder.buildSExt(DstReg, TmpReg);
2727 break;
2728 case TargetOpcode::G_ZEXTLOAD:
2729 MIRBuilder.buildZExt(DstReg, TmpReg);
2730 break;
2731 }
2732
2733 MI.eraseFromParent();
2734 return Legalized;
2735 }
2736
2737 return UnableToLegalize;
2738 }
2739
2740 LegalizerHelper::LegalizeResult
lowerStore(MachineInstr & MI)2741 LegalizerHelper::lowerStore(MachineInstr &MI) {
2742 // Lower a non-power of 2 store into multiple pow-2 stores.
2743 // E.g. split an i24 store into an i16 store + i8 store.
2744 // We do this by first extending the stored value to the next largest power
2745 // of 2 type, and then using truncating stores to store the components.
2746 // By doing this, likewise with G_LOAD, generate an extend that can be
2747 // artifact-combined away instead of leaving behind extracts.
2748 Register SrcReg = MI.getOperand(0).getReg();
2749 Register PtrReg = MI.getOperand(1).getReg();
2750 LLT SrcTy = MRI.getType(SrcReg);
2751 MachineMemOperand &MMO = **MI.memoperands_begin();
2752 if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2753 return UnableToLegalize;
2754 if (SrcTy.isVector())
2755 return UnableToLegalize;
2756 if (isPowerOf2_32(SrcTy.getSizeInBits()))
2757 return UnableToLegalize; // Don't know what we're being asked to do.
2758
2759 // Extend to the next pow-2.
2760 const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2761 auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2762
2763 // Obtain the smaller value by shifting away the larger value.
2764 uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2765 uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2766 auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2767 auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2768
2769 // Generate the PtrAdd and truncating stores.
2770 LLT PtrTy = MRI.getType(PtrReg);
2771 auto OffsetCst = MIRBuilder.buildConstant(
2772 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2773 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2774 auto SmallPtr =
2775 MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2776
2777 MachineFunction &MF = MIRBuilder.getMF();
2778 MachineMemOperand *LargeMMO =
2779 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2780 MachineMemOperand *SmallMMO =
2781 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2782 MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2783 MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2784 MI.eraseFromParent();
2785 return Legalized;
2786 }
2787
2788 LegalizerHelper::LegalizeResult
bitcast(MachineInstr & MI,unsigned TypeIdx,LLT CastTy)2789 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
2790 switch (MI.getOpcode()) {
2791 case TargetOpcode::G_LOAD: {
2792 if (TypeIdx != 0)
2793 return UnableToLegalize;
2794
2795 Observer.changingInstr(MI);
2796 bitcastDst(MI, CastTy, 0);
2797 Observer.changedInstr(MI);
2798 return Legalized;
2799 }
2800 case TargetOpcode::G_STORE: {
2801 if (TypeIdx != 0)
2802 return UnableToLegalize;
2803
2804 Observer.changingInstr(MI);
2805 bitcastSrc(MI, CastTy, 0);
2806 Observer.changedInstr(MI);
2807 return Legalized;
2808 }
2809 case TargetOpcode::G_SELECT: {
2810 if (TypeIdx != 0)
2811 return UnableToLegalize;
2812
2813 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
2814 LLVM_DEBUG(
2815 dbgs() << "bitcast action not implemented for vector select\n");
2816 return UnableToLegalize;
2817 }
2818
2819 Observer.changingInstr(MI);
2820 bitcastSrc(MI, CastTy, 2);
2821 bitcastSrc(MI, CastTy, 3);
2822 bitcastDst(MI, CastTy, 0);
2823 Observer.changedInstr(MI);
2824 return Legalized;
2825 }
2826 case TargetOpcode::G_AND:
2827 case TargetOpcode::G_OR:
2828 case TargetOpcode::G_XOR: {
2829 Observer.changingInstr(MI);
2830 bitcastSrc(MI, CastTy, 1);
2831 bitcastSrc(MI, CastTy, 2);
2832 bitcastDst(MI, CastTy, 0);
2833 Observer.changedInstr(MI);
2834 return Legalized;
2835 }
2836 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2837 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
2838 case TargetOpcode::G_INSERT_VECTOR_ELT:
2839 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
2840 default:
2841 return UnableToLegalize;
2842 }
2843 }
2844
2845 // Legalize an instruction by changing the opcode in place.
changeOpcode(MachineInstr & MI,unsigned NewOpcode)2846 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
2847 Observer.changingInstr(MI);
2848 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
2849 Observer.changedInstr(MI);
2850 }
2851
2852 LegalizerHelper::LegalizeResult
lower(MachineInstr & MI,unsigned TypeIdx,LLT LowerHintTy)2853 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
2854 using namespace TargetOpcode;
2855
2856 switch(MI.getOpcode()) {
2857 default:
2858 return UnableToLegalize;
2859 case TargetOpcode::G_BITCAST:
2860 return lowerBitcast(MI);
2861 case TargetOpcode::G_SREM:
2862 case TargetOpcode::G_UREM: {
2863 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2864 auto Quot =
2865 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
2866 {MI.getOperand(1), MI.getOperand(2)});
2867
2868 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
2869 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
2870 MI.eraseFromParent();
2871 return Legalized;
2872 }
2873 case TargetOpcode::G_SADDO:
2874 case TargetOpcode::G_SSUBO:
2875 return lowerSADDO_SSUBO(MI);
2876 case TargetOpcode::G_UMULH:
2877 case TargetOpcode::G_SMULH:
2878 return lowerSMULH_UMULH(MI);
2879 case TargetOpcode::G_SMULO:
2880 case TargetOpcode::G_UMULO: {
2881 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
2882 // result.
2883 Register Res = MI.getOperand(0).getReg();
2884 Register Overflow = MI.getOperand(1).getReg();
2885 Register LHS = MI.getOperand(2).getReg();
2886 Register RHS = MI.getOperand(3).getReg();
2887 LLT Ty = MRI.getType(Res);
2888
2889 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
2890 ? TargetOpcode::G_SMULH
2891 : TargetOpcode::G_UMULH;
2892
2893 Observer.changingInstr(MI);
2894 const auto &TII = MIRBuilder.getTII();
2895 MI.setDesc(TII.get(TargetOpcode::G_MUL));
2896 MI.RemoveOperand(1);
2897 Observer.changedInstr(MI);
2898
2899 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
2900 auto Zero = MIRBuilder.buildConstant(Ty, 0);
2901
2902 // Move insert point forward so we can use the Res register if needed.
2903 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2904
2905 // For *signed* multiply, overflow is detected by checking:
2906 // (hi != (lo >> bitwidth-1))
2907 if (Opcode == TargetOpcode::G_SMULH) {
2908 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
2909 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
2910 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
2911 } else {
2912 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
2913 }
2914 return Legalized;
2915 }
2916 case TargetOpcode::G_FNEG: {
2917 Register Res = MI.getOperand(0).getReg();
2918 LLT Ty = MRI.getType(Res);
2919
2920 // TODO: Handle vector types once we are able to
2921 // represent them.
2922 if (Ty.isVector())
2923 return UnableToLegalize;
2924 auto SignMask =
2925 MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
2926 Register SubByReg = MI.getOperand(1).getReg();
2927 MIRBuilder.buildXor(Res, SubByReg, SignMask);
2928 MI.eraseFromParent();
2929 return Legalized;
2930 }
2931 case TargetOpcode::G_FSUB: {
2932 Register Res = MI.getOperand(0).getReg();
2933 LLT Ty = MRI.getType(Res);
2934
2935 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
2936 // First, check if G_FNEG is marked as Lower. If so, we may
2937 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
2938 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
2939 return UnableToLegalize;
2940 Register LHS = MI.getOperand(1).getReg();
2941 Register RHS = MI.getOperand(2).getReg();
2942 Register Neg = MRI.createGenericVirtualRegister(Ty);
2943 MIRBuilder.buildFNeg(Neg, RHS);
2944 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
2945 MI.eraseFromParent();
2946 return Legalized;
2947 }
2948 case TargetOpcode::G_FMAD:
2949 return lowerFMad(MI);
2950 case TargetOpcode::G_FFLOOR:
2951 return lowerFFloor(MI);
2952 case TargetOpcode::G_INTRINSIC_ROUND:
2953 return lowerIntrinsicRound(MI);
2954 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
2955 // Since round even is the assumed rounding mode for unconstrained FP
2956 // operations, rint and roundeven are the same operation.
2957 changeOpcode(MI, TargetOpcode::G_FRINT);
2958 return Legalized;
2959 }
2960 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
2961 Register OldValRes = MI.getOperand(0).getReg();
2962 Register SuccessRes = MI.getOperand(1).getReg();
2963 Register Addr = MI.getOperand(2).getReg();
2964 Register CmpVal = MI.getOperand(3).getReg();
2965 Register NewVal = MI.getOperand(4).getReg();
2966 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
2967 **MI.memoperands_begin());
2968 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
2969 MI.eraseFromParent();
2970 return Legalized;
2971 }
2972 case TargetOpcode::G_LOAD:
2973 case TargetOpcode::G_SEXTLOAD:
2974 case TargetOpcode::G_ZEXTLOAD:
2975 return lowerLoad(MI);
2976 case TargetOpcode::G_STORE:
2977 return lowerStore(MI);
2978 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2979 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2980 case TargetOpcode::G_CTLZ:
2981 case TargetOpcode::G_CTTZ:
2982 case TargetOpcode::G_CTPOP:
2983 return lowerBitCount(MI);
2984 case G_UADDO: {
2985 Register Res = MI.getOperand(0).getReg();
2986 Register CarryOut = MI.getOperand(1).getReg();
2987 Register LHS = MI.getOperand(2).getReg();
2988 Register RHS = MI.getOperand(3).getReg();
2989
2990 MIRBuilder.buildAdd(Res, LHS, RHS);
2991 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
2992
2993 MI.eraseFromParent();
2994 return Legalized;
2995 }
2996 case G_UADDE: {
2997 Register Res = MI.getOperand(0).getReg();
2998 Register CarryOut = MI.getOperand(1).getReg();
2999 Register LHS = MI.getOperand(2).getReg();
3000 Register RHS = MI.getOperand(3).getReg();
3001 Register CarryIn = MI.getOperand(4).getReg();
3002 LLT Ty = MRI.getType(Res);
3003
3004 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3005 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3006 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3007 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
3008
3009 MI.eraseFromParent();
3010 return Legalized;
3011 }
3012 case G_USUBO: {
3013 Register Res = MI.getOperand(0).getReg();
3014 Register BorrowOut = MI.getOperand(1).getReg();
3015 Register LHS = MI.getOperand(2).getReg();
3016 Register RHS = MI.getOperand(3).getReg();
3017
3018 MIRBuilder.buildSub(Res, LHS, RHS);
3019 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
3020
3021 MI.eraseFromParent();
3022 return Legalized;
3023 }
3024 case G_USUBE: {
3025 Register Res = MI.getOperand(0).getReg();
3026 Register BorrowOut = MI.getOperand(1).getReg();
3027 Register LHS = MI.getOperand(2).getReg();
3028 Register RHS = MI.getOperand(3).getReg();
3029 Register BorrowIn = MI.getOperand(4).getReg();
3030 const LLT CondTy = MRI.getType(BorrowOut);
3031 const LLT Ty = MRI.getType(Res);
3032
3033 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3034 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3035 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3036
3037 auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
3038 auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
3039 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
3040
3041 MI.eraseFromParent();
3042 return Legalized;
3043 }
3044 case G_UITOFP:
3045 return lowerUITOFP(MI);
3046 case G_SITOFP:
3047 return lowerSITOFP(MI);
3048 case G_FPTOUI:
3049 return lowerFPTOUI(MI);
3050 case G_FPTOSI:
3051 return lowerFPTOSI(MI);
3052 case G_FPTRUNC:
3053 return lowerFPTRUNC(MI);
3054 case G_FPOWI:
3055 return lowerFPOWI(MI);
3056 case G_SMIN:
3057 case G_SMAX:
3058 case G_UMIN:
3059 case G_UMAX:
3060 return lowerMinMax(MI);
3061 case G_FCOPYSIGN:
3062 return lowerFCopySign(MI);
3063 case G_FMINNUM:
3064 case G_FMAXNUM:
3065 return lowerFMinNumMaxNum(MI);
3066 case G_MERGE_VALUES:
3067 return lowerMergeValues(MI);
3068 case G_UNMERGE_VALUES:
3069 return lowerUnmergeValues(MI);
3070 case TargetOpcode::G_SEXT_INREG: {
3071 assert(MI.getOperand(2).isImm() && "Expected immediate");
3072 int64_t SizeInBits = MI.getOperand(2).getImm();
3073
3074 Register DstReg = MI.getOperand(0).getReg();
3075 Register SrcReg = MI.getOperand(1).getReg();
3076 LLT DstTy = MRI.getType(DstReg);
3077 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3078
3079 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3080 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3081 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3082 MI.eraseFromParent();
3083 return Legalized;
3084 }
3085 case G_EXTRACT_VECTOR_ELT:
3086 case G_INSERT_VECTOR_ELT:
3087 return lowerExtractInsertVectorElt(MI);
3088 case G_SHUFFLE_VECTOR:
3089 return lowerShuffleVector(MI);
3090 case G_DYN_STACKALLOC:
3091 return lowerDynStackAlloc(MI);
3092 case G_EXTRACT:
3093 return lowerExtract(MI);
3094 case G_INSERT:
3095 return lowerInsert(MI);
3096 case G_BSWAP:
3097 return lowerBswap(MI);
3098 case G_BITREVERSE:
3099 return lowerBitreverse(MI);
3100 case G_READ_REGISTER:
3101 case G_WRITE_REGISTER:
3102 return lowerReadWriteRegister(MI);
3103 case G_UADDSAT:
3104 case G_USUBSAT: {
3105 // Try to make a reasonable guess about which lowering strategy to use. The
3106 // target can override this with custom lowering and calling the
3107 // implementation functions.
3108 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3109 if (LI.isLegalOrCustom({G_UMIN, Ty}))
3110 return lowerAddSubSatToMinMax(MI);
3111 return lowerAddSubSatToAddoSubo(MI);
3112 }
3113 case G_SADDSAT:
3114 case G_SSUBSAT: {
3115 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3116
3117 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3118 // since it's a shorter expansion. However, we would need to figure out the
3119 // preferred boolean type for the carry out for the query.
3120 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3121 return lowerAddSubSatToMinMax(MI);
3122 return lowerAddSubSatToAddoSubo(MI);
3123 }
3124 case G_SSHLSAT:
3125 case G_USHLSAT:
3126 return lowerShlSat(MI);
3127 case G_ABS: {
3128 // Expand %res = G_ABS %a into:
3129 // %v1 = G_ASHR %a, scalar_size-1
3130 // %v2 = G_ADD %a, %v1
3131 // %res = G_XOR %v2, %v1
3132 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3133 Register OpReg = MI.getOperand(1).getReg();
3134 auto ShiftAmt =
3135 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
3136 auto Shift =
3137 MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
3138 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
3139 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
3140 MI.eraseFromParent();
3141 return Legalized;
3142 }
3143 case G_SELECT:
3144 return lowerSelect(MI);
3145 }
3146 }
3147
getStackTemporaryAlignment(LLT Ty,Align MinAlign) const3148 Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
3149 Align MinAlign) const {
3150 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3151 // datalayout for the preferred alignment. Also there should be a target hook
3152 // for this to allow targets to reduce the alignment and ignore the
3153 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3154 // the type.
3155 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
3156 }
3157
3158 MachineInstrBuilder
createStackTemporary(TypeSize Bytes,Align Alignment,MachinePointerInfo & PtrInfo)3159 LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
3160 MachinePointerInfo &PtrInfo) {
3161 MachineFunction &MF = MIRBuilder.getMF();
3162 const DataLayout &DL = MIRBuilder.getDataLayout();
3163 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3164
3165 unsigned AddrSpace = DL.getAllocaAddrSpace();
3166 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3167
3168 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3169 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3170 }
3171
clampDynamicVectorIndex(MachineIRBuilder & B,Register IdxReg,LLT VecTy)3172 static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
3173 LLT VecTy) {
3174 int64_t IdxVal;
3175 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3176 return IdxReg;
3177
3178 LLT IdxTy = B.getMRI()->getType(IdxReg);
3179 unsigned NElts = VecTy.getNumElements();
3180 if (isPowerOf2_32(NElts)) {
3181 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3182 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3183 }
3184
3185 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3186 .getReg(0);
3187 }
3188
getVectorElementPointer(Register VecPtr,LLT VecTy,Register Index)3189 Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
3190 Register Index) {
3191 LLT EltTy = VecTy.getElementType();
3192
3193 // Calculate the element offset and add it to the pointer.
3194 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3195 assert(EltSize * 8 == EltTy.getSizeInBits() &&
3196 "Converting bits to bytes lost precision");
3197
3198 Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
3199
3200 LLT IdxTy = MRI.getType(Index);
3201 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3202 MIRBuilder.buildConstant(IdxTy, EltSize));
3203
3204 LLT PtrTy = MRI.getType(VecPtr);
3205 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
3206 }
3207
fewerElementsVectorImplicitDef(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3208 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
3209 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
3210 Register DstReg = MI.getOperand(0).getReg();
3211 LLT DstTy = MRI.getType(DstReg);
3212 LLT LCMTy = getLCMType(DstTy, NarrowTy);
3213
3214 unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
3215
3216 auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
3217 SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
3218
3219 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3220 MI.eraseFromParent();
3221 return Legalized;
3222 }
3223
3224 // Handle splitting vector operations which need to have the same number of
3225 // elements in each type index, but each type index may have a different element
3226 // type.
3227 //
3228 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
3229 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3230 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3231 //
3232 // Also handles some irregular breakdown cases, e.g.
3233 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
3234 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3235 // s64 = G_SHL s64, s32
3236 LegalizerHelper::LegalizeResult
fewerElementsVectorMultiEltType(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTyArg)3237 LegalizerHelper::fewerElementsVectorMultiEltType(
3238 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
3239 if (TypeIdx != 0)
3240 return UnableToLegalize;
3241
3242 const LLT NarrowTy0 = NarrowTyArg;
3243 const unsigned NewNumElts =
3244 NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
3245
3246 const Register DstReg = MI.getOperand(0).getReg();
3247 LLT DstTy = MRI.getType(DstReg);
3248 LLT LeftoverTy0;
3249
3250 // All of the operands need to have the same number of elements, so if we can
3251 // determine a type breakdown for the result type, we can for all of the
3252 // source types.
3253 int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
3254 if (NumParts < 0)
3255 return UnableToLegalize;
3256
3257 SmallVector<MachineInstrBuilder, 4> NewInsts;
3258
3259 SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3260 SmallVector<Register, 4> PartRegs, LeftoverRegs;
3261
3262 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
3263 Register SrcReg = MI.getOperand(I).getReg();
3264 LLT SrcTyI = MRI.getType(SrcReg);
3265 LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
3266 LLT LeftoverTyI;
3267
3268 // Split this operand into the requested typed registers, and any leftover
3269 // required to reproduce the original type.
3270 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
3271 LeftoverRegs))
3272 return UnableToLegalize;
3273
3274 if (I == 1) {
3275 // For the first operand, create an instruction for each part and setup
3276 // the result.
3277 for (Register PartReg : PartRegs) {
3278 Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3279 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3280 .addDef(PartDstReg)
3281 .addUse(PartReg));
3282 DstRegs.push_back(PartDstReg);
3283 }
3284
3285 for (Register LeftoverReg : LeftoverRegs) {
3286 Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
3287 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3288 .addDef(PartDstReg)
3289 .addUse(LeftoverReg));
3290 LeftoverDstRegs.push_back(PartDstReg);
3291 }
3292 } else {
3293 assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
3294
3295 // Add the newly created operand splits to the existing instructions. The
3296 // odd-sized pieces are ordered after the requested NarrowTyArg sized
3297 // pieces.
3298 unsigned InstCount = 0;
3299 for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
3300 NewInsts[InstCount++].addUse(PartRegs[J]);
3301 for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
3302 NewInsts[InstCount++].addUse(LeftoverRegs[J]);
3303 }
3304
3305 PartRegs.clear();
3306 LeftoverRegs.clear();
3307 }
3308
3309 // Insert the newly built operations and rebuild the result register.
3310 for (auto &MIB : NewInsts)
3311 MIRBuilder.insertInstr(MIB);
3312
3313 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
3314
3315 MI.eraseFromParent();
3316 return Legalized;
3317 }
3318
3319 LegalizerHelper::LegalizeResult
fewerElementsVectorCasts(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3320 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
3321 LLT NarrowTy) {
3322 if (TypeIdx != 0)
3323 return UnableToLegalize;
3324
3325 Register DstReg = MI.getOperand(0).getReg();
3326 Register SrcReg = MI.getOperand(1).getReg();
3327 LLT DstTy = MRI.getType(DstReg);
3328 LLT SrcTy = MRI.getType(SrcReg);
3329
3330 LLT NarrowTy0 = NarrowTy;
3331 LLT NarrowTy1;
3332 unsigned NumParts;
3333
3334 if (NarrowTy.isVector()) {
3335 // Uneven breakdown not handled.
3336 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
3337 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
3338 return UnableToLegalize;
3339
3340 NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
3341 } else {
3342 NumParts = DstTy.getNumElements();
3343 NarrowTy1 = SrcTy.getElementType();
3344 }
3345
3346 SmallVector<Register, 4> SrcRegs, DstRegs;
3347 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
3348
3349 for (unsigned I = 0; I < NumParts; ++I) {
3350 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3351 MachineInstr *NewInst =
3352 MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
3353
3354 NewInst->setFlags(MI.getFlags());
3355 DstRegs.push_back(DstReg);
3356 }
3357
3358 if (NarrowTy.isVector())
3359 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3360 else
3361 MIRBuilder.buildBuildVector(DstReg, DstRegs);
3362
3363 MI.eraseFromParent();
3364 return Legalized;
3365 }
3366
3367 LegalizerHelper::LegalizeResult
fewerElementsVectorCmp(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3368 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
3369 LLT NarrowTy) {
3370 Register DstReg = MI.getOperand(0).getReg();
3371 Register Src0Reg = MI.getOperand(2).getReg();
3372 LLT DstTy = MRI.getType(DstReg);
3373 LLT SrcTy = MRI.getType(Src0Reg);
3374
3375 unsigned NumParts;
3376 LLT NarrowTy0, NarrowTy1;
3377
3378 if (TypeIdx == 0) {
3379 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3380 unsigned OldElts = DstTy.getNumElements();
3381
3382 NarrowTy0 = NarrowTy;
3383 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
3384 NarrowTy1 = NarrowTy.isVector() ?
3385 LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
3386 SrcTy.getElementType();
3387
3388 } else {
3389 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3390 unsigned OldElts = SrcTy.getNumElements();
3391
3392 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
3393 NarrowTy.getNumElements();
3394 NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
3395 DstTy.getScalarSizeInBits());
3396 NarrowTy1 = NarrowTy;
3397 }
3398
3399 // FIXME: Don't know how to handle the situation where the small vectors
3400 // aren't all the same size yet.
3401 if (NarrowTy1.isVector() &&
3402 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
3403 return UnableToLegalize;
3404
3405 CmpInst::Predicate Pred
3406 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3407
3408 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
3409 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
3410 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
3411
3412 for (unsigned I = 0; I < NumParts; ++I) {
3413 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3414 DstRegs.push_back(DstReg);
3415
3416 if (MI.getOpcode() == TargetOpcode::G_ICMP)
3417 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3418 else {
3419 MachineInstr *NewCmp
3420 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3421 NewCmp->setFlags(MI.getFlags());
3422 }
3423 }
3424
3425 if (NarrowTy1.isVector())
3426 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3427 else
3428 MIRBuilder.buildBuildVector(DstReg, DstRegs);
3429
3430 MI.eraseFromParent();
3431 return Legalized;
3432 }
3433
3434 LegalizerHelper::LegalizeResult
fewerElementsVectorSelect(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3435 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
3436 LLT NarrowTy) {
3437 Register DstReg = MI.getOperand(0).getReg();
3438 Register CondReg = MI.getOperand(1).getReg();
3439
3440 unsigned NumParts = 0;
3441 LLT NarrowTy0, NarrowTy1;
3442
3443 LLT DstTy = MRI.getType(DstReg);
3444 LLT CondTy = MRI.getType(CondReg);
3445 unsigned Size = DstTy.getSizeInBits();
3446
3447 assert(TypeIdx == 0 || CondTy.isVector());
3448
3449 if (TypeIdx == 0) {
3450 NarrowTy0 = NarrowTy;
3451 NarrowTy1 = CondTy;
3452
3453 unsigned NarrowSize = NarrowTy0.getSizeInBits();
3454 // FIXME: Don't know how to handle the situation where the small vectors
3455 // aren't all the same size yet.
3456 if (Size % NarrowSize != 0)
3457 return UnableToLegalize;
3458
3459 NumParts = Size / NarrowSize;
3460
3461 // Need to break down the condition type
3462 if (CondTy.isVector()) {
3463 if (CondTy.getNumElements() == NumParts)
3464 NarrowTy1 = CondTy.getElementType();
3465 else
3466 NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
3467 CondTy.getScalarSizeInBits());
3468 }
3469 } else {
3470 NumParts = CondTy.getNumElements();
3471 if (NarrowTy.isVector()) {
3472 // TODO: Handle uneven breakdown.
3473 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
3474 return UnableToLegalize;
3475
3476 return UnableToLegalize;
3477 } else {
3478 NarrowTy0 = DstTy.getElementType();
3479 NarrowTy1 = NarrowTy;
3480 }
3481 }
3482
3483 SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
3484 if (CondTy.isVector())
3485 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
3486
3487 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
3488 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
3489
3490 for (unsigned i = 0; i < NumParts; ++i) {
3491 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3492 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
3493 Src1Regs[i], Src2Regs[i]);
3494 DstRegs.push_back(DstReg);
3495 }
3496
3497 if (NarrowTy0.isVector())
3498 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3499 else
3500 MIRBuilder.buildBuildVector(DstReg, DstRegs);
3501
3502 MI.eraseFromParent();
3503 return Legalized;
3504 }
3505
3506 LegalizerHelper::LegalizeResult
fewerElementsVectorPhi(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3507 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
3508 LLT NarrowTy) {
3509 const Register DstReg = MI.getOperand(0).getReg();
3510 LLT PhiTy = MRI.getType(DstReg);
3511 LLT LeftoverTy;
3512
3513 // All of the operands need to have the same number of elements, so if we can
3514 // determine a type breakdown for the result type, we can for all of the
3515 // source types.
3516 int NumParts, NumLeftover;
3517 std::tie(NumParts, NumLeftover)
3518 = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
3519 if (NumParts < 0)
3520 return UnableToLegalize;
3521
3522 SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3523 SmallVector<MachineInstrBuilder, 4> NewInsts;
3524
3525 const int TotalNumParts = NumParts + NumLeftover;
3526
3527 // Insert the new phis in the result block first.
3528 for (int I = 0; I != TotalNumParts; ++I) {
3529 LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
3530 Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
3531 NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
3532 .addDef(PartDstReg));
3533 if (I < NumParts)
3534 DstRegs.push_back(PartDstReg);
3535 else
3536 LeftoverDstRegs.push_back(PartDstReg);
3537 }
3538
3539 MachineBasicBlock *MBB = MI.getParent();
3540 MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
3541 insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
3542
3543 SmallVector<Register, 4> PartRegs, LeftoverRegs;
3544
3545 // Insert code to extract the incoming values in each predecessor block.
3546 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3547 PartRegs.clear();
3548 LeftoverRegs.clear();
3549
3550 Register SrcReg = MI.getOperand(I).getReg();
3551 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3552 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3553
3554 LLT Unused;
3555 if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
3556 LeftoverRegs))
3557 return UnableToLegalize;
3558
3559 // Add the newly created operand splits to the existing instructions. The
3560 // odd-sized pieces are ordered after the requested NarrowTyArg sized
3561 // pieces.
3562 for (int J = 0; J != TotalNumParts; ++J) {
3563 MachineInstrBuilder MIB = NewInsts[J];
3564 MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
3565 MIB.addMBB(&OpMBB);
3566 }
3567 }
3568
3569 MI.eraseFromParent();
3570 return Legalized;
3571 }
3572
3573 LegalizerHelper::LegalizeResult
fewerElementsVectorUnmergeValues(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3574 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
3575 unsigned TypeIdx,
3576 LLT NarrowTy) {
3577 if (TypeIdx != 1)
3578 return UnableToLegalize;
3579
3580 const int NumDst = MI.getNumOperands() - 1;
3581 const Register SrcReg = MI.getOperand(NumDst).getReg();
3582 LLT SrcTy = MRI.getType(SrcReg);
3583
3584 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3585
3586 // TODO: Create sequence of extracts.
3587 if (DstTy == NarrowTy)
3588 return UnableToLegalize;
3589
3590 LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3591 if (DstTy == GCDTy) {
3592 // This would just be a copy of the same unmerge.
3593 // TODO: Create extracts, pad with undef and create intermediate merges.
3594 return UnableToLegalize;
3595 }
3596
3597 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
3598 const int NumUnmerge = Unmerge->getNumOperands() - 1;
3599 const int PartsPerUnmerge = NumDst / NumUnmerge;
3600
3601 for (int I = 0; I != NumUnmerge; ++I) {
3602 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3603
3604 for (int J = 0; J != PartsPerUnmerge; ++J)
3605 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3606 MIB.addUse(Unmerge.getReg(I));
3607 }
3608
3609 MI.eraseFromParent();
3610 return Legalized;
3611 }
3612
3613 // Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
3614 // a vector
3615 //
3616 // Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
3617 // undef as necessary.
3618 //
3619 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
3620 // -> <2 x s16>
3621 //
3622 // %4:_(s16) = G_IMPLICIT_DEF
3623 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
3624 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
3625 // %7:_(<2 x s16>) = G_IMPLICIT_DEF
3626 // %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
3627 // %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
3628 LegalizerHelper::LegalizeResult
fewerElementsVectorMerge(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3629 LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
3630 LLT NarrowTy) {
3631 Register DstReg = MI.getOperand(0).getReg();
3632 LLT DstTy = MRI.getType(DstReg);
3633 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
3634 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
3635
3636 // Break into a common type
3637 SmallVector<Register, 16> Parts;
3638 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3639 extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
3640
3641 // Build the requested new merge, padding with undef.
3642 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
3643 TargetOpcode::G_ANYEXT);
3644
3645 // Pack into the original result register.
3646 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3647
3648 MI.eraseFromParent();
3649 return Legalized;
3650 }
3651
3652 LegalizerHelper::LegalizeResult
fewerElementsVectorExtractInsertVectorElt(MachineInstr & MI,unsigned TypeIdx,LLT NarrowVecTy)3653 LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
3654 unsigned TypeIdx,
3655 LLT NarrowVecTy) {
3656 Register DstReg = MI.getOperand(0).getReg();
3657 Register SrcVec = MI.getOperand(1).getReg();
3658 Register InsertVal;
3659 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3660
3661 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
3662 if (IsInsert)
3663 InsertVal = MI.getOperand(2).getReg();
3664
3665 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
3666
3667 // TODO: Handle total scalarization case.
3668 if (!NarrowVecTy.isVector())
3669 return UnableToLegalize;
3670
3671 LLT VecTy = MRI.getType(SrcVec);
3672
3673 // If the index is a constant, we can really break this down as you would
3674 // expect, and index into the target size pieces.
3675 int64_t IdxVal;
3676 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
3677 // Avoid out of bounds indexing the pieces.
3678 if (IdxVal >= VecTy.getNumElements()) {
3679 MIRBuilder.buildUndef(DstReg);
3680 MI.eraseFromParent();
3681 return Legalized;
3682 }
3683
3684 SmallVector<Register, 8> VecParts;
3685 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
3686
3687 // Build a sequence of NarrowTy pieces in VecParts for this operand.
3688 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
3689 TargetOpcode::G_ANYEXT);
3690
3691 unsigned NewNumElts = NarrowVecTy.getNumElements();
3692
3693 LLT IdxTy = MRI.getType(Idx);
3694 int64_t PartIdx = IdxVal / NewNumElts;
3695 auto NewIdx =
3696 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
3697
3698 if (IsInsert) {
3699 LLT PartTy = MRI.getType(VecParts[PartIdx]);
3700
3701 // Use the adjusted index to insert into one of the subvectors.
3702 auto InsertPart = MIRBuilder.buildInsertVectorElement(
3703 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
3704 VecParts[PartIdx] = InsertPart.getReg(0);
3705
3706 // Recombine the inserted subvector with the others to reform the result
3707 // vector.
3708 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
3709 } else {
3710 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
3711 }
3712
3713 MI.eraseFromParent();
3714 return Legalized;
3715 }
3716
3717 // With a variable index, we can't perform the operation in a smaller type, so
3718 // we're forced to expand this.
3719 //
3720 // TODO: We could emit a chain of compare/select to figure out which piece to
3721 // index.
3722 return lowerExtractInsertVectorElt(MI);
3723 }
3724
3725 LegalizerHelper::LegalizeResult
reduceLoadStoreWidth(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3726 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
3727 LLT NarrowTy) {
3728 // FIXME: Don't know how to handle secondary types yet.
3729 if (TypeIdx != 0)
3730 return UnableToLegalize;
3731
3732 MachineMemOperand *MMO = *MI.memoperands_begin();
3733
3734 // This implementation doesn't work for atomics. Give up instead of doing
3735 // something invalid.
3736 if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
3737 MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
3738 return UnableToLegalize;
3739
3740 bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
3741 Register ValReg = MI.getOperand(0).getReg();
3742 Register AddrReg = MI.getOperand(1).getReg();
3743 LLT ValTy = MRI.getType(ValReg);
3744
3745 // FIXME: Do we need a distinct NarrowMemory legalize action?
3746 if (ValTy.getSizeInBits() != 8 * MMO->getSize()) {
3747 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
3748 return UnableToLegalize;
3749 }
3750
3751 int NumParts = -1;
3752 int NumLeftover = -1;
3753 LLT LeftoverTy;
3754 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
3755 if (IsLoad) {
3756 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
3757 } else {
3758 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
3759 NarrowLeftoverRegs)) {
3760 NumParts = NarrowRegs.size();
3761 NumLeftover = NarrowLeftoverRegs.size();
3762 }
3763 }
3764
3765 if (NumParts == -1)
3766 return UnableToLegalize;
3767
3768 LLT PtrTy = MRI.getType(AddrReg);
3769 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
3770
3771 unsigned TotalSize = ValTy.getSizeInBits();
3772
3773 // Split the load/store into PartTy sized pieces starting at Offset. If this
3774 // is a load, return the new registers in ValRegs. For a store, each elements
3775 // of ValRegs should be PartTy. Returns the next offset that needs to be
3776 // handled.
3777 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
3778 unsigned Offset) -> unsigned {
3779 MachineFunction &MF = MIRBuilder.getMF();
3780 unsigned PartSize = PartTy.getSizeInBits();
3781 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
3782 Offset += PartSize, ++Idx) {
3783 unsigned ByteSize = PartSize / 8;
3784 unsigned ByteOffset = Offset / 8;
3785 Register NewAddrReg;
3786
3787 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
3788
3789 MachineMemOperand *NewMMO =
3790 MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
3791
3792 if (IsLoad) {
3793 Register Dst = MRI.createGenericVirtualRegister(PartTy);
3794 ValRegs.push_back(Dst);
3795 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
3796 } else {
3797 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
3798 }
3799 }
3800
3801 return Offset;
3802 };
3803
3804 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
3805
3806 // Handle the rest of the register if this isn't an even type breakdown.
3807 if (LeftoverTy.isValid())
3808 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
3809
3810 if (IsLoad) {
3811 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
3812 LeftoverTy, NarrowLeftoverRegs);
3813 }
3814
3815 MI.eraseFromParent();
3816 return Legalized;
3817 }
3818
3819 LegalizerHelper::LegalizeResult
reduceOperationWidth(MachineInstr & MI,unsigned int TypeIdx,LLT NarrowTy)3820 LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
3821 LLT NarrowTy) {
3822 assert(TypeIdx == 0 && "only one type index expected");
3823
3824 const unsigned Opc = MI.getOpcode();
3825 const int NumOps = MI.getNumOperands() - 1;
3826 const Register DstReg = MI.getOperand(0).getReg();
3827 const unsigned Flags = MI.getFlags();
3828 const unsigned NarrowSize = NarrowTy.getSizeInBits();
3829 const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
3830
3831 assert(NumOps <= 3 && "expected instruction with 1 result and 1-3 sources");
3832
3833 // First of all check whether we are narrowing (changing the element type)
3834 // or reducing the vector elements
3835 const LLT DstTy = MRI.getType(DstReg);
3836 const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
3837
3838 SmallVector<Register, 8> ExtractedRegs[3];
3839 SmallVector<Register, 8> Parts;
3840
3841 unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3842
3843 // Break down all the sources into NarrowTy pieces we can operate on. This may
3844 // involve creating merges to a wider type, padded with undef.
3845 for (int I = 0; I != NumOps; ++I) {
3846 Register SrcReg = MI.getOperand(I + 1).getReg();
3847 LLT SrcTy = MRI.getType(SrcReg);
3848
3849 // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
3850 // For fewerElements, this is a smaller vector with the same element type.
3851 LLT OpNarrowTy;
3852 if (IsNarrow) {
3853 OpNarrowTy = NarrowScalarTy;
3854
3855 // In case of narrowing, we need to cast vectors to scalars for this to
3856 // work properly
3857 // FIXME: Can we do without the bitcast here if we're narrowing?
3858 if (SrcTy.isVector()) {
3859 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
3860 SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
3861 }
3862 } else {
3863 OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
3864 }
3865
3866 LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
3867
3868 // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
3869 buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
3870 TargetOpcode::G_ANYEXT);
3871 }
3872
3873 SmallVector<Register, 8> ResultRegs;
3874
3875 // Input operands for each sub-instruction.
3876 SmallVector<SrcOp, 4> InputRegs(NumOps, Register());
3877
3878 int NumParts = ExtractedRegs[0].size();
3879 const unsigned DstSize = DstTy.getSizeInBits();
3880 const LLT DstScalarTy = LLT::scalar(DstSize);
3881
3882 // Narrowing needs to use scalar types
3883 LLT DstLCMTy, NarrowDstTy;
3884 if (IsNarrow) {
3885 DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
3886 NarrowDstTy = NarrowScalarTy;
3887 } else {
3888 DstLCMTy = getLCMType(DstTy, NarrowTy);
3889 NarrowDstTy = NarrowTy;
3890 }
3891
3892 // We widened the source registers to satisfy merge/unmerge size
3893 // constraints. We'll have some extra fully undef parts.
3894 const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
3895
3896 for (int I = 0; I != NumRealParts; ++I) {
3897 // Emit this instruction on each of the split pieces.
3898 for (int J = 0; J != NumOps; ++J)
3899 InputRegs[J] = ExtractedRegs[J][I];
3900
3901 auto Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
3902 ResultRegs.push_back(Inst.getReg(0));
3903 }
3904
3905 // Fill out the widened result with undef instead of creating instructions
3906 // with undef inputs.
3907 int NumUndefParts = NumParts - NumRealParts;
3908 if (NumUndefParts != 0)
3909 ResultRegs.append(NumUndefParts,
3910 MIRBuilder.buildUndef(NarrowDstTy).getReg(0));
3911
3912 // Extract the possibly padded result. Use a scratch register if we need to do
3913 // a final bitcast, otherwise use the original result register.
3914 Register MergeDstReg;
3915 if (IsNarrow && DstTy.isVector())
3916 MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
3917 else
3918 MergeDstReg = DstReg;
3919
3920 buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs);
3921
3922 // Recast to vector if we narrowed a vector
3923 if (IsNarrow && DstTy.isVector())
3924 MIRBuilder.buildBitcast(DstReg, MergeDstReg);
3925
3926 MI.eraseFromParent();
3927 return Legalized;
3928 }
3929
3930 LegalizerHelper::LegalizeResult
fewerElementsVectorSextInReg(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3931 LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
3932 LLT NarrowTy) {
3933 Register DstReg = MI.getOperand(0).getReg();
3934 Register SrcReg = MI.getOperand(1).getReg();
3935 int64_t Imm = MI.getOperand(2).getImm();
3936
3937 LLT DstTy = MRI.getType(DstReg);
3938
3939 SmallVector<Register, 8> Parts;
3940 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
3941 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
3942
3943 for (Register &R : Parts)
3944 R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
3945
3946 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3947
3948 MI.eraseFromParent();
3949 return Legalized;
3950 }
3951
3952 LegalizerHelper::LegalizeResult
fewerElementsVector(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)3953 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
3954 LLT NarrowTy) {
3955 using namespace TargetOpcode;
3956
3957 switch (MI.getOpcode()) {
3958 case G_IMPLICIT_DEF:
3959 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
3960 case G_TRUNC:
3961 case G_AND:
3962 case G_OR:
3963 case G_XOR:
3964 case G_ADD:
3965 case G_SUB:
3966 case G_MUL:
3967 case G_PTR_ADD:
3968 case G_SMULH:
3969 case G_UMULH:
3970 case G_FADD:
3971 case G_FMUL:
3972 case G_FSUB:
3973 case G_FNEG:
3974 case G_FABS:
3975 case G_FCANONICALIZE:
3976 case G_FDIV:
3977 case G_FREM:
3978 case G_FMA:
3979 case G_FMAD:
3980 case G_FPOW:
3981 case G_FEXP:
3982 case G_FEXP2:
3983 case G_FLOG:
3984 case G_FLOG2:
3985 case G_FLOG10:
3986 case G_FNEARBYINT:
3987 case G_FCEIL:
3988 case G_FFLOOR:
3989 case G_FRINT:
3990 case G_INTRINSIC_ROUND:
3991 case G_INTRINSIC_ROUNDEVEN:
3992 case G_INTRINSIC_TRUNC:
3993 case G_FCOS:
3994 case G_FSIN:
3995 case G_FSQRT:
3996 case G_BSWAP:
3997 case G_BITREVERSE:
3998 case G_SDIV:
3999 case G_UDIV:
4000 case G_SREM:
4001 case G_UREM:
4002 case G_SMIN:
4003 case G_SMAX:
4004 case G_UMIN:
4005 case G_UMAX:
4006 case G_FMINNUM:
4007 case G_FMAXNUM:
4008 case G_FMINNUM_IEEE:
4009 case G_FMAXNUM_IEEE:
4010 case G_FMINIMUM:
4011 case G_FMAXIMUM:
4012 case G_FSHL:
4013 case G_FSHR:
4014 case G_FREEZE:
4015 case G_SADDSAT:
4016 case G_SSUBSAT:
4017 case G_UADDSAT:
4018 case G_USUBSAT:
4019 return reduceOperationWidth(MI, TypeIdx, NarrowTy);
4020 case G_SHL:
4021 case G_LSHR:
4022 case G_ASHR:
4023 case G_SSHLSAT:
4024 case G_USHLSAT:
4025 case G_CTLZ:
4026 case G_CTLZ_ZERO_UNDEF:
4027 case G_CTTZ:
4028 case G_CTTZ_ZERO_UNDEF:
4029 case G_CTPOP:
4030 case G_FCOPYSIGN:
4031 return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
4032 case G_ZEXT:
4033 case G_SEXT:
4034 case G_ANYEXT:
4035 case G_FPEXT:
4036 case G_FPTRUNC:
4037 case G_SITOFP:
4038 case G_UITOFP:
4039 case G_FPTOSI:
4040 case G_FPTOUI:
4041 case G_INTTOPTR:
4042 case G_PTRTOINT:
4043 case G_ADDRSPACE_CAST:
4044 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
4045 case G_ICMP:
4046 case G_FCMP:
4047 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
4048 case G_SELECT:
4049 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
4050 case G_PHI:
4051 return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
4052 case G_UNMERGE_VALUES:
4053 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4054 case G_BUILD_VECTOR:
4055 assert(TypeIdx == 0 && "not a vector type index");
4056 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4057 case G_CONCAT_VECTORS:
4058 if (TypeIdx != 1) // TODO: This probably does work as expected already.
4059 return UnableToLegalize;
4060 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4061 case G_EXTRACT_VECTOR_ELT:
4062 case G_INSERT_VECTOR_ELT:
4063 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4064 case G_LOAD:
4065 case G_STORE:
4066 return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
4067 case G_SEXT_INREG:
4068 return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
4069 default:
4070 return UnableToLegalize;
4071 }
4072 }
4073
4074 LegalizerHelper::LegalizeResult
narrowScalarShiftByConstant(MachineInstr & MI,const APInt & Amt,const LLT HalfTy,const LLT AmtTy)4075 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
4076 const LLT HalfTy, const LLT AmtTy) {
4077
4078 Register InL = MRI.createGenericVirtualRegister(HalfTy);
4079 Register InH = MRI.createGenericVirtualRegister(HalfTy);
4080 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4081
4082 if (Amt.isNullValue()) {
4083 MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
4084 MI.eraseFromParent();
4085 return Legalized;
4086 }
4087
4088 LLT NVT = HalfTy;
4089 unsigned NVTBits = HalfTy.getSizeInBits();
4090 unsigned VTBits = 2 * NVTBits;
4091
4092 SrcOp Lo(Register(0)), Hi(Register(0));
4093 if (MI.getOpcode() == TargetOpcode::G_SHL) {
4094 if (Amt.ugt(VTBits)) {
4095 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4096 } else if (Amt.ugt(NVTBits)) {
4097 Lo = MIRBuilder.buildConstant(NVT, 0);
4098 Hi = MIRBuilder.buildShl(NVT, InL,
4099 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4100 } else if (Amt == NVTBits) {
4101 Lo = MIRBuilder.buildConstant(NVT, 0);
4102 Hi = InL;
4103 } else {
4104 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
4105 auto OrLHS =
4106 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
4107 auto OrRHS = MIRBuilder.buildLShr(
4108 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4109 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4110 }
4111 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4112 if (Amt.ugt(VTBits)) {
4113 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4114 } else if (Amt.ugt(NVTBits)) {
4115 Lo = MIRBuilder.buildLShr(NVT, InH,
4116 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4117 Hi = MIRBuilder.buildConstant(NVT, 0);
4118 } else if (Amt == NVTBits) {
4119 Lo = InH;
4120 Hi = MIRBuilder.buildConstant(NVT, 0);
4121 } else {
4122 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4123
4124 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4125 auto OrRHS = MIRBuilder.buildShl(
4126 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4127
4128 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4129 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
4130 }
4131 } else {
4132 if (Amt.ugt(VTBits)) {
4133 Hi = Lo = MIRBuilder.buildAShr(
4134 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4135 } else if (Amt.ugt(NVTBits)) {
4136 Lo = MIRBuilder.buildAShr(NVT, InH,
4137 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4138 Hi = MIRBuilder.buildAShr(NVT, InH,
4139 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4140 } else if (Amt == NVTBits) {
4141 Lo = InH;
4142 Hi = MIRBuilder.buildAShr(NVT, InH,
4143 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4144 } else {
4145 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4146
4147 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4148 auto OrRHS = MIRBuilder.buildShl(
4149 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4150
4151 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4152 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
4153 }
4154 }
4155
4156 MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
4157 MI.eraseFromParent();
4158
4159 return Legalized;
4160 }
4161
4162 // TODO: Optimize if constant shift amount.
4163 LegalizerHelper::LegalizeResult
narrowScalarShift(MachineInstr & MI,unsigned TypeIdx,LLT RequestedTy)4164 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
4165 LLT RequestedTy) {
4166 if (TypeIdx == 1) {
4167 Observer.changingInstr(MI);
4168 narrowScalarSrc(MI, RequestedTy, 2);
4169 Observer.changedInstr(MI);
4170 return Legalized;
4171 }
4172
4173 Register DstReg = MI.getOperand(0).getReg();
4174 LLT DstTy = MRI.getType(DstReg);
4175 if (DstTy.isVector())
4176 return UnableToLegalize;
4177
4178 Register Amt = MI.getOperand(2).getReg();
4179 LLT ShiftAmtTy = MRI.getType(Amt);
4180 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
4181 if (DstEltSize % 2 != 0)
4182 return UnableToLegalize;
4183
4184 // Ignore the input type. We can only go to exactly half the size of the
4185 // input. If that isn't small enough, the resulting pieces will be further
4186 // legalized.
4187 const unsigned NewBitSize = DstEltSize / 2;
4188 const LLT HalfTy = LLT::scalar(NewBitSize);
4189 const LLT CondTy = LLT::scalar(1);
4190
4191 if (const MachineInstr *KShiftAmt =
4192 getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
4193 return narrowScalarShiftByConstant(
4194 MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
4195 }
4196
4197 // TODO: Expand with known bits.
4198
4199 // Handle the fully general expansion by an unknown amount.
4200 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
4201
4202 Register InL = MRI.createGenericVirtualRegister(HalfTy);
4203 Register InH = MRI.createGenericVirtualRegister(HalfTy);
4204 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4205
4206 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
4207 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
4208
4209 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
4210 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
4211 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
4212
4213 Register ResultRegs[2];
4214 switch (MI.getOpcode()) {
4215 case TargetOpcode::G_SHL: {
4216 // Short: ShAmt < NewBitSize
4217 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
4218
4219 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
4220 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
4221 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4222
4223 // Long: ShAmt >= NewBitSize
4224 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
4225 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
4226
4227 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
4228 auto Hi = MIRBuilder.buildSelect(
4229 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
4230
4231 ResultRegs[0] = Lo.getReg(0);
4232 ResultRegs[1] = Hi.getReg(0);
4233 break;
4234 }
4235 case TargetOpcode::G_LSHR:
4236 case TargetOpcode::G_ASHR: {
4237 // Short: ShAmt < NewBitSize
4238 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
4239
4240 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
4241 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
4242 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4243
4244 // Long: ShAmt >= NewBitSize
4245 MachineInstrBuilder HiL;
4246 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4247 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
4248 } else {
4249 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
4250 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
4251 }
4252 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
4253 {InH, AmtExcess}); // Lo from Hi part.
4254
4255 auto Lo = MIRBuilder.buildSelect(
4256 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
4257
4258 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
4259
4260 ResultRegs[0] = Lo.getReg(0);
4261 ResultRegs[1] = Hi.getReg(0);
4262 break;
4263 }
4264 default:
4265 llvm_unreachable("not a shift");
4266 }
4267
4268 MIRBuilder.buildMerge(DstReg, ResultRegs);
4269 MI.eraseFromParent();
4270 return Legalized;
4271 }
4272
4273 LegalizerHelper::LegalizeResult
moreElementsVectorPhi(MachineInstr & MI,unsigned TypeIdx,LLT MoreTy)4274 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
4275 LLT MoreTy) {
4276 assert(TypeIdx == 0 && "Expecting only Idx 0");
4277
4278 Observer.changingInstr(MI);
4279 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
4280 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
4281 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
4282 moreElementsVectorSrc(MI, MoreTy, I);
4283 }
4284
4285 MachineBasicBlock &MBB = *MI.getParent();
4286 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
4287 moreElementsVectorDst(MI, MoreTy, 0);
4288 Observer.changedInstr(MI);
4289 return Legalized;
4290 }
4291
4292 LegalizerHelper::LegalizeResult
moreElementsVector(MachineInstr & MI,unsigned TypeIdx,LLT MoreTy)4293 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
4294 LLT MoreTy) {
4295 unsigned Opc = MI.getOpcode();
4296 switch (Opc) {
4297 case TargetOpcode::G_IMPLICIT_DEF:
4298 case TargetOpcode::G_LOAD: {
4299 if (TypeIdx != 0)
4300 return UnableToLegalize;
4301 Observer.changingInstr(MI);
4302 moreElementsVectorDst(MI, MoreTy, 0);
4303 Observer.changedInstr(MI);
4304 return Legalized;
4305 }
4306 case TargetOpcode::G_STORE:
4307 if (TypeIdx != 0)
4308 return UnableToLegalize;
4309 Observer.changingInstr(MI);
4310 moreElementsVectorSrc(MI, MoreTy, 0);
4311 Observer.changedInstr(MI);
4312 return Legalized;
4313 case TargetOpcode::G_AND:
4314 case TargetOpcode::G_OR:
4315 case TargetOpcode::G_XOR:
4316 case TargetOpcode::G_SMIN:
4317 case TargetOpcode::G_SMAX:
4318 case TargetOpcode::G_UMIN:
4319 case TargetOpcode::G_UMAX:
4320 case TargetOpcode::G_FMINNUM:
4321 case TargetOpcode::G_FMAXNUM:
4322 case TargetOpcode::G_FMINNUM_IEEE:
4323 case TargetOpcode::G_FMAXNUM_IEEE:
4324 case TargetOpcode::G_FMINIMUM:
4325 case TargetOpcode::G_FMAXIMUM: {
4326 Observer.changingInstr(MI);
4327 moreElementsVectorSrc(MI, MoreTy, 1);
4328 moreElementsVectorSrc(MI, MoreTy, 2);
4329 moreElementsVectorDst(MI, MoreTy, 0);
4330 Observer.changedInstr(MI);
4331 return Legalized;
4332 }
4333 case TargetOpcode::G_EXTRACT:
4334 if (TypeIdx != 1)
4335 return UnableToLegalize;
4336 Observer.changingInstr(MI);
4337 moreElementsVectorSrc(MI, MoreTy, 1);
4338 Observer.changedInstr(MI);
4339 return Legalized;
4340 case TargetOpcode::G_INSERT:
4341 case TargetOpcode::G_FREEZE:
4342 if (TypeIdx != 0)
4343 return UnableToLegalize;
4344 Observer.changingInstr(MI);
4345 moreElementsVectorSrc(MI, MoreTy, 1);
4346 moreElementsVectorDst(MI, MoreTy, 0);
4347 Observer.changedInstr(MI);
4348 return Legalized;
4349 case TargetOpcode::G_SELECT:
4350 if (TypeIdx != 0)
4351 return UnableToLegalize;
4352 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4353 return UnableToLegalize;
4354
4355 Observer.changingInstr(MI);
4356 moreElementsVectorSrc(MI, MoreTy, 2);
4357 moreElementsVectorSrc(MI, MoreTy, 3);
4358 moreElementsVectorDst(MI, MoreTy, 0);
4359 Observer.changedInstr(MI);
4360 return Legalized;
4361 case TargetOpcode::G_UNMERGE_VALUES: {
4362 if (TypeIdx != 1)
4363 return UnableToLegalize;
4364
4365 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4366 int NumDst = MI.getNumOperands() - 1;
4367 moreElementsVectorSrc(MI, MoreTy, NumDst);
4368
4369 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4370 for (int I = 0; I != NumDst; ++I)
4371 MIB.addDef(MI.getOperand(I).getReg());
4372
4373 int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
4374 for (int I = NumDst; I != NewNumDst; ++I)
4375 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
4376
4377 MIB.addUse(MI.getOperand(NumDst).getReg());
4378 MI.eraseFromParent();
4379 return Legalized;
4380 }
4381 case TargetOpcode::G_PHI:
4382 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
4383 default:
4384 return UnableToLegalize;
4385 }
4386 }
4387
multiplyRegisters(SmallVectorImpl<Register> & DstRegs,ArrayRef<Register> Src1Regs,ArrayRef<Register> Src2Regs,LLT NarrowTy)4388 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
4389 ArrayRef<Register> Src1Regs,
4390 ArrayRef<Register> Src2Regs,
4391 LLT NarrowTy) {
4392 MachineIRBuilder &B = MIRBuilder;
4393 unsigned SrcParts = Src1Regs.size();
4394 unsigned DstParts = DstRegs.size();
4395
4396 unsigned DstIdx = 0; // Low bits of the result.
4397 Register FactorSum =
4398 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
4399 DstRegs[DstIdx] = FactorSum;
4400
4401 unsigned CarrySumPrevDstIdx;
4402 SmallVector<Register, 4> Factors;
4403
4404 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
4405 // Collect low parts of muls for DstIdx.
4406 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
4407 i <= std::min(DstIdx, SrcParts - 1); ++i) {
4408 MachineInstrBuilder Mul =
4409 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
4410 Factors.push_back(Mul.getReg(0));
4411 }
4412 // Collect high parts of muls from previous DstIdx.
4413 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
4414 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
4415 MachineInstrBuilder Umulh =
4416 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
4417 Factors.push_back(Umulh.getReg(0));
4418 }
4419 // Add CarrySum from additions calculated for previous DstIdx.
4420 if (DstIdx != 1) {
4421 Factors.push_back(CarrySumPrevDstIdx);
4422 }
4423
4424 Register CarrySum;
4425 // Add all factors and accumulate all carries into CarrySum.
4426 if (DstIdx != DstParts - 1) {
4427 MachineInstrBuilder Uaddo =
4428 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
4429 FactorSum = Uaddo.getReg(0);
4430 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
4431 for (unsigned i = 2; i < Factors.size(); ++i) {
4432 MachineInstrBuilder Uaddo =
4433 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
4434 FactorSum = Uaddo.getReg(0);
4435 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
4436 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
4437 }
4438 } else {
4439 // Since value for the next index is not calculated, neither is CarrySum.
4440 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
4441 for (unsigned i = 2; i < Factors.size(); ++i)
4442 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
4443 }
4444
4445 CarrySumPrevDstIdx = CarrySum;
4446 DstRegs[DstIdx] = FactorSum;
4447 Factors.clear();
4448 }
4449 }
4450
4451 LegalizerHelper::LegalizeResult
narrowScalarMul(MachineInstr & MI,LLT NarrowTy)4452 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
4453 Register DstReg = MI.getOperand(0).getReg();
4454 Register Src1 = MI.getOperand(1).getReg();
4455 Register Src2 = MI.getOperand(2).getReg();
4456
4457 LLT Ty = MRI.getType(DstReg);
4458 if (Ty.isVector())
4459 return UnableToLegalize;
4460
4461 unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
4462 unsigned DstSize = Ty.getSizeInBits();
4463 unsigned NarrowSize = NarrowTy.getSizeInBits();
4464 if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
4465 return UnableToLegalize;
4466
4467 unsigned NumDstParts = DstSize / NarrowSize;
4468 unsigned NumSrcParts = SrcSize / NarrowSize;
4469 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
4470 unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
4471
4472 SmallVector<Register, 2> Src1Parts, Src2Parts;
4473 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
4474 extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
4475 extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
4476 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
4477
4478 // Take only high half of registers if this is high mul.
4479 ArrayRef<Register> DstRegs(
4480 IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
4481 MIRBuilder.buildMerge(DstReg, DstRegs);
4482 MI.eraseFromParent();
4483 return Legalized;
4484 }
4485
4486 LegalizerHelper::LegalizeResult
narrowScalarFPTOI(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)4487 LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
4488 LLT NarrowTy) {
4489 if (TypeIdx != 0)
4490 return UnableToLegalize;
4491
4492 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
4493
4494 Register Src = MI.getOperand(1).getReg();
4495 LLT SrcTy = MRI.getType(Src);
4496
4497 // If all finite floats fit into the narrowed integer type, we can just swap
4498 // out the result type. This is practically only useful for conversions from
4499 // half to at least 16-bits, so just handle the one case.
4500 if (SrcTy.getScalarType() != LLT::scalar(16) ||
4501 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17 : 16))
4502 return UnableToLegalize;
4503
4504 Observer.changingInstr(MI);
4505 narrowScalarDst(MI, NarrowTy, 0,
4506 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
4507 Observer.changedInstr(MI);
4508 return Legalized;
4509 }
4510
4511 LegalizerHelper::LegalizeResult
narrowScalarExtract(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)4512 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
4513 LLT NarrowTy) {
4514 if (TypeIdx != 1)
4515 return UnableToLegalize;
4516
4517 uint64_t NarrowSize = NarrowTy.getSizeInBits();
4518
4519 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
4520 // FIXME: add support for when SizeOp1 isn't an exact multiple of
4521 // NarrowSize.
4522 if (SizeOp1 % NarrowSize != 0)
4523 return UnableToLegalize;
4524 int NumParts = SizeOp1 / NarrowSize;
4525
4526 SmallVector<Register, 2> SrcRegs, DstRegs;
4527 SmallVector<uint64_t, 2> Indexes;
4528 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
4529
4530 Register OpReg = MI.getOperand(0).getReg();
4531 uint64_t OpStart = MI.getOperand(2).getImm();
4532 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
4533 for (int i = 0; i < NumParts; ++i) {
4534 unsigned SrcStart = i * NarrowSize;
4535
4536 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
4537 // No part of the extract uses this subregister, ignore it.
4538 continue;
4539 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
4540 // The entire subregister is extracted, forward the value.
4541 DstRegs.push_back(SrcRegs[i]);
4542 continue;
4543 }
4544
4545 // OpSegStart is where this destination segment would start in OpReg if it
4546 // extended infinitely in both directions.
4547 int64_t ExtractOffset;
4548 uint64_t SegSize;
4549 if (OpStart < SrcStart) {
4550 ExtractOffset = 0;
4551 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
4552 } else {
4553 ExtractOffset = OpStart - SrcStart;
4554 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
4555 }
4556
4557 Register SegReg = SrcRegs[i];
4558 if (ExtractOffset != 0 || SegSize != NarrowSize) {
4559 // A genuine extract is needed.
4560 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
4561 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
4562 }
4563
4564 DstRegs.push_back(SegReg);
4565 }
4566
4567 Register DstReg = MI.getOperand(0).getReg();
4568 if (MRI.getType(DstReg).isVector())
4569 MIRBuilder.buildBuildVector(DstReg, DstRegs);
4570 else if (DstRegs.size() > 1)
4571 MIRBuilder.buildMerge(DstReg, DstRegs);
4572 else
4573 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
4574 MI.eraseFromParent();
4575 return Legalized;
4576 }
4577
4578 LegalizerHelper::LegalizeResult
narrowScalarInsert(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)4579 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
4580 LLT NarrowTy) {
4581 // FIXME: Don't know how to handle secondary types yet.
4582 if (TypeIdx != 0)
4583 return UnableToLegalize;
4584
4585 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
4586 uint64_t NarrowSize = NarrowTy.getSizeInBits();
4587
4588 // FIXME: add support for when SizeOp0 isn't an exact multiple of
4589 // NarrowSize.
4590 if (SizeOp0 % NarrowSize != 0)
4591 return UnableToLegalize;
4592
4593 int NumParts = SizeOp0 / NarrowSize;
4594
4595 SmallVector<Register, 2> SrcRegs, DstRegs;
4596 SmallVector<uint64_t, 2> Indexes;
4597 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
4598
4599 Register OpReg = MI.getOperand(2).getReg();
4600 uint64_t OpStart = MI.getOperand(3).getImm();
4601 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
4602 for (int i = 0; i < NumParts; ++i) {
4603 unsigned DstStart = i * NarrowSize;
4604
4605 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
4606 // No part of the insert affects this subregister, forward the original.
4607 DstRegs.push_back(SrcRegs[i]);
4608 continue;
4609 } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
4610 // The entire subregister is defined by this insert, forward the new
4611 // value.
4612 DstRegs.push_back(OpReg);
4613 continue;
4614 }
4615
4616 // OpSegStart is where this destination segment would start in OpReg if it
4617 // extended infinitely in both directions.
4618 int64_t ExtractOffset, InsertOffset;
4619 uint64_t SegSize;
4620 if (OpStart < DstStart) {
4621 InsertOffset = 0;
4622 ExtractOffset = DstStart - OpStart;
4623 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
4624 } else {
4625 InsertOffset = OpStart - DstStart;
4626 ExtractOffset = 0;
4627 SegSize =
4628 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
4629 }
4630
4631 Register SegReg = OpReg;
4632 if (ExtractOffset != 0 || SegSize != OpSize) {
4633 // A genuine extract is needed.
4634 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
4635 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
4636 }
4637
4638 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
4639 MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
4640 DstRegs.push_back(DstReg);
4641 }
4642
4643 assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
4644 Register DstReg = MI.getOperand(0).getReg();
4645 if(MRI.getType(DstReg).isVector())
4646 MIRBuilder.buildBuildVector(DstReg, DstRegs);
4647 else
4648 MIRBuilder.buildMerge(DstReg, DstRegs);
4649 MI.eraseFromParent();
4650 return Legalized;
4651 }
4652
4653 LegalizerHelper::LegalizeResult
narrowScalarBasic(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)4654 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
4655 LLT NarrowTy) {
4656 Register DstReg = MI.getOperand(0).getReg();
4657 LLT DstTy = MRI.getType(DstReg);
4658
4659 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
4660
4661 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
4662 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
4663 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
4664 LLT LeftoverTy;
4665 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
4666 Src0Regs, Src0LeftoverRegs))
4667 return UnableToLegalize;
4668
4669 LLT Unused;
4670 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
4671 Src1Regs, Src1LeftoverRegs))
4672 llvm_unreachable("inconsistent extractParts result");
4673
4674 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
4675 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
4676 {Src0Regs[I], Src1Regs[I]});
4677 DstRegs.push_back(Inst.getReg(0));
4678 }
4679
4680 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
4681 auto Inst = MIRBuilder.buildInstr(
4682 MI.getOpcode(),
4683 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
4684 DstLeftoverRegs.push_back(Inst.getReg(0));
4685 }
4686
4687 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
4688 LeftoverTy, DstLeftoverRegs);
4689
4690 MI.eraseFromParent();
4691 return Legalized;
4692 }
4693
4694 LegalizerHelper::LegalizeResult
narrowScalarExt(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)4695 LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
4696 LLT NarrowTy) {
4697 if (TypeIdx != 0)
4698 return UnableToLegalize;
4699
4700 Register DstReg = MI.getOperand(0).getReg();
4701 Register SrcReg = MI.getOperand(1).getReg();
4702
4703 LLT DstTy = MRI.getType(DstReg);
4704 if (DstTy.isVector())
4705 return UnableToLegalize;
4706
4707 SmallVector<Register, 8> Parts;
4708 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
4709 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
4710 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
4711
4712 MI.eraseFromParent();
4713 return Legalized;
4714 }
4715
4716 LegalizerHelper::LegalizeResult
narrowScalarSelect(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)4717 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
4718 LLT NarrowTy) {
4719 if (TypeIdx != 0)
4720 return UnableToLegalize;
4721
4722 Register CondReg = MI.getOperand(1).getReg();
4723 LLT CondTy = MRI.getType(CondReg);
4724 if (CondTy.isVector()) // TODO: Handle vselect
4725 return UnableToLegalize;
4726
4727 Register DstReg = MI.getOperand(0).getReg();
4728 LLT DstTy = MRI.getType(DstReg);
4729
4730 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
4731 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
4732 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
4733 LLT LeftoverTy;
4734 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
4735 Src1Regs, Src1LeftoverRegs))
4736 return UnableToLegalize;
4737
4738 LLT Unused;
4739 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
4740 Src2Regs, Src2LeftoverRegs))
4741 llvm_unreachable("inconsistent extractParts result");
4742
4743 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
4744 auto Select = MIRBuilder.buildSelect(NarrowTy,
4745 CondReg, Src1Regs[I], Src2Regs[I]);
4746 DstRegs.push_back(Select.getReg(0));
4747 }
4748
4749 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
4750 auto Select = MIRBuilder.buildSelect(
4751 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
4752 DstLeftoverRegs.push_back(Select.getReg(0));
4753 }
4754
4755 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
4756 LeftoverTy, DstLeftoverRegs);
4757
4758 MI.eraseFromParent();
4759 return Legalized;
4760 }
4761
4762 LegalizerHelper::LegalizeResult
narrowScalarCTLZ(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)4763 LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
4764 LLT NarrowTy) {
4765 if (TypeIdx != 1)
4766 return UnableToLegalize;
4767
4768 Register DstReg = MI.getOperand(0).getReg();
4769 Register SrcReg = MI.getOperand(1).getReg();
4770 LLT DstTy = MRI.getType(DstReg);
4771 LLT SrcTy = MRI.getType(SrcReg);
4772 unsigned NarrowSize = NarrowTy.getSizeInBits();
4773
4774 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4775 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
4776
4777 MachineIRBuilder &B = MIRBuilder;
4778 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
4779 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
4780 auto C_0 = B.buildConstant(NarrowTy, 0);
4781 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
4782 UnmergeSrc.getReg(1), C_0);
4783 auto LoCTLZ = IsUndef ?
4784 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
4785 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
4786 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
4787 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
4788 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
4789 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
4790
4791 MI.eraseFromParent();
4792 return Legalized;
4793 }
4794
4795 return UnableToLegalize;
4796 }
4797
4798 LegalizerHelper::LegalizeResult
narrowScalarCTTZ(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)4799 LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
4800 LLT NarrowTy) {
4801 if (TypeIdx != 1)
4802 return UnableToLegalize;
4803
4804 Register DstReg = MI.getOperand(0).getReg();
4805 Register SrcReg = MI.getOperand(1).getReg();
4806 LLT DstTy = MRI.getType(DstReg);
4807 LLT SrcTy = MRI.getType(SrcReg);
4808 unsigned NarrowSize = NarrowTy.getSizeInBits();
4809
4810 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4811 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
4812
4813 MachineIRBuilder &B = MIRBuilder;
4814 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
4815 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
4816 auto C_0 = B.buildConstant(NarrowTy, 0);
4817 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
4818 UnmergeSrc.getReg(0), C_0);
4819 auto HiCTTZ = IsUndef ?
4820 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
4821 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
4822 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
4823 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
4824 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
4825 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
4826
4827 MI.eraseFromParent();
4828 return Legalized;
4829 }
4830
4831 return UnableToLegalize;
4832 }
4833
4834 LegalizerHelper::LegalizeResult
narrowScalarCTPOP(MachineInstr & MI,unsigned TypeIdx,LLT NarrowTy)4835 LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
4836 LLT NarrowTy) {
4837 if (TypeIdx != 1)
4838 return UnableToLegalize;
4839
4840 Register DstReg = MI.getOperand(0).getReg();
4841 LLT DstTy = MRI.getType(DstReg);
4842 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
4843 unsigned NarrowSize = NarrowTy.getSizeInBits();
4844
4845 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
4846 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
4847
4848 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
4849 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
4850 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
4851
4852 MI.eraseFromParent();
4853 return Legalized;
4854 }
4855
4856 return UnableToLegalize;
4857 }
4858
4859 LegalizerHelper::LegalizeResult
lowerBitCount(MachineInstr & MI)4860 LegalizerHelper::lowerBitCount(MachineInstr &MI) {
4861 unsigned Opc = MI.getOpcode();
4862 const auto &TII = MIRBuilder.getTII();
4863 auto isSupported = [this](const LegalityQuery &Q) {
4864 auto QAction = LI.getAction(Q).Action;
4865 return QAction == Legal || QAction == Libcall || QAction == Custom;
4866 };
4867 switch (Opc) {
4868 default:
4869 return UnableToLegalize;
4870 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
4871 // This trivially expands to CTLZ.
4872 Observer.changingInstr(MI);
4873 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
4874 Observer.changedInstr(MI);
4875 return Legalized;
4876 }
4877 case TargetOpcode::G_CTLZ: {
4878 Register DstReg = MI.getOperand(0).getReg();
4879 Register SrcReg = MI.getOperand(1).getReg();
4880 LLT DstTy = MRI.getType(DstReg);
4881 LLT SrcTy = MRI.getType(SrcReg);
4882 unsigned Len = SrcTy.getSizeInBits();
4883
4884 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
4885 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
4886 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
4887 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
4888 auto ICmp = MIRBuilder.buildICmp(
4889 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
4890 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
4891 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
4892 MI.eraseFromParent();
4893 return Legalized;
4894 }
4895 // for now, we do this:
4896 // NewLen = NextPowerOf2(Len);
4897 // x = x | (x >> 1);
4898 // x = x | (x >> 2);
4899 // ...
4900 // x = x | (x >>16);
4901 // x = x | (x >>32); // for 64-bit input
4902 // Upto NewLen/2
4903 // return Len - popcount(x);
4904 //
4905 // Ref: "Hacker's Delight" by Henry Warren
4906 Register Op = SrcReg;
4907 unsigned NewLen = PowerOf2Ceil(Len);
4908 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
4909 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
4910 auto MIBOp = MIRBuilder.buildOr(
4911 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
4912 Op = MIBOp.getReg(0);
4913 }
4914 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
4915 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
4916 MIBPop);
4917 MI.eraseFromParent();
4918 return Legalized;
4919 }
4920 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
4921 // This trivially expands to CTTZ.
4922 Observer.changingInstr(MI);
4923 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
4924 Observer.changedInstr(MI);
4925 return Legalized;
4926 }
4927 case TargetOpcode::G_CTTZ: {
4928 Register DstReg = MI.getOperand(0).getReg();
4929 Register SrcReg = MI.getOperand(1).getReg();
4930 LLT DstTy = MRI.getType(DstReg);
4931 LLT SrcTy = MRI.getType(SrcReg);
4932
4933 unsigned Len = SrcTy.getSizeInBits();
4934 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
4935 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
4936 // zero.
4937 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
4938 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
4939 auto ICmp = MIRBuilder.buildICmp(
4940 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
4941 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
4942 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
4943 MI.eraseFromParent();
4944 return Legalized;
4945 }
4946 // for now, we use: { return popcount(~x & (x - 1)); }
4947 // unless the target has ctlz but not ctpop, in which case we use:
4948 // { return 32 - nlz(~x & (x-1)); }
4949 // Ref: "Hacker's Delight" by Henry Warren
4950 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
4951 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
4952 auto MIBTmp = MIRBuilder.buildAnd(
4953 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
4954 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
4955 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
4956 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
4957 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
4958 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
4959 MI.eraseFromParent();
4960 return Legalized;
4961 }
4962 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
4963 MI.getOperand(1).setReg(MIBTmp.getReg(0));
4964 return Legalized;
4965 }
4966 case TargetOpcode::G_CTPOP: {
4967 Register SrcReg = MI.getOperand(1).getReg();
4968 LLT Ty = MRI.getType(SrcReg);
4969 unsigned Size = Ty.getSizeInBits();
4970 MachineIRBuilder &B = MIRBuilder;
4971
4972 // Count set bits in blocks of 2 bits. Default approach would be
4973 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
4974 // We use following formula instead:
4975 // B2Count = val - { (val >> 1) & 0x55555555 }
4976 // since it gives same result in blocks of 2 with one instruction less.
4977 auto C_1 = B.buildConstant(Ty, 1);
4978 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
4979 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
4980 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
4981 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
4982 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
4983
4984 // In order to get count in blocks of 4 add values from adjacent block of 2.
4985 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
4986 auto C_2 = B.buildConstant(Ty, 2);
4987 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
4988 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
4989 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
4990 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
4991 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
4992 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
4993
4994 // For count in blocks of 8 bits we don't have to mask high 4 bits before
4995 // addition since count value sits in range {0,...,8} and 4 bits are enough
4996 // to hold such binary values. After addition high 4 bits still hold count
4997 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
4998 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
4999 auto C_4 = B.buildConstant(Ty, 4);
5000 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
5001 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
5002 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
5003 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
5004 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
5005
5006 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
5007 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
5008 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
5009 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
5010 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
5011
5012 // Shift count result from 8 high bits to low bits.
5013 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
5014 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
5015
5016 MI.eraseFromParent();
5017 return Legalized;
5018 }
5019 }
5020 }
5021
5022 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
5023 // representation.
5024 LegalizerHelper::LegalizeResult
lowerU64ToF32BitOps(MachineInstr & MI)5025 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
5026 Register Dst = MI.getOperand(0).getReg();
5027 Register Src = MI.getOperand(1).getReg();
5028 const LLT S64 = LLT::scalar(64);
5029 const LLT S32 = LLT::scalar(32);
5030 const LLT S1 = LLT::scalar(1);
5031
5032 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
5033
5034 // unsigned cul2f(ulong u) {
5035 // uint lz = clz(u);
5036 // uint e = (u != 0) ? 127U + 63U - lz : 0;
5037 // u = (u << lz) & 0x7fffffffffffffffUL;
5038 // ulong t = u & 0xffffffffffUL;
5039 // uint v = (e << 23) | (uint)(u >> 40);
5040 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
5041 // return as_float(v + r);
5042 // }
5043
5044 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
5045 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
5046
5047 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
5048
5049 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
5050 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
5051
5052 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
5053 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
5054
5055 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
5056 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
5057
5058 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
5059
5060 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
5061 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
5062
5063 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
5064 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
5065 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
5066
5067 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
5068 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
5069 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
5070 auto One = MIRBuilder.buildConstant(S32, 1);
5071
5072 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
5073 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
5074 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
5075 MIRBuilder.buildAdd(Dst, V, R);
5076
5077 MI.eraseFromParent();
5078 return Legalized;
5079 }
5080
lowerUITOFP(MachineInstr & MI)5081 LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
5082 Register Dst = MI.getOperand(0).getReg();
5083 Register Src = MI.getOperand(1).getReg();
5084 LLT DstTy = MRI.getType(Dst);
5085 LLT SrcTy = MRI.getType(Src);
5086
5087 if (SrcTy == LLT::scalar(1)) {
5088 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
5089 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
5090 MIRBuilder.buildSelect(Dst, Src, True, False);
5091 MI.eraseFromParent();
5092 return Legalized;
5093 }
5094
5095 if (SrcTy != LLT::scalar(64))
5096 return UnableToLegalize;
5097
5098 if (DstTy == LLT::scalar(32)) {
5099 // TODO: SelectionDAG has several alternative expansions to port which may
5100 // be more reasonble depending on the available instructions. If a target
5101 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
5102 // intermediate type, this is probably worse.
5103 return lowerU64ToF32BitOps(MI);
5104 }
5105
5106 return UnableToLegalize;
5107 }
5108
lowerSITOFP(MachineInstr & MI)5109 LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
5110 Register Dst = MI.getOperand(0).getReg();
5111 Register Src = MI.getOperand(1).getReg();
5112 LLT DstTy = MRI.getType(Dst);
5113 LLT SrcTy = MRI.getType(Src);
5114
5115 const LLT S64 = LLT::scalar(64);
5116 const LLT S32 = LLT::scalar(32);
5117 const LLT S1 = LLT::scalar(1);
5118
5119 if (SrcTy == S1) {
5120 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
5121 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
5122 MIRBuilder.buildSelect(Dst, Src, True, False);
5123 MI.eraseFromParent();
5124 return Legalized;
5125 }
5126
5127 if (SrcTy != S64)
5128 return UnableToLegalize;
5129
5130 if (DstTy == S32) {
5131 // signed cl2f(long l) {
5132 // long s = l >> 63;
5133 // float r = cul2f((l + s) ^ s);
5134 // return s ? -r : r;
5135 // }
5136 Register L = Src;
5137 auto SignBit = MIRBuilder.buildConstant(S64, 63);
5138 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
5139
5140 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
5141 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
5142 auto R = MIRBuilder.buildUITOFP(S32, Xor);
5143
5144 auto RNeg = MIRBuilder.buildFNeg(S32, R);
5145 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
5146 MIRBuilder.buildConstant(S64, 0));
5147 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
5148 MI.eraseFromParent();
5149 return Legalized;
5150 }
5151
5152 return UnableToLegalize;
5153 }
5154
lowerFPTOUI(MachineInstr & MI)5155 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
5156 Register Dst = MI.getOperand(0).getReg();
5157 Register Src = MI.getOperand(1).getReg();
5158 LLT DstTy = MRI.getType(Dst);
5159 LLT SrcTy = MRI.getType(Src);
5160 const LLT S64 = LLT::scalar(64);
5161 const LLT S32 = LLT::scalar(32);
5162
5163 if (SrcTy != S64 && SrcTy != S32)
5164 return UnableToLegalize;
5165 if (DstTy != S32 && DstTy != S64)
5166 return UnableToLegalize;
5167
5168 // FPTOSI gives same result as FPTOUI for positive signed integers.
5169 // FPTOUI needs to deal with fp values that convert to unsigned integers
5170 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
5171
5172 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
5173 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
5174 : APFloat::IEEEdouble(),
5175 APInt::getNullValue(SrcTy.getSizeInBits()));
5176 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
5177
5178 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
5179
5180 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
5181 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
5182 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
5183 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
5184 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
5185 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
5186 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
5187
5188 const LLT S1 = LLT::scalar(1);
5189
5190 MachineInstrBuilder FCMP =
5191 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
5192 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
5193
5194 MI.eraseFromParent();
5195 return Legalized;
5196 }
5197
lowerFPTOSI(MachineInstr & MI)5198 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
5199 Register Dst = MI.getOperand(0).getReg();
5200 Register Src = MI.getOperand(1).getReg();
5201 LLT DstTy = MRI.getType(Dst);
5202 LLT SrcTy = MRI.getType(Src);
5203 const LLT S64 = LLT::scalar(64);
5204 const LLT S32 = LLT::scalar(32);
5205
5206 // FIXME: Only f32 to i64 conversions are supported.
5207 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
5208 return UnableToLegalize;
5209
5210 // Expand f32 -> i64 conversion
5211 // This algorithm comes from compiler-rt's implementation of fixsfdi:
5212 // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
5213
5214 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
5215
5216 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
5217 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
5218
5219 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
5220 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
5221
5222 auto SignMask = MIRBuilder.buildConstant(SrcTy,
5223 APInt::getSignMask(SrcEltBits));
5224 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
5225 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
5226 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
5227 Sign = MIRBuilder.buildSExt(DstTy, Sign);
5228
5229 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
5230 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
5231 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
5232
5233 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
5234 R = MIRBuilder.buildZExt(DstTy, R);
5235
5236 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
5237 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
5238 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
5239 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
5240
5241 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
5242 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
5243
5244 const LLT S1 = LLT::scalar(1);
5245 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
5246 S1, Exponent, ExponentLoBit);
5247
5248 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
5249
5250 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
5251 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
5252
5253 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
5254
5255 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
5256 S1, Exponent, ZeroSrcTy);
5257
5258 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
5259 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
5260
5261 MI.eraseFromParent();
5262 return Legalized;
5263 }
5264
5265 // f64 -> f16 conversion using round-to-nearest-even rounding mode.
5266 LegalizerHelper::LegalizeResult
lowerFPTRUNC_F64_TO_F16(MachineInstr & MI)5267 LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
5268 Register Dst = MI.getOperand(0).getReg();
5269 Register Src = MI.getOperand(1).getReg();
5270
5271 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
5272 return UnableToLegalize;
5273
5274 const unsigned ExpMask = 0x7ff;
5275 const unsigned ExpBiasf64 = 1023;
5276 const unsigned ExpBiasf16 = 15;
5277 const LLT S32 = LLT::scalar(32);
5278 const LLT S1 = LLT::scalar(1);
5279
5280 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
5281 Register U = Unmerge.getReg(0);
5282 Register UH = Unmerge.getReg(1);
5283
5284 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
5285 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
5286
5287 // Subtract the fp64 exponent bias (1023) to get the real exponent and
5288 // add the f16 bias (15) to get the biased exponent for the f16 format.
5289 E = MIRBuilder.buildAdd(
5290 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
5291
5292 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
5293 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
5294
5295 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
5296 MIRBuilder.buildConstant(S32, 0x1ff));
5297 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
5298
5299 auto Zero = MIRBuilder.buildConstant(S32, 0);
5300 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
5301 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
5302 M = MIRBuilder.buildOr(S32, M, Lo40Set);
5303
5304 // (M != 0 ? 0x0200 : 0) | 0x7c00;
5305 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
5306 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
5307 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
5308
5309 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
5310 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
5311
5312 // N = M | (E << 12);
5313 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
5314 auto N = MIRBuilder.buildOr(S32, M, EShl12);
5315
5316 // B = clamp(1-E, 0, 13);
5317 auto One = MIRBuilder.buildConstant(S32, 1);
5318 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
5319 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
5320 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
5321
5322 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
5323 MIRBuilder.buildConstant(S32, 0x1000));
5324
5325 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
5326 auto D0 = MIRBuilder.buildShl(S32, D, B);
5327
5328 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
5329 D0, SigSetHigh);
5330 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
5331 D = MIRBuilder.buildOr(S32, D, D1);
5332
5333 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
5334 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
5335
5336 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
5337 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
5338
5339 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
5340 MIRBuilder.buildConstant(S32, 3));
5341 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
5342
5343 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
5344 MIRBuilder.buildConstant(S32, 5));
5345 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
5346
5347 V1 = MIRBuilder.buildOr(S32, V0, V1);
5348 V = MIRBuilder.buildAdd(S32, V, V1);
5349
5350 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
5351 E, MIRBuilder.buildConstant(S32, 30));
5352 V = MIRBuilder.buildSelect(S32, CmpEGt30,
5353 MIRBuilder.buildConstant(S32, 0x7c00), V);
5354
5355 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
5356 E, MIRBuilder.buildConstant(S32, 1039));
5357 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
5358
5359 // Extract the sign bit.
5360 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
5361 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
5362
5363 // Insert the sign bit
5364 V = MIRBuilder.buildOr(S32, Sign, V);
5365
5366 MIRBuilder.buildTrunc(Dst, V);
5367 MI.eraseFromParent();
5368 return Legalized;
5369 }
5370
5371 LegalizerHelper::LegalizeResult
lowerFPTRUNC(MachineInstr & MI)5372 LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
5373 Register Dst = MI.getOperand(0).getReg();
5374 Register Src = MI.getOperand(1).getReg();
5375
5376 LLT DstTy = MRI.getType(Dst);
5377 LLT SrcTy = MRI.getType(Src);
5378 const LLT S64 = LLT::scalar(64);
5379 const LLT S16 = LLT::scalar(16);
5380
5381 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
5382 return lowerFPTRUNC_F64_TO_F16(MI);
5383
5384 return UnableToLegalize;
5385 }
5386
5387 // TODO: If RHS is a constant SelectionDAGBuilder expands this into a
5388 // multiplication tree.
lowerFPOWI(MachineInstr & MI)5389 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
5390 Register Dst = MI.getOperand(0).getReg();
5391 Register Src0 = MI.getOperand(1).getReg();
5392 Register Src1 = MI.getOperand(2).getReg();
5393 LLT Ty = MRI.getType(Dst);
5394
5395 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
5396 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
5397 MI.eraseFromParent();
5398 return Legalized;
5399 }
5400
minMaxToCompare(unsigned Opc)5401 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
5402 switch (Opc) {
5403 case TargetOpcode::G_SMIN:
5404 return CmpInst::ICMP_SLT;
5405 case TargetOpcode::G_SMAX:
5406 return CmpInst::ICMP_SGT;
5407 case TargetOpcode::G_UMIN:
5408 return CmpInst::ICMP_ULT;
5409 case TargetOpcode::G_UMAX:
5410 return CmpInst::ICMP_UGT;
5411 default:
5412 llvm_unreachable("not in integer min/max");
5413 }
5414 }
5415
lowerMinMax(MachineInstr & MI)5416 LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
5417 Register Dst = MI.getOperand(0).getReg();
5418 Register Src0 = MI.getOperand(1).getReg();
5419 Register Src1 = MI.getOperand(2).getReg();
5420
5421 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
5422 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
5423
5424 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
5425 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
5426
5427 MI.eraseFromParent();
5428 return Legalized;
5429 }
5430
5431 LegalizerHelper::LegalizeResult
lowerFCopySign(MachineInstr & MI)5432 LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
5433 Register Dst = MI.getOperand(0).getReg();
5434 Register Src0 = MI.getOperand(1).getReg();
5435 Register Src1 = MI.getOperand(2).getReg();
5436
5437 const LLT Src0Ty = MRI.getType(Src0);
5438 const LLT Src1Ty = MRI.getType(Src1);
5439
5440 const int Src0Size = Src0Ty.getScalarSizeInBits();
5441 const int Src1Size = Src1Ty.getScalarSizeInBits();
5442
5443 auto SignBitMask = MIRBuilder.buildConstant(
5444 Src0Ty, APInt::getSignMask(Src0Size));
5445
5446 auto NotSignBitMask = MIRBuilder.buildConstant(
5447 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
5448
5449 auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
5450 MachineInstr *Or;
5451
5452 if (Src0Ty == Src1Ty) {
5453 auto And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask);
5454 Or = MIRBuilder.buildOr(Dst, And0, And1);
5455 } else if (Src0Size > Src1Size) {
5456 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
5457 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
5458 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
5459 auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
5460 Or = MIRBuilder.buildOr(Dst, And0, And1);
5461 } else {
5462 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
5463 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
5464 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
5465 auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
5466 Or = MIRBuilder.buildOr(Dst, And0, And1);
5467 }
5468
5469 // Be careful about setting nsz/nnan/ninf on every instruction, since the
5470 // constants are a nan and -0.0, but the final result should preserve
5471 // everything.
5472 if (unsigned Flags = MI.getFlags())
5473 Or->setFlags(Flags);
5474
5475 MI.eraseFromParent();
5476 return Legalized;
5477 }
5478
5479 LegalizerHelper::LegalizeResult
lowerFMinNumMaxNum(MachineInstr & MI)5480 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
5481 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
5482 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
5483
5484 Register Dst = MI.getOperand(0).getReg();
5485 Register Src0 = MI.getOperand(1).getReg();
5486 Register Src1 = MI.getOperand(2).getReg();
5487 LLT Ty = MRI.getType(Dst);
5488
5489 if (!MI.getFlag(MachineInstr::FmNoNans)) {
5490 // Insert canonicalizes if it's possible we need to quiet to get correct
5491 // sNaN behavior.
5492
5493 // Note this must be done here, and not as an optimization combine in the
5494 // absence of a dedicate quiet-snan instruction as we're using an
5495 // omni-purpose G_FCANONICALIZE.
5496 if (!isKnownNeverSNaN(Src0, MRI))
5497 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
5498
5499 if (!isKnownNeverSNaN(Src1, MRI))
5500 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
5501 }
5502
5503 // If there are no nans, it's safe to simply replace this with the non-IEEE
5504 // version.
5505 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
5506 MI.eraseFromParent();
5507 return Legalized;
5508 }
5509
lowerFMad(MachineInstr & MI)5510 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
5511 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
5512 Register DstReg = MI.getOperand(0).getReg();
5513 LLT Ty = MRI.getType(DstReg);
5514 unsigned Flags = MI.getFlags();
5515
5516 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
5517 Flags);
5518 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
5519 MI.eraseFromParent();
5520 return Legalized;
5521 }
5522
5523 LegalizerHelper::LegalizeResult
lowerIntrinsicRound(MachineInstr & MI)5524 LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
5525 Register DstReg = MI.getOperand(0).getReg();
5526 Register X = MI.getOperand(1).getReg();
5527 const unsigned Flags = MI.getFlags();
5528 const LLT Ty = MRI.getType(DstReg);
5529 const LLT CondTy = Ty.changeElementSize(1);
5530
5531 // round(x) =>
5532 // t = trunc(x);
5533 // d = fabs(x - t);
5534 // o = copysign(1.0f, x);
5535 // return t + (d >= 0.5 ? o : 0.0);
5536
5537 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
5538
5539 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
5540 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
5541 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
5542 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
5543 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
5544 auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
5545
5546 auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
5547 Flags);
5548 auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
5549
5550 MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
5551
5552 MI.eraseFromParent();
5553 return Legalized;
5554 }
5555
5556 LegalizerHelper::LegalizeResult
lowerFFloor(MachineInstr & MI)5557 LegalizerHelper::lowerFFloor(MachineInstr &MI) {
5558 Register DstReg = MI.getOperand(0).getReg();
5559 Register SrcReg = MI.getOperand(1).getReg();
5560 unsigned Flags = MI.getFlags();
5561 LLT Ty = MRI.getType(DstReg);
5562 const LLT CondTy = Ty.changeElementSize(1);
5563
5564 // result = trunc(src);
5565 // if (src < 0.0 && src != result)
5566 // result += -1.0.
5567
5568 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
5569 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
5570
5571 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
5572 SrcReg, Zero, Flags);
5573 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
5574 SrcReg, Trunc, Flags);
5575 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
5576 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
5577
5578 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
5579 MI.eraseFromParent();
5580 return Legalized;
5581 }
5582
5583 LegalizerHelper::LegalizeResult
lowerMergeValues(MachineInstr & MI)5584 LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
5585 const unsigned NumOps = MI.getNumOperands();
5586 Register DstReg = MI.getOperand(0).getReg();
5587 Register Src0Reg = MI.getOperand(1).getReg();
5588 LLT DstTy = MRI.getType(DstReg);
5589 LLT SrcTy = MRI.getType(Src0Reg);
5590 unsigned PartSize = SrcTy.getSizeInBits();
5591
5592 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
5593 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
5594
5595 for (unsigned I = 2; I != NumOps; ++I) {
5596 const unsigned Offset = (I - 1) * PartSize;
5597
5598 Register SrcReg = MI.getOperand(I).getReg();
5599 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
5600
5601 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
5602 MRI.createGenericVirtualRegister(WideTy);
5603
5604 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
5605 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
5606 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
5607 ResultReg = NextResult;
5608 }
5609
5610 if (DstTy.isPointer()) {
5611 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
5612 DstTy.getAddressSpace())) {
5613 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
5614 return UnableToLegalize;
5615 }
5616
5617 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
5618 }
5619
5620 MI.eraseFromParent();
5621 return Legalized;
5622 }
5623
5624 LegalizerHelper::LegalizeResult
lowerUnmergeValues(MachineInstr & MI)5625 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
5626 const unsigned NumDst = MI.getNumOperands() - 1;
5627 Register SrcReg = MI.getOperand(NumDst).getReg();
5628 Register Dst0Reg = MI.getOperand(0).getReg();
5629 LLT DstTy = MRI.getType(Dst0Reg);
5630 if (DstTy.isPointer())
5631 return UnableToLegalize; // TODO
5632
5633 SrcReg = coerceToScalar(SrcReg);
5634 if (!SrcReg)
5635 return UnableToLegalize;
5636
5637 // Expand scalarizing unmerge as bitcast to integer and shift.
5638 LLT IntTy = MRI.getType(SrcReg);
5639
5640 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
5641
5642 const unsigned DstSize = DstTy.getSizeInBits();
5643 unsigned Offset = DstSize;
5644 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
5645 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
5646 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
5647 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
5648 }
5649
5650 MI.eraseFromParent();
5651 return Legalized;
5652 }
5653
5654 /// Lower a vector extract or insert by writing the vector to a stack temporary
5655 /// and reloading the element or vector.
5656 ///
5657 /// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
5658 /// =>
5659 /// %stack_temp = G_FRAME_INDEX
5660 /// G_STORE %vec, %stack_temp
5661 /// %idx = clamp(%idx, %vec.getNumElements())
5662 /// %element_ptr = G_PTR_ADD %stack_temp, %idx
5663 /// %dst = G_LOAD %element_ptr
5664 LegalizerHelper::LegalizeResult
lowerExtractInsertVectorElt(MachineInstr & MI)5665 LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
5666 Register DstReg = MI.getOperand(0).getReg();
5667 Register SrcVec = MI.getOperand(1).getReg();
5668 Register InsertVal;
5669 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
5670 InsertVal = MI.getOperand(2).getReg();
5671
5672 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5673
5674 LLT VecTy = MRI.getType(SrcVec);
5675 LLT EltTy = VecTy.getElementType();
5676 if (!EltTy.isByteSized()) { // Not implemented.
5677 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
5678 return UnableToLegalize;
5679 }
5680
5681 unsigned EltBytes = EltTy.getSizeInBytes();
5682 Align VecAlign = getStackTemporaryAlignment(VecTy);
5683 Align EltAlign;
5684
5685 MachinePointerInfo PtrInfo;
5686 auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
5687 VecAlign, PtrInfo);
5688 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
5689
5690 // Get the pointer to the element, and be sure not to hit undefined behavior
5691 // if the index is out of bounds.
5692 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
5693
5694 int64_t IdxVal;
5695 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
5696 int64_t Offset = IdxVal * EltBytes;
5697 PtrInfo = PtrInfo.getWithOffset(Offset);
5698 EltAlign = commonAlignment(VecAlign, Offset);
5699 } else {
5700 // We lose information with a variable offset.
5701 EltAlign = getStackTemporaryAlignment(EltTy);
5702 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
5703 }
5704
5705 if (InsertVal) {
5706 // Write the inserted element
5707 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
5708
5709 // Reload the whole vector.
5710 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
5711 } else {
5712 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
5713 }
5714
5715 MI.eraseFromParent();
5716 return Legalized;
5717 }
5718
5719 LegalizerHelper::LegalizeResult
lowerShuffleVector(MachineInstr & MI)5720 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
5721 Register DstReg = MI.getOperand(0).getReg();
5722 Register Src0Reg = MI.getOperand(1).getReg();
5723 Register Src1Reg = MI.getOperand(2).getReg();
5724 LLT Src0Ty = MRI.getType(Src0Reg);
5725 LLT DstTy = MRI.getType(DstReg);
5726 LLT IdxTy = LLT::scalar(32);
5727
5728 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5729
5730 if (DstTy.isScalar()) {
5731 if (Src0Ty.isVector())
5732 return UnableToLegalize;
5733
5734 // This is just a SELECT.
5735 assert(Mask.size() == 1 && "Expected a single mask element");
5736 Register Val;
5737 if (Mask[0] < 0 || Mask[0] > 1)
5738 Val = MIRBuilder.buildUndef(DstTy).getReg(0);
5739 else
5740 Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
5741 MIRBuilder.buildCopy(DstReg, Val);
5742 MI.eraseFromParent();
5743 return Legalized;
5744 }
5745
5746 Register Undef;
5747 SmallVector<Register, 32> BuildVec;
5748 LLT EltTy = DstTy.getElementType();
5749
5750 for (int Idx : Mask) {
5751 if (Idx < 0) {
5752 if (!Undef.isValid())
5753 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
5754 BuildVec.push_back(Undef);
5755 continue;
5756 }
5757
5758 if (Src0Ty.isScalar()) {
5759 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
5760 } else {
5761 int NumElts = Src0Ty.getNumElements();
5762 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
5763 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
5764 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
5765 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
5766 BuildVec.push_back(Extract.getReg(0));
5767 }
5768 }
5769
5770 MIRBuilder.buildBuildVector(DstReg, BuildVec);
5771 MI.eraseFromParent();
5772 return Legalized;
5773 }
5774
5775 LegalizerHelper::LegalizeResult
lowerDynStackAlloc(MachineInstr & MI)5776 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
5777 const auto &MF = *MI.getMF();
5778 const auto &TFI = *MF.getSubtarget().getFrameLowering();
5779 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
5780 return UnableToLegalize;
5781
5782 Register Dst = MI.getOperand(0).getReg();
5783 Register AllocSize = MI.getOperand(1).getReg();
5784 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
5785
5786 LLT PtrTy = MRI.getType(Dst);
5787 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
5788
5789 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
5790 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
5791 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
5792
5793 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
5794 // have to generate an extra instruction to negate the alloc and then use
5795 // G_PTR_ADD to add the negative offset.
5796 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
5797 if (Alignment > Align(1)) {
5798 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
5799 AlignMask.negate();
5800 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
5801 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
5802 }
5803
5804 SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
5805 MIRBuilder.buildCopy(SPReg, SPTmp);
5806 MIRBuilder.buildCopy(Dst, SPTmp);
5807
5808 MI.eraseFromParent();
5809 return Legalized;
5810 }
5811
5812 LegalizerHelper::LegalizeResult
lowerExtract(MachineInstr & MI)5813 LegalizerHelper::lowerExtract(MachineInstr &MI) {
5814 Register Dst = MI.getOperand(0).getReg();
5815 Register Src = MI.getOperand(1).getReg();
5816 unsigned Offset = MI.getOperand(2).getImm();
5817
5818 LLT DstTy = MRI.getType(Dst);
5819 LLT SrcTy = MRI.getType(Src);
5820
5821 if (DstTy.isScalar() &&
5822 (SrcTy.isScalar() ||
5823 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
5824 LLT SrcIntTy = SrcTy;
5825 if (!SrcTy.isScalar()) {
5826 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
5827 Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
5828 }
5829
5830 if (Offset == 0)
5831 MIRBuilder.buildTrunc(Dst, Src);
5832 else {
5833 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
5834 auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
5835 MIRBuilder.buildTrunc(Dst, Shr);
5836 }
5837
5838 MI.eraseFromParent();
5839 return Legalized;
5840 }
5841
5842 return UnableToLegalize;
5843 }
5844
lowerInsert(MachineInstr & MI)5845 LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
5846 Register Dst = MI.getOperand(0).getReg();
5847 Register Src = MI.getOperand(1).getReg();
5848 Register InsertSrc = MI.getOperand(2).getReg();
5849 uint64_t Offset = MI.getOperand(3).getImm();
5850
5851 LLT DstTy = MRI.getType(Src);
5852 LLT InsertTy = MRI.getType(InsertSrc);
5853
5854 if (InsertTy.isVector() ||
5855 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
5856 return UnableToLegalize;
5857
5858 const DataLayout &DL = MIRBuilder.getDataLayout();
5859 if ((DstTy.isPointer() &&
5860 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
5861 (InsertTy.isPointer() &&
5862 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
5863 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
5864 return UnableToLegalize;
5865 }
5866
5867 LLT IntDstTy = DstTy;
5868
5869 if (!DstTy.isScalar()) {
5870 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
5871 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
5872 }
5873
5874 if (!InsertTy.isScalar()) {
5875 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
5876 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
5877 }
5878
5879 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
5880 if (Offset != 0) {
5881 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
5882 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
5883 }
5884
5885 APInt MaskVal = APInt::getBitsSetWithWrap(
5886 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
5887
5888 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
5889 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
5890 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
5891
5892 MIRBuilder.buildCast(Dst, Or);
5893 MI.eraseFromParent();
5894 return Legalized;
5895 }
5896
5897 LegalizerHelper::LegalizeResult
lowerSADDO_SSUBO(MachineInstr & MI)5898 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
5899 Register Dst0 = MI.getOperand(0).getReg();
5900 Register Dst1 = MI.getOperand(1).getReg();
5901 Register LHS = MI.getOperand(2).getReg();
5902 Register RHS = MI.getOperand(3).getReg();
5903 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
5904
5905 LLT Ty = MRI.getType(Dst0);
5906 LLT BoolTy = MRI.getType(Dst1);
5907
5908 if (IsAdd)
5909 MIRBuilder.buildAdd(Dst0, LHS, RHS);
5910 else
5911 MIRBuilder.buildSub(Dst0, LHS, RHS);
5912
5913 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
5914
5915 auto Zero = MIRBuilder.buildConstant(Ty, 0);
5916
5917 // For an addition, the result should be less than one of the operands (LHS)
5918 // if and only if the other operand (RHS) is negative, otherwise there will
5919 // be overflow.
5920 // For a subtraction, the result should be less than one of the operands
5921 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
5922 // otherwise there will be overflow.
5923 auto ResultLowerThanLHS =
5924 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
5925 auto ConditionRHS = MIRBuilder.buildICmp(
5926 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
5927
5928 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
5929 MI.eraseFromParent();
5930 return Legalized;
5931 }
5932
5933 LegalizerHelper::LegalizeResult
lowerAddSubSatToMinMax(MachineInstr & MI)5934 LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
5935 Register Res = MI.getOperand(0).getReg();
5936 Register LHS = MI.getOperand(1).getReg();
5937 Register RHS = MI.getOperand(2).getReg();
5938 LLT Ty = MRI.getType(Res);
5939 bool IsSigned;
5940 bool IsAdd;
5941 unsigned BaseOp;
5942 switch (MI.getOpcode()) {
5943 default:
5944 llvm_unreachable("unexpected addsat/subsat opcode");
5945 case TargetOpcode::G_UADDSAT:
5946 IsSigned = false;
5947 IsAdd = true;
5948 BaseOp = TargetOpcode::G_ADD;
5949 break;
5950 case TargetOpcode::G_SADDSAT:
5951 IsSigned = true;
5952 IsAdd = true;
5953 BaseOp = TargetOpcode::G_ADD;
5954 break;
5955 case TargetOpcode::G_USUBSAT:
5956 IsSigned = false;
5957 IsAdd = false;
5958 BaseOp = TargetOpcode::G_SUB;
5959 break;
5960 case TargetOpcode::G_SSUBSAT:
5961 IsSigned = true;
5962 IsAdd = false;
5963 BaseOp = TargetOpcode::G_SUB;
5964 break;
5965 }
5966
5967 if (IsSigned) {
5968 // sadd.sat(a, b) ->
5969 // hi = 0x7fffffff - smax(a, 0)
5970 // lo = 0x80000000 - smin(a, 0)
5971 // a + smin(smax(lo, b), hi)
5972 // ssub.sat(a, b) ->
5973 // lo = smax(a, -1) - 0x7fffffff
5974 // hi = smin(a, -1) - 0x80000000
5975 // a - smin(smax(lo, b), hi)
5976 // TODO: AMDGPU can use a "median of 3" instruction here:
5977 // a +/- med3(lo, b, hi)
5978 uint64_t NumBits = Ty.getScalarSizeInBits();
5979 auto MaxVal =
5980 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
5981 auto MinVal =
5982 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
5983 MachineInstrBuilder Hi, Lo;
5984 if (IsAdd) {
5985 auto Zero = MIRBuilder.buildConstant(Ty, 0);
5986 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
5987 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
5988 } else {
5989 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
5990 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
5991 MaxVal);
5992 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
5993 MinVal);
5994 }
5995 auto RHSClamped =
5996 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
5997 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
5998 } else {
5999 // uadd.sat(a, b) -> a + umin(~a, b)
6000 // usub.sat(a, b) -> a - umin(a, b)
6001 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
6002 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
6003 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
6004 }
6005
6006 MI.eraseFromParent();
6007 return Legalized;
6008 }
6009
6010 LegalizerHelper::LegalizeResult
lowerAddSubSatToAddoSubo(MachineInstr & MI)6011 LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
6012 Register Res = MI.getOperand(0).getReg();
6013 Register LHS = MI.getOperand(1).getReg();
6014 Register RHS = MI.getOperand(2).getReg();
6015 LLT Ty = MRI.getType(Res);
6016 LLT BoolTy = Ty.changeElementSize(1);
6017 bool IsSigned;
6018 bool IsAdd;
6019 unsigned OverflowOp;
6020 switch (MI.getOpcode()) {
6021 default:
6022 llvm_unreachable("unexpected addsat/subsat opcode");
6023 case TargetOpcode::G_UADDSAT:
6024 IsSigned = false;
6025 IsAdd = true;
6026 OverflowOp = TargetOpcode::G_UADDO;
6027 break;
6028 case TargetOpcode::G_SADDSAT:
6029 IsSigned = true;
6030 IsAdd = true;
6031 OverflowOp = TargetOpcode::G_SADDO;
6032 break;
6033 case TargetOpcode::G_USUBSAT:
6034 IsSigned = false;
6035 IsAdd = false;
6036 OverflowOp = TargetOpcode::G_USUBO;
6037 break;
6038 case TargetOpcode::G_SSUBSAT:
6039 IsSigned = true;
6040 IsAdd = false;
6041 OverflowOp = TargetOpcode::G_SSUBO;
6042 break;
6043 }
6044
6045 auto OverflowRes =
6046 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
6047 Register Tmp = OverflowRes.getReg(0);
6048 Register Ov = OverflowRes.getReg(1);
6049 MachineInstrBuilder Clamp;
6050 if (IsSigned) {
6051 // sadd.sat(a, b) ->
6052 // {tmp, ov} = saddo(a, b)
6053 // ov ? (tmp >>s 31) + 0x80000000 : r
6054 // ssub.sat(a, b) ->
6055 // {tmp, ov} = ssubo(a, b)
6056 // ov ? (tmp >>s 31) + 0x80000000 : r
6057 uint64_t NumBits = Ty.getScalarSizeInBits();
6058 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
6059 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
6060 auto MinVal =
6061 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
6062 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
6063 } else {
6064 // uadd.sat(a, b) ->
6065 // {tmp, ov} = uaddo(a, b)
6066 // ov ? 0xffffffff : tmp
6067 // usub.sat(a, b) ->
6068 // {tmp, ov} = usubo(a, b)
6069 // ov ? 0 : tmp
6070 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
6071 }
6072 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
6073
6074 MI.eraseFromParent();
6075 return Legalized;
6076 }
6077
6078 LegalizerHelper::LegalizeResult
lowerShlSat(MachineInstr & MI)6079 LegalizerHelper::lowerShlSat(MachineInstr &MI) {
6080 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
6081 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
6082 "Expected shlsat opcode!");
6083 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
6084 Register Res = MI.getOperand(0).getReg();
6085 Register LHS = MI.getOperand(1).getReg();
6086 Register RHS = MI.getOperand(2).getReg();
6087 LLT Ty = MRI.getType(Res);
6088 LLT BoolTy = Ty.changeElementSize(1);
6089
6090 unsigned BW = Ty.getScalarSizeInBits();
6091 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
6092 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
6093 : MIRBuilder.buildLShr(Ty, Result, RHS);
6094
6095 MachineInstrBuilder SatVal;
6096 if (IsSigned) {
6097 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
6098 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
6099 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
6100 MIRBuilder.buildConstant(Ty, 0));
6101 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
6102 } else {
6103 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
6104 }
6105 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
6106 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
6107
6108 MI.eraseFromParent();
6109 return Legalized;
6110 }
6111
6112 LegalizerHelper::LegalizeResult
lowerBswap(MachineInstr & MI)6113 LegalizerHelper::lowerBswap(MachineInstr &MI) {
6114 Register Dst = MI.getOperand(0).getReg();
6115 Register Src = MI.getOperand(1).getReg();
6116 const LLT Ty = MRI.getType(Src);
6117 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
6118 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
6119
6120 // Swap most and least significant byte, set remaining bytes in Res to zero.
6121 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
6122 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
6123 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
6124 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
6125
6126 // Set i-th high/low byte in Res to i-th low/high byte from Src.
6127 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
6128 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
6129 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
6130 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
6131 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
6132 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
6133 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
6134 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
6135 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
6136 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
6137 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
6138 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
6139 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
6140 }
6141 Res.getInstr()->getOperand(0).setReg(Dst);
6142
6143 MI.eraseFromParent();
6144 return Legalized;
6145 }
6146
6147 //{ (Src & Mask) >> N } | { (Src << N) & Mask }
SwapN(unsigned N,DstOp Dst,MachineIRBuilder & B,MachineInstrBuilder Src,APInt Mask)6148 static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
6149 MachineInstrBuilder Src, APInt Mask) {
6150 const LLT Ty = Dst.getLLTTy(*B.getMRI());
6151 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
6152 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
6153 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
6154 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
6155 return B.buildOr(Dst, LHS, RHS);
6156 }
6157
6158 LegalizerHelper::LegalizeResult
lowerBitreverse(MachineInstr & MI)6159 LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
6160 Register Dst = MI.getOperand(0).getReg();
6161 Register Src = MI.getOperand(1).getReg();
6162 const LLT Ty = MRI.getType(Src);
6163 unsigned Size = Ty.getSizeInBits();
6164
6165 MachineInstrBuilder BSWAP =
6166 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
6167
6168 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
6169 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
6170 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
6171 MachineInstrBuilder Swap4 =
6172 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
6173
6174 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
6175 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
6176 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
6177 MachineInstrBuilder Swap2 =
6178 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
6179
6180 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
6181 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
6182 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
6183 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
6184
6185 MI.eraseFromParent();
6186 return Legalized;
6187 }
6188
6189 LegalizerHelper::LegalizeResult
lowerReadWriteRegister(MachineInstr & MI)6190 LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
6191 MachineFunction &MF = MIRBuilder.getMF();
6192
6193 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
6194 int NameOpIdx = IsRead ? 1 : 0;
6195 int ValRegIndex = IsRead ? 0 : 1;
6196
6197 Register ValReg = MI.getOperand(ValRegIndex).getReg();
6198 const LLT Ty = MRI.getType(ValReg);
6199 const MDString *RegStr = cast<MDString>(
6200 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
6201
6202 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
6203 if (!PhysReg.isValid())
6204 return UnableToLegalize;
6205
6206 if (IsRead)
6207 MIRBuilder.buildCopy(ValReg, PhysReg);
6208 else
6209 MIRBuilder.buildCopy(PhysReg, ValReg);
6210
6211 MI.eraseFromParent();
6212 return Legalized;
6213 }
6214
6215 LegalizerHelper::LegalizeResult
lowerSMULH_UMULH(MachineInstr & MI)6216 LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
6217 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
6218 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
6219 Register Result = MI.getOperand(0).getReg();
6220 LLT OrigTy = MRI.getType(Result);
6221 auto SizeInBits = OrigTy.getScalarSizeInBits();
6222 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
6223
6224 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
6225 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
6226 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
6227 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
6228
6229 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
6230 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
6231 MIRBuilder.buildTrunc(Result, Shifted);
6232
6233 MI.eraseFromParent();
6234 return Legalized;
6235 }
6236
lowerSelect(MachineInstr & MI)6237 LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
6238 // Implement vector G_SELECT in terms of XOR, AND, OR.
6239 Register DstReg = MI.getOperand(0).getReg();
6240 Register MaskReg = MI.getOperand(1).getReg();
6241 Register Op1Reg = MI.getOperand(2).getReg();
6242 Register Op2Reg = MI.getOperand(3).getReg();
6243 LLT DstTy = MRI.getType(DstReg);
6244 LLT MaskTy = MRI.getType(MaskReg);
6245 LLT Op1Ty = MRI.getType(Op1Reg);
6246 if (!DstTy.isVector())
6247 return UnableToLegalize;
6248
6249 // Vector selects can have a scalar predicate. If so, splat into a vector and
6250 // finish for later legalization attempts to try again.
6251 if (MaskTy.isScalar()) {
6252 Register MaskElt = MaskReg;
6253 if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
6254 MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
6255 // Generate a vector splat idiom to be pattern matched later.
6256 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
6257 Observer.changingInstr(MI);
6258 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6259 Observer.changedInstr(MI);
6260 return Legalized;
6261 }
6262
6263 if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
6264 return UnableToLegalize;
6265 }
6266
6267 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
6268 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
6269 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
6270 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
6271 MI.eraseFromParent();
6272 return Legalized;
6273 }
6274