1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/Analysis/VectorUtils.h"
16 #include "llvm/CodeGen/CallingConvLower.h"
17 #include "llvm/CodeGen/CodeGenCommonISel.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineJumpTableInfo.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/SelectionDAG.h"
23 #include "llvm/CodeGen/TargetRegisterInfo.h"
24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/DerivedTypes.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCExpr.h"
30 #include "llvm/Support/DivisionByConstantInfo.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/KnownBits.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Target/TargetMachine.h"
35 #include <cctype>
36 using namespace llvm;
37
38 /// NOTE: The TargetMachine owns TLOF.
TargetLowering(const TargetMachine & tm)39 TargetLowering::TargetLowering(const TargetMachine &tm)
40 : TargetLoweringBase(tm) {}
41
getTargetNodeName(unsigned Opcode) const42 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
43 return nullptr;
44 }
45
isPositionIndependent() const46 bool TargetLowering::isPositionIndependent() const {
47 return getTargetMachine().isPositionIndependent();
48 }
49
50 /// Check whether a given call node is in tail position within its function. If
51 /// so, it sets Chain to the input chain of the tail call.
isInTailCallPosition(SelectionDAG & DAG,SDNode * Node,SDValue & Chain) const52 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
53 SDValue &Chain) const {
54 const Function &F = DAG.getMachineFunction().getFunction();
55
56 // First, check if tail calls have been disabled in this function.
57 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
58 return false;
59
60 // Conservatively require the attributes of the call to match those of
61 // the return. Ignore following attributes because they don't affect the
62 // call sequence.
63 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
64 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
65 Attribute::DereferenceableOrNull, Attribute::NoAlias,
66 Attribute::NonNull, Attribute::NoUndef})
67 CallerAttrs.removeAttribute(Attr);
68
69 if (CallerAttrs.hasAttributes())
70 return false;
71
72 // It's not safe to eliminate the sign / zero extension of the return value.
73 if (CallerAttrs.contains(Attribute::ZExt) ||
74 CallerAttrs.contains(Attribute::SExt))
75 return false;
76
77 // Check if the only use is a function return node.
78 return isUsedByReturnOnly(Node, Chain);
79 }
80
parametersInCSRMatch(const MachineRegisterInfo & MRI,const uint32_t * CallerPreservedMask,const SmallVectorImpl<CCValAssign> & ArgLocs,const SmallVectorImpl<SDValue> & OutVals) const81 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
82 const uint32_t *CallerPreservedMask,
83 const SmallVectorImpl<CCValAssign> &ArgLocs,
84 const SmallVectorImpl<SDValue> &OutVals) const {
85 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
86 const CCValAssign &ArgLoc = ArgLocs[I];
87 if (!ArgLoc.isRegLoc())
88 continue;
89 MCRegister Reg = ArgLoc.getLocReg();
90 // Only look at callee saved registers.
91 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
92 continue;
93 // Check that we pass the value used for the caller.
94 // (We look for a CopyFromReg reading a virtual register that is used
95 // for the function live-in value of register Reg)
96 SDValue Value = OutVals[I];
97 if (Value->getOpcode() == ISD::AssertZext)
98 Value = Value.getOperand(0);
99 if (Value->getOpcode() != ISD::CopyFromReg)
100 return false;
101 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
102 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
103 return false;
104 }
105 return true;
106 }
107
108 /// Set CallLoweringInfo attribute flags based on a call instruction
109 /// and called function attributes.
setAttributes(const CallBase * Call,unsigned ArgIdx)110 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
111 unsigned ArgIdx) {
112 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
113 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
114 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
115 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
116 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
117 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
118 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
119 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
120 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
121 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
122 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
123 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
124 Alignment = Call->getParamStackAlign(ArgIdx);
125 IndirectType = nullptr;
126 assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
127 "multiple ABI attributes?");
128 if (IsByVal) {
129 IndirectType = Call->getParamByValType(ArgIdx);
130 if (!Alignment)
131 Alignment = Call->getParamAlign(ArgIdx);
132 }
133 if (IsPreallocated)
134 IndirectType = Call->getParamPreallocatedType(ArgIdx);
135 if (IsInAlloca)
136 IndirectType = Call->getParamInAllocaType(ArgIdx);
137 if (IsSRet)
138 IndirectType = Call->getParamStructRetType(ArgIdx);
139 }
140
141 /// Generate a libcall taking the given operands as arguments and returning a
142 /// result of type RetVT.
143 std::pair<SDValue, SDValue>
makeLibCall(SelectionDAG & DAG,RTLIB::Libcall LC,EVT RetVT,ArrayRef<SDValue> Ops,MakeLibCallOptions CallOptions,const SDLoc & dl,SDValue InChain) const144 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
145 ArrayRef<SDValue> Ops,
146 MakeLibCallOptions CallOptions,
147 const SDLoc &dl,
148 SDValue InChain) const {
149 if (!InChain)
150 InChain = DAG.getEntryNode();
151
152 TargetLowering::ArgListTy Args;
153 Args.reserve(Ops.size());
154
155 TargetLowering::ArgListEntry Entry;
156 for (unsigned i = 0; i < Ops.size(); ++i) {
157 SDValue NewOp = Ops[i];
158 Entry.Node = NewOp;
159 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
160 Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
161 CallOptions.IsSExt);
162 Entry.IsZExt = !Entry.IsSExt;
163
164 if (CallOptions.IsSoften &&
165 !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
166 Entry.IsSExt = Entry.IsZExt = false;
167 }
168 Args.push_back(Entry);
169 }
170
171 if (LC == RTLIB::UNKNOWN_LIBCALL)
172 report_fatal_error("Unsupported library call operation!");
173 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
174 getPointerTy(DAG.getDataLayout()));
175
176 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
177 TargetLowering::CallLoweringInfo CLI(DAG);
178 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
179 bool zeroExtend = !signExtend;
180
181 if (CallOptions.IsSoften &&
182 !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
183 signExtend = zeroExtend = false;
184 }
185
186 CLI.setDebugLoc(dl)
187 .setChain(InChain)
188 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
189 .setNoReturn(CallOptions.DoesNotReturn)
190 .setDiscardResult(!CallOptions.IsReturnValueUsed)
191 .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
192 .setSExtResult(signExtend)
193 .setZExtResult(zeroExtend);
194 return LowerCallTo(CLI);
195 }
196
findOptimalMemOpLowering(std::vector<EVT> & MemOps,unsigned Limit,const MemOp & Op,unsigned DstAS,unsigned SrcAS,const AttributeList & FuncAttributes) const197 bool TargetLowering::findOptimalMemOpLowering(
198 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
199 unsigned SrcAS, const AttributeList &FuncAttributes) const {
200 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
201 Op.getSrcAlign() < Op.getDstAlign())
202 return false;
203
204 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
205
206 if (VT == MVT::Other) {
207 // Use the largest integer type whose alignment constraints are satisfied.
208 // We only need to check DstAlign here as SrcAlign is always greater or
209 // equal to DstAlign (or zero).
210 VT = MVT::i64;
211 if (Op.isFixedDstAlign())
212 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
213 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
214 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
215 assert(VT.isInteger());
216
217 // Find the largest legal integer type.
218 MVT LVT = MVT::i64;
219 while (!isTypeLegal(LVT))
220 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
221 assert(LVT.isInteger());
222
223 // If the type we've chosen is larger than the largest legal integer type
224 // then use that instead.
225 if (VT.bitsGT(LVT))
226 VT = LVT;
227 }
228
229 unsigned NumMemOps = 0;
230 uint64_t Size = Op.size();
231 while (Size) {
232 unsigned VTSize = VT.getSizeInBits() / 8;
233 while (VTSize > Size) {
234 // For now, only use non-vector load / store's for the left-over pieces.
235 EVT NewVT = VT;
236 unsigned NewVTSize;
237
238 bool Found = false;
239 if (VT.isVector() || VT.isFloatingPoint()) {
240 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
241 if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
242 isSafeMemOpType(NewVT.getSimpleVT()))
243 Found = true;
244 else if (NewVT == MVT::i64 &&
245 isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
246 isSafeMemOpType(MVT::f64)) {
247 // i64 is usually not legal on 32-bit targets, but f64 may be.
248 NewVT = MVT::f64;
249 Found = true;
250 }
251 }
252
253 if (!Found) {
254 do {
255 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
256 if (NewVT == MVT::i8)
257 break;
258 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
259 }
260 NewVTSize = NewVT.getSizeInBits() / 8;
261
262 // If the new VT cannot cover all of the remaining bits, then consider
263 // issuing a (or a pair of) unaligned and overlapping load / store.
264 unsigned Fast;
265 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
266 allowsMisalignedMemoryAccesses(
267 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
268 MachineMemOperand::MONone, &Fast) &&
269 Fast)
270 VTSize = Size;
271 else {
272 VT = NewVT;
273 VTSize = NewVTSize;
274 }
275 }
276
277 if (++NumMemOps > Limit)
278 return false;
279
280 MemOps.push_back(VT);
281 Size -= VTSize;
282 }
283
284 return true;
285 }
286
287 /// Soften the operands of a comparison. This code is shared among BR_CC,
288 /// SELECT_CC, and SETCC handlers.
softenSetCCOperands(SelectionDAG & DAG,EVT VT,SDValue & NewLHS,SDValue & NewRHS,ISD::CondCode & CCCode,const SDLoc & dl,const SDValue OldLHS,const SDValue OldRHS) const289 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
290 SDValue &NewLHS, SDValue &NewRHS,
291 ISD::CondCode &CCCode,
292 const SDLoc &dl, const SDValue OldLHS,
293 const SDValue OldRHS) const {
294 SDValue Chain;
295 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
296 OldRHS, Chain);
297 }
298
softenSetCCOperands(SelectionDAG & DAG,EVT VT,SDValue & NewLHS,SDValue & NewRHS,ISD::CondCode & CCCode,const SDLoc & dl,const SDValue OldLHS,const SDValue OldRHS,SDValue & Chain,bool IsSignaling) const299 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
300 SDValue &NewLHS, SDValue &NewRHS,
301 ISD::CondCode &CCCode,
302 const SDLoc &dl, const SDValue OldLHS,
303 const SDValue OldRHS,
304 SDValue &Chain,
305 bool IsSignaling) const {
306 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
307 // not supporting it. We can update this code when libgcc provides such
308 // functions.
309
310 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
311 && "Unsupported setcc type!");
312
313 // Expand into one or more soft-fp libcall(s).
314 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
315 bool ShouldInvertCC = false;
316 switch (CCCode) {
317 case ISD::SETEQ:
318 case ISD::SETOEQ:
319 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
320 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
321 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
322 break;
323 case ISD::SETNE:
324 case ISD::SETUNE:
325 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
326 (VT == MVT::f64) ? RTLIB::UNE_F64 :
327 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
328 break;
329 case ISD::SETGE:
330 case ISD::SETOGE:
331 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
332 (VT == MVT::f64) ? RTLIB::OGE_F64 :
333 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
334 break;
335 case ISD::SETLT:
336 case ISD::SETOLT:
337 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
338 (VT == MVT::f64) ? RTLIB::OLT_F64 :
339 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
340 break;
341 case ISD::SETLE:
342 case ISD::SETOLE:
343 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
344 (VT == MVT::f64) ? RTLIB::OLE_F64 :
345 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
346 break;
347 case ISD::SETGT:
348 case ISD::SETOGT:
349 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
350 (VT == MVT::f64) ? RTLIB::OGT_F64 :
351 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
352 break;
353 case ISD::SETO:
354 ShouldInvertCC = true;
355 [[fallthrough]];
356 case ISD::SETUO:
357 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
358 (VT == MVT::f64) ? RTLIB::UO_F64 :
359 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
360 break;
361 case ISD::SETONE:
362 // SETONE = O && UNE
363 ShouldInvertCC = true;
364 [[fallthrough]];
365 case ISD::SETUEQ:
366 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
367 (VT == MVT::f64) ? RTLIB::UO_F64 :
368 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
369 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
370 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
371 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
372 break;
373 default:
374 // Invert CC for unordered comparisons
375 ShouldInvertCC = true;
376 switch (CCCode) {
377 case ISD::SETULT:
378 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
379 (VT == MVT::f64) ? RTLIB::OGE_F64 :
380 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
381 break;
382 case ISD::SETULE:
383 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
384 (VT == MVT::f64) ? RTLIB::OGT_F64 :
385 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
386 break;
387 case ISD::SETUGT:
388 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
389 (VT == MVT::f64) ? RTLIB::OLE_F64 :
390 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
391 break;
392 case ISD::SETUGE:
393 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
394 (VT == MVT::f64) ? RTLIB::OLT_F64 :
395 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
396 break;
397 default: llvm_unreachable("Do not know how to soften this setcc!");
398 }
399 }
400
401 // Use the target specific return value for comparison lib calls.
402 EVT RetVT = getCmpLibcallReturnType();
403 SDValue Ops[2] = {NewLHS, NewRHS};
404 TargetLowering::MakeLibCallOptions CallOptions;
405 EVT OpsVT[2] = { OldLHS.getValueType(),
406 OldRHS.getValueType() };
407 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
408 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
409 NewLHS = Call.first;
410 NewRHS = DAG.getConstant(0, dl, RetVT);
411
412 CCCode = getCmpLibcallCC(LC1);
413 if (ShouldInvertCC) {
414 assert(RetVT.isInteger());
415 CCCode = getSetCCInverse(CCCode, RetVT);
416 }
417
418 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
419 // Update Chain.
420 Chain = Call.second;
421 } else {
422 EVT SetCCVT =
423 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
424 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
425 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
426 CCCode = getCmpLibcallCC(LC2);
427 if (ShouldInvertCC)
428 CCCode = getSetCCInverse(CCCode, RetVT);
429 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
430 if (Chain)
431 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
432 Call2.second);
433 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
434 Tmp.getValueType(), Tmp, NewLHS);
435 NewRHS = SDValue();
436 }
437 }
438
439 /// Return the entry encoding for a jump table in the current function. The
440 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
getJumpTableEncoding() const441 unsigned TargetLowering::getJumpTableEncoding() const {
442 // In non-pic modes, just use the address of a block.
443 if (!isPositionIndependent())
444 return MachineJumpTableInfo::EK_BlockAddress;
445
446 // In PIC mode, if the target supports a GPRel32 directive, use it.
447 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
448 return MachineJumpTableInfo::EK_GPRel32BlockAddress;
449
450 // Otherwise, use a label difference.
451 return MachineJumpTableInfo::EK_LabelDifference32;
452 }
453
getPICJumpTableRelocBase(SDValue Table,SelectionDAG & DAG) const454 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
455 SelectionDAG &DAG) const {
456 // If our PIC model is GP relative, use the global offset table as the base.
457 unsigned JTEncoding = getJumpTableEncoding();
458
459 if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
460 (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
461 return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
462
463 return Table;
464 }
465
466 /// This returns the relocation base for the given PIC jumptable, the same as
467 /// getPICJumpTableRelocBase, but as an MCExpr.
468 const MCExpr *
getPICJumpTableRelocBaseExpr(const MachineFunction * MF,unsigned JTI,MCContext & Ctx) const469 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
470 unsigned JTI,MCContext &Ctx) const{
471 // The normal PIC reloc base is the label at the start of the jump table.
472 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
473 }
474
475 bool
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const476 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
477 const TargetMachine &TM = getTargetMachine();
478 const GlobalValue *GV = GA->getGlobal();
479
480 // If the address is not even local to this DSO we will have to load it from
481 // a got and then add the offset.
482 if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
483 return false;
484
485 // If the code is position independent we will have to add a base register.
486 if (isPositionIndependent())
487 return false;
488
489 // Otherwise we can do it.
490 return true;
491 }
492
493 //===----------------------------------------------------------------------===//
494 // Optimization Methods
495 //===----------------------------------------------------------------------===//
496
497 /// If the specified instruction has a constant integer operand and there are
498 /// bits set in that constant that are not demanded, then clear those bits and
499 /// return true.
ShrinkDemandedConstant(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,TargetLoweringOpt & TLO) const500 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
501 const APInt &DemandedBits,
502 const APInt &DemandedElts,
503 TargetLoweringOpt &TLO) const {
504 SDLoc DL(Op);
505 unsigned Opcode = Op.getOpcode();
506
507 // Do target-specific constant optimization.
508 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
509 return TLO.New.getNode();
510
511 // FIXME: ISD::SELECT, ISD::SELECT_CC
512 switch (Opcode) {
513 default:
514 break;
515 case ISD::XOR:
516 case ISD::AND:
517 case ISD::OR: {
518 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
519 if (!Op1C || Op1C->isOpaque())
520 return false;
521
522 // If this is a 'not' op, don't touch it because that's a canonical form.
523 const APInt &C = Op1C->getAPIntValue();
524 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
525 return false;
526
527 if (!C.isSubsetOf(DemandedBits)) {
528 EVT VT = Op.getValueType();
529 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
530 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
531 return TLO.CombineTo(Op, NewOp);
532 }
533
534 break;
535 }
536 }
537
538 return false;
539 }
540
ShrinkDemandedConstant(SDValue Op,const APInt & DemandedBits,TargetLoweringOpt & TLO) const541 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
542 const APInt &DemandedBits,
543 TargetLoweringOpt &TLO) const {
544 EVT VT = Op.getValueType();
545 APInt DemandedElts = VT.isVector()
546 ? APInt::getAllOnes(VT.getVectorNumElements())
547 : APInt(1, 1);
548 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
549 }
550
551 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
552 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
553 /// generalized for targets with other types of implicit widening casts.
ShrinkDemandedOp(SDValue Op,unsigned BitWidth,const APInt & Demanded,TargetLoweringOpt & TLO) const554 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
555 const APInt &Demanded,
556 TargetLoweringOpt &TLO) const {
557 assert(Op.getNumOperands() == 2 &&
558 "ShrinkDemandedOp only supports binary operators!");
559 assert(Op.getNode()->getNumValues() == 1 &&
560 "ShrinkDemandedOp only supports nodes with one result!");
561
562 SelectionDAG &DAG = TLO.DAG;
563 SDLoc dl(Op);
564
565 // Early return, as this function cannot handle vector types.
566 if (Op.getValueType().isVector())
567 return false;
568
569 // Don't do this if the node has another user, which may require the
570 // full value.
571 if (!Op.getNode()->hasOneUse())
572 return false;
573
574 // Search for the smallest integer type with free casts to and from
575 // Op's type. For expedience, just check power-of-2 integer types.
576 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
577 unsigned DemandedSize = Demanded.getActiveBits();
578 unsigned SmallVTBits = DemandedSize;
579 if (!isPowerOf2_32(SmallVTBits))
580 SmallVTBits = NextPowerOf2(SmallVTBits);
581 for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
582 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
583 if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
584 TLI.isZExtFree(SmallVT, Op.getValueType())) {
585 // We found a type with free casts.
586 SDValue X = DAG.getNode(
587 Op.getOpcode(), dl, SmallVT,
588 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
589 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
590 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
591 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
592 return TLO.CombineTo(Op, Z);
593 }
594 }
595 return false;
596 }
597
SimplifyDemandedBits(SDValue Op,const APInt & DemandedBits,DAGCombinerInfo & DCI) const598 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
599 DAGCombinerInfo &DCI) const {
600 SelectionDAG &DAG = DCI.DAG;
601 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
602 !DCI.isBeforeLegalizeOps());
603 KnownBits Known;
604
605 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
606 if (Simplified) {
607 DCI.AddToWorklist(Op.getNode());
608 DCI.CommitTargetLoweringOpt(TLO);
609 }
610 return Simplified;
611 }
612
SimplifyDemandedBits(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,DAGCombinerInfo & DCI) const613 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
614 const APInt &DemandedElts,
615 DAGCombinerInfo &DCI) const {
616 SelectionDAG &DAG = DCI.DAG;
617 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
618 !DCI.isBeforeLegalizeOps());
619 KnownBits Known;
620
621 bool Simplified =
622 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
623 if (Simplified) {
624 DCI.AddToWorklist(Op.getNode());
625 DCI.CommitTargetLoweringOpt(TLO);
626 }
627 return Simplified;
628 }
629
SimplifyDemandedBits(SDValue Op,const APInt & DemandedBits,KnownBits & Known,TargetLoweringOpt & TLO,unsigned Depth,bool AssumeSingleUse) const630 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
631 KnownBits &Known,
632 TargetLoweringOpt &TLO,
633 unsigned Depth,
634 bool AssumeSingleUse) const {
635 EVT VT = Op.getValueType();
636
637 // Since the number of lanes in a scalable vector is unknown at compile time,
638 // we track one bit which is implicitly broadcast to all lanes. This means
639 // that all lanes in a scalable vector are considered demanded.
640 APInt DemandedElts = VT.isFixedLengthVector()
641 ? APInt::getAllOnes(VT.getVectorNumElements())
642 : APInt(1, 1);
643 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
644 AssumeSingleUse);
645 }
646
647 // TODO: Under what circumstances can we create nodes? Constant folding?
SimplifyMultipleUseDemandedBits(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,SelectionDAG & DAG,unsigned Depth) const648 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
649 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
650 SelectionDAG &DAG, unsigned Depth) const {
651 EVT VT = Op.getValueType();
652
653 // Limit search depth.
654 if (Depth >= SelectionDAG::MaxRecursionDepth)
655 return SDValue();
656
657 // Ignore UNDEFs.
658 if (Op.isUndef())
659 return SDValue();
660
661 // Not demanding any bits/elts from Op.
662 if (DemandedBits == 0 || DemandedElts == 0)
663 return DAG.getUNDEF(VT);
664
665 bool IsLE = DAG.getDataLayout().isLittleEndian();
666 unsigned NumElts = DemandedElts.getBitWidth();
667 unsigned BitWidth = DemandedBits.getBitWidth();
668 KnownBits LHSKnown, RHSKnown;
669 switch (Op.getOpcode()) {
670 case ISD::BITCAST: {
671 if (VT.isScalableVector())
672 return SDValue();
673
674 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
675 EVT SrcVT = Src.getValueType();
676 EVT DstVT = Op.getValueType();
677 if (SrcVT == DstVT)
678 return Src;
679
680 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
681 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
682 if (NumSrcEltBits == NumDstEltBits)
683 if (SDValue V = SimplifyMultipleUseDemandedBits(
684 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
685 return DAG.getBitcast(DstVT, V);
686
687 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
688 unsigned Scale = NumDstEltBits / NumSrcEltBits;
689 unsigned NumSrcElts = SrcVT.getVectorNumElements();
690 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
691 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
692 for (unsigned i = 0; i != Scale; ++i) {
693 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
694 unsigned BitOffset = EltOffset * NumSrcEltBits;
695 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
696 if (!Sub.isZero()) {
697 DemandedSrcBits |= Sub;
698 for (unsigned j = 0; j != NumElts; ++j)
699 if (DemandedElts[j])
700 DemandedSrcElts.setBit((j * Scale) + i);
701 }
702 }
703
704 if (SDValue V = SimplifyMultipleUseDemandedBits(
705 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
706 return DAG.getBitcast(DstVT, V);
707 }
708
709 // TODO - bigendian once we have test coverage.
710 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
711 unsigned Scale = NumSrcEltBits / NumDstEltBits;
712 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
713 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
714 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
715 for (unsigned i = 0; i != NumElts; ++i)
716 if (DemandedElts[i]) {
717 unsigned Offset = (i % Scale) * NumDstEltBits;
718 DemandedSrcBits.insertBits(DemandedBits, Offset);
719 DemandedSrcElts.setBit(i / Scale);
720 }
721
722 if (SDValue V = SimplifyMultipleUseDemandedBits(
723 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
724 return DAG.getBitcast(DstVT, V);
725 }
726
727 break;
728 }
729 case ISD::AND: {
730 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
731 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
732
733 // If all of the demanded bits are known 1 on one side, return the other.
734 // These bits cannot contribute to the result of the 'and' in this
735 // context.
736 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
737 return Op.getOperand(0);
738 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
739 return Op.getOperand(1);
740 break;
741 }
742 case ISD::OR: {
743 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
744 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
745
746 // If all of the demanded bits are known zero on one side, return the
747 // other. These bits cannot contribute to the result of the 'or' in this
748 // context.
749 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
750 return Op.getOperand(0);
751 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
752 return Op.getOperand(1);
753 break;
754 }
755 case ISD::XOR: {
756 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
757 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
758
759 // If all of the demanded bits are known zero on one side, return the
760 // other.
761 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
762 return Op.getOperand(0);
763 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
764 return Op.getOperand(1);
765 break;
766 }
767 case ISD::SHL: {
768 // If we are only demanding sign bits then we can use the shift source
769 // directly.
770 if (const APInt *MaxSA =
771 DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
772 SDValue Op0 = Op.getOperand(0);
773 unsigned ShAmt = MaxSA->getZExtValue();
774 unsigned NumSignBits =
775 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
776 unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
777 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
778 return Op0;
779 }
780 break;
781 }
782 case ISD::SETCC: {
783 SDValue Op0 = Op.getOperand(0);
784 SDValue Op1 = Op.getOperand(1);
785 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
786 // If (1) we only need the sign-bit, (2) the setcc operands are the same
787 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
788 // -1, we may be able to bypass the setcc.
789 if (DemandedBits.isSignMask() &&
790 Op0.getScalarValueSizeInBits() == BitWidth &&
791 getBooleanContents(Op0.getValueType()) ==
792 BooleanContent::ZeroOrNegativeOneBooleanContent) {
793 // If we're testing X < 0, then this compare isn't needed - just use X!
794 // FIXME: We're limiting to integer types here, but this should also work
795 // if we don't care about FP signed-zero. The use of SETLT with FP means
796 // that we don't care about NaNs.
797 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
798 (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
799 return Op0;
800 }
801 break;
802 }
803 case ISD::SIGN_EXTEND_INREG: {
804 // If none of the extended bits are demanded, eliminate the sextinreg.
805 SDValue Op0 = Op.getOperand(0);
806 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
807 unsigned ExBits = ExVT.getScalarSizeInBits();
808 if (DemandedBits.getActiveBits() <= ExBits)
809 return Op0;
810 // If the input is already sign extended, just drop the extension.
811 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
812 if (NumSignBits >= (BitWidth - ExBits + 1))
813 return Op0;
814 break;
815 }
816 case ISD::ANY_EXTEND_VECTOR_INREG:
817 case ISD::SIGN_EXTEND_VECTOR_INREG:
818 case ISD::ZERO_EXTEND_VECTOR_INREG: {
819 if (VT.isScalableVector())
820 return SDValue();
821
822 // If we only want the lowest element and none of extended bits, then we can
823 // return the bitcasted source vector.
824 SDValue Src = Op.getOperand(0);
825 EVT SrcVT = Src.getValueType();
826 EVT DstVT = Op.getValueType();
827 if (IsLE && DemandedElts == 1 &&
828 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
829 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
830 return DAG.getBitcast(DstVT, Src);
831 }
832 break;
833 }
834 case ISD::INSERT_VECTOR_ELT: {
835 if (VT.isScalableVector())
836 return SDValue();
837
838 // If we don't demand the inserted element, return the base vector.
839 SDValue Vec = Op.getOperand(0);
840 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
841 EVT VecVT = Vec.getValueType();
842 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
843 !DemandedElts[CIdx->getZExtValue()])
844 return Vec;
845 break;
846 }
847 case ISD::INSERT_SUBVECTOR: {
848 if (VT.isScalableVector())
849 return SDValue();
850
851 SDValue Vec = Op.getOperand(0);
852 SDValue Sub = Op.getOperand(1);
853 uint64_t Idx = Op.getConstantOperandVal(2);
854 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
855 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
856 // If we don't demand the inserted subvector, return the base vector.
857 if (DemandedSubElts == 0)
858 return Vec;
859 // If this simply widens the lowest subvector, see if we can do it earlier.
860 // TODO: REMOVE ME - SimplifyMultipleUseDemandedBits shouldn't be creating
861 // general nodes like this.
862 if (Idx == 0 && Vec.isUndef()) {
863 if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
864 Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
865 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
866 Op.getOperand(0), NewSub, Op.getOperand(2));
867 }
868 break;
869 }
870 case ISD::VECTOR_SHUFFLE: {
871 assert(!VT.isScalableVector());
872 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
873
874 // If all the demanded elts are from one operand and are inline,
875 // then we can use the operand directly.
876 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
877 for (unsigned i = 0; i != NumElts; ++i) {
878 int M = ShuffleMask[i];
879 if (M < 0 || !DemandedElts[i])
880 continue;
881 AllUndef = false;
882 IdentityLHS &= (M == (int)i);
883 IdentityRHS &= ((M - NumElts) == i);
884 }
885
886 if (AllUndef)
887 return DAG.getUNDEF(Op.getValueType());
888 if (IdentityLHS)
889 return Op.getOperand(0);
890 if (IdentityRHS)
891 return Op.getOperand(1);
892 break;
893 }
894 default:
895 // TODO: Probably okay to remove after audit; here to reduce change size
896 // in initial enablement patch for scalable vectors
897 if (VT.isScalableVector())
898 return SDValue();
899
900 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
901 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
902 Op, DemandedBits, DemandedElts, DAG, Depth))
903 return V;
904 break;
905 }
906 return SDValue();
907 }
908
SimplifyMultipleUseDemandedBits(SDValue Op,const APInt & DemandedBits,SelectionDAG & DAG,unsigned Depth) const909 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
910 SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
911 unsigned Depth) const {
912 EVT VT = Op.getValueType();
913 // Since the number of lanes in a scalable vector is unknown at compile time,
914 // we track one bit which is implicitly broadcast to all lanes. This means
915 // that all lanes in a scalable vector are considered demanded.
916 APInt DemandedElts = VT.isFixedLengthVector()
917 ? APInt::getAllOnes(VT.getVectorNumElements())
918 : APInt(1, 1);
919 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
920 Depth);
921 }
922
SimplifyMultipleUseDemandedVectorElts(SDValue Op,const APInt & DemandedElts,SelectionDAG & DAG,unsigned Depth) const923 SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
924 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
925 unsigned Depth) const {
926 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
927 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
928 Depth);
929 }
930
931 // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
932 // or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
combineShiftToAVG(SDValue Op,SelectionDAG & DAG,const TargetLowering & TLI,const APInt & DemandedBits,const APInt & DemandedElts,unsigned Depth)933 static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
934 const TargetLowering &TLI,
935 const APInt &DemandedBits,
936 const APInt &DemandedElts,
937 unsigned Depth) {
938 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
939 "SRL or SRA node is required here!");
940 // Is the right shift using an immediate value of 1?
941 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
942 if (!N1C || !N1C->isOne())
943 return SDValue();
944
945 // We are looking for an avgfloor
946 // add(ext, ext)
947 // or one of these as a avgceil
948 // add(add(ext, ext), 1)
949 // add(add(ext, 1), ext)
950 // add(ext, add(ext, 1))
951 SDValue Add = Op.getOperand(0);
952 if (Add.getOpcode() != ISD::ADD)
953 return SDValue();
954
955 SDValue ExtOpA = Add.getOperand(0);
956 SDValue ExtOpB = Add.getOperand(1);
957 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3) {
958 ConstantSDNode *ConstOp;
959 if ((ConstOp = isConstOrConstSplat(Op1, DemandedElts)) &&
960 ConstOp->isOne()) {
961 ExtOpA = Op2;
962 ExtOpB = Op3;
963 return true;
964 }
965 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
966 ConstOp->isOne()) {
967 ExtOpA = Op1;
968 ExtOpB = Op3;
969 return true;
970 }
971 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
972 ConstOp->isOne()) {
973 ExtOpA = Op1;
974 ExtOpB = Op2;
975 return true;
976 }
977 return false;
978 };
979 bool IsCeil =
980 (ExtOpA.getOpcode() == ISD::ADD &&
981 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB)) ||
982 (ExtOpB.getOpcode() == ISD::ADD &&
983 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA));
984
985 // If the shift is signed (sra):
986 // - Needs >= 2 sign bit for both operands.
987 // - Needs >= 2 zero bits.
988 // If the shift is unsigned (srl):
989 // - Needs >= 1 zero bit for both operands.
990 // - Needs 1 demanded bit zero and >= 2 sign bits.
991 unsigned ShiftOpc = Op.getOpcode();
992 bool IsSigned = false;
993 unsigned KnownBits;
994 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
995 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
996 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
997 unsigned NumZeroA =
998 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
999 unsigned NumZeroB =
1000 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1001 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1002
1003 switch (ShiftOpc) {
1004 default:
1005 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1006 case ISD::SRA: {
1007 if (NumZero >= 2 && NumSigned < NumZero) {
1008 IsSigned = false;
1009 KnownBits = NumZero;
1010 break;
1011 }
1012 if (NumSigned >= 1) {
1013 IsSigned = true;
1014 KnownBits = NumSigned;
1015 break;
1016 }
1017 return SDValue();
1018 }
1019 case ISD::SRL: {
1020 if (NumZero >= 1 && NumSigned < NumZero) {
1021 IsSigned = false;
1022 KnownBits = NumZero;
1023 break;
1024 }
1025 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1026 IsSigned = true;
1027 KnownBits = NumSigned;
1028 break;
1029 }
1030 return SDValue();
1031 }
1032 }
1033
1034 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1035 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1036
1037 // Find the smallest power-2 type that is legal for this vector size and
1038 // operation, given the original type size and the number of known sign/zero
1039 // bits.
1040 EVT VT = Op.getValueType();
1041 unsigned MinWidth =
1042 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1043 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), PowerOf2Ceil(MinWidth));
1044 if (VT.isVector())
1045 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1046 if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT))
1047 return SDValue();
1048
1049 SDLoc DL(Op);
1050 SDValue ResultAVG =
1051 DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA),
1052 DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB));
1053 return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT,
1054 ResultAVG);
1055 }
1056
1057 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1058 /// result of Op are ever used downstream. If we can use this information to
1059 /// simplify Op, create a new simplified DAG node and return true, returning the
1060 /// original and new nodes in Old and New. Otherwise, analyze the expression and
1061 /// return a mask of Known bits for the expression (used to simplify the
1062 /// caller). The Known bits may only be accurate for those bits in the
1063 /// OriginalDemandedBits and OriginalDemandedElts.
SimplifyDemandedBits(SDValue Op,const APInt & OriginalDemandedBits,const APInt & OriginalDemandedElts,KnownBits & Known,TargetLoweringOpt & TLO,unsigned Depth,bool AssumeSingleUse) const1064 bool TargetLowering::SimplifyDemandedBits(
1065 SDValue Op, const APInt &OriginalDemandedBits,
1066 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1067 unsigned Depth, bool AssumeSingleUse) const {
1068 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1069 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1070 "Mask size mismatches value type size!");
1071
1072 // Don't know anything.
1073 Known = KnownBits(BitWidth);
1074
1075 EVT VT = Op.getValueType();
1076 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1077 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1078 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1079 "Unexpected vector size");
1080
1081 APInt DemandedBits = OriginalDemandedBits;
1082 APInt DemandedElts = OriginalDemandedElts;
1083 SDLoc dl(Op);
1084 auto &DL = TLO.DAG.getDataLayout();
1085
1086 // Undef operand.
1087 if (Op.isUndef())
1088 return false;
1089
1090 // We can't simplify target constants.
1091 if (Op.getOpcode() == ISD::TargetConstant)
1092 return false;
1093
1094 if (Op.getOpcode() == ISD::Constant) {
1095 // We know all of the bits for a constant!
1096 Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
1097 return false;
1098 }
1099
1100 if (Op.getOpcode() == ISD::ConstantFP) {
1101 // We know all of the bits for a floating point constant!
1102 Known = KnownBits::makeConstant(
1103 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1104 return false;
1105 }
1106
1107 // Other users may use these bits.
1108 bool HasMultiUse = false;
1109 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1110 if (Depth >= SelectionDAG::MaxRecursionDepth) {
1111 // Limit search depth.
1112 return false;
1113 }
1114 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1115 DemandedBits = APInt::getAllOnes(BitWidth);
1116 DemandedElts = APInt::getAllOnes(NumElts);
1117 HasMultiUse = true;
1118 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1119 // Not demanding any bits/elts from Op.
1120 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1121 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1122 // Limit search depth.
1123 return false;
1124 }
1125
1126 KnownBits Known2;
1127 switch (Op.getOpcode()) {
1128 case ISD::SCALAR_TO_VECTOR: {
1129 if (VT.isScalableVector())
1130 return false;
1131 if (!DemandedElts[0])
1132 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1133
1134 KnownBits SrcKnown;
1135 SDValue Src = Op.getOperand(0);
1136 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1137 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1138 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1139 return true;
1140
1141 // Upper elements are undef, so only get the knownbits if we just demand
1142 // the bottom element.
1143 if (DemandedElts == 1)
1144 Known = SrcKnown.anyextOrTrunc(BitWidth);
1145 break;
1146 }
1147 case ISD::BUILD_VECTOR:
1148 // Collect the known bits that are shared by every demanded element.
1149 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1150 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1151 return false; // Don't fall through, will infinitely loop.
1152 case ISD::LOAD: {
1153 auto *LD = cast<LoadSDNode>(Op);
1154 if (getTargetConstantFromLoad(LD)) {
1155 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1156 return false; // Don't fall through, will infinitely loop.
1157 }
1158 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1159 // If this is a ZEXTLoad and we are looking at the loaded value.
1160 EVT MemVT = LD->getMemoryVT();
1161 unsigned MemBits = MemVT.getScalarSizeInBits();
1162 Known.Zero.setBitsFrom(MemBits);
1163 return false; // Don't fall through, will infinitely loop.
1164 }
1165 break;
1166 }
1167 case ISD::INSERT_VECTOR_ELT: {
1168 if (VT.isScalableVector())
1169 return false;
1170 SDValue Vec = Op.getOperand(0);
1171 SDValue Scl = Op.getOperand(1);
1172 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1173 EVT VecVT = Vec.getValueType();
1174
1175 // If index isn't constant, assume we need all vector elements AND the
1176 // inserted element.
1177 APInt DemandedVecElts(DemandedElts);
1178 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1179 unsigned Idx = CIdx->getZExtValue();
1180 DemandedVecElts.clearBit(Idx);
1181
1182 // Inserted element is not required.
1183 if (!DemandedElts[Idx])
1184 return TLO.CombineTo(Op, Vec);
1185 }
1186
1187 KnownBits KnownScl;
1188 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1189 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1190 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1191 return true;
1192
1193 Known = KnownScl.anyextOrTrunc(BitWidth);
1194
1195 KnownBits KnownVec;
1196 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1197 Depth + 1))
1198 return true;
1199
1200 if (!!DemandedVecElts)
1201 Known = KnownBits::commonBits(Known, KnownVec);
1202
1203 return false;
1204 }
1205 case ISD::INSERT_SUBVECTOR: {
1206 if (VT.isScalableVector())
1207 return false;
1208 // Demand any elements from the subvector and the remainder from the src its
1209 // inserted into.
1210 SDValue Src = Op.getOperand(0);
1211 SDValue Sub = Op.getOperand(1);
1212 uint64_t Idx = Op.getConstantOperandVal(2);
1213 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1214 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1215 APInt DemandedSrcElts = DemandedElts;
1216 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1217
1218 KnownBits KnownSub, KnownSrc;
1219 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1220 Depth + 1))
1221 return true;
1222 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1223 Depth + 1))
1224 return true;
1225
1226 Known.Zero.setAllBits();
1227 Known.One.setAllBits();
1228 if (!!DemandedSubElts)
1229 Known = KnownBits::commonBits(Known, KnownSub);
1230 if (!!DemandedSrcElts)
1231 Known = KnownBits::commonBits(Known, KnownSrc);
1232
1233 // Attempt to avoid multi-use src if we don't need anything from it.
1234 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1235 !DemandedSrcElts.isAllOnes()) {
1236 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1237 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1238 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1239 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1240 if (NewSub || NewSrc) {
1241 NewSub = NewSub ? NewSub : Sub;
1242 NewSrc = NewSrc ? NewSrc : Src;
1243 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1244 Op.getOperand(2));
1245 return TLO.CombineTo(Op, NewOp);
1246 }
1247 }
1248 break;
1249 }
1250 case ISD::EXTRACT_SUBVECTOR: {
1251 if (VT.isScalableVector())
1252 return false;
1253 // Offset the demanded elts by the subvector index.
1254 SDValue Src = Op.getOperand(0);
1255 if (Src.getValueType().isScalableVector())
1256 break;
1257 uint64_t Idx = Op.getConstantOperandVal(1);
1258 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1259 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1260
1261 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1262 Depth + 1))
1263 return true;
1264
1265 // Attempt to avoid multi-use src if we don't need anything from it.
1266 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1267 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1268 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1269 if (DemandedSrc) {
1270 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1271 Op.getOperand(1));
1272 return TLO.CombineTo(Op, NewOp);
1273 }
1274 }
1275 break;
1276 }
1277 case ISD::CONCAT_VECTORS: {
1278 if (VT.isScalableVector())
1279 return false;
1280 Known.Zero.setAllBits();
1281 Known.One.setAllBits();
1282 EVT SubVT = Op.getOperand(0).getValueType();
1283 unsigned NumSubVecs = Op.getNumOperands();
1284 unsigned NumSubElts = SubVT.getVectorNumElements();
1285 for (unsigned i = 0; i != NumSubVecs; ++i) {
1286 APInt DemandedSubElts =
1287 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1288 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1289 Known2, TLO, Depth + 1))
1290 return true;
1291 // Known bits are shared by every demanded subvector element.
1292 if (!!DemandedSubElts)
1293 Known = KnownBits::commonBits(Known, Known2);
1294 }
1295 break;
1296 }
1297 case ISD::VECTOR_SHUFFLE: {
1298 assert(!VT.isScalableVector());
1299 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1300
1301 // Collect demanded elements from shuffle operands..
1302 APInt DemandedLHS, DemandedRHS;
1303 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1304 DemandedRHS))
1305 break;
1306
1307 if (!!DemandedLHS || !!DemandedRHS) {
1308 SDValue Op0 = Op.getOperand(0);
1309 SDValue Op1 = Op.getOperand(1);
1310
1311 Known.Zero.setAllBits();
1312 Known.One.setAllBits();
1313 if (!!DemandedLHS) {
1314 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1315 Depth + 1))
1316 return true;
1317 Known = KnownBits::commonBits(Known, Known2);
1318 }
1319 if (!!DemandedRHS) {
1320 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1321 Depth + 1))
1322 return true;
1323 Known = KnownBits::commonBits(Known, Known2);
1324 }
1325
1326 // Attempt to avoid multi-use ops if we don't need anything from them.
1327 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1328 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1329 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1330 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1331 if (DemandedOp0 || DemandedOp1) {
1332 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1333 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1334 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1335 return TLO.CombineTo(Op, NewOp);
1336 }
1337 }
1338 break;
1339 }
1340 case ISD::AND: {
1341 SDValue Op0 = Op.getOperand(0);
1342 SDValue Op1 = Op.getOperand(1);
1343
1344 // If the RHS is a constant, check to see if the LHS would be zero without
1345 // using the bits from the RHS. Below, we use knowledge about the RHS to
1346 // simplify the LHS, here we're using information from the LHS to simplify
1347 // the RHS.
1348 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1349 // Do not increment Depth here; that can cause an infinite loop.
1350 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1351 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1352 if ((LHSKnown.Zero & DemandedBits) ==
1353 (~RHSC->getAPIntValue() & DemandedBits))
1354 return TLO.CombineTo(Op, Op0);
1355
1356 // If any of the set bits in the RHS are known zero on the LHS, shrink
1357 // the constant.
1358 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1359 DemandedElts, TLO))
1360 return true;
1361
1362 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1363 // constant, but if this 'and' is only clearing bits that were just set by
1364 // the xor, then this 'and' can be eliminated by shrinking the mask of
1365 // the xor. For example, for a 32-bit X:
1366 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1367 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1368 LHSKnown.One == ~RHSC->getAPIntValue()) {
1369 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1370 return TLO.CombineTo(Op, Xor);
1371 }
1372 }
1373
1374 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1375 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1376 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1377 (Op0.getOperand(0).isUndef() ||
1378 ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
1379 Op0->hasOneUse()) {
1380 unsigned NumSubElts =
1381 Op0.getOperand(1).getValueType().getVectorNumElements();
1382 unsigned SubIdx = Op0.getConstantOperandVal(2);
1383 APInt DemandedSub =
1384 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1385 KnownBits KnownSubMask =
1386 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1387 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1388 SDValue NewAnd =
1389 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1390 SDValue NewInsert =
1391 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1392 Op0.getOperand(1), Op0.getOperand(2));
1393 return TLO.CombineTo(Op, NewInsert);
1394 }
1395 }
1396
1397 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1398 Depth + 1))
1399 return true;
1400 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1401 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1402 Known2, TLO, Depth + 1))
1403 return true;
1404 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1405
1406 // If all of the demanded bits are known one on one side, return the other.
1407 // These bits cannot contribute to the result of the 'and'.
1408 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1409 return TLO.CombineTo(Op, Op0);
1410 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1411 return TLO.CombineTo(Op, Op1);
1412 // If all of the demanded bits in the inputs are known zeros, return zero.
1413 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1414 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1415 // If the RHS is a constant, see if we can simplify it.
1416 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1417 TLO))
1418 return true;
1419 // If the operation can be done in a smaller type, do so.
1420 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1421 return true;
1422
1423 // Attempt to avoid multi-use ops if we don't need anything from them.
1424 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1425 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1426 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1427 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1428 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1429 if (DemandedOp0 || DemandedOp1) {
1430 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1431 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1432 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1433 return TLO.CombineTo(Op, NewOp);
1434 }
1435 }
1436
1437 Known &= Known2;
1438 break;
1439 }
1440 case ISD::OR: {
1441 SDValue Op0 = Op.getOperand(0);
1442 SDValue Op1 = Op.getOperand(1);
1443
1444 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1445 Depth + 1))
1446 return true;
1447 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1448 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1449 Known2, TLO, Depth + 1))
1450 return true;
1451 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1452
1453 // If all of the demanded bits are known zero on one side, return the other.
1454 // These bits cannot contribute to the result of the 'or'.
1455 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1456 return TLO.CombineTo(Op, Op0);
1457 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1458 return TLO.CombineTo(Op, Op1);
1459 // If the RHS is a constant, see if we can simplify it.
1460 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1461 return true;
1462 // If the operation can be done in a smaller type, do so.
1463 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1464 return true;
1465
1466 // Attempt to avoid multi-use ops if we don't need anything from them.
1467 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1468 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1469 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1470 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1471 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1472 if (DemandedOp0 || DemandedOp1) {
1473 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1474 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1475 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1476 return TLO.CombineTo(Op, NewOp);
1477 }
1478 }
1479
1480 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1481 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1482 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1483 Op0->hasOneUse() && Op1->hasOneUse()) {
1484 // Attempt to match all commutations - m_c_Or would've been useful!
1485 for (int I = 0; I != 2; ++I) {
1486 SDValue X = Op.getOperand(I).getOperand(0);
1487 SDValue C1 = Op.getOperand(I).getOperand(1);
1488 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1489 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1490 if (Alt.getOpcode() == ISD::OR) {
1491 for (int J = 0; J != 2; ++J) {
1492 if (X == Alt.getOperand(J)) {
1493 SDValue Y = Alt.getOperand(1 - J);
1494 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1495 {C1, C2})) {
1496 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1497 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1498 return TLO.CombineTo(
1499 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1500 }
1501 }
1502 }
1503 }
1504 }
1505 }
1506
1507 Known |= Known2;
1508 break;
1509 }
1510 case ISD::XOR: {
1511 SDValue Op0 = Op.getOperand(0);
1512 SDValue Op1 = Op.getOperand(1);
1513
1514 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1515 Depth + 1))
1516 return true;
1517 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1518 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1519 Depth + 1))
1520 return true;
1521 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1522
1523 // If all of the demanded bits are known zero on one side, return the other.
1524 // These bits cannot contribute to the result of the 'xor'.
1525 if (DemandedBits.isSubsetOf(Known.Zero))
1526 return TLO.CombineTo(Op, Op0);
1527 if (DemandedBits.isSubsetOf(Known2.Zero))
1528 return TLO.CombineTo(Op, Op1);
1529 // If the operation can be done in a smaller type, do so.
1530 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1531 return true;
1532
1533 // If all of the unknown bits are known to be zero on one side or the other
1534 // turn this into an *inclusive* or.
1535 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1536 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1537 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1538
1539 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1540 if (C) {
1541 // If one side is a constant, and all of the set bits in the constant are
1542 // also known set on the other side, turn this into an AND, as we know
1543 // the bits will be cleared.
1544 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1545 // NB: it is okay if more bits are known than are requested
1546 if (C->getAPIntValue() == Known2.One) {
1547 SDValue ANDC =
1548 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1549 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1550 }
1551
1552 // If the RHS is a constant, see if we can change it. Don't alter a -1
1553 // constant because that's a 'not' op, and that is better for combining
1554 // and codegen.
1555 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1556 // We're flipping all demanded bits. Flip the undemanded bits too.
1557 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1558 return TLO.CombineTo(Op, New);
1559 }
1560
1561 unsigned Op0Opcode = Op0.getOpcode();
1562 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1563 if (ConstantSDNode *ShiftC =
1564 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1565 // Don't crash on an oversized shift. We can not guarantee that a
1566 // bogus shift has been simplified to undef.
1567 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1568 uint64_t ShiftAmt = ShiftC->getZExtValue();
1569 APInt Ones = APInt::getAllOnes(BitWidth);
1570 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1571 : Ones.lshr(ShiftAmt);
1572 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1573 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1574 TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1575 // If the xor constant is a demanded mask, do a 'not' before the
1576 // shift:
1577 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1578 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1579 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1580 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1581 Op0.getOperand(1)));
1582 }
1583 }
1584 }
1585 }
1586 }
1587
1588 // If we can't turn this into a 'not', try to shrink the constant.
1589 if (!C || !C->isAllOnes())
1590 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1591 return true;
1592
1593 // Attempt to avoid multi-use ops if we don't need anything from them.
1594 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1595 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1596 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1597 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1598 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1599 if (DemandedOp0 || DemandedOp1) {
1600 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1601 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1602 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1603 return TLO.CombineTo(Op, NewOp);
1604 }
1605 }
1606
1607 Known ^= Known2;
1608 break;
1609 }
1610 case ISD::SELECT:
1611 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known, TLO,
1612 Depth + 1))
1613 return true;
1614 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, Known2, TLO,
1615 Depth + 1))
1616 return true;
1617 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1618 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1619
1620 // If the operands are constants, see if we can simplify them.
1621 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1622 return true;
1623
1624 // Only known if known in both the LHS and RHS.
1625 Known = KnownBits::commonBits(Known, Known2);
1626 break;
1627 case ISD::VSELECT:
1628 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1629 Known, TLO, Depth + 1))
1630 return true;
1631 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1632 Known2, TLO, Depth + 1))
1633 return true;
1634 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1635 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1636
1637 // Only known if known in both the LHS and RHS.
1638 Known = KnownBits::commonBits(Known, Known2);
1639 break;
1640 case ISD::SELECT_CC:
1641 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
1642 Depth + 1))
1643 return true;
1644 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, Known2, TLO,
1645 Depth + 1))
1646 return true;
1647 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1648 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1649
1650 // If the operands are constants, see if we can simplify them.
1651 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1652 return true;
1653
1654 // Only known if known in both the LHS and RHS.
1655 Known = KnownBits::commonBits(Known, Known2);
1656 break;
1657 case ISD::SETCC: {
1658 SDValue Op0 = Op.getOperand(0);
1659 SDValue Op1 = Op.getOperand(1);
1660 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1661 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1662 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1663 // -1, we may be able to bypass the setcc.
1664 if (DemandedBits.isSignMask() &&
1665 Op0.getScalarValueSizeInBits() == BitWidth &&
1666 getBooleanContents(Op0.getValueType()) ==
1667 BooleanContent::ZeroOrNegativeOneBooleanContent) {
1668 // If we're testing X < 0, then this compare isn't needed - just use X!
1669 // FIXME: We're limiting to integer types here, but this should also work
1670 // if we don't care about FP signed-zero. The use of SETLT with FP means
1671 // that we don't care about NaNs.
1672 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1673 (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1674 return TLO.CombineTo(Op, Op0);
1675
1676 // TODO: Should we check for other forms of sign-bit comparisons?
1677 // Examples: X <= -1, X >= 0
1678 }
1679 if (getBooleanContents(Op0.getValueType()) ==
1680 TargetLowering::ZeroOrOneBooleanContent &&
1681 BitWidth > 1)
1682 Known.Zero.setBitsFrom(1);
1683 break;
1684 }
1685 case ISD::SHL: {
1686 SDValue Op0 = Op.getOperand(0);
1687 SDValue Op1 = Op.getOperand(1);
1688 EVT ShiftVT = Op1.getValueType();
1689
1690 if (const APInt *SA =
1691 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1692 unsigned ShAmt = SA->getZExtValue();
1693 if (ShAmt == 0)
1694 return TLO.CombineTo(Op, Op0);
1695
1696 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1697 // single shift. We can do this if the bottom bits (which are shifted
1698 // out) are never demanded.
1699 // TODO - support non-uniform vector amounts.
1700 if (Op0.getOpcode() == ISD::SRL) {
1701 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1702 if (const APInt *SA2 =
1703 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1704 unsigned C1 = SA2->getZExtValue();
1705 unsigned Opc = ISD::SHL;
1706 int Diff = ShAmt - C1;
1707 if (Diff < 0) {
1708 Diff = -Diff;
1709 Opc = ISD::SRL;
1710 }
1711 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1712 return TLO.CombineTo(
1713 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1714 }
1715 }
1716 }
1717
1718 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1719 // are not demanded. This will likely allow the anyext to be folded away.
1720 // TODO - support non-uniform vector amounts.
1721 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1722 SDValue InnerOp = Op0.getOperand(0);
1723 EVT InnerVT = InnerOp.getValueType();
1724 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1725 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1726 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1727 SDValue NarrowShl = TLO.DAG.getNode(
1728 ISD::SHL, dl, InnerVT, InnerOp,
1729 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1730 return TLO.CombineTo(
1731 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1732 }
1733
1734 // Repeat the SHL optimization above in cases where an extension
1735 // intervenes: (shl (anyext (shr x, c1)), c2) to
1736 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1737 // aren't demanded (as above) and that the shifted upper c1 bits of
1738 // x aren't demanded.
1739 // TODO - support non-uniform vector amounts.
1740 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1741 InnerOp.hasOneUse()) {
1742 if (const APInt *SA2 =
1743 TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1744 unsigned InnerShAmt = SA2->getZExtValue();
1745 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1746 DemandedBits.getActiveBits() <=
1747 (InnerBits - InnerShAmt + ShAmt) &&
1748 DemandedBits.countTrailingZeros() >= ShAmt) {
1749 SDValue NewSA =
1750 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1751 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1752 InnerOp.getOperand(0));
1753 return TLO.CombineTo(
1754 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1755 }
1756 }
1757 }
1758 }
1759
1760 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1761 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1762 Depth + 1))
1763 return true;
1764 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1765 Known.Zero <<= ShAmt;
1766 Known.One <<= ShAmt;
1767 // low bits known zero.
1768 Known.Zero.setLowBits(ShAmt);
1769
1770 // Attempt to avoid multi-use ops if we don't need anything from them.
1771 if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1772 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1773 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1774 if (DemandedOp0) {
1775 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1776 return TLO.CombineTo(Op, NewOp);
1777 }
1778 }
1779
1780 // Try shrinking the operation as long as the shift amount will still be
1781 // in range.
1782 if ((ShAmt < DemandedBits.getActiveBits()) &&
1783 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1784 return true;
1785 } else {
1786 // This is a variable shift, so we can't shift the demand mask by a known
1787 // amount. But if we are not demanding high bits, then we are not
1788 // demanding those bits from the pre-shifted operand either.
1789 if (unsigned CTLZ = DemandedBits.countLeadingZeros()) {
1790 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1791 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1792 Depth + 1)) {
1793 SDNodeFlags Flags = Op.getNode()->getFlags();
1794 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1795 // Disable the nsw and nuw flags. We can no longer guarantee that we
1796 // won't wrap after simplification.
1797 Flags.setNoSignedWrap(false);
1798 Flags.setNoUnsignedWrap(false);
1799 Op->setFlags(Flags);
1800 }
1801 return true;
1802 }
1803 Known.resetAll();
1804 }
1805 }
1806
1807 // If we are only demanding sign bits then we can use the shift source
1808 // directly.
1809 if (const APInt *MaxSA =
1810 TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1811 unsigned ShAmt = MaxSA->getZExtValue();
1812 unsigned NumSignBits =
1813 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1814 unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1815 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1816 return TLO.CombineTo(Op, Op0);
1817 }
1818 break;
1819 }
1820 case ISD::SRL: {
1821 SDValue Op0 = Op.getOperand(0);
1822 SDValue Op1 = Op.getOperand(1);
1823 EVT ShiftVT = Op1.getValueType();
1824
1825 // Try to match AVG patterns.
1826 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1827 DemandedElts, Depth + 1))
1828 return TLO.CombineTo(Op, AVG);
1829
1830 if (const APInt *SA =
1831 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1832 unsigned ShAmt = SA->getZExtValue();
1833 if (ShAmt == 0)
1834 return TLO.CombineTo(Op, Op0);
1835
1836 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1837 // single shift. We can do this if the top bits (which are shifted out)
1838 // are never demanded.
1839 // TODO - support non-uniform vector amounts.
1840 if (Op0.getOpcode() == ISD::SHL) {
1841 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1842 if (const APInt *SA2 =
1843 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1844 unsigned C1 = SA2->getZExtValue();
1845 unsigned Opc = ISD::SRL;
1846 int Diff = ShAmt - C1;
1847 if (Diff < 0) {
1848 Diff = -Diff;
1849 Opc = ISD::SHL;
1850 }
1851 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1852 return TLO.CombineTo(
1853 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1854 }
1855 }
1856 }
1857
1858 APInt InDemandedMask = (DemandedBits << ShAmt);
1859
1860 // If the shift is exact, then it does demand the low bits (and knows that
1861 // they are zero).
1862 if (Op->getFlags().hasExact())
1863 InDemandedMask.setLowBits(ShAmt);
1864
1865 // Compute the new bits that are at the top now.
1866 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1867 Depth + 1))
1868 return true;
1869 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1870 Known.Zero.lshrInPlace(ShAmt);
1871 Known.One.lshrInPlace(ShAmt);
1872 // High bits known zero.
1873 Known.Zero.setHighBits(ShAmt);
1874
1875 // Attempt to avoid multi-use ops if we don't need anything from them.
1876 if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
1877 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1878 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1879 if (DemandedOp0) {
1880 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
1881 return TLO.CombineTo(Op, NewOp);
1882 }
1883 }
1884 }
1885 break;
1886 }
1887 case ISD::SRA: {
1888 SDValue Op0 = Op.getOperand(0);
1889 SDValue Op1 = Op.getOperand(1);
1890 EVT ShiftVT = Op1.getValueType();
1891
1892 // If we only want bits that already match the signbit then we don't need
1893 // to shift.
1894 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
1895 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1896 NumHiDemandedBits)
1897 return TLO.CombineTo(Op, Op0);
1898
1899 // If this is an arithmetic shift right and only the low-bit is set, we can
1900 // always convert this into a logical shr, even if the shift amount is
1901 // variable. The low bit of the shift cannot be an input sign bit unless
1902 // the shift amount is >= the size of the datatype, which is undefined.
1903 if (DemandedBits.isOne())
1904 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
1905
1906 // Try to match AVG patterns.
1907 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1908 DemandedElts, Depth + 1))
1909 return TLO.CombineTo(Op, AVG);
1910
1911 if (const APInt *SA =
1912 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1913 unsigned ShAmt = SA->getZExtValue();
1914 if (ShAmt == 0)
1915 return TLO.CombineTo(Op, Op0);
1916
1917 APInt InDemandedMask = (DemandedBits << ShAmt);
1918
1919 // If the shift is exact, then it does demand the low bits (and knows that
1920 // they are zero).
1921 if (Op->getFlags().hasExact())
1922 InDemandedMask.setLowBits(ShAmt);
1923
1924 // If any of the demanded bits are produced by the sign extension, we also
1925 // demand the input sign bit.
1926 if (DemandedBits.countLeadingZeros() < ShAmt)
1927 InDemandedMask.setSignBit();
1928
1929 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1930 Depth + 1))
1931 return true;
1932 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1933 Known.Zero.lshrInPlace(ShAmt);
1934 Known.One.lshrInPlace(ShAmt);
1935
1936 // If the input sign bit is known to be zero, or if none of the top bits
1937 // are demanded, turn this into an unsigned shift right.
1938 if (Known.Zero[BitWidth - ShAmt - 1] ||
1939 DemandedBits.countLeadingZeros() >= ShAmt) {
1940 SDNodeFlags Flags;
1941 Flags.setExact(Op->getFlags().hasExact());
1942 return TLO.CombineTo(
1943 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
1944 }
1945
1946 int Log2 = DemandedBits.exactLogBase2();
1947 if (Log2 >= 0) {
1948 // The bit must come from the sign.
1949 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
1950 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
1951 }
1952
1953 if (Known.One[BitWidth - ShAmt - 1])
1954 // New bits are known one.
1955 Known.One.setHighBits(ShAmt);
1956
1957 // Attempt to avoid multi-use ops if we don't need anything from them.
1958 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1959 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1960 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1961 if (DemandedOp0) {
1962 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
1963 return TLO.CombineTo(Op, NewOp);
1964 }
1965 }
1966 }
1967 break;
1968 }
1969 case ISD::FSHL:
1970 case ISD::FSHR: {
1971 SDValue Op0 = Op.getOperand(0);
1972 SDValue Op1 = Op.getOperand(1);
1973 SDValue Op2 = Op.getOperand(2);
1974 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
1975
1976 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
1977 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
1978
1979 // For fshl, 0-shift returns the 1st arg.
1980 // For fshr, 0-shift returns the 2nd arg.
1981 if (Amt == 0) {
1982 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
1983 Known, TLO, Depth + 1))
1984 return true;
1985 break;
1986 }
1987
1988 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1989 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1990 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
1991 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
1992 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
1993 Depth + 1))
1994 return true;
1995 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
1996 Depth + 1))
1997 return true;
1998
1999 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2000 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2001 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2002 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2003 Known.One |= Known2.One;
2004 Known.Zero |= Known2.Zero;
2005
2006 // Attempt to avoid multi-use ops if we don't need anything from them.
2007 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2008 !DemandedElts.isAllOnes()) {
2009 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2010 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2011 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2012 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2013 if (DemandedOp0 || DemandedOp1) {
2014 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2015 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2016 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2017 DemandedOp1, Op2);
2018 return TLO.CombineTo(Op, NewOp);
2019 }
2020 }
2021 }
2022
2023 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2024 if (isPowerOf2_32(BitWidth)) {
2025 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2026 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2027 Known2, TLO, Depth + 1))
2028 return true;
2029 }
2030 break;
2031 }
2032 case ISD::ROTL:
2033 case ISD::ROTR: {
2034 SDValue Op0 = Op.getOperand(0);
2035 SDValue Op1 = Op.getOperand(1);
2036 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2037
2038 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2039 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2040 return TLO.CombineTo(Op, Op0);
2041
2042 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2043 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2044 unsigned RevAmt = BitWidth - Amt;
2045
2046 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2047 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2048 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2049 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2050 Depth + 1))
2051 return true;
2052
2053 // rot*(x, 0) --> x
2054 if (Amt == 0)
2055 return TLO.CombineTo(Op, Op0);
2056
2057 // See if we don't demand either half of the rotated bits.
2058 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2059 DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
2060 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2061 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2062 }
2063 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2064 DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
2065 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2066 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2067 }
2068 }
2069
2070 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2071 if (isPowerOf2_32(BitWidth)) {
2072 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2073 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2074 Depth + 1))
2075 return true;
2076 }
2077 break;
2078 }
2079 case ISD::UMIN: {
2080 // Check if one arg is always less than (or equal) to the other arg.
2081 SDValue Op0 = Op.getOperand(0);
2082 SDValue Op1 = Op.getOperand(1);
2083 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2084 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2085 Known = KnownBits::umin(Known0, Known1);
2086 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2087 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2088 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2089 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2090 break;
2091 }
2092 case ISD::UMAX: {
2093 // Check if one arg is always greater than (or equal) to the other arg.
2094 SDValue Op0 = Op.getOperand(0);
2095 SDValue Op1 = Op.getOperand(1);
2096 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2097 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2098 Known = KnownBits::umax(Known0, Known1);
2099 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2100 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2101 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2102 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2103 break;
2104 }
2105 case ISD::BITREVERSE: {
2106 SDValue Src = Op.getOperand(0);
2107 APInt DemandedSrcBits = DemandedBits.reverseBits();
2108 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2109 Depth + 1))
2110 return true;
2111 Known.One = Known2.One.reverseBits();
2112 Known.Zero = Known2.Zero.reverseBits();
2113 break;
2114 }
2115 case ISD::BSWAP: {
2116 SDValue Src = Op.getOperand(0);
2117
2118 // If the only bits demanded come from one byte of the bswap result,
2119 // just shift the input byte into position to eliminate the bswap.
2120 unsigned NLZ = DemandedBits.countLeadingZeros();
2121 unsigned NTZ = DemandedBits.countTrailingZeros();
2122
2123 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2124 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2125 // have 14 leading zeros, round to 8.
2126 NLZ = alignDown(NLZ, 8);
2127 NTZ = alignDown(NTZ, 8);
2128 // If we need exactly one byte, we can do this transformation.
2129 if (BitWidth - NLZ - NTZ == 8) {
2130 // Replace this with either a left or right shift to get the byte into
2131 // the right place.
2132 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2133 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2134 EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
2135 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2136 SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
2137 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2138 return TLO.CombineTo(Op, NewOp);
2139 }
2140 }
2141
2142 APInt DemandedSrcBits = DemandedBits.byteSwap();
2143 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2144 Depth + 1))
2145 return true;
2146 Known.One = Known2.One.byteSwap();
2147 Known.Zero = Known2.Zero.byteSwap();
2148 break;
2149 }
2150 case ISD::CTPOP: {
2151 // If only 1 bit is demanded, replace with PARITY as long as we're before
2152 // op legalization.
2153 // FIXME: Limit to scalars for now.
2154 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2155 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2156 Op.getOperand(0)));
2157
2158 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2159 break;
2160 }
2161 case ISD::SIGN_EXTEND_INREG: {
2162 SDValue Op0 = Op.getOperand(0);
2163 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2164 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2165
2166 // If we only care about the highest bit, don't bother shifting right.
2167 if (DemandedBits.isSignMask()) {
2168 unsigned MinSignedBits =
2169 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2170 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2171 // However if the input is already sign extended we expect the sign
2172 // extension to be dropped altogether later and do not simplify.
2173 if (!AlreadySignExtended) {
2174 // Compute the correct shift amount type, which must be getShiftAmountTy
2175 // for scalar types after legalization.
2176 SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
2177 getShiftAmountTy(VT, DL));
2178 return TLO.CombineTo(Op,
2179 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2180 }
2181 }
2182
2183 // If none of the extended bits are demanded, eliminate the sextinreg.
2184 if (DemandedBits.getActiveBits() <= ExVTBits)
2185 return TLO.CombineTo(Op, Op0);
2186
2187 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2188
2189 // Since the sign extended bits are demanded, we know that the sign
2190 // bit is demanded.
2191 InputDemandedBits.setBit(ExVTBits - 1);
2192
2193 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2194 Depth + 1))
2195 return true;
2196 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2197
2198 // If the sign bit of the input is known set or clear, then we know the
2199 // top bits of the result.
2200
2201 // If the input sign bit is known zero, convert this into a zero extension.
2202 if (Known.Zero[ExVTBits - 1])
2203 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2204
2205 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2206 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2207 Known.One.setBitsFrom(ExVTBits);
2208 Known.Zero &= Mask;
2209 } else { // Input sign bit unknown
2210 Known.Zero &= Mask;
2211 Known.One &= Mask;
2212 }
2213 break;
2214 }
2215 case ISD::BUILD_PAIR: {
2216 EVT HalfVT = Op.getOperand(0).getValueType();
2217 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2218
2219 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2220 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2221
2222 KnownBits KnownLo, KnownHi;
2223
2224 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2225 return true;
2226
2227 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2228 return true;
2229
2230 Known = KnownHi.concat(KnownLo);
2231 break;
2232 }
2233 case ISD::ZERO_EXTEND_VECTOR_INREG:
2234 if (VT.isScalableVector())
2235 return false;
2236 [[fallthrough]];
2237 case ISD::ZERO_EXTEND: {
2238 SDValue Src = Op.getOperand(0);
2239 EVT SrcVT = Src.getValueType();
2240 unsigned InBits = SrcVT.getScalarSizeInBits();
2241 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2242 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2243
2244 // If none of the top bits are demanded, convert this into an any_extend.
2245 if (DemandedBits.getActiveBits() <= InBits) {
2246 // If we only need the non-extended bits of the bottom element
2247 // then we can just bitcast to the result.
2248 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2249 VT.getSizeInBits() == SrcVT.getSizeInBits())
2250 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2251
2252 unsigned Opc =
2253 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2254 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2255 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2256 }
2257
2258 APInt InDemandedBits = DemandedBits.trunc(InBits);
2259 APInt InDemandedElts = DemandedElts.zext(InElts);
2260 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2261 Depth + 1))
2262 return true;
2263 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2264 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2265 Known = Known.zext(BitWidth);
2266
2267 // Attempt to avoid multi-use ops if we don't need anything from them.
2268 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2269 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2270 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2271 break;
2272 }
2273 case ISD::SIGN_EXTEND_VECTOR_INREG:
2274 if (VT.isScalableVector())
2275 return false;
2276 [[fallthrough]];
2277 case ISD::SIGN_EXTEND: {
2278 SDValue Src = Op.getOperand(0);
2279 EVT SrcVT = Src.getValueType();
2280 unsigned InBits = SrcVT.getScalarSizeInBits();
2281 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2282 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2283
2284 // If none of the top bits are demanded, convert this into an any_extend.
2285 if (DemandedBits.getActiveBits() <= InBits) {
2286 // If we only need the non-extended bits of the bottom element
2287 // then we can just bitcast to the result.
2288 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2289 VT.getSizeInBits() == SrcVT.getSizeInBits())
2290 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2291
2292 unsigned Opc =
2293 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2294 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2295 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2296 }
2297
2298 APInt InDemandedBits = DemandedBits.trunc(InBits);
2299 APInt InDemandedElts = DemandedElts.zext(InElts);
2300
2301 // Since some of the sign extended bits are demanded, we know that the sign
2302 // bit is demanded.
2303 InDemandedBits.setBit(InBits - 1);
2304
2305 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2306 Depth + 1))
2307 return true;
2308 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2309 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2310
2311 // If the sign bit is known one, the top bits match.
2312 Known = Known.sext(BitWidth);
2313
2314 // If the sign bit is known zero, convert this to a zero extend.
2315 if (Known.isNonNegative()) {
2316 unsigned Opc =
2317 IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2318 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2319 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2320 }
2321
2322 // Attempt to avoid multi-use ops if we don't need anything from them.
2323 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2324 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2325 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2326 break;
2327 }
2328 case ISD::ANY_EXTEND_VECTOR_INREG:
2329 if (VT.isScalableVector())
2330 return false;
2331 [[fallthrough]];
2332 case ISD::ANY_EXTEND: {
2333 SDValue Src = Op.getOperand(0);
2334 EVT SrcVT = Src.getValueType();
2335 unsigned InBits = SrcVT.getScalarSizeInBits();
2336 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2337 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2338
2339 // If we only need the bottom element then we can just bitcast.
2340 // TODO: Handle ANY_EXTEND?
2341 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2342 VT.getSizeInBits() == SrcVT.getSizeInBits())
2343 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2344
2345 APInt InDemandedBits = DemandedBits.trunc(InBits);
2346 APInt InDemandedElts = DemandedElts.zext(InElts);
2347 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2348 Depth + 1))
2349 return true;
2350 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2351 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2352 Known = Known.anyext(BitWidth);
2353
2354 // Attempt to avoid multi-use ops if we don't need anything from them.
2355 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2356 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2357 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2358 break;
2359 }
2360 case ISD::TRUNCATE: {
2361 SDValue Src = Op.getOperand(0);
2362
2363 // Simplify the input, using demanded bit information, and compute the known
2364 // zero/one bits live out.
2365 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2366 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2367 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2368 Depth + 1))
2369 return true;
2370 Known = Known.trunc(BitWidth);
2371
2372 // Attempt to avoid multi-use ops if we don't need anything from them.
2373 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2374 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2375 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2376
2377 // If the input is only used by this truncate, see if we can shrink it based
2378 // on the known demanded bits.
2379 switch (Src.getOpcode()) {
2380 default:
2381 break;
2382 case ISD::SRL:
2383 // Shrink SRL by a constant if none of the high bits shifted in are
2384 // demanded.
2385 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2386 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2387 // undesirable.
2388 break;
2389
2390 if (Src.getNode()->hasOneUse()) {
2391 const APInt *ShAmtC =
2392 TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2393 if (!ShAmtC || ShAmtC->uge(BitWidth))
2394 break;
2395 uint64_t ShVal = ShAmtC->getZExtValue();
2396
2397 APInt HighBits =
2398 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2399 HighBits.lshrInPlace(ShVal);
2400 HighBits = HighBits.trunc(BitWidth);
2401
2402 if (!(HighBits & DemandedBits)) {
2403 // None of the shifted in bits are needed. Add a truncate of the
2404 // shift input, then shift it.
2405 SDValue NewShAmt = TLO.DAG.getConstant(
2406 ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
2407 SDValue NewTrunc =
2408 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2409 return TLO.CombineTo(
2410 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2411 }
2412 }
2413 break;
2414 }
2415
2416 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2417 break;
2418 }
2419 case ISD::AssertZext: {
2420 // AssertZext demands all of the high bits, plus any of the low bits
2421 // demanded by its users.
2422 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2423 APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
2424 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2425 TLO, Depth + 1))
2426 return true;
2427 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2428
2429 Known.Zero |= ~InMask;
2430 Known.One &= (~Known.Zero);
2431 break;
2432 }
2433 case ISD::EXTRACT_VECTOR_ELT: {
2434 SDValue Src = Op.getOperand(0);
2435 SDValue Idx = Op.getOperand(1);
2436 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2437 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2438
2439 if (SrcEltCnt.isScalable())
2440 return false;
2441
2442 // Demand the bits from every vector element without a constant index.
2443 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2444 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2445 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2446 if (CIdx->getAPIntValue().ult(NumSrcElts))
2447 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2448
2449 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2450 // anything about the extended bits.
2451 APInt DemandedSrcBits = DemandedBits;
2452 if (BitWidth > EltBitWidth)
2453 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2454
2455 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2456 Depth + 1))
2457 return true;
2458
2459 // Attempt to avoid multi-use ops if we don't need anything from them.
2460 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2461 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2462 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2463 SDValue NewOp =
2464 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2465 return TLO.CombineTo(Op, NewOp);
2466 }
2467 }
2468
2469 Known = Known2;
2470 if (BitWidth > EltBitWidth)
2471 Known = Known.anyext(BitWidth);
2472 break;
2473 }
2474 case ISD::BITCAST: {
2475 if (VT.isScalableVector())
2476 return false;
2477 SDValue Src = Op.getOperand(0);
2478 EVT SrcVT = Src.getValueType();
2479 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2480
2481 // If this is an FP->Int bitcast and if the sign bit is the only
2482 // thing demanded, turn this into a FGETSIGN.
2483 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2484 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2485 SrcVT.isFloatingPoint()) {
2486 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2487 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2488 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2489 SrcVT != MVT::f128) {
2490 // Cannot eliminate/lower SHL for f128 yet.
2491 EVT Ty = OpVTLegal ? VT : MVT::i32;
2492 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2493 // place. We expect the SHL to be eliminated by other optimizations.
2494 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2495 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2496 if (!OpVTLegal && OpVTSizeInBits > 32)
2497 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2498 unsigned ShVal = Op.getValueSizeInBits() - 1;
2499 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2500 return TLO.CombineTo(Op,
2501 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2502 }
2503 }
2504
2505 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2506 // Demand the elt/bit if any of the original elts/bits are demanded.
2507 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2508 unsigned Scale = BitWidth / NumSrcEltBits;
2509 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2510 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2511 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2512 for (unsigned i = 0; i != Scale; ++i) {
2513 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2514 unsigned BitOffset = EltOffset * NumSrcEltBits;
2515 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2516 if (!Sub.isZero()) {
2517 DemandedSrcBits |= Sub;
2518 for (unsigned j = 0; j != NumElts; ++j)
2519 if (DemandedElts[j])
2520 DemandedSrcElts.setBit((j * Scale) + i);
2521 }
2522 }
2523
2524 APInt KnownSrcUndef, KnownSrcZero;
2525 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2526 KnownSrcZero, TLO, Depth + 1))
2527 return true;
2528
2529 KnownBits KnownSrcBits;
2530 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2531 KnownSrcBits, TLO, Depth + 1))
2532 return true;
2533 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2534 // TODO - bigendian once we have test coverage.
2535 unsigned Scale = NumSrcEltBits / BitWidth;
2536 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2537 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2538 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2539 for (unsigned i = 0; i != NumElts; ++i)
2540 if (DemandedElts[i]) {
2541 unsigned Offset = (i % Scale) * BitWidth;
2542 DemandedSrcBits.insertBits(DemandedBits, Offset);
2543 DemandedSrcElts.setBit(i / Scale);
2544 }
2545
2546 if (SrcVT.isVector()) {
2547 APInt KnownSrcUndef, KnownSrcZero;
2548 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2549 KnownSrcZero, TLO, Depth + 1))
2550 return true;
2551 }
2552
2553 KnownBits KnownSrcBits;
2554 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2555 KnownSrcBits, TLO, Depth + 1))
2556 return true;
2557 }
2558
2559 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2560 // recursive call where Known may be useful to the caller.
2561 if (Depth > 0) {
2562 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2563 return false;
2564 }
2565 break;
2566 }
2567 case ISD::MUL:
2568 if (DemandedBits.isPowerOf2()) {
2569 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2570 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2571 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2572 unsigned CTZ = DemandedBits.countTrailingZeros();
2573 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2574 if (C && C->getAPIntValue().countTrailingZeros() == CTZ) {
2575 EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2576 SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
2577 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2578 return TLO.CombineTo(Op, Shl);
2579 }
2580 }
2581 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2582 // X * X is odd iff X is odd.
2583 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2584 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2585 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2586 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2587 return TLO.CombineTo(Op, And1);
2588 }
2589 [[fallthrough]];
2590 case ISD::ADD:
2591 case ISD::SUB: {
2592 // Add, Sub, and Mul don't demand any bits in positions beyond that
2593 // of the highest bit demanded of them.
2594 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2595 SDNodeFlags Flags = Op.getNode()->getFlags();
2596 unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
2597 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2598 if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
2599 Depth + 1) ||
2600 SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
2601 Depth + 1) ||
2602 // See if the operation should be performed at a smaller bit width.
2603 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2604 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2605 // Disable the nsw and nuw flags. We can no longer guarantee that we
2606 // won't wrap after simplification.
2607 Flags.setNoSignedWrap(false);
2608 Flags.setNoUnsignedWrap(false);
2609 Op->setFlags(Flags);
2610 }
2611 return true;
2612 }
2613
2614 // neg x with only low bit demanded is simply x.
2615 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2616 isa<ConstantSDNode>(Op0) && cast<ConstantSDNode>(Op0)->isZero())
2617 return TLO.CombineTo(Op, Op1);
2618
2619 // Attempt to avoid multi-use ops if we don't need anything from them.
2620 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2621 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2622 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2623 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2624 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2625 if (DemandedOp0 || DemandedOp1) {
2626 Flags.setNoSignedWrap(false);
2627 Flags.setNoUnsignedWrap(false);
2628 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2629 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2630 SDValue NewOp =
2631 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2632 return TLO.CombineTo(Op, NewOp);
2633 }
2634 }
2635
2636 // If we have a constant operand, we may be able to turn it into -1 if we
2637 // do not demand the high bits. This can make the constant smaller to
2638 // encode, allow more general folding, or match specialized instruction
2639 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2640 // is probably not useful (and could be detrimental).
2641 ConstantSDNode *C = isConstOrConstSplat(Op1);
2642 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2643 if (C && !C->isAllOnes() && !C->isOne() &&
2644 (C->getAPIntValue() | HighMask).isAllOnes()) {
2645 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2646 // Disable the nsw and nuw flags. We can no longer guarantee that we
2647 // won't wrap after simplification.
2648 Flags.setNoSignedWrap(false);
2649 Flags.setNoUnsignedWrap(false);
2650 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2651 return TLO.CombineTo(Op, NewOp);
2652 }
2653
2654 // Match a multiply with a disguised negated-power-of-2 and convert to a
2655 // an equivalent shift-left amount.
2656 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2657 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2658 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2659 return 0;
2660
2661 // Don't touch opaque constants. Also, ignore zero and power-of-2
2662 // multiplies. Those will get folded later.
2663 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2664 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2665 !MulC->getAPIntValue().isPowerOf2()) {
2666 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2667 if (UnmaskedC.isNegatedPowerOf2())
2668 return (-UnmaskedC).logBase2();
2669 }
2670 return 0;
2671 };
2672
2673 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) {
2674 EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2675 SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
2676 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2677 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2678 return TLO.CombineTo(Op, Res);
2679 };
2680
2681 if (isOperationLegalOrCustom(ISD::SHL, VT)) {
2682 if (Op.getOpcode() == ISD::ADD) {
2683 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2684 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2685 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2686 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2687 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2688 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2689 }
2690 if (Op.getOpcode() == ISD::SUB) {
2691 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2692 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2693 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2694 }
2695 }
2696
2697 [[fallthrough]];
2698 }
2699 default:
2700 // We also ask the target about intrinsics (which could be specific to it).
2701 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2702 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2703 // TODO: Probably okay to remove after audit; here to reduce change size
2704 // in initial enablement patch for scalable vectors
2705 if (Op.getValueType().isScalableVector())
2706 break;
2707 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2708 Known, TLO, Depth))
2709 return true;
2710 break;
2711 }
2712
2713 // Just use computeKnownBits to compute output bits.
2714 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2715 break;
2716 }
2717
2718 // If we know the value of all of the demanded bits, return this as a
2719 // constant.
2720 if (!isTargetCanonicalConstantNode(Op) &&
2721 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2722 // Avoid folding to a constant if any OpaqueConstant is involved.
2723 const SDNode *N = Op.getNode();
2724 for (SDNode *Op :
2725 llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
2726 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2727 if (C->isOpaque())
2728 return false;
2729 }
2730 if (VT.isInteger())
2731 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2732 if (VT.isFloatingPoint())
2733 return TLO.CombineTo(
2734 Op,
2735 TLO.DAG.getConstantFP(
2736 APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2737 }
2738
2739 // A multi use 'all demanded elts' simplify failed to find any knownbits.
2740 // Try again just for the original demanded elts.
2741 // Ensure we do this AFTER constant folding above.
2742 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2743 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2744
2745 return false;
2746 }
2747
SimplifyDemandedVectorElts(SDValue Op,const APInt & DemandedElts,DAGCombinerInfo & DCI) const2748 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2749 const APInt &DemandedElts,
2750 DAGCombinerInfo &DCI) const {
2751 SelectionDAG &DAG = DCI.DAG;
2752 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2753 !DCI.isBeforeLegalizeOps());
2754
2755 APInt KnownUndef, KnownZero;
2756 bool Simplified =
2757 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2758 if (Simplified) {
2759 DCI.AddToWorklist(Op.getNode());
2760 DCI.CommitTargetLoweringOpt(TLO);
2761 }
2762
2763 return Simplified;
2764 }
2765
2766 /// Given a vector binary operation and known undefined elements for each input
2767 /// operand, compute whether each element of the output is undefined.
getKnownUndefForVectorBinop(SDValue BO,SelectionDAG & DAG,const APInt & UndefOp0,const APInt & UndefOp1)2768 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2769 const APInt &UndefOp0,
2770 const APInt &UndefOp1) {
2771 EVT VT = BO.getValueType();
2772 assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2773 "Vector binop only");
2774
2775 EVT EltVT = VT.getVectorElementType();
2776 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2777 assert(UndefOp0.getBitWidth() == NumElts &&
2778 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2779
2780 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2781 const APInt &UndefVals) {
2782 if (UndefVals[Index])
2783 return DAG.getUNDEF(EltVT);
2784
2785 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2786 // Try hard to make sure that the getNode() call is not creating temporary
2787 // nodes. Ignore opaque integers because they do not constant fold.
2788 SDValue Elt = BV->getOperand(Index);
2789 auto *C = dyn_cast<ConstantSDNode>(Elt);
2790 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2791 return Elt;
2792 }
2793
2794 return SDValue();
2795 };
2796
2797 APInt KnownUndef = APInt::getZero(NumElts);
2798 for (unsigned i = 0; i != NumElts; ++i) {
2799 // If both inputs for this element are either constant or undef and match
2800 // the element type, compute the constant/undef result for this element of
2801 // the vector.
2802 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2803 // not handle FP constants. The code within getNode() should be refactored
2804 // to avoid the danger of creating a bogus temporary node here.
2805 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2806 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2807 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2808 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2809 KnownUndef.setBit(i);
2810 }
2811 return KnownUndef;
2812 }
2813
SimplifyDemandedVectorElts(SDValue Op,const APInt & OriginalDemandedElts,APInt & KnownUndef,APInt & KnownZero,TargetLoweringOpt & TLO,unsigned Depth,bool AssumeSingleUse) const2814 bool TargetLowering::SimplifyDemandedVectorElts(
2815 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
2816 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
2817 bool AssumeSingleUse) const {
2818 EVT VT = Op.getValueType();
2819 unsigned Opcode = Op.getOpcode();
2820 APInt DemandedElts = OriginalDemandedElts;
2821 unsigned NumElts = DemandedElts.getBitWidth();
2822 assert(VT.isVector() && "Expected vector op");
2823
2824 KnownUndef = KnownZero = APInt::getZero(NumElts);
2825
2826 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
2827 if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
2828 return false;
2829
2830 // TODO: For now we assume we know nothing about scalable vectors.
2831 if (VT.isScalableVector())
2832 return false;
2833
2834 assert(VT.getVectorNumElements() == NumElts &&
2835 "Mask size mismatches value type element count!");
2836
2837 // Undef operand.
2838 if (Op.isUndef()) {
2839 KnownUndef.setAllBits();
2840 return false;
2841 }
2842
2843 // If Op has other users, assume that all elements are needed.
2844 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
2845 DemandedElts.setAllBits();
2846
2847 // Not demanding any elements from Op.
2848 if (DemandedElts == 0) {
2849 KnownUndef.setAllBits();
2850 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2851 }
2852
2853 // Limit search depth.
2854 if (Depth >= SelectionDAG::MaxRecursionDepth)
2855 return false;
2856
2857 SDLoc DL(Op);
2858 unsigned EltSizeInBits = VT.getScalarSizeInBits();
2859 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
2860
2861 // Helper for demanding the specified elements and all the bits of both binary
2862 // operands.
2863 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
2864 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
2865 TLO.DAG, Depth + 1);
2866 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
2867 TLO.DAG, Depth + 1);
2868 if (NewOp0 || NewOp1) {
2869 SDValue NewOp = TLO.DAG.getNode(
2870 Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1);
2871 return TLO.CombineTo(Op, NewOp);
2872 }
2873 return false;
2874 };
2875
2876 switch (Opcode) {
2877 case ISD::SCALAR_TO_VECTOR: {
2878 if (!DemandedElts[0]) {
2879 KnownUndef.setAllBits();
2880 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
2881 }
2882 SDValue ScalarSrc = Op.getOperand(0);
2883 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
2884 SDValue Src = ScalarSrc.getOperand(0);
2885 SDValue Idx = ScalarSrc.getOperand(1);
2886 EVT SrcVT = Src.getValueType();
2887
2888 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
2889
2890 if (SrcEltCnt.isScalable())
2891 return false;
2892
2893 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2894 if (isNullConstant(Idx)) {
2895 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
2896 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
2897 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
2898 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2899 TLO, Depth + 1))
2900 return true;
2901 }
2902 }
2903 KnownUndef.setHighBits(NumElts - 1);
2904 break;
2905 }
2906 case ISD::BITCAST: {
2907 SDValue Src = Op.getOperand(0);
2908 EVT SrcVT = Src.getValueType();
2909
2910 // We only handle vectors here.
2911 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2912 if (!SrcVT.isVector())
2913 break;
2914
2915 // Fast handling of 'identity' bitcasts.
2916 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2917 if (NumSrcElts == NumElts)
2918 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
2919 KnownZero, TLO, Depth + 1);
2920
2921 APInt SrcDemandedElts, SrcZero, SrcUndef;
2922
2923 // Bitcast from 'large element' src vector to 'small element' vector, we
2924 // must demand a source element if any DemandedElt maps to it.
2925 if ((NumElts % NumSrcElts) == 0) {
2926 unsigned Scale = NumElts / NumSrcElts;
2927 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2928 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2929 TLO, Depth + 1))
2930 return true;
2931
2932 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2933 // of the large element.
2934 // TODO - bigendian once we have test coverage.
2935 if (IsLE) {
2936 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
2937 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
2938 for (unsigned i = 0; i != NumElts; ++i)
2939 if (DemandedElts[i]) {
2940 unsigned Ofs = (i % Scale) * EltSizeInBits;
2941 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
2942 }
2943
2944 KnownBits Known;
2945 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
2946 TLO, Depth + 1))
2947 return true;
2948
2949 // The bitcast has split each wide element into a number of
2950 // narrow subelements. We have just computed the Known bits
2951 // for wide elements. See if element splitting results in
2952 // some subelements being zero. Only for demanded elements!
2953 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
2954 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
2955 .isAllOnes())
2956 continue;
2957 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
2958 unsigned Elt = Scale * SrcElt + SubElt;
2959 if (DemandedElts[Elt])
2960 KnownZero.setBit(Elt);
2961 }
2962 }
2963 }
2964
2965 // If the src element is zero/undef then all the output elements will be -
2966 // only demanded elements are guaranteed to be correct.
2967 for (unsigned i = 0; i != NumSrcElts; ++i) {
2968 if (SrcDemandedElts[i]) {
2969 if (SrcZero[i])
2970 KnownZero.setBits(i * Scale, (i + 1) * Scale);
2971 if (SrcUndef[i])
2972 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
2973 }
2974 }
2975 }
2976
2977 // Bitcast from 'small element' src vector to 'large element' vector, we
2978 // demand all smaller source elements covered by the larger demanded element
2979 // of this vector.
2980 if ((NumSrcElts % NumElts) == 0) {
2981 unsigned Scale = NumSrcElts / NumElts;
2982 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
2983 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
2984 TLO, Depth + 1))
2985 return true;
2986
2987 // If all the src elements covering an output element are zero/undef, then
2988 // the output element will be as well, assuming it was demanded.
2989 for (unsigned i = 0; i != NumElts; ++i) {
2990 if (DemandedElts[i]) {
2991 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
2992 KnownZero.setBit(i);
2993 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
2994 KnownUndef.setBit(i);
2995 }
2996 }
2997 }
2998 break;
2999 }
3000 case ISD::BUILD_VECTOR: {
3001 // Check all elements and simplify any unused elements with UNDEF.
3002 if (!DemandedElts.isAllOnes()) {
3003 // Don't simplify BROADCASTS.
3004 if (llvm::any_of(Op->op_values(),
3005 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3006 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3007 bool Updated = false;
3008 for (unsigned i = 0; i != NumElts; ++i) {
3009 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3010 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3011 KnownUndef.setBit(i);
3012 Updated = true;
3013 }
3014 }
3015 if (Updated)
3016 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3017 }
3018 }
3019 for (unsigned i = 0; i != NumElts; ++i) {
3020 SDValue SrcOp = Op.getOperand(i);
3021 if (SrcOp.isUndef()) {
3022 KnownUndef.setBit(i);
3023 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3024 (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
3025 KnownZero.setBit(i);
3026 }
3027 }
3028 break;
3029 }
3030 case ISD::CONCAT_VECTORS: {
3031 EVT SubVT = Op.getOperand(0).getValueType();
3032 unsigned NumSubVecs = Op.getNumOperands();
3033 unsigned NumSubElts = SubVT.getVectorNumElements();
3034 for (unsigned i = 0; i != NumSubVecs; ++i) {
3035 SDValue SubOp = Op.getOperand(i);
3036 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3037 APInt SubUndef, SubZero;
3038 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3039 Depth + 1))
3040 return true;
3041 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3042 KnownZero.insertBits(SubZero, i * NumSubElts);
3043 }
3044
3045 // Attempt to avoid multi-use ops if we don't need anything from them.
3046 if (!DemandedElts.isAllOnes()) {
3047 bool FoundNewSub = false;
3048 SmallVector<SDValue, 2> DemandedSubOps;
3049 for (unsigned i = 0; i != NumSubVecs; ++i) {
3050 SDValue SubOp = Op.getOperand(i);
3051 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3052 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3053 SubOp, SubElts, TLO.DAG, Depth + 1);
3054 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3055 FoundNewSub = NewSubOp ? true : FoundNewSub;
3056 }
3057 if (FoundNewSub) {
3058 SDValue NewOp =
3059 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3060 return TLO.CombineTo(Op, NewOp);
3061 }
3062 }
3063 break;
3064 }
3065 case ISD::INSERT_SUBVECTOR: {
3066 // Demand any elements from the subvector and the remainder from the src its
3067 // inserted into.
3068 SDValue Src = Op.getOperand(0);
3069 SDValue Sub = Op.getOperand(1);
3070 uint64_t Idx = Op.getConstantOperandVal(2);
3071 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3072 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3073 APInt DemandedSrcElts = DemandedElts;
3074 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3075
3076 APInt SubUndef, SubZero;
3077 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3078 Depth + 1))
3079 return true;
3080
3081 // If none of the src operand elements are demanded, replace it with undef.
3082 if (!DemandedSrcElts && !Src.isUndef())
3083 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3084 TLO.DAG.getUNDEF(VT), Sub,
3085 Op.getOperand(2)));
3086
3087 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3088 TLO, Depth + 1))
3089 return true;
3090 KnownUndef.insertBits(SubUndef, Idx);
3091 KnownZero.insertBits(SubZero, Idx);
3092
3093 // Attempt to avoid multi-use ops if we don't need anything from them.
3094 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3095 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3096 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3097 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3098 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3099 if (NewSrc || NewSub) {
3100 NewSrc = NewSrc ? NewSrc : Src;
3101 NewSub = NewSub ? NewSub : Sub;
3102 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3103 NewSub, Op.getOperand(2));
3104 return TLO.CombineTo(Op, NewOp);
3105 }
3106 }
3107 break;
3108 }
3109 case ISD::EXTRACT_SUBVECTOR: {
3110 // Offset the demanded elts by the subvector index.
3111 SDValue Src = Op.getOperand(0);
3112 if (Src.getValueType().isScalableVector())
3113 break;
3114 uint64_t Idx = Op.getConstantOperandVal(1);
3115 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3116 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3117
3118 APInt SrcUndef, SrcZero;
3119 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3120 Depth + 1))
3121 return true;
3122 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3123 KnownZero = SrcZero.extractBits(NumElts, Idx);
3124
3125 // Attempt to avoid multi-use ops if we don't need anything from them.
3126 if (!DemandedElts.isAllOnes()) {
3127 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3128 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3129 if (NewSrc) {
3130 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3131 Op.getOperand(1));
3132 return TLO.CombineTo(Op, NewOp);
3133 }
3134 }
3135 break;
3136 }
3137 case ISD::INSERT_VECTOR_ELT: {
3138 SDValue Vec = Op.getOperand(0);
3139 SDValue Scl = Op.getOperand(1);
3140 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3141
3142 // For a legal, constant insertion index, if we don't need this insertion
3143 // then strip it, else remove it from the demanded elts.
3144 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3145 unsigned Idx = CIdx->getZExtValue();
3146 if (!DemandedElts[Idx])
3147 return TLO.CombineTo(Op, Vec);
3148
3149 APInt DemandedVecElts(DemandedElts);
3150 DemandedVecElts.clearBit(Idx);
3151 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3152 KnownZero, TLO, Depth + 1))
3153 return true;
3154
3155 KnownUndef.setBitVal(Idx, Scl.isUndef());
3156
3157 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3158 break;
3159 }
3160
3161 APInt VecUndef, VecZero;
3162 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3163 Depth + 1))
3164 return true;
3165 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3166 break;
3167 }
3168 case ISD::VSELECT: {
3169 SDValue Sel = Op.getOperand(0);
3170 SDValue LHS = Op.getOperand(1);
3171 SDValue RHS = Op.getOperand(2);
3172
3173 // Try to transform the select condition based on the current demanded
3174 // elements.
3175 APInt UndefSel, UndefZero;
3176 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, UndefZero, TLO,
3177 Depth + 1))
3178 return true;
3179
3180 // See if we can simplify either vselect operand.
3181 APInt DemandedLHS(DemandedElts);
3182 APInt DemandedRHS(DemandedElts);
3183 APInt UndefLHS, ZeroLHS;
3184 APInt UndefRHS, ZeroRHS;
3185 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3186 Depth + 1))
3187 return true;
3188 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3189 Depth + 1))
3190 return true;
3191
3192 KnownUndef = UndefLHS & UndefRHS;
3193 KnownZero = ZeroLHS & ZeroRHS;
3194
3195 // If we know that the selected element is always zero, we don't need the
3196 // select value element.
3197 APInt DemandedSel = DemandedElts & ~KnownZero;
3198 if (DemandedSel != DemandedElts)
3199 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, UndefZero, TLO,
3200 Depth + 1))
3201 return true;
3202
3203 break;
3204 }
3205 case ISD::VECTOR_SHUFFLE: {
3206 SDValue LHS = Op.getOperand(0);
3207 SDValue RHS = Op.getOperand(1);
3208 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3209
3210 // Collect demanded elements from shuffle operands..
3211 APInt DemandedLHS(NumElts, 0);
3212 APInt DemandedRHS(NumElts, 0);
3213 for (unsigned i = 0; i != NumElts; ++i) {
3214 int M = ShuffleMask[i];
3215 if (M < 0 || !DemandedElts[i])
3216 continue;
3217 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3218 if (M < (int)NumElts)
3219 DemandedLHS.setBit(M);
3220 else
3221 DemandedRHS.setBit(M - NumElts);
3222 }
3223
3224 // See if we can simplify either shuffle operand.
3225 APInt UndefLHS, ZeroLHS;
3226 APInt UndefRHS, ZeroRHS;
3227 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3228 Depth + 1))
3229 return true;
3230 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3231 Depth + 1))
3232 return true;
3233
3234 // Simplify mask using undef elements from LHS/RHS.
3235 bool Updated = false;
3236 bool IdentityLHS = true, IdentityRHS = true;
3237 SmallVector<int, 32> NewMask(ShuffleMask);
3238 for (unsigned i = 0; i != NumElts; ++i) {
3239 int &M = NewMask[i];
3240 if (M < 0)
3241 continue;
3242 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3243 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3244 Updated = true;
3245 M = -1;
3246 }
3247 IdentityLHS &= (M < 0) || (M == (int)i);
3248 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3249 }
3250
3251 // Update legal shuffle masks based on demanded elements if it won't reduce
3252 // to Identity which can cause premature removal of the shuffle mask.
3253 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3254 SDValue LegalShuffle =
3255 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3256 if (LegalShuffle)
3257 return TLO.CombineTo(Op, LegalShuffle);
3258 }
3259
3260 // Propagate undef/zero elements from LHS/RHS.
3261 for (unsigned i = 0; i != NumElts; ++i) {
3262 int M = ShuffleMask[i];
3263 if (M < 0) {
3264 KnownUndef.setBit(i);
3265 } else if (M < (int)NumElts) {
3266 if (UndefLHS[M])
3267 KnownUndef.setBit(i);
3268 if (ZeroLHS[M])
3269 KnownZero.setBit(i);
3270 } else {
3271 if (UndefRHS[M - NumElts])
3272 KnownUndef.setBit(i);
3273 if (ZeroRHS[M - NumElts])
3274 KnownZero.setBit(i);
3275 }
3276 }
3277 break;
3278 }
3279 case ISD::ANY_EXTEND_VECTOR_INREG:
3280 case ISD::SIGN_EXTEND_VECTOR_INREG:
3281 case ISD::ZERO_EXTEND_VECTOR_INREG: {
3282 APInt SrcUndef, SrcZero;
3283 SDValue Src = Op.getOperand(0);
3284 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3285 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3286 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3287 Depth + 1))
3288 return true;
3289 KnownZero = SrcZero.zextOrTrunc(NumElts);
3290 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3291
3292 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3293 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3294 DemandedSrcElts == 1) {
3295 // aext - if we just need the bottom element then we can bitcast.
3296 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3297 }
3298
3299 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3300 // zext(undef) upper bits are guaranteed to be zero.
3301 if (DemandedElts.isSubsetOf(KnownUndef))
3302 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3303 KnownUndef.clearAllBits();
3304
3305 // zext - if we just need the bottom element then we can mask:
3306 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3307 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3308 Op->isOnlyUserOf(Src.getNode()) &&
3309 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3310 SDLoc DL(Op);
3311 EVT SrcVT = Src.getValueType();
3312 EVT SrcSVT = SrcVT.getScalarType();
3313 SmallVector<SDValue> MaskElts;
3314 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3315 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3316 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3317 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3318 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3319 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3320 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3321 }
3322 }
3323 }
3324 break;
3325 }
3326
3327 // TODO: There are more binop opcodes that could be handled here - MIN,
3328 // MAX, saturated math, etc.
3329 case ISD::ADD: {
3330 SDValue Op0 = Op.getOperand(0);
3331 SDValue Op1 = Op.getOperand(1);
3332 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3333 APInt UndefLHS, ZeroLHS;
3334 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3335 Depth + 1, /*AssumeSingleUse*/ true))
3336 return true;
3337 }
3338 [[fallthrough]];
3339 }
3340 case ISD::OR:
3341 case ISD::XOR:
3342 case ISD::SUB:
3343 case ISD::FADD:
3344 case ISD::FSUB:
3345 case ISD::FMUL:
3346 case ISD::FDIV:
3347 case ISD::FREM: {
3348 SDValue Op0 = Op.getOperand(0);
3349 SDValue Op1 = Op.getOperand(1);
3350
3351 APInt UndefRHS, ZeroRHS;
3352 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3353 Depth + 1))
3354 return true;
3355 APInt UndefLHS, ZeroLHS;
3356 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3357 Depth + 1))
3358 return true;
3359
3360 KnownZero = ZeroLHS & ZeroRHS;
3361 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3362
3363 // Attempt to avoid multi-use ops if we don't need anything from them.
3364 // TODO - use KnownUndef to relax the demandedelts?
3365 if (!DemandedElts.isAllOnes())
3366 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3367 return true;
3368 break;
3369 }
3370 case ISD::SHL:
3371 case ISD::SRL:
3372 case ISD::SRA:
3373 case ISD::ROTL:
3374 case ISD::ROTR: {
3375 SDValue Op0 = Op.getOperand(0);
3376 SDValue Op1 = Op.getOperand(1);
3377
3378 APInt UndefRHS, ZeroRHS;
3379 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3380 Depth + 1))
3381 return true;
3382 APInt UndefLHS, ZeroLHS;
3383 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3384 Depth + 1))
3385 return true;
3386
3387 KnownZero = ZeroLHS;
3388 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3389
3390 // Attempt to avoid multi-use ops if we don't need anything from them.
3391 // TODO - use KnownUndef to relax the demandedelts?
3392 if (!DemandedElts.isAllOnes())
3393 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3394 return true;
3395 break;
3396 }
3397 case ISD::MUL:
3398 case ISD::MULHU:
3399 case ISD::MULHS:
3400 case ISD::AND: {
3401 SDValue Op0 = Op.getOperand(0);
3402 SDValue Op1 = Op.getOperand(1);
3403
3404 APInt SrcUndef, SrcZero;
3405 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3406 Depth + 1))
3407 return true;
3408 // If we know that a demanded element was zero in Op1 we don't need to
3409 // demand it in Op0 - its guaranteed to be zero.
3410 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3411 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3412 TLO, Depth + 1))
3413 return true;
3414
3415 KnownUndef &= DemandedElts0;
3416 KnownZero &= DemandedElts0;
3417
3418 // If every element pair has a zero/undef then just fold to zero.
3419 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3420 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3421 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3422 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3423
3424 // If either side has a zero element, then the result element is zero, even
3425 // if the other is an UNDEF.
3426 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3427 // and then handle 'and' nodes with the rest of the binop opcodes.
3428 KnownZero |= SrcZero;
3429 KnownUndef &= SrcUndef;
3430 KnownUndef &= ~KnownZero;
3431
3432 // Attempt to avoid multi-use ops if we don't need anything from them.
3433 if (!DemandedElts.isAllOnes())
3434 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3435 return true;
3436 break;
3437 }
3438 case ISD::TRUNCATE:
3439 case ISD::SIGN_EXTEND:
3440 case ISD::ZERO_EXTEND:
3441 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3442 KnownZero, TLO, Depth + 1))
3443 return true;
3444
3445 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3446 // zext(undef) upper bits are guaranteed to be zero.
3447 if (DemandedElts.isSubsetOf(KnownUndef))
3448 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3449 KnownUndef.clearAllBits();
3450 }
3451 break;
3452 default: {
3453 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3454 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3455 KnownZero, TLO, Depth))
3456 return true;
3457 } else {
3458 KnownBits Known;
3459 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3460 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3461 TLO, Depth, AssumeSingleUse))
3462 return true;
3463 }
3464 break;
3465 }
3466 }
3467 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3468
3469 // Constant fold all undef cases.
3470 // TODO: Handle zero cases as well.
3471 if (DemandedElts.isSubsetOf(KnownUndef))
3472 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3473
3474 return false;
3475 }
3476
3477 /// Determine which of the bits specified in Mask are known to be either zero or
3478 /// one and return them in the Known.
computeKnownBitsForTargetNode(const SDValue Op,KnownBits & Known,const APInt & DemandedElts,const SelectionDAG & DAG,unsigned Depth) const3479 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3480 KnownBits &Known,
3481 const APInt &DemandedElts,
3482 const SelectionDAG &DAG,
3483 unsigned Depth) const {
3484 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3485 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3486 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3487 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3488 "Should use MaskedValueIsZero if you don't know whether Op"
3489 " is a target node!");
3490 Known.resetAll();
3491 }
3492
computeKnownBitsForTargetInstr(GISelKnownBits & Analysis,Register R,KnownBits & Known,const APInt & DemandedElts,const MachineRegisterInfo & MRI,unsigned Depth) const3493 void TargetLowering::computeKnownBitsForTargetInstr(
3494 GISelKnownBits &Analysis, Register R, KnownBits &Known,
3495 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3496 unsigned Depth) const {
3497 Known.resetAll();
3498 }
3499
computeKnownBitsForFrameIndex(const int FrameIdx,KnownBits & Known,const MachineFunction & MF) const3500 void TargetLowering::computeKnownBitsForFrameIndex(
3501 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3502 // The low bits are known zero if the pointer is aligned.
3503 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3504 }
3505
computeKnownAlignForTargetInstr(GISelKnownBits & Analysis,Register R,const MachineRegisterInfo & MRI,unsigned Depth) const3506 Align TargetLowering::computeKnownAlignForTargetInstr(
3507 GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3508 unsigned Depth) const {
3509 return Align(1);
3510 }
3511
3512 /// This method can be implemented by targets that want to expose additional
3513 /// information about sign bits to the DAG Combiner.
ComputeNumSignBitsForTargetNode(SDValue Op,const APInt &,const SelectionDAG &,unsigned Depth) const3514 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3515 const APInt &,
3516 const SelectionDAG &,
3517 unsigned Depth) const {
3518 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3519 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3520 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3521 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3522 "Should use ComputeNumSignBits if you don't know whether Op"
3523 " is a target node!");
3524 return 1;
3525 }
3526
computeNumSignBitsForTargetInstr(GISelKnownBits & Analysis,Register R,const APInt & DemandedElts,const MachineRegisterInfo & MRI,unsigned Depth) const3527 unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3528 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3529 const MachineRegisterInfo &MRI, unsigned Depth) const {
3530 return 1;
3531 }
3532
SimplifyDemandedVectorEltsForTargetNode(SDValue Op,const APInt & DemandedElts,APInt & KnownUndef,APInt & KnownZero,TargetLoweringOpt & TLO,unsigned Depth) const3533 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3534 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3535 TargetLoweringOpt &TLO, unsigned Depth) const {
3536 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3537 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3538 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3539 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3540 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3541 " is a target node!");
3542 return false;
3543 }
3544
SimplifyDemandedBitsForTargetNode(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,KnownBits & Known,TargetLoweringOpt & TLO,unsigned Depth) const3545 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3546 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3547 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3548 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3549 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3550 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3551 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3552 "Should use SimplifyDemandedBits if you don't know whether Op"
3553 " is a target node!");
3554 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3555 return false;
3556 }
3557
SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,SelectionDAG & DAG,unsigned Depth) const3558 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3559 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3560 SelectionDAG &DAG, unsigned Depth) const {
3561 assert(
3562 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3563 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3564 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3565 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3566 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3567 " is a target node!");
3568 return SDValue();
3569 }
3570
3571 SDValue
buildLegalVectorShuffle(EVT VT,const SDLoc & DL,SDValue N0,SDValue N1,MutableArrayRef<int> Mask,SelectionDAG & DAG) const3572 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3573 SDValue N1, MutableArrayRef<int> Mask,
3574 SelectionDAG &DAG) const {
3575 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3576 if (!LegalMask) {
3577 std::swap(N0, N1);
3578 ShuffleVectorSDNode::commuteMask(Mask);
3579 LegalMask = isShuffleMaskLegal(Mask, VT);
3580 }
3581
3582 if (!LegalMask)
3583 return SDValue();
3584
3585 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3586 }
3587
getTargetConstantFromLoad(LoadSDNode *) const3588 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3589 return nullptr;
3590 }
3591
isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op,const APInt & DemandedElts,const SelectionDAG & DAG,bool PoisonOnly,unsigned Depth) const3592 bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3593 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3594 bool PoisonOnly, unsigned Depth) const {
3595 assert(
3596 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3597 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3598 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3599 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3600 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3601 " is a target node!");
3602 return false;
3603 }
3604
canCreateUndefOrPoisonForTargetNode(SDValue Op,const APInt & DemandedElts,const SelectionDAG & DAG,bool PoisonOnly,bool ConsiderFlags,unsigned Depth) const3605 bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
3606 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3607 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3608 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3609 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3610 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3611 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3612 "Should use canCreateUndefOrPoison if you don't know whether Op"
3613 " is a target node!");
3614 // Be conservative and return true.
3615 return true;
3616 }
3617
isKnownNeverNaNForTargetNode(SDValue Op,const SelectionDAG & DAG,bool SNaN,unsigned Depth) const3618 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3619 const SelectionDAG &DAG,
3620 bool SNaN,
3621 unsigned Depth) const {
3622 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3623 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3624 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3625 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3626 "Should use isKnownNeverNaN if you don't know whether Op"
3627 " is a target node!");
3628 return false;
3629 }
3630
isSplatValueForTargetNode(SDValue Op,const APInt & DemandedElts,APInt & UndefElts,const SelectionDAG & DAG,unsigned Depth) const3631 bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3632 const APInt &DemandedElts,
3633 APInt &UndefElts,
3634 const SelectionDAG &DAG,
3635 unsigned Depth) const {
3636 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3637 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3638 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3639 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3640 "Should use isSplatValue if you don't know whether Op"
3641 " is a target node!");
3642 return false;
3643 }
3644
3645 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3646 // work with truncating build vectors and vectors with elements of less than
3647 // 8 bits.
isConstTrueVal(SDValue N) const3648 bool TargetLowering::isConstTrueVal(SDValue N) const {
3649 if (!N)
3650 return false;
3651
3652 unsigned EltWidth;
3653 APInt CVal;
3654 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3655 /*AllowTruncation=*/true)) {
3656 CVal = CN->getAPIntValue();
3657 EltWidth = N.getValueType().getScalarSizeInBits();
3658 } else
3659 return false;
3660
3661 // If this is a truncating splat, truncate the splat value.
3662 // Otherwise, we may fail to match the expected values below.
3663 if (EltWidth < CVal.getBitWidth())
3664 CVal = CVal.trunc(EltWidth);
3665
3666 switch (getBooleanContents(N.getValueType())) {
3667 case UndefinedBooleanContent:
3668 return CVal[0];
3669 case ZeroOrOneBooleanContent:
3670 return CVal.isOne();
3671 case ZeroOrNegativeOneBooleanContent:
3672 return CVal.isAllOnes();
3673 }
3674
3675 llvm_unreachable("Invalid boolean contents");
3676 }
3677
isConstFalseVal(SDValue N) const3678 bool TargetLowering::isConstFalseVal(SDValue N) const {
3679 if (!N)
3680 return false;
3681
3682 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3683 if (!CN) {
3684 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3685 if (!BV)
3686 return false;
3687
3688 // Only interested in constant splats, we don't care about undef
3689 // elements in identifying boolean constants and getConstantSplatNode
3690 // returns NULL if all ops are undef;
3691 CN = BV->getConstantSplatNode();
3692 if (!CN)
3693 return false;
3694 }
3695
3696 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3697 return !CN->getAPIntValue()[0];
3698
3699 return CN->isZero();
3700 }
3701
isExtendedTrueVal(const ConstantSDNode * N,EVT VT,bool SExt) const3702 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3703 bool SExt) const {
3704 if (VT == MVT::i1)
3705 return N->isOne();
3706
3707 TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
3708 switch (Cnt) {
3709 case TargetLowering::ZeroOrOneBooleanContent:
3710 // An extended value of 1 is always true, unless its original type is i1,
3711 // in which case it will be sign extended to -1.
3712 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3713 case TargetLowering::UndefinedBooleanContent:
3714 case TargetLowering::ZeroOrNegativeOneBooleanContent:
3715 return N->isAllOnes() && SExt;
3716 }
3717 llvm_unreachable("Unexpected enumeration.");
3718 }
3719
3720 /// This helper function of SimplifySetCC tries to optimize the comparison when
3721 /// either operand of the SetCC node is a bitwise-and instruction.
foldSetCCWithAnd(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,const SDLoc & DL,DAGCombinerInfo & DCI) const3722 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3723 ISD::CondCode Cond, const SDLoc &DL,
3724 DAGCombinerInfo &DCI) const {
3725 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3726 std::swap(N0, N1);
3727
3728 SelectionDAG &DAG = DCI.DAG;
3729 EVT OpVT = N0.getValueType();
3730 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3731 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3732 return SDValue();
3733
3734 // (X & Y) != 0 --> zextOrTrunc(X & Y)
3735 // iff everything but LSB is known zero:
3736 if (Cond == ISD::SETNE && isNullConstant(N1) &&
3737 (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
3738 getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
3739 unsigned NumEltBits = OpVT.getScalarSizeInBits();
3740 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3741 if (DAG.MaskedValueIsZero(N0, UpperBits))
3742 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3743 }
3744
3745 // Try to eliminate a power-of-2 mask constant by converting to a signbit
3746 // test in a narrow type that we can truncate to with no cost. Examples:
3747 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3748 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3749 // TODO: This conservatively checks for type legality on the source and
3750 // destination types. That may inhibit optimizations, but it also
3751 // allows setcc->shift transforms that may be more beneficial.
3752 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3753 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
3754 isTypeLegal(OpVT) && N0.hasOneUse()) {
3755 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
3756 AndC->getAPIntValue().getActiveBits());
3757 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
3758 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
3759 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
3760 return DAG.getSetCC(DL, VT, Trunc, Zero,
3761 Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
3762 }
3763 }
3764
3765 // Match these patterns in any of their permutations:
3766 // (X & Y) == Y
3767 // (X & Y) != Y
3768 SDValue X, Y;
3769 if (N0.getOperand(0) == N1) {
3770 X = N0.getOperand(1);
3771 Y = N0.getOperand(0);
3772 } else if (N0.getOperand(1) == N1) {
3773 X = N0.getOperand(0);
3774 Y = N0.getOperand(1);
3775 } else {
3776 return SDValue();
3777 }
3778
3779 SDValue Zero = DAG.getConstant(0, DL, OpVT);
3780 if (DAG.isKnownToBeAPowerOfTwo(Y)) {
3781 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3782 // Note that where Y is variable and is known to have at most one bit set
3783 // (for example, if it is Z & 1) we cannot do this; the expressions are not
3784 // equivalent when Y == 0.
3785 assert(OpVT.isInteger());
3786 Cond = ISD::getSetCCInverse(Cond, OpVT);
3787 if (DCI.isBeforeLegalizeOps() ||
3788 isCondCodeLegal(Cond, N0.getSimpleValueType()))
3789 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3790 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3791 // If the target supports an 'and-not' or 'and-complement' logic operation,
3792 // try to use that to make a comparison operation more efficient.
3793 // But don't do this transform if the mask is a single bit because there are
3794 // more efficient ways to deal with that case (for example, 'bt' on x86 or
3795 // 'rlwinm' on PPC).
3796
3797 // Bail out if the compare operand that we want to turn into a zero is
3798 // already a zero (otherwise, infinite loop).
3799 auto *YConst = dyn_cast<ConstantSDNode>(Y);
3800 if (YConst && YConst->isZero())
3801 return SDValue();
3802
3803 // Transform this into: ~X & Y == 0.
3804 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3805 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3806 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
3807 }
3808
3809 return SDValue();
3810 }
3811
3812 /// There are multiple IR patterns that could be checking whether certain
3813 /// truncation of a signed number would be lossy or not. The pattern which is
3814 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
3815 /// We are looking for the following pattern: (KeptBits is a constant)
3816 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3817 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3818 /// KeptBits also can't be 1, that would have been folded to %x dstcond 0
3819 /// We will unfold it into the natural trunc+sext pattern:
3820 /// ((%x << C) a>> C) dstcond %x
3821 /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
optimizeSetCCOfSignedTruncationCheck(EVT SCCVT,SDValue N0,SDValue N1,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const3822 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3823 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
3824 const SDLoc &DL) const {
3825 // We must be comparing with a constant.
3826 ConstantSDNode *C1;
3827 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
3828 return SDValue();
3829
3830 // N0 should be: add %x, (1 << (KeptBits-1))
3831 if (N0->getOpcode() != ISD::ADD)
3832 return SDValue();
3833
3834 // And we must be 'add'ing a constant.
3835 ConstantSDNode *C01;
3836 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
3837 return SDValue();
3838
3839 SDValue X = N0->getOperand(0);
3840 EVT XVT = X.getValueType();
3841
3842 // Validate constants ...
3843
3844 APInt I1 = C1->getAPIntValue();
3845
3846 ISD::CondCode NewCond;
3847 if (Cond == ISD::CondCode::SETULT) {
3848 NewCond = ISD::CondCode::SETEQ;
3849 } else if (Cond == ISD::CondCode::SETULE) {
3850 NewCond = ISD::CondCode::SETEQ;
3851 // But need to 'canonicalize' the constant.
3852 I1 += 1;
3853 } else if (Cond == ISD::CondCode::SETUGT) {
3854 NewCond = ISD::CondCode::SETNE;
3855 // But need to 'canonicalize' the constant.
3856 I1 += 1;
3857 } else if (Cond == ISD::CondCode::SETUGE) {
3858 NewCond = ISD::CondCode::SETNE;
3859 } else
3860 return SDValue();
3861
3862 APInt I01 = C01->getAPIntValue();
3863
3864 auto checkConstants = [&I1, &I01]() -> bool {
3865 // Both of them must be power-of-two, and the constant from setcc is bigger.
3866 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
3867 };
3868
3869 if (checkConstants()) {
3870 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
3871 } else {
3872 // What if we invert constants? (and the target predicate)
3873 I1.negate();
3874 I01.negate();
3875 assert(XVT.isInteger());
3876 NewCond = getSetCCInverse(NewCond, XVT);
3877 if (!checkConstants())
3878 return SDValue();
3879 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
3880 }
3881
3882 // They are power-of-two, so which bit is set?
3883 const unsigned KeptBits = I1.logBase2();
3884 const unsigned KeptBitsMinusOne = I01.logBase2();
3885
3886 // Magic!
3887 if (KeptBits != (KeptBitsMinusOne + 1))
3888 return SDValue();
3889 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
3890
3891 // We don't want to do this in every single case.
3892 SelectionDAG &DAG = DCI.DAG;
3893 if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
3894 XVT, KeptBits))
3895 return SDValue();
3896
3897 const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
3898 assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
3899
3900 // Unfold into: ((%x << C) a>> C) cond %x
3901 // Where 'cond' will be either 'eq' or 'ne'.
3902 SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
3903 SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
3904 SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
3905 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
3906
3907 return T2;
3908 }
3909
3910 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
optimizeSetCCByHoistingAndByConstFromLogicalShift(EVT SCCVT,SDValue N0,SDValue N1C,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const3911 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3912 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
3913 DAGCombinerInfo &DCI, const SDLoc &DL) const {
3914 assert(isConstOrConstSplat(N1C) &&
3915 isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
3916 "Should be a comparison with 0.");
3917 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
3918 "Valid only for [in]equality comparisons.");
3919
3920 unsigned NewShiftOpcode;
3921 SDValue X, C, Y;
3922
3923 SelectionDAG &DAG = DCI.DAG;
3924 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3925
3926 // Look for '(C l>>/<< Y)'.
3927 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
3928 // The shift should be one-use.
3929 if (!V.hasOneUse())
3930 return false;
3931 unsigned OldShiftOpcode = V.getOpcode();
3932 switch (OldShiftOpcode) {
3933 case ISD::SHL:
3934 NewShiftOpcode = ISD::SRL;
3935 break;
3936 case ISD::SRL:
3937 NewShiftOpcode = ISD::SHL;
3938 break;
3939 default:
3940 return false; // must be a logical shift.
3941 }
3942 // We should be shifting a constant.
3943 // FIXME: best to use isConstantOrConstantVector().
3944 C = V.getOperand(0);
3945 ConstantSDNode *CC =
3946 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3947 if (!CC)
3948 return false;
3949 Y = V.getOperand(1);
3950
3951 ConstantSDNode *XC =
3952 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3953 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3954 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
3955 };
3956
3957 // LHS of comparison should be an one-use 'and'.
3958 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
3959 return SDValue();
3960
3961 X = N0.getOperand(0);
3962 SDValue Mask = N0.getOperand(1);
3963
3964 // 'and' is commutative!
3965 if (!Match(Mask)) {
3966 std::swap(X, Mask);
3967 if (!Match(Mask))
3968 return SDValue();
3969 }
3970
3971 EVT VT = X.getValueType();
3972
3973 // Produce:
3974 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3975 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
3976 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
3977 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
3978 return T2;
3979 }
3980
3981 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3982 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3983 /// handle the commuted versions of these patterns.
foldSetCCWithBinOp(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,const SDLoc & DL,DAGCombinerInfo & DCI) const3984 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
3985 ISD::CondCode Cond, const SDLoc &DL,
3986 DAGCombinerInfo &DCI) const {
3987 unsigned BOpcode = N0.getOpcode();
3988 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
3989 "Unexpected binop");
3990 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
3991
3992 // (X + Y) == X --> Y == 0
3993 // (X - Y) == X --> Y == 0
3994 // (X ^ Y) == X --> Y == 0
3995 SelectionDAG &DAG = DCI.DAG;
3996 EVT OpVT = N0.getValueType();
3997 SDValue X = N0.getOperand(0);
3998 SDValue Y = N0.getOperand(1);
3999 if (X == N1)
4000 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4001
4002 if (Y != N1)
4003 return SDValue();
4004
4005 // (X + Y) == Y --> X == 0
4006 // (X ^ Y) == Y --> X == 0
4007 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4008 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4009
4010 // The shift would not be valid if the operands are boolean (i1).
4011 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4012 return SDValue();
4013
4014 // (X - Y) == Y --> X == Y << 1
4015 EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
4016 !DCI.isBeforeLegalize());
4017 SDValue One = DAG.getConstant(1, DL, ShiftVT);
4018 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4019 if (!DCI.isCalledByLegalizer())
4020 DCI.AddToWorklist(YShl1.getNode());
4021 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4022 }
4023
simplifySetCCWithCTPOP(const TargetLowering & TLI,EVT VT,SDValue N0,const APInt & C1,ISD::CondCode Cond,const SDLoc & dl,SelectionDAG & DAG)4024 static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4025 SDValue N0, const APInt &C1,
4026 ISD::CondCode Cond, const SDLoc &dl,
4027 SelectionDAG &DAG) {
4028 // Look through truncs that don't change the value of a ctpop.
4029 // FIXME: Add vector support? Need to be careful with setcc result type below.
4030 SDValue CTPOP = N0;
4031 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4032 N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
4033 CTPOP = N0.getOperand(0);
4034
4035 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4036 return SDValue();
4037
4038 EVT CTVT = CTPOP.getValueType();
4039 SDValue CTOp = CTPOP.getOperand(0);
4040
4041 // Expand a power-of-2-or-zero comparison based on ctpop:
4042 // (ctpop x) u< 2 -> (x & x-1) == 0
4043 // (ctpop x) u> 1 -> (x & x-1) != 0
4044 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4045 // Keep the CTPOP if it is a legal vector op.
4046 if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT))
4047 return SDValue();
4048
4049 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4050 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4051 return SDValue();
4052 if (C1 == 0 && (Cond == ISD::SETULT))
4053 return SDValue(); // This is handled elsewhere.
4054
4055 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4056
4057 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4058 SDValue Result = CTOp;
4059 for (unsigned i = 0; i < Passes; i++) {
4060 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4061 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4062 }
4063 ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4064 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4065 }
4066
4067 // Expand a power-of-2 comparison based on ctpop:
4068 // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
4069 // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
4070 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4071 // Keep the CTPOP if it is legal.
4072 if (TLI.isOperationLegal(ISD::CTPOP, CTVT))
4073 return SDValue();
4074
4075 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4076 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4077 assert(CTVT.isInteger());
4078 ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
4079 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4080 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4081 SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
4082 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4083 unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
4084 return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
4085 }
4086
4087 return SDValue();
4088 }
4089
foldSetCCWithRotate(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,const SDLoc & dl,SelectionDAG & DAG)4090 static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4091 ISD::CondCode Cond, const SDLoc &dl,
4092 SelectionDAG &DAG) {
4093 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4094 return SDValue();
4095
4096 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4097 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4098 return SDValue();
4099
4100 auto getRotateSource = [](SDValue X) {
4101 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4102 return X.getOperand(0);
4103 return SDValue();
4104 };
4105
4106 // Peek through a rotated value compared against 0 or -1:
4107 // (rot X, Y) == 0/-1 --> X == 0/-1
4108 // (rot X, Y) != 0/-1 --> X != 0/-1
4109 if (SDValue R = getRotateSource(N0))
4110 return DAG.getSetCC(dl, VT, R, N1, Cond);
4111
4112 // Peek through an 'or' of a rotated value compared against 0:
4113 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4114 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4115 //
4116 // TODO: Add the 'and' with -1 sibling.
4117 // TODO: Recurse through a series of 'or' ops to find the rotate.
4118 EVT OpVT = N0.getValueType();
4119 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4120 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4121 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4122 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4123 }
4124 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4125 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4126 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4127 }
4128 }
4129
4130 return SDValue();
4131 }
4132
foldSetCCWithFunnelShift(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,const SDLoc & dl,SelectionDAG & DAG)4133 static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4134 ISD::CondCode Cond, const SDLoc &dl,
4135 SelectionDAG &DAG) {
4136 // If we are testing for all-bits-clear, we might be able to do that with
4137 // less shifting since bit-order does not matter.
4138 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4139 return SDValue();
4140
4141 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4142 if (!C1 || !C1->isZero())
4143 return SDValue();
4144
4145 if (!N0.hasOneUse() ||
4146 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4147 return SDValue();
4148
4149 unsigned BitWidth = N0.getScalarValueSizeInBits();
4150 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4151 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4152 return SDValue();
4153
4154 // Canonicalize fshr as fshl to reduce pattern-matching.
4155 unsigned ShAmt = ShAmtC->getZExtValue();
4156 if (N0.getOpcode() == ISD::FSHR)
4157 ShAmt = BitWidth - ShAmt;
4158
4159 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4160 SDValue X, Y;
4161 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4162 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4163 return false;
4164 if (Or.getOperand(0) == Other) {
4165 X = Or.getOperand(0);
4166 Y = Or.getOperand(1);
4167 return true;
4168 }
4169 if (Or.getOperand(1) == Other) {
4170 X = Or.getOperand(1);
4171 Y = Or.getOperand(0);
4172 return true;
4173 }
4174 return false;
4175 };
4176
4177 EVT OpVT = N0.getValueType();
4178 EVT ShAmtVT = N0.getOperand(2).getValueType();
4179 SDValue F0 = N0.getOperand(0);
4180 SDValue F1 = N0.getOperand(1);
4181 if (matchOr(F0, F1)) {
4182 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4183 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4184 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4185 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4186 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4187 }
4188 if (matchOr(F1, F0)) {
4189 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4190 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4191 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4192 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4193 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4194 }
4195
4196 return SDValue();
4197 }
4198
4199 /// Try to simplify a setcc built with the specified operands and cc. If it is
4200 /// unable to simplify it, return a null SDValue.
SimplifySetCC(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,bool foldBooleans,DAGCombinerInfo & DCI,const SDLoc & dl) const4201 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4202 ISD::CondCode Cond, bool foldBooleans,
4203 DAGCombinerInfo &DCI,
4204 const SDLoc &dl) const {
4205 SelectionDAG &DAG = DCI.DAG;
4206 const DataLayout &Layout = DAG.getDataLayout();
4207 EVT OpVT = N0.getValueType();
4208 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4209
4210 // Constant fold or commute setcc.
4211 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4212 return Fold;
4213
4214 bool N0ConstOrSplat =
4215 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4216 bool N1ConstOrSplat =
4217 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4218
4219 // Ensure that the constant occurs on the RHS and fold constant comparisons.
4220 // TODO: Handle non-splat vector constants. All undef causes trouble.
4221 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4222 // infinite loop here when we encounter one.
4223 ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
4224 if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) &&
4225 (DCI.isBeforeLegalizeOps() ||
4226 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4227 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4228
4229 // If we have a subtract with the same 2 non-constant operands as this setcc
4230 // -- but in reverse order -- then try to commute the operands of this setcc
4231 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4232 // instruction on some targets.
4233 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4234 (DCI.isBeforeLegalizeOps() ||
4235 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4236 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4237 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4238 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4239
4240 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4241 return V;
4242
4243 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4244 return V;
4245
4246 if (auto *N1C = isConstOrConstSplat(N1)) {
4247 const APInt &C1 = N1C->getAPIntValue();
4248
4249 // Optimize some CTPOP cases.
4250 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4251 return V;
4252
4253 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4254 // X * Y == 0 --> (X == 0) || (Y == 0)
4255 // X * Y != 0 --> (X != 0) && (Y != 0)
4256 // TODO: This bails out if minsize is set, but if the target doesn't have a
4257 // single instruction multiply for this type, it would likely be
4258 // smaller to decompose.
4259 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4260 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4261 (N0->getFlags().hasNoUnsignedWrap() ||
4262 N0->getFlags().hasNoSignedWrap()) &&
4263 !Attr.hasFnAttr(Attribute::MinSize)) {
4264 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4265 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4266 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4267 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4268 }
4269
4270 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4271 // equality comparison, then we're just comparing whether X itself is
4272 // zero.
4273 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4274 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4275 isPowerOf2_32(N0.getScalarValueSizeInBits())) {
4276 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4277 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4278 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4279 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4280 // (srl (ctlz x), 5) == 0 -> X != 0
4281 // (srl (ctlz x), 5) != 1 -> X != 0
4282 Cond = ISD::SETNE;
4283 } else {
4284 // (srl (ctlz x), 5) != 0 -> X == 0
4285 // (srl (ctlz x), 5) == 1 -> X == 0
4286 Cond = ISD::SETEQ;
4287 }
4288 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4289 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4290 Cond);
4291 }
4292 }
4293 }
4294 }
4295
4296 // FIXME: Support vectors.
4297 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4298 const APInt &C1 = N1C->getAPIntValue();
4299
4300 // (zext x) == C --> x == (trunc C)
4301 // (sext x) == C --> x == (trunc C)
4302 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4303 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4304 unsigned MinBits = N0.getValueSizeInBits();
4305 SDValue PreExt;
4306 bool Signed = false;
4307 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4308 // ZExt
4309 MinBits = N0->getOperand(0).getValueSizeInBits();
4310 PreExt = N0->getOperand(0);
4311 } else if (N0->getOpcode() == ISD::AND) {
4312 // DAGCombine turns costly ZExts into ANDs
4313 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4314 if ((C->getAPIntValue()+1).isPowerOf2()) {
4315 MinBits = C->getAPIntValue().countTrailingOnes();
4316 PreExt = N0->getOperand(0);
4317 }
4318 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4319 // SExt
4320 MinBits = N0->getOperand(0).getValueSizeInBits();
4321 PreExt = N0->getOperand(0);
4322 Signed = true;
4323 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4324 // ZEXTLOAD / SEXTLOAD
4325 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4326 MinBits = LN0->getMemoryVT().getSizeInBits();
4327 PreExt = N0;
4328 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4329 Signed = true;
4330 MinBits = LN0->getMemoryVT().getSizeInBits();
4331 PreExt = N0;
4332 }
4333 }
4334
4335 // Figure out how many bits we need to preserve this constant.
4336 unsigned ReqdBits = Signed ? C1.getMinSignedBits() : C1.getActiveBits();
4337
4338 // Make sure we're not losing bits from the constant.
4339 if (MinBits > 0 &&
4340 MinBits < C1.getBitWidth() &&
4341 MinBits >= ReqdBits) {
4342 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4343 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4344 // Will get folded away.
4345 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4346 if (MinBits == 1 && C1 == 1)
4347 // Invert the condition.
4348 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4349 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4350 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4351 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4352 }
4353
4354 // If truncating the setcc operands is not desirable, we can still
4355 // simplify the expression in some cases:
4356 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4357 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4358 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4359 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4360 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4361 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4362 SDValue TopSetCC = N0->getOperand(0);
4363 unsigned N0Opc = N0->getOpcode();
4364 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4365 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4366 TopSetCC.getOpcode() == ISD::SETCC &&
4367 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4368 (isConstFalseVal(N1) ||
4369 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4370
4371 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4372 (!N1C->isZero() && Cond == ISD::SETNE);
4373
4374 if (!Inverse)
4375 return TopSetCC;
4376
4377 ISD::CondCode InvCond = ISD::getSetCCInverse(
4378 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4379 TopSetCC.getOperand(0).getValueType());
4380 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4381 TopSetCC.getOperand(1),
4382 InvCond);
4383 }
4384 }
4385 }
4386
4387 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4388 // equality or unsigned, and all 1 bits of the const are in the same
4389 // partial word, see if we can shorten the load.
4390 if (DCI.isBeforeLegalize() &&
4391 !ISD::isSignedIntSetCC(Cond) &&
4392 N0.getOpcode() == ISD::AND && C1 == 0 &&
4393 N0.getNode()->hasOneUse() &&
4394 isa<LoadSDNode>(N0.getOperand(0)) &&
4395 N0.getOperand(0).getNode()->hasOneUse() &&
4396 isa<ConstantSDNode>(N0.getOperand(1))) {
4397 LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
4398 APInt bestMask;
4399 unsigned bestWidth = 0, bestOffset = 0;
4400 if (Lod->isSimple() && Lod->isUnindexed()) {
4401 unsigned origWidth = N0.getValueSizeInBits();
4402 unsigned maskWidth = origWidth;
4403 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4404 // 8 bits, but have to be careful...
4405 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4406 origWidth = Lod->getMemoryVT().getSizeInBits();
4407 const APInt &Mask = N0.getConstantOperandAPInt(1);
4408 for (unsigned width = origWidth / 2; width>=8; width /= 2) {
4409 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4410 for (unsigned offset=0; offset<origWidth/width; offset++) {
4411 if (Mask.isSubsetOf(newMask)) {
4412 if (Layout.isLittleEndian())
4413 bestOffset = (uint64_t)offset * (width/8);
4414 else
4415 bestOffset = (origWidth/width - offset - 1) * (width/8);
4416 bestMask = Mask.lshr(offset * (width/8) * 8);
4417 bestWidth = width;
4418 break;
4419 }
4420 newMask <<= width;
4421 }
4422 }
4423 }
4424 if (bestWidth) {
4425 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4426 if (newVT.isRound() &&
4427 shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
4428 SDValue Ptr = Lod->getBasePtr();
4429 if (bestOffset != 0)
4430 Ptr =
4431 DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl);
4432 SDValue NewLoad =
4433 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4434 Lod->getPointerInfo().getWithOffset(bestOffset),
4435 Lod->getOriginalAlign());
4436 return DAG.getSetCC(dl, VT,
4437 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4438 DAG.getConstant(bestMask.trunc(bestWidth),
4439 dl, newVT)),
4440 DAG.getConstant(0LL, dl, newVT), Cond);
4441 }
4442 }
4443 }
4444
4445 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4446 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4447 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4448
4449 // If the comparison constant has bits in the upper part, the
4450 // zero-extended value could never match.
4451 if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4452 C1.getBitWidth() - InSize))) {
4453 switch (Cond) {
4454 case ISD::SETUGT:
4455 case ISD::SETUGE:
4456 case ISD::SETEQ:
4457 return DAG.getConstant(0, dl, VT);
4458 case ISD::SETULT:
4459 case ISD::SETULE:
4460 case ISD::SETNE:
4461 return DAG.getConstant(1, dl, VT);
4462 case ISD::SETGT:
4463 case ISD::SETGE:
4464 // True if the sign bit of C1 is set.
4465 return DAG.getConstant(C1.isNegative(), dl, VT);
4466 case ISD::SETLT:
4467 case ISD::SETLE:
4468 // True if the sign bit of C1 isn't set.
4469 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4470 default:
4471 break;
4472 }
4473 }
4474
4475 // Otherwise, we can perform the comparison with the low bits.
4476 switch (Cond) {
4477 case ISD::SETEQ:
4478 case ISD::SETNE:
4479 case ISD::SETUGT:
4480 case ISD::SETUGE:
4481 case ISD::SETULT:
4482 case ISD::SETULE: {
4483 EVT newVT = N0.getOperand(0).getValueType();
4484 if (DCI.isBeforeLegalizeOps() ||
4485 (isOperationLegal(ISD::SETCC, newVT) &&
4486 isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
4487 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4488 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4489
4490 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4491 NewConst, Cond);
4492 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4493 }
4494 break;
4495 }
4496 default:
4497 break; // todo, be more careful with signed comparisons
4498 }
4499 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4500 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4501 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4502 OpVT)) {
4503 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4504 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4505 EVT ExtDstTy = N0.getValueType();
4506 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4507
4508 // If the constant doesn't fit into the number of bits for the source of
4509 // the sign extension, it is impossible for both sides to be equal.
4510 if (C1.getMinSignedBits() > ExtSrcTyBits)
4511 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4512
4513 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4514 ExtDstTy != ExtSrcTy && "Unexpected types!");
4515 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4516 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4517 DAG.getConstant(Imm, dl, ExtDstTy));
4518 if (!DCI.isCalledByLegalizer())
4519 DCI.AddToWorklist(ZextOp.getNode());
4520 // Otherwise, make this a use of a zext.
4521 return DAG.getSetCC(dl, VT, ZextOp,
4522 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4523 } else if ((N1C->isZero() || N1C->isOne()) &&
4524 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4525 // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
4526 if (N0.getOpcode() == ISD::SETCC &&
4527 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4528 (N0.getValueType() == MVT::i1 ||
4529 getBooleanContents(N0.getOperand(0).getValueType()) ==
4530 ZeroOrOneBooleanContent)) {
4531 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4532 if (TrueWhenTrue)
4533 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4534 // Invert the condition.
4535 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4536 CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
4537 if (DCI.isBeforeLegalizeOps() ||
4538 isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
4539 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4540 }
4541
4542 if ((N0.getOpcode() == ISD::XOR ||
4543 (N0.getOpcode() == ISD::AND &&
4544 N0.getOperand(0).getOpcode() == ISD::XOR &&
4545 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4546 isOneConstant(N0.getOperand(1))) {
4547 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4548 // can only do this if the top bits are known zero.
4549 unsigned BitWidth = N0.getValueSizeInBits();
4550 if (DAG.MaskedValueIsZero(N0,
4551 APInt::getHighBitsSet(BitWidth,
4552 BitWidth-1))) {
4553 // Okay, get the un-inverted input value.
4554 SDValue Val;
4555 if (N0.getOpcode() == ISD::XOR) {
4556 Val = N0.getOperand(0);
4557 } else {
4558 assert(N0.getOpcode() == ISD::AND &&
4559 N0.getOperand(0).getOpcode() == ISD::XOR);
4560 // ((X^1)&1)^1 -> X & 1
4561 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4562 N0.getOperand(0).getOperand(0),
4563 N0.getOperand(1));
4564 }
4565
4566 return DAG.getSetCC(dl, VT, Val, N1,
4567 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4568 }
4569 } else if (N1C->isOne()) {
4570 SDValue Op0 = N0;
4571 if (Op0.getOpcode() == ISD::TRUNCATE)
4572 Op0 = Op0.getOperand(0);
4573
4574 if ((Op0.getOpcode() == ISD::XOR) &&
4575 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4576 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4577 SDValue XorLHS = Op0.getOperand(0);
4578 SDValue XorRHS = Op0.getOperand(1);
4579 // Ensure that the input setccs return an i1 type or 0/1 value.
4580 if (Op0.getValueType() == MVT::i1 ||
4581 (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4582 ZeroOrOneBooleanContent &&
4583 getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
4584 ZeroOrOneBooleanContent)) {
4585 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4586 Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4587 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4588 }
4589 }
4590 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4591 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4592 if (Op0.getValueType().bitsGT(VT))
4593 Op0 = DAG.getNode(ISD::AND, dl, VT,
4594 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4595 DAG.getConstant(1, dl, VT));
4596 else if (Op0.getValueType().bitsLT(VT))
4597 Op0 = DAG.getNode(ISD::AND, dl, VT,
4598 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4599 DAG.getConstant(1, dl, VT));
4600
4601 return DAG.getSetCC(dl, VT, Op0,
4602 DAG.getConstant(0, dl, Op0.getValueType()),
4603 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4604 }
4605 if (Op0.getOpcode() == ISD::AssertZext &&
4606 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4607 return DAG.getSetCC(dl, VT, Op0,
4608 DAG.getConstant(0, dl, Op0.getValueType()),
4609 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4610 }
4611 }
4612
4613 // Given:
4614 // icmp eq/ne (urem %x, %y), 0
4615 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4616 // icmp eq/ne %x, 0
4617 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4618 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4619 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4620 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4621 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4622 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4623 }
4624
4625 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4626 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4627 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4628 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4629 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4630 N1C && N1C->isAllOnes()) {
4631 return DAG.getSetCC(dl, VT, N0.getOperand(0),
4632 DAG.getConstant(0, dl, OpVT),
4633 Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4634 }
4635
4636 if (SDValue V =
4637 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4638 return V;
4639 }
4640
4641 // These simplifications apply to splat vectors as well.
4642 // TODO: Handle more splat vector cases.
4643 if (auto *N1C = isConstOrConstSplat(N1)) {
4644 const APInt &C1 = N1C->getAPIntValue();
4645
4646 APInt MinVal, MaxVal;
4647 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4648 if (ISD::isSignedIntSetCC(Cond)) {
4649 MinVal = APInt::getSignedMinValue(OperandBitSize);
4650 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4651 } else {
4652 MinVal = APInt::getMinValue(OperandBitSize);
4653 MaxVal = APInt::getMaxValue(OperandBitSize);
4654 }
4655
4656 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4657 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4658 // X >= MIN --> true
4659 if (C1 == MinVal)
4660 return DAG.getBoolConstant(true, dl, VT, OpVT);
4661
4662 if (!VT.isVector()) { // TODO: Support this for vectors.
4663 // X >= C0 --> X > (C0 - 1)
4664 APInt C = C1 - 1;
4665 ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4666 if ((DCI.isBeforeLegalizeOps() ||
4667 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4668 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4669 isLegalICmpImmediate(C.getSExtValue())))) {
4670 return DAG.getSetCC(dl, VT, N0,
4671 DAG.getConstant(C, dl, N1.getValueType()),
4672 NewCC);
4673 }
4674 }
4675 }
4676
4677 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4678 // X <= MAX --> true
4679 if (C1 == MaxVal)
4680 return DAG.getBoolConstant(true, dl, VT, OpVT);
4681
4682 // X <= C0 --> X < (C0 + 1)
4683 if (!VT.isVector()) { // TODO: Support this for vectors.
4684 APInt C = C1 + 1;
4685 ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
4686 if ((DCI.isBeforeLegalizeOps() ||
4687 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4688 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4689 isLegalICmpImmediate(C.getSExtValue())))) {
4690 return DAG.getSetCC(dl, VT, N0,
4691 DAG.getConstant(C, dl, N1.getValueType()),
4692 NewCC);
4693 }
4694 }
4695 }
4696
4697 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4698 if (C1 == MinVal)
4699 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4700
4701 // TODO: Support this for vectors after legalize ops.
4702 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4703 // Canonicalize setlt X, Max --> setne X, Max
4704 if (C1 == MaxVal)
4705 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4706
4707 // If we have setult X, 1, turn it into seteq X, 0
4708 if (C1 == MinVal+1)
4709 return DAG.getSetCC(dl, VT, N0,
4710 DAG.getConstant(MinVal, dl, N0.getValueType()),
4711 ISD::SETEQ);
4712 }
4713 }
4714
4715 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4716 if (C1 == MaxVal)
4717 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
4718
4719 // TODO: Support this for vectors after legalize ops.
4720 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4721 // Canonicalize setgt X, Min --> setne X, Min
4722 if (C1 == MinVal)
4723 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4724
4725 // If we have setugt X, Max-1, turn it into seteq X, Max
4726 if (C1 == MaxVal-1)
4727 return DAG.getSetCC(dl, VT, N0,
4728 DAG.getConstant(MaxVal, dl, N0.getValueType()),
4729 ISD::SETEQ);
4730 }
4731 }
4732
4733 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
4734 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4735 if (C1.isZero())
4736 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
4737 VT, N0, N1, Cond, DCI, dl))
4738 return CC;
4739
4740 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
4741 // For example, when high 32-bits of i64 X are known clear:
4742 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
4743 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
4744 bool CmpZero = N1C->getAPIntValue().isZero();
4745 bool CmpNegOne = N1C->getAPIntValue().isAllOnes();
4746 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
4747 // Match or(lo,shl(hi,bw/2)) pattern.
4748 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
4749 unsigned EltBits = V.getScalarValueSizeInBits();
4750 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
4751 return false;
4752 SDValue LHS = V.getOperand(0);
4753 SDValue RHS = V.getOperand(1);
4754 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
4755 // Unshifted element must have zero upperbits.
4756 if (RHS.getOpcode() == ISD::SHL &&
4757 isa<ConstantSDNode>(RHS.getOperand(1)) &&
4758 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
4759 DAG.MaskedValueIsZero(LHS, HiBits)) {
4760 Lo = LHS;
4761 Hi = RHS.getOperand(0);
4762 return true;
4763 }
4764 if (LHS.getOpcode() == ISD::SHL &&
4765 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4766 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
4767 DAG.MaskedValueIsZero(RHS, HiBits)) {
4768 Lo = RHS;
4769 Hi = LHS.getOperand(0);
4770 return true;
4771 }
4772 return false;
4773 };
4774
4775 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
4776 unsigned EltBits = N0.getScalarValueSizeInBits();
4777 unsigned HalfBits = EltBits / 2;
4778 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
4779 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
4780 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
4781 SDValue NewN0 =
4782 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
4783 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
4784 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
4785 };
4786
4787 SDValue Lo, Hi;
4788 if (IsConcat(N0, Lo, Hi))
4789 return MergeConcat(Lo, Hi);
4790
4791 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
4792 SDValue Lo0, Lo1, Hi0, Hi1;
4793 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
4794 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
4795 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
4796 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
4797 }
4798 }
4799 }
4800 }
4801
4802 // If we have "setcc X, C0", check to see if we can shrink the immediate
4803 // by changing cc.
4804 // TODO: Support this for vectors after legalize ops.
4805 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4806 // SETUGT X, SINTMAX -> SETLT X, 0
4807 // SETUGE X, SINTMIN -> SETLT X, 0
4808 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
4809 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
4810 return DAG.getSetCC(dl, VT, N0,
4811 DAG.getConstant(0, dl, N1.getValueType()),
4812 ISD::SETLT);
4813
4814 // SETULT X, SINTMIN -> SETGT X, -1
4815 // SETULE X, SINTMAX -> SETGT X, -1
4816 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
4817 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
4818 return DAG.getSetCC(dl, VT, N0,
4819 DAG.getAllOnesConstant(dl, N1.getValueType()),
4820 ISD::SETGT);
4821 }
4822 }
4823
4824 // Back to non-vector simplifications.
4825 // TODO: Can we do these for vector splats?
4826 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4827 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4828 const APInt &C1 = N1C->getAPIntValue();
4829 EVT ShValTy = N0.getValueType();
4830
4831 // Fold bit comparisons when we can. This will result in an
4832 // incorrect value when boolean false is negative one, unless
4833 // the bitsize is 1 in which case the false value is the same
4834 // in practice regardless of the representation.
4835 if ((VT.getSizeInBits() == 1 ||
4836 getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
4837 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4838 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
4839 N0.getOpcode() == ISD::AND) {
4840 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4841 EVT ShiftTy =
4842 getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
4843 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
4844 // Perform the xform if the AND RHS is a single bit.
4845 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
4846 if (AndRHS->getAPIntValue().isPowerOf2() &&
4847 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
4848 return DAG.getNode(ISD::TRUNCATE, dl, VT,
4849 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4850 DAG.getConstant(ShCt, dl, ShiftTy)));
4851 }
4852 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
4853 // (X & 8) == 8 --> (X & 8) >> 3
4854 // Perform the xform if C1 is a single bit.
4855 unsigned ShCt = C1.logBase2();
4856 if (C1.isPowerOf2() &&
4857 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
4858 return DAG.getNode(ISD::TRUNCATE, dl, VT,
4859 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4860 DAG.getConstant(ShCt, dl, ShiftTy)));
4861 }
4862 }
4863 }
4864 }
4865
4866 if (C1.getMinSignedBits() <= 64 &&
4867 !isLegalICmpImmediate(C1.getSExtValue())) {
4868 EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
4869 // (X & -256) == 256 -> (X >> 8) == 1
4870 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4871 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
4872 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4873 const APInt &AndRHSC = AndRHS->getAPIntValue();
4874 if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
4875 unsigned ShiftBits = AndRHSC.countTrailingZeros();
4876 if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
4877 SDValue Shift =
4878 DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
4879 DAG.getConstant(ShiftBits, dl, ShiftTy));
4880 SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
4881 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
4882 }
4883 }
4884 }
4885 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
4886 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
4887 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
4888 // X < 0x100000000 -> (X >> 32) < 1
4889 // X >= 0x100000000 -> (X >> 32) >= 1
4890 // X <= 0x0ffffffff -> (X >> 32) < 1
4891 // X > 0x0ffffffff -> (X >> 32) >= 1
4892 unsigned ShiftBits;
4893 APInt NewC = C1;
4894 ISD::CondCode NewCond = Cond;
4895 if (AdjOne) {
4896 ShiftBits = C1.countTrailingOnes();
4897 NewC = NewC + 1;
4898 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4899 } else {
4900 ShiftBits = C1.countTrailingZeros();
4901 }
4902 NewC.lshrInPlace(ShiftBits);
4903 if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
4904 isLegalICmpImmediate(NewC.getSExtValue()) &&
4905 !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
4906 SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
4907 DAG.getConstant(ShiftBits, dl, ShiftTy));
4908 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
4909 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
4910 }
4911 }
4912 }
4913 }
4914
4915 if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
4916 auto *CFP = cast<ConstantFPSDNode>(N1);
4917 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
4918
4919 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
4920 // constant if knowing that the operand is non-nan is enough. We prefer to
4921 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
4922 // materialize 0.0.
4923 if (Cond == ISD::SETO || Cond == ISD::SETUO)
4924 return DAG.getSetCC(dl, VT, N0, N0, Cond);
4925
4926 // setcc (fneg x), C -> setcc swap(pred) x, -C
4927 if (N0.getOpcode() == ISD::FNEG) {
4928 ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
4929 if (DCI.isBeforeLegalizeOps() ||
4930 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
4931 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
4932 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
4933 }
4934 }
4935
4936 // If the condition is not legal, see if we can find an equivalent one
4937 // which is legal.
4938 if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
4939 // If the comparison was an awkward floating-point == or != and one of
4940 // the comparison operands is infinity or negative infinity, convert the
4941 // condition to a less-awkward <= or >=.
4942 if (CFP->getValueAPF().isInfinity()) {
4943 bool IsNegInf = CFP->getValueAPF().isNegative();
4944 ISD::CondCode NewCond = ISD::SETCC_INVALID;
4945 switch (Cond) {
4946 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
4947 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
4948 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
4949 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
4950 default: break;
4951 }
4952 if (NewCond != ISD::SETCC_INVALID &&
4953 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
4954 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4955 }
4956 }
4957 }
4958
4959 if (N0 == N1) {
4960 // The sext(setcc()) => setcc() optimization relies on the appropriate
4961 // constant being emitted.
4962 assert(!N0.getValueType().isInteger() &&
4963 "Integer types should be handled by FoldSetCC");
4964
4965 bool EqTrue = ISD::isTrueWhenEqual(Cond);
4966 unsigned UOF = ISD::getUnorderedFlavor(Cond);
4967 if (UOF == 2) // FP operators that are undefined on NaNs.
4968 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4969 if (UOF == unsigned(EqTrue))
4970 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
4971 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
4972 // if it is not already.
4973 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
4974 if (NewCond != Cond &&
4975 (DCI.isBeforeLegalizeOps() ||
4976 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
4977 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
4978 }
4979
4980 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4981 N0.getValueType().isInteger()) {
4982 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
4983 N0.getOpcode() == ISD::XOR) {
4984 // Simplify (X+Y) == (X+Z) --> Y == Z
4985 if (N0.getOpcode() == N1.getOpcode()) {
4986 if (N0.getOperand(0) == N1.getOperand(0))
4987 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
4988 if (N0.getOperand(1) == N1.getOperand(1))
4989 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
4990 if (isCommutativeBinOp(N0.getOpcode())) {
4991 // If X op Y == Y op X, try other combinations.
4992 if (N0.getOperand(0) == N1.getOperand(1))
4993 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
4994 Cond);
4995 if (N0.getOperand(1) == N1.getOperand(0))
4996 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
4997 Cond);
4998 }
4999 }
5000
5001 // If RHS is a legal immediate value for a compare instruction, we need
5002 // to be careful about increasing register pressure needlessly.
5003 bool LegalRHSImm = false;
5004
5005 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5006 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5007 // Turn (X+C1) == C2 --> X == C2-C1
5008 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5009 return DAG.getSetCC(
5010 dl, VT, N0.getOperand(0),
5011 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5012 dl, N0.getValueType()),
5013 Cond);
5014
5015 // Turn (X^C1) == C2 --> X == C1^C2
5016 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5017 return DAG.getSetCC(
5018 dl, VT, N0.getOperand(0),
5019 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5020 dl, N0.getValueType()),
5021 Cond);
5022 }
5023
5024 // Turn (C1-X) == C2 --> X == C1-C2
5025 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5026 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5027 return DAG.getSetCC(
5028 dl, VT, N0.getOperand(1),
5029 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5030 dl, N0.getValueType()),
5031 Cond);
5032
5033 // Could RHSC fold directly into a compare?
5034 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5035 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5036 }
5037
5038 // (X+Y) == X --> Y == 0 and similar folds.
5039 // Don't do this if X is an immediate that can fold into a cmp
5040 // instruction and X+Y has other uses. It could be an induction variable
5041 // chain, and the transform would increase register pressure.
5042 if (!LegalRHSImm || N0.hasOneUse())
5043 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5044 return V;
5045 }
5046
5047 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5048 N1.getOpcode() == ISD::XOR)
5049 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5050 return V;
5051
5052 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5053 return V;
5054 }
5055
5056 // Fold remainder of division by a constant.
5057 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5058 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5059 // When division is cheap or optimizing for minimum size,
5060 // fall through to DIVREM creation by skipping this fold.
5061 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5062 if (N0.getOpcode() == ISD::UREM) {
5063 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5064 return Folded;
5065 } else if (N0.getOpcode() == ISD::SREM) {
5066 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5067 return Folded;
5068 }
5069 }
5070 }
5071
5072 // Fold away ALL boolean setcc's.
5073 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5074 SDValue Temp;
5075 switch (Cond) {
5076 default: llvm_unreachable("Unknown integer setcc!");
5077 case ISD::SETEQ: // X == Y -> ~(X^Y)
5078 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5079 N0 = DAG.getNOT(dl, Temp, OpVT);
5080 if (!DCI.isCalledByLegalizer())
5081 DCI.AddToWorklist(Temp.getNode());
5082 break;
5083 case ISD::SETNE: // X != Y --> (X^Y)
5084 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5085 break;
5086 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5087 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5088 Temp = DAG.getNOT(dl, N0, OpVT);
5089 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5090 if (!DCI.isCalledByLegalizer())
5091 DCI.AddToWorklist(Temp.getNode());
5092 break;
5093 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5094 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5095 Temp = DAG.getNOT(dl, N1, OpVT);
5096 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5097 if (!DCI.isCalledByLegalizer())
5098 DCI.AddToWorklist(Temp.getNode());
5099 break;
5100 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5101 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5102 Temp = DAG.getNOT(dl, N0, OpVT);
5103 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5104 if (!DCI.isCalledByLegalizer())
5105 DCI.AddToWorklist(Temp.getNode());
5106 break;
5107 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5108 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5109 Temp = DAG.getNOT(dl, N1, OpVT);
5110 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5111 break;
5112 }
5113 if (VT.getScalarType() != MVT::i1) {
5114 if (!DCI.isCalledByLegalizer())
5115 DCI.AddToWorklist(N0.getNode());
5116 // FIXME: If running after legalize, we probably can't do this.
5117 ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
5118 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5119 }
5120 return N0;
5121 }
5122
5123 // Could not fold it.
5124 return SDValue();
5125 }
5126
5127 /// Returns true (and the GlobalValue and the offset) if the node is a
5128 /// GlobalAddress + offset.
isGAPlusOffset(SDNode * WN,const GlobalValue * & GA,int64_t & Offset) const5129 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5130 int64_t &Offset) const {
5131
5132 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5133
5134 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5135 GA = GASD->getGlobal();
5136 Offset += GASD->getOffset();
5137 return true;
5138 }
5139
5140 if (N->getOpcode() == ISD::ADD) {
5141 SDValue N1 = N->getOperand(0);
5142 SDValue N2 = N->getOperand(1);
5143 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5144 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5145 Offset += V->getSExtValue();
5146 return true;
5147 }
5148 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5149 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5150 Offset += V->getSExtValue();
5151 return true;
5152 }
5153 }
5154 }
5155
5156 return false;
5157 }
5158
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const5159 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5160 DAGCombinerInfo &DCI) const {
5161 // Default implementation: no optimization.
5162 return SDValue();
5163 }
5164
5165 //===----------------------------------------------------------------------===//
5166 // Inline Assembler Implementation Methods
5167 //===----------------------------------------------------------------------===//
5168
5169 TargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const5170 TargetLowering::getConstraintType(StringRef Constraint) const {
5171 unsigned S = Constraint.size();
5172
5173 if (S == 1) {
5174 switch (Constraint[0]) {
5175 default: break;
5176 case 'r':
5177 return C_RegisterClass;
5178 case 'm': // memory
5179 case 'o': // offsetable
5180 case 'V': // not offsetable
5181 return C_Memory;
5182 case 'p': // Address.
5183 return C_Address;
5184 case 'n': // Simple Integer
5185 case 'E': // Floating Point Constant
5186 case 'F': // Floating Point Constant
5187 return C_Immediate;
5188 case 'i': // Simple Integer or Relocatable Constant
5189 case 's': // Relocatable Constant
5190 case 'X': // Allow ANY value.
5191 case 'I': // Target registers.
5192 case 'J':
5193 case 'K':
5194 case 'L':
5195 case 'M':
5196 case 'N':
5197 case 'O':
5198 case 'P':
5199 case '<':
5200 case '>':
5201 return C_Other;
5202 }
5203 }
5204
5205 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5206 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5207 return C_Memory;
5208 return C_Register;
5209 }
5210 return C_Unknown;
5211 }
5212
5213 /// Try to replace an X constraint, which matches anything, with another that
5214 /// has more specific requirements based on the type of the corresponding
5215 /// operand.
LowerXConstraint(EVT ConstraintVT) const5216 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5217 if (ConstraintVT.isInteger())
5218 return "r";
5219 if (ConstraintVT.isFloatingPoint())
5220 return "f"; // works for many targets
5221 return nullptr;
5222 }
5223
LowerAsmOutputForConstraint(SDValue & Chain,SDValue & Flag,const SDLoc & DL,const AsmOperandInfo & OpInfo,SelectionDAG & DAG) const5224 SDValue TargetLowering::LowerAsmOutputForConstraint(
5225 SDValue &Chain, SDValue &Flag, const SDLoc &DL,
5226 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5227 return SDValue();
5228 }
5229
5230 /// Lower the specified operand into the Ops vector.
5231 /// If it is invalid, don't add anything to Ops.
LowerAsmOperandForConstraint(SDValue Op,std::string & Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const5232 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5233 std::string &Constraint,
5234 std::vector<SDValue> &Ops,
5235 SelectionDAG &DAG) const {
5236
5237 if (Constraint.length() > 1) return;
5238
5239 char ConstraintLetter = Constraint[0];
5240 switch (ConstraintLetter) {
5241 default: break;
5242 case 'X': // Allows any operand
5243 case 'i': // Simple Integer or Relocatable Constant
5244 case 'n': // Simple Integer
5245 case 's': { // Relocatable Constant
5246
5247 ConstantSDNode *C;
5248 uint64_t Offset = 0;
5249
5250 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5251 // etc., since getelementpointer is variadic. We can't use
5252 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5253 // while in this case the GA may be furthest from the root node which is
5254 // likely an ISD::ADD.
5255 while (true) {
5256 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5257 // gcc prints these as sign extended. Sign extend value to 64 bits
5258 // now; without this it would get ZExt'd later in
5259 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5260 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5261 BooleanContent BCont = getBooleanContents(MVT::i64);
5262 ISD::NodeType ExtOpc =
5263 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5264 int64_t ExtVal =
5265 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5266 Ops.push_back(
5267 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5268 return;
5269 }
5270 if (ConstraintLetter != 'n') {
5271 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5272 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5273 GA->getValueType(0),
5274 Offset + GA->getOffset()));
5275 return;
5276 }
5277 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5278 Ops.push_back(DAG.getTargetBlockAddress(
5279 BA->getBlockAddress(), BA->getValueType(0),
5280 Offset + BA->getOffset(), BA->getTargetFlags()));
5281 return;
5282 }
5283 if (isa<BasicBlockSDNode>(Op)) {
5284 Ops.push_back(Op);
5285 return;
5286 }
5287 }
5288 const unsigned OpCode = Op.getOpcode();
5289 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5290 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5291 Op = Op.getOperand(1);
5292 // Subtraction is not commutative.
5293 else if (OpCode == ISD::ADD &&
5294 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5295 Op = Op.getOperand(0);
5296 else
5297 return;
5298 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5299 continue;
5300 }
5301 return;
5302 }
5303 break;
5304 }
5305 }
5306 }
5307
CollectTargetIntrinsicOperands(const CallInst & I,SmallVectorImpl<SDValue> & Ops,SelectionDAG & DAG) const5308 void TargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
5309 SmallVectorImpl<SDValue> &Ops,
5310 SelectionDAG &DAG) const {
5311 return;
5312 }
5313
5314 std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * RI,StringRef Constraint,MVT VT) const5315 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5316 StringRef Constraint,
5317 MVT VT) const {
5318 if (Constraint.empty() || Constraint[0] != '{')
5319 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5320 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5321
5322 // Remove the braces from around the name.
5323 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5324
5325 std::pair<unsigned, const TargetRegisterClass *> R =
5326 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5327
5328 // Figure out which register class contains this reg.
5329 for (const TargetRegisterClass *RC : RI->regclasses()) {
5330 // If none of the value types for this register class are valid, we
5331 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5332 if (!isLegalRC(*RI, *RC))
5333 continue;
5334
5335 for (const MCPhysReg &PR : *RC) {
5336 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5337 std::pair<unsigned, const TargetRegisterClass *> S =
5338 std::make_pair(PR, RC);
5339
5340 // If this register class has the requested value type, return it,
5341 // otherwise keep searching and return the first class found
5342 // if no other is found which explicitly has the requested type.
5343 if (RI->isTypeLegalForClass(*RC, VT))
5344 return S;
5345 if (!R.second)
5346 R = S;
5347 }
5348 }
5349 }
5350
5351 return R;
5352 }
5353
5354 //===----------------------------------------------------------------------===//
5355 // Constraint Selection.
5356
5357 /// Return true of this is an input operand that is a matching constraint like
5358 /// "4".
isMatchingInputConstraint() const5359 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5360 assert(!ConstraintCode.empty() && "No known constraint!");
5361 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5362 }
5363
5364 /// If this is an input matching constraint, this method returns the output
5365 /// operand it matches.
getMatchedOperand() const5366 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5367 assert(!ConstraintCode.empty() && "No known constraint!");
5368 return atoi(ConstraintCode.c_str());
5369 }
5370
5371 /// Split up the constraint string from the inline assembly value into the
5372 /// specific constraints and their prefixes, and also tie in the associated
5373 /// operand values.
5374 /// If this returns an empty vector, and if the constraint string itself
5375 /// isn't empty, there was an error parsing.
5376 TargetLowering::AsmOperandInfoVector
ParseConstraints(const DataLayout & DL,const TargetRegisterInfo * TRI,const CallBase & Call) const5377 TargetLowering::ParseConstraints(const DataLayout &DL,
5378 const TargetRegisterInfo *TRI,
5379 const CallBase &Call) const {
5380 /// Information about all of the constraints.
5381 AsmOperandInfoVector ConstraintOperands;
5382 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5383 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5384
5385 // Do a prepass over the constraints, canonicalizing them, and building up the
5386 // ConstraintOperands list.
5387 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5388 unsigned ResNo = 0; // ResNo - The result number of the next output.
5389 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5390
5391 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5392 ConstraintOperands.emplace_back(std::move(CI));
5393 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5394
5395 // Update multiple alternative constraint count.
5396 if (OpInfo.multipleAlternatives.size() > maCount)
5397 maCount = OpInfo.multipleAlternatives.size();
5398
5399 OpInfo.ConstraintVT = MVT::Other;
5400
5401 // Compute the value type for each operand.
5402 switch (OpInfo.Type) {
5403 case InlineAsm::isOutput:
5404 // Indirect outputs just consume an argument.
5405 if (OpInfo.isIndirect) {
5406 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5407 break;
5408 }
5409
5410 // The return value of the call is this value. As such, there is no
5411 // corresponding argument.
5412 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5413 if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
5414 OpInfo.ConstraintVT =
5415 getSimpleValueType(DL, STy->getElementType(ResNo));
5416 } else {
5417 assert(ResNo == 0 && "Asm only has one result!");
5418 OpInfo.ConstraintVT =
5419 getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5420 }
5421 ++ResNo;
5422 break;
5423 case InlineAsm::isInput:
5424 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5425 break;
5426 case InlineAsm::isLabel:
5427 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5428 ++LabelNo;
5429 continue;
5430 case InlineAsm::isClobber:
5431 // Nothing to do.
5432 break;
5433 }
5434
5435 if (OpInfo.CallOperandVal) {
5436 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5437 if (OpInfo.isIndirect) {
5438 OpTy = Call.getParamElementType(ArgNo);
5439 assert(OpTy && "Indirect operand must have elementtype attribute");
5440 }
5441
5442 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5443 if (StructType *STy = dyn_cast<StructType>(OpTy))
5444 if (STy->getNumElements() == 1)
5445 OpTy = STy->getElementType(0);
5446
5447 // If OpTy is not a single value, it may be a struct/union that we
5448 // can tile with integers.
5449 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5450 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5451 switch (BitSize) {
5452 default: break;
5453 case 1:
5454 case 8:
5455 case 16:
5456 case 32:
5457 case 64:
5458 case 128:
5459 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5460 break;
5461 }
5462 }
5463
5464 EVT VT = getAsmOperandValueType(DL, OpTy, true);
5465 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5466 ArgNo++;
5467 }
5468 }
5469
5470 // If we have multiple alternative constraints, select the best alternative.
5471 if (!ConstraintOperands.empty()) {
5472 if (maCount) {
5473 unsigned bestMAIndex = 0;
5474 int bestWeight = -1;
5475 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5476 int weight = -1;
5477 unsigned maIndex;
5478 // Compute the sums of the weights for each alternative, keeping track
5479 // of the best (highest weight) one so far.
5480 for (maIndex = 0; maIndex < maCount; ++maIndex) {
5481 int weightSum = 0;
5482 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5483 cIndex != eIndex; ++cIndex) {
5484 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5485 if (OpInfo.Type == InlineAsm::isClobber)
5486 continue;
5487
5488 // If this is an output operand with a matching input operand,
5489 // look up the matching input. If their types mismatch, e.g. one
5490 // is an integer, the other is floating point, or their sizes are
5491 // different, flag it as an maCantMatch.
5492 if (OpInfo.hasMatchingInput()) {
5493 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5494 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5495 if ((OpInfo.ConstraintVT.isInteger() !=
5496 Input.ConstraintVT.isInteger()) ||
5497 (OpInfo.ConstraintVT.getSizeInBits() !=
5498 Input.ConstraintVT.getSizeInBits())) {
5499 weightSum = -1; // Can't match.
5500 break;
5501 }
5502 }
5503 }
5504 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5505 if (weight == -1) {
5506 weightSum = -1;
5507 break;
5508 }
5509 weightSum += weight;
5510 }
5511 // Update best.
5512 if (weightSum > bestWeight) {
5513 bestWeight = weightSum;
5514 bestMAIndex = maIndex;
5515 }
5516 }
5517
5518 // Now select chosen alternative in each constraint.
5519 for (AsmOperandInfo &cInfo : ConstraintOperands)
5520 if (cInfo.Type != InlineAsm::isClobber)
5521 cInfo.selectAlternative(bestMAIndex);
5522 }
5523 }
5524
5525 // Check and hook up tied operands, choose constraint code to use.
5526 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5527 cIndex != eIndex; ++cIndex) {
5528 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5529
5530 // If this is an output operand with a matching input operand, look up the
5531 // matching input. If their types mismatch, e.g. one is an integer, the
5532 // other is floating point, or their sizes are different, flag it as an
5533 // error.
5534 if (OpInfo.hasMatchingInput()) {
5535 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5536
5537 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5538 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5539 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5540 OpInfo.ConstraintVT);
5541 std::pair<unsigned, const TargetRegisterClass *> InputRC =
5542 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5543 Input.ConstraintVT);
5544 if ((OpInfo.ConstraintVT.isInteger() !=
5545 Input.ConstraintVT.isInteger()) ||
5546 (MatchRC.second != InputRC.second)) {
5547 report_fatal_error("Unsupported asm: input constraint"
5548 " with a matching output constraint of"
5549 " incompatible type!");
5550 }
5551 }
5552 }
5553 }
5554
5555 return ConstraintOperands;
5556 }
5557
5558 /// Return an integer indicating how general CT is.
getConstraintGenerality(TargetLowering::ConstraintType CT)5559 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
5560 switch (CT) {
5561 case TargetLowering::C_Immediate:
5562 case TargetLowering::C_Other:
5563 case TargetLowering::C_Unknown:
5564 return 0;
5565 case TargetLowering::C_Register:
5566 return 1;
5567 case TargetLowering::C_RegisterClass:
5568 return 2;
5569 case TargetLowering::C_Memory:
5570 case TargetLowering::C_Address:
5571 return 3;
5572 }
5573 llvm_unreachable("Invalid constraint type");
5574 }
5575
5576 /// Examine constraint type and operand type and determine a weight value.
5577 /// This object must already have been set up with the operand type
5578 /// and the current alternative constraint selected.
5579 TargetLowering::ConstraintWeight
getMultipleConstraintMatchWeight(AsmOperandInfo & info,int maIndex) const5580 TargetLowering::getMultipleConstraintMatchWeight(
5581 AsmOperandInfo &info, int maIndex) const {
5582 InlineAsm::ConstraintCodeVector *rCodes;
5583 if (maIndex >= (int)info.multipleAlternatives.size())
5584 rCodes = &info.Codes;
5585 else
5586 rCodes = &info.multipleAlternatives[maIndex].Codes;
5587 ConstraintWeight BestWeight = CW_Invalid;
5588
5589 // Loop over the options, keeping track of the most general one.
5590 for (const std::string &rCode : *rCodes) {
5591 ConstraintWeight weight =
5592 getSingleConstraintMatchWeight(info, rCode.c_str());
5593 if (weight > BestWeight)
5594 BestWeight = weight;
5595 }
5596
5597 return BestWeight;
5598 }
5599
5600 /// Examine constraint type and operand type and determine a weight value.
5601 /// This object must already have been set up with the operand type
5602 /// and the current alternative constraint selected.
5603 TargetLowering::ConstraintWeight
getSingleConstraintMatchWeight(AsmOperandInfo & info,const char * constraint) const5604 TargetLowering::getSingleConstraintMatchWeight(
5605 AsmOperandInfo &info, const char *constraint) const {
5606 ConstraintWeight weight = CW_Invalid;
5607 Value *CallOperandVal = info.CallOperandVal;
5608 // If we don't have a value, we can't do a match,
5609 // but allow it at the lowest weight.
5610 if (!CallOperandVal)
5611 return CW_Default;
5612 // Look at the constraint type.
5613 switch (*constraint) {
5614 case 'i': // immediate integer.
5615 case 'n': // immediate integer with a known value.
5616 if (isa<ConstantInt>(CallOperandVal))
5617 weight = CW_Constant;
5618 break;
5619 case 's': // non-explicit intregal immediate.
5620 if (isa<GlobalValue>(CallOperandVal))
5621 weight = CW_Constant;
5622 break;
5623 case 'E': // immediate float if host format.
5624 case 'F': // immediate float.
5625 if (isa<ConstantFP>(CallOperandVal))
5626 weight = CW_Constant;
5627 break;
5628 case '<': // memory operand with autodecrement.
5629 case '>': // memory operand with autoincrement.
5630 case 'm': // memory operand.
5631 case 'o': // offsettable memory operand
5632 case 'V': // non-offsettable memory operand
5633 weight = CW_Memory;
5634 break;
5635 case 'r': // general register.
5636 case 'g': // general register, memory operand or immediate integer.
5637 // note: Clang converts "g" to "imr".
5638 if (CallOperandVal->getType()->isIntegerTy())
5639 weight = CW_Register;
5640 break;
5641 case 'X': // any operand.
5642 default:
5643 weight = CW_Default;
5644 break;
5645 }
5646 return weight;
5647 }
5648
5649 /// If there are multiple different constraints that we could pick for this
5650 /// operand (e.g. "imr") try to pick the 'best' one.
5651 /// This is somewhat tricky: constraints fall into four classes:
5652 /// Other -> immediates and magic values
5653 /// Register -> one specific register
5654 /// RegisterClass -> a group of regs
5655 /// Memory -> memory
5656 /// Ideally, we would pick the most specific constraint possible: if we have
5657 /// something that fits into a register, we would pick it. The problem here
5658 /// is that if we have something that could either be in a register or in
5659 /// memory that use of the register could cause selection of *other*
5660 /// operands to fail: they might only succeed if we pick memory. Because of
5661 /// this the heuristic we use is:
5662 ///
5663 /// 1) If there is an 'other' constraint, and if the operand is valid for
5664 /// that constraint, use it. This makes us take advantage of 'i'
5665 /// constraints when available.
5666 /// 2) Otherwise, pick the most general constraint present. This prefers
5667 /// 'm' over 'r', for example.
5668 ///
ChooseConstraint(TargetLowering::AsmOperandInfo & OpInfo,const TargetLowering & TLI,SDValue Op,SelectionDAG * DAG)5669 static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
5670 const TargetLowering &TLI,
5671 SDValue Op, SelectionDAG *DAG) {
5672 assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
5673 unsigned BestIdx = 0;
5674 TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
5675 int BestGenerality = -1;
5676
5677 // Loop over the options, keeping track of the most general one.
5678 for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
5679 TargetLowering::ConstraintType CType =
5680 TLI.getConstraintType(OpInfo.Codes[i]);
5681
5682 // Indirect 'other' or 'immediate' constraints are not allowed.
5683 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
5684 CType == TargetLowering::C_Register ||
5685 CType == TargetLowering::C_RegisterClass))
5686 continue;
5687
5688 // If this is an 'other' or 'immediate' constraint, see if the operand is
5689 // valid for it. For example, on X86 we might have an 'rI' constraint. If
5690 // the operand is an integer in the range [0..31] we want to use I (saving a
5691 // load of a register), otherwise we must use 'r'.
5692 if ((CType == TargetLowering::C_Other ||
5693 CType == TargetLowering::C_Immediate) && Op.getNode()) {
5694 assert(OpInfo.Codes[i].size() == 1 &&
5695 "Unhandled multi-letter 'other' constraint");
5696 std::vector<SDValue> ResultOps;
5697 TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
5698 ResultOps, *DAG);
5699 if (!ResultOps.empty()) {
5700 BestType = CType;
5701 BestIdx = i;
5702 break;
5703 }
5704 }
5705
5706 // Things with matching constraints can only be registers, per gcc
5707 // documentation. This mainly affects "g" constraints.
5708 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
5709 continue;
5710
5711 // This constraint letter is more general than the previous one, use it.
5712 int Generality = getConstraintGenerality(CType);
5713 if (Generality > BestGenerality) {
5714 BestType = CType;
5715 BestIdx = i;
5716 BestGenerality = Generality;
5717 }
5718 }
5719
5720 OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
5721 OpInfo.ConstraintType = BestType;
5722 }
5723
5724 /// Determines the constraint code and constraint type to use for the specific
5725 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
ComputeConstraintToUse(AsmOperandInfo & OpInfo,SDValue Op,SelectionDAG * DAG) const5726 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
5727 SDValue Op,
5728 SelectionDAG *DAG) const {
5729 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
5730
5731 // Single-letter constraints ('r') are very common.
5732 if (OpInfo.Codes.size() == 1) {
5733 OpInfo.ConstraintCode = OpInfo.Codes[0];
5734 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
5735 } else {
5736 ChooseConstraint(OpInfo, *this, Op, DAG);
5737 }
5738
5739 // 'X' matches anything.
5740 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
5741 // Constants are handled elsewhere. For Functions, the type here is the
5742 // type of the result, which is not what we want to look at; leave them
5743 // alone.
5744 Value *v = OpInfo.CallOperandVal;
5745 if (isa<ConstantInt>(v) || isa<Function>(v)) {
5746 return;
5747 }
5748
5749 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
5750 OpInfo.ConstraintCode = "i";
5751 return;
5752 }
5753
5754 // Otherwise, try to resolve it to something we know about by looking at
5755 // the actual operand type.
5756 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
5757 OpInfo.ConstraintCode = Repl;
5758 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
5759 }
5760 }
5761 }
5762
5763 /// Given an exact SDIV by a constant, create a multiplication
5764 /// with the multiplicative inverse of the constant.
BuildExactSDIV(const TargetLowering & TLI,SDNode * N,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDNode * > & Created)5765 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
5766 const SDLoc &dl, SelectionDAG &DAG,
5767 SmallVectorImpl<SDNode *> &Created) {
5768 SDValue Op0 = N->getOperand(0);
5769 SDValue Op1 = N->getOperand(1);
5770 EVT VT = N->getValueType(0);
5771 EVT SVT = VT.getScalarType();
5772 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
5773 EVT ShSVT = ShVT.getScalarType();
5774
5775 bool UseSRA = false;
5776 SmallVector<SDValue, 16> Shifts, Factors;
5777
5778 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
5779 if (C->isZero())
5780 return false;
5781 APInt Divisor = C->getAPIntValue();
5782 unsigned Shift = Divisor.countTrailingZeros();
5783 if (Shift) {
5784 Divisor.ashrInPlace(Shift);
5785 UseSRA = true;
5786 }
5787 // Calculate the multiplicative inverse, using Newton's method.
5788 APInt t;
5789 APInt Factor = Divisor;
5790 while ((t = Divisor * Factor) != 1)
5791 Factor *= APInt(Divisor.getBitWidth(), 2) - t;
5792 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
5793 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
5794 return true;
5795 };
5796
5797 // Collect all magic values from the build vector.
5798 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
5799 return SDValue();
5800
5801 SDValue Shift, Factor;
5802 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
5803 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
5804 Factor = DAG.getBuildVector(VT, dl, Factors);
5805 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
5806 assert(Shifts.size() == 1 && Factors.size() == 1 &&
5807 "Expected matchUnaryPredicate to return one element for scalable "
5808 "vectors");
5809 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
5810 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
5811 } else {
5812 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
5813 Shift = Shifts[0];
5814 Factor = Factors[0];
5815 }
5816
5817 SDValue Res = Op0;
5818
5819 // Shift the value upfront if it is even, so the LSB is one.
5820 if (UseSRA) {
5821 // TODO: For UDIV use SRL instead of SRA.
5822 SDNodeFlags Flags;
5823 Flags.setExact(true);
5824 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
5825 Created.push_back(Res.getNode());
5826 }
5827
5828 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
5829 }
5830
BuildSDIVPow2(SDNode * N,const APInt & Divisor,SelectionDAG & DAG,SmallVectorImpl<SDNode * > & Created) const5831 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
5832 SelectionDAG &DAG,
5833 SmallVectorImpl<SDNode *> &Created) const {
5834 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5835 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5836 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
5837 return SDValue(N, 0); // Lower SDIV as SDIV
5838 return SDValue();
5839 }
5840
5841 SDValue
BuildSREMPow2(SDNode * N,const APInt & Divisor,SelectionDAG & DAG,SmallVectorImpl<SDNode * > & Created) const5842 TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
5843 SelectionDAG &DAG,
5844 SmallVectorImpl<SDNode *> &Created) const {
5845 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5846 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5847 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
5848 return SDValue(N, 0); // Lower SREM as SREM
5849 return SDValue();
5850 }
5851
5852 /// Given an ISD::SDIV node expressing a divide by constant,
5853 /// return a DAG expression to select that will generate the same value by
5854 /// multiplying by a magic number.
5855 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
BuildSDIV(SDNode * N,SelectionDAG & DAG,bool IsAfterLegalization,SmallVectorImpl<SDNode * > & Created) const5856 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
5857 bool IsAfterLegalization,
5858 SmallVectorImpl<SDNode *> &Created) const {
5859 SDLoc dl(N);
5860 EVT VT = N->getValueType(0);
5861 EVT SVT = VT.getScalarType();
5862 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
5863 EVT ShSVT = ShVT.getScalarType();
5864 unsigned EltBits = VT.getScalarSizeInBits();
5865 EVT MulVT;
5866
5867 // Check to see if we can do this.
5868 // FIXME: We should be more aggressive here.
5869 if (!isTypeLegal(VT)) {
5870 // Limit this to simple scalars for now.
5871 if (VT.isVector() || !VT.isSimple())
5872 return SDValue();
5873
5874 // If this type will be promoted to a large enough type with a legal
5875 // multiply operation, we can go ahead and do this transform.
5876 if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
5877 return SDValue();
5878
5879 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
5880 if (MulVT.getSizeInBits() < (2 * EltBits) ||
5881 !isOperationLegal(ISD::MUL, MulVT))
5882 return SDValue();
5883 }
5884
5885 // If the sdiv has an 'exact' bit we can use a simpler lowering.
5886 if (N->getFlags().hasExact())
5887 return BuildExactSDIV(*this, N, dl, DAG, Created);
5888
5889 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
5890
5891 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
5892 if (C->isZero())
5893 return false;
5894
5895 const APInt &Divisor = C->getAPIntValue();
5896 SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
5897 int NumeratorFactor = 0;
5898 int ShiftMask = -1;
5899
5900 if (Divisor.isOne() || Divisor.isAllOnes()) {
5901 // If d is +1/-1, we just multiply the numerator by +1/-1.
5902 NumeratorFactor = Divisor.getSExtValue();
5903 magics.Magic = 0;
5904 magics.ShiftAmount = 0;
5905 ShiftMask = 0;
5906 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
5907 // If d > 0 and m < 0, add the numerator.
5908 NumeratorFactor = 1;
5909 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
5910 // If d < 0 and m > 0, subtract the numerator.
5911 NumeratorFactor = -1;
5912 }
5913
5914 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
5915 Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
5916 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
5917 ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
5918 return true;
5919 };
5920
5921 SDValue N0 = N->getOperand(0);
5922 SDValue N1 = N->getOperand(1);
5923
5924 // Collect the shifts / magic values from each element.
5925 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
5926 return SDValue();
5927
5928 SDValue MagicFactor, Factor, Shift, ShiftMask;
5929 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
5930 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
5931 Factor = DAG.getBuildVector(VT, dl, Factors);
5932 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
5933 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
5934 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
5935 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
5936 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
5937 "Expected matchUnaryPredicate to return one element for scalable "
5938 "vectors");
5939 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
5940 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
5941 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
5942 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
5943 } else {
5944 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
5945 MagicFactor = MagicFactors[0];
5946 Factor = Factors[0];
5947 Shift = Shifts[0];
5948 ShiftMask = ShiftMasks[0];
5949 }
5950
5951 // Multiply the numerator (operand 0) by the magic value.
5952 // FIXME: We should support doing a MUL in a wider type.
5953 auto GetMULHS = [&](SDValue X, SDValue Y) {
5954 // If the type isn't legal, use a wider mul of the the type calculated
5955 // earlier.
5956 if (!isTypeLegal(VT)) {
5957 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
5958 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
5959 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
5960 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
5961 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
5962 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
5963 }
5964
5965 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
5966 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
5967 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
5968 SDValue LoHi =
5969 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
5970 return SDValue(LoHi.getNode(), 1);
5971 }
5972 return SDValue();
5973 };
5974
5975 SDValue Q = GetMULHS(N0, MagicFactor);
5976 if (!Q)
5977 return SDValue();
5978
5979 Created.push_back(Q.getNode());
5980
5981 // (Optionally) Add/subtract the numerator using Factor.
5982 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
5983 Created.push_back(Factor.getNode());
5984 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
5985 Created.push_back(Q.getNode());
5986
5987 // Shift right algebraic by shift value.
5988 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
5989 Created.push_back(Q.getNode());
5990
5991 // Extract the sign bit, mask it and add it to the quotient.
5992 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
5993 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
5994 Created.push_back(T.getNode());
5995 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
5996 Created.push_back(T.getNode());
5997 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
5998 }
5999
6000 /// Given an ISD::UDIV node expressing a divide by constant,
6001 /// return a DAG expression to select that will generate the same value by
6002 /// multiplying by a magic number.
6003 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
BuildUDIV(SDNode * N,SelectionDAG & DAG,bool IsAfterLegalization,SmallVectorImpl<SDNode * > & Created) const6004 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6005 bool IsAfterLegalization,
6006 SmallVectorImpl<SDNode *> &Created) const {
6007 SDLoc dl(N);
6008 EVT VT = N->getValueType(0);
6009 EVT SVT = VT.getScalarType();
6010 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6011 EVT ShSVT = ShVT.getScalarType();
6012 unsigned EltBits = VT.getScalarSizeInBits();
6013 EVT MulVT;
6014
6015 // Check to see if we can do this.
6016 // FIXME: We should be more aggressive here.
6017 if (!isTypeLegal(VT)) {
6018 // Limit this to simple scalars for now.
6019 if (VT.isVector() || !VT.isSimple())
6020 return SDValue();
6021
6022 // If this type will be promoted to a large enough type with a legal
6023 // multiply operation, we can go ahead and do this transform.
6024 if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6025 return SDValue();
6026
6027 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6028 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6029 !isOperationLegal(ISD::MUL, MulVT))
6030 return SDValue();
6031 }
6032
6033 SDValue N0 = N->getOperand(0);
6034 SDValue N1 = N->getOperand(1);
6035
6036 // Try to use leading zeros of the dividend to reduce the multiplier and
6037 // avoid expensive fixups.
6038 // TODO: Support vectors.
6039 unsigned LeadingZeros = 0;
6040 if (!VT.isVector() && isa<ConstantSDNode>(N1)) {
6041 assert(!isOneConstant(N1) && "Unexpected divisor");
6042 LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6043 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
6044 // the dividend exceeds the leading zeros for the divisor.
6045 LeadingZeros =
6046 std::min(LeadingZeros,
6047 cast<ConstantSDNode>(N1)->getAPIntValue().countLeadingZeros());
6048 }
6049
6050 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6051 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6052
6053 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6054 if (C->isZero())
6055 return false;
6056 const APInt& Divisor = C->getAPIntValue();
6057
6058 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6059
6060 // Magic algorithm doesn't work for division by 1. We need to emit a select
6061 // at the end.
6062 if (Divisor.isOne()) {
6063 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6064 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6065 } else {
6066 UnsignedDivisionByConstantInfo magics =
6067 UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros);
6068
6069 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6070
6071 assert(magics.PreShift < Divisor.getBitWidth() &&
6072 "We shouldn't generate an undefined shift!");
6073 assert(magics.PostShift < Divisor.getBitWidth() &&
6074 "We shouldn't generate an undefined shift!");
6075 assert((!magics.IsAdd || magics.PreShift == 0) &&
6076 "Unexpected pre-shift");
6077 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6078 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6079 NPQFactor = DAG.getConstant(
6080 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6081 : APInt::getZero(EltBits),
6082 dl, SVT);
6083 UseNPQ |= magics.IsAdd;
6084 UsePreShift |= magics.PreShift != 0;
6085 UsePostShift |= magics.PostShift != 0;
6086 }
6087
6088 PreShifts.push_back(PreShift);
6089 MagicFactors.push_back(MagicFactor);
6090 NPQFactors.push_back(NPQFactor);
6091 PostShifts.push_back(PostShift);
6092 return true;
6093 };
6094
6095 // Collect the shifts/magic values from each element.
6096 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6097 return SDValue();
6098
6099 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6100 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6101 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6102 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6103 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6104 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6105 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6106 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6107 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6108 "Expected matchUnaryPredicate to return one for scalable vectors");
6109 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6110 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6111 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6112 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6113 } else {
6114 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6115 PreShift = PreShifts[0];
6116 MagicFactor = MagicFactors[0];
6117 PostShift = PostShifts[0];
6118 }
6119
6120 SDValue Q = N0;
6121 if (UsePreShift) {
6122 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6123 Created.push_back(Q.getNode());
6124 }
6125
6126 // FIXME: We should support doing a MUL in a wider type.
6127 auto GetMULHU = [&](SDValue X, SDValue Y) {
6128 // If the type isn't legal, use a wider mul of the the type calculated
6129 // earlier.
6130 if (!isTypeLegal(VT)) {
6131 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6132 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6133 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6134 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6135 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6136 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6137 }
6138
6139 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6140 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6141 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6142 SDValue LoHi =
6143 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6144 return SDValue(LoHi.getNode(), 1);
6145 }
6146 return SDValue(); // No mulhu or equivalent
6147 };
6148
6149 // Multiply the numerator (operand 0) by the magic value.
6150 Q = GetMULHU(Q, MagicFactor);
6151 if (!Q)
6152 return SDValue();
6153
6154 Created.push_back(Q.getNode());
6155
6156 if (UseNPQ) {
6157 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6158 Created.push_back(NPQ.getNode());
6159
6160 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6161 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6162 if (VT.isVector())
6163 NPQ = GetMULHU(NPQ, NPQFactor);
6164 else
6165 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6166
6167 Created.push_back(NPQ.getNode());
6168
6169 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6170 Created.push_back(Q.getNode());
6171 }
6172
6173 if (UsePostShift) {
6174 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6175 Created.push_back(Q.getNode());
6176 }
6177
6178 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6179
6180 SDValue One = DAG.getConstant(1, dl, VT);
6181 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6182 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6183 }
6184
6185 /// If all values in Values that *don't* match the predicate are same 'splat'
6186 /// value, then replace all values with that splat value.
6187 /// Else, if AlternativeReplacement was provided, then replace all values that
6188 /// do match predicate with AlternativeReplacement value.
6189 static void
turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,std::function<bool (SDValue)> Predicate,SDValue AlternativeReplacement=SDValue ())6190 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6191 std::function<bool(SDValue)> Predicate,
6192 SDValue AlternativeReplacement = SDValue()) {
6193 SDValue Replacement;
6194 // Is there a value for which the Predicate does *NOT* match? What is it?
6195 auto SplatValue = llvm::find_if_not(Values, Predicate);
6196 if (SplatValue != Values.end()) {
6197 // Does Values consist only of SplatValue's and values matching Predicate?
6198 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6199 return Value == *SplatValue || Predicate(Value);
6200 })) // Then we shall replace values matching predicate with SplatValue.
6201 Replacement = *SplatValue;
6202 }
6203 if (!Replacement) {
6204 // Oops, we did not find the "baseline" splat value.
6205 if (!AlternativeReplacement)
6206 return; // Nothing to do.
6207 // Let's replace with provided value then.
6208 Replacement = AlternativeReplacement;
6209 }
6210 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6211 }
6212
6213 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6214 /// where the divisor is constant and the comparison target is zero,
6215 /// return a DAG expression that will generate the same comparison result
6216 /// using only multiplications, additions and shifts/rotations.
6217 /// Ref: "Hacker's Delight" 10-17.
buildUREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const6218 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6219 SDValue CompTargetNode,
6220 ISD::CondCode Cond,
6221 DAGCombinerInfo &DCI,
6222 const SDLoc &DL) const {
6223 SmallVector<SDNode *, 5> Built;
6224 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6225 DCI, DL, Built)) {
6226 for (SDNode *N : Built)
6227 DCI.AddToWorklist(N);
6228 return Folded;
6229 }
6230
6231 return SDValue();
6232 }
6233
6234 SDValue
prepareUREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL,SmallVectorImpl<SDNode * > & Created) const6235 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6236 SDValue CompTargetNode, ISD::CondCode Cond,
6237 DAGCombinerInfo &DCI, const SDLoc &DL,
6238 SmallVectorImpl<SDNode *> &Created) const {
6239 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6240 // - D must be constant, with D = D0 * 2^K where D0 is odd
6241 // - P is the multiplicative inverse of D0 modulo 2^W
6242 // - Q = floor(((2^W) - 1) / D)
6243 // where W is the width of the common type of N and D.
6244 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6245 "Only applicable for (in)equality comparisons.");
6246
6247 SelectionDAG &DAG = DCI.DAG;
6248
6249 EVT VT = REMNode.getValueType();
6250 EVT SVT = VT.getScalarType();
6251 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6252 EVT ShSVT = ShVT.getScalarType();
6253
6254 // If MUL is unavailable, we cannot proceed in any case.
6255 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6256 return SDValue();
6257
6258 bool ComparingWithAllZeros = true;
6259 bool AllComparisonsWithNonZerosAreTautological = true;
6260 bool HadTautologicalLanes = false;
6261 bool AllLanesAreTautological = true;
6262 bool HadEvenDivisor = false;
6263 bool AllDivisorsArePowerOfTwo = true;
6264 bool HadTautologicalInvertedLanes = false;
6265 SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6266
6267 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6268 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6269 if (CDiv->isZero())
6270 return false;
6271
6272 const APInt &D = CDiv->getAPIntValue();
6273 const APInt &Cmp = CCmp->getAPIntValue();
6274
6275 ComparingWithAllZeros &= Cmp.isZero();
6276
6277 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6278 // if C2 is not less than C1, the comparison is always false.
6279 // But we will only be able to produce the comparison that will give the
6280 // opposive tautological answer. So this lane would need to be fixed up.
6281 bool TautologicalInvertedLane = D.ule(Cmp);
6282 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6283
6284 // If all lanes are tautological (either all divisors are ones, or divisor
6285 // is not greater than the constant we are comparing with),
6286 // we will prefer to avoid the fold.
6287 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6288 HadTautologicalLanes |= TautologicalLane;
6289 AllLanesAreTautological &= TautologicalLane;
6290
6291 // If we are comparing with non-zero, we need'll need to subtract said
6292 // comparison value from the LHS. But there is no point in doing that if
6293 // every lane where we are comparing with non-zero is tautological..
6294 if (!Cmp.isZero())
6295 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6296
6297 // Decompose D into D0 * 2^K
6298 unsigned K = D.countTrailingZeros();
6299 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6300 APInt D0 = D.lshr(K);
6301
6302 // D is even if it has trailing zeros.
6303 HadEvenDivisor |= (K != 0);
6304 // D is a power-of-two if D0 is one.
6305 // If all divisors are power-of-two, we will prefer to avoid the fold.
6306 AllDivisorsArePowerOfTwo &= D0.isOne();
6307
6308 // P = inv(D0, 2^W)
6309 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6310 unsigned W = D.getBitWidth();
6311 APInt P = D0.zext(W + 1)
6312 .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
6313 .trunc(W);
6314 assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6315 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6316
6317 // Q = floor((2^W - 1) u/ D)
6318 // R = ((2^W - 1) u% D)
6319 APInt Q, R;
6320 APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
6321
6322 // If we are comparing with zero, then that comparison constant is okay,
6323 // else it may need to be one less than that.
6324 if (Cmp.ugt(R))
6325 Q -= 1;
6326
6327 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6328 "We are expecting that K is always less than all-ones for ShSVT");
6329
6330 // If the lane is tautological the result can be constant-folded.
6331 if (TautologicalLane) {
6332 // Set P and K amount to a bogus values so we can try to splat them.
6333 P = 0;
6334 K = -1;
6335 // And ensure that comparison constant is tautological,
6336 // it will always compare true/false.
6337 Q = -1;
6338 }
6339
6340 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6341 KAmts.push_back(
6342 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6343 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6344 return true;
6345 };
6346
6347 SDValue N = REMNode.getOperand(0);
6348 SDValue D = REMNode.getOperand(1);
6349
6350 // Collect the values from each element.
6351 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6352 return SDValue();
6353
6354 // If all lanes are tautological, the result can be constant-folded.
6355 if (AllLanesAreTautological)
6356 return SDValue();
6357
6358 // If this is a urem by a powers-of-two, avoid the fold since it can be
6359 // best implemented as a bit test.
6360 if (AllDivisorsArePowerOfTwo)
6361 return SDValue();
6362
6363 SDValue PVal, KVal, QVal;
6364 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6365 if (HadTautologicalLanes) {
6366 // Try to turn PAmts into a splat, since we don't care about the values
6367 // that are currently '0'. If we can't, just keep '0'`s.
6368 turnVectorIntoSplatVector(PAmts, isNullConstant);
6369 // Try to turn KAmts into a splat, since we don't care about the values
6370 // that are currently '-1'. If we can't, change them to '0'`s.
6371 turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6372 DAG.getConstant(0, DL, ShSVT));
6373 }
6374
6375 PVal = DAG.getBuildVector(VT, DL, PAmts);
6376 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6377 QVal = DAG.getBuildVector(VT, DL, QAmts);
6378 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6379 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6380 "Expected matchBinaryPredicate to return one element for "
6381 "SPLAT_VECTORs");
6382 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6383 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6384 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6385 } else {
6386 PVal = PAmts[0];
6387 KVal = KAmts[0];
6388 QVal = QAmts[0];
6389 }
6390
6391 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6392 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6393 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6394 assert(CompTargetNode.getValueType() == N.getValueType() &&
6395 "Expecting that the types on LHS and RHS of comparisons match.");
6396 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6397 }
6398
6399 // (mul N, P)
6400 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6401 Created.push_back(Op0.getNode());
6402
6403 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6404 // divisors as a performance improvement, since rotating by 0 is a no-op.
6405 if (HadEvenDivisor) {
6406 // We need ROTR to do this.
6407 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6408 return SDValue();
6409 // UREM: (rotr (mul N, P), K)
6410 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6411 Created.push_back(Op0.getNode());
6412 }
6413
6414 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6415 SDValue NewCC =
6416 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6417 ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6418 if (!HadTautologicalInvertedLanes)
6419 return NewCC;
6420
6421 // If any lanes previously compared always-false, the NewCC will give
6422 // always-true result for them, so we need to fixup those lanes.
6423 // Or the other way around for inequality predicate.
6424 assert(VT.isVector() && "Can/should only get here for vectors.");
6425 Created.push_back(NewCC.getNode());
6426
6427 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6428 // if C2 is not less than C1, the comparison is always false.
6429 // But we have produced the comparison that will give the
6430 // opposive tautological answer. So these lanes would need to be fixed up.
6431 SDValue TautologicalInvertedChannels =
6432 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6433 Created.push_back(TautologicalInvertedChannels.getNode());
6434
6435 // NOTE: we avoid letting illegal types through even if we're before legalize
6436 // ops – legalization has a hard time producing good code for this.
6437 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6438 // If we have a vector select, let's replace the comparison results in the
6439 // affected lanes with the correct tautological result.
6440 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6441 DL, SETCCVT, SETCCVT);
6442 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6443 Replacement, NewCC);
6444 }
6445
6446 // Else, we can just invert the comparison result in the appropriate lanes.
6447 //
6448 // NOTE: see the note above VSELECT above.
6449 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6450 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6451 TautologicalInvertedChannels);
6452
6453 return SDValue(); // Don't know how to lower.
6454 }
6455
6456 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6457 /// where the divisor is constant and the comparison target is zero,
6458 /// return a DAG expression that will generate the same comparison result
6459 /// using only multiplications, additions and shifts/rotations.
6460 /// Ref: "Hacker's Delight" 10-17.
buildSREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const6461 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6462 SDValue CompTargetNode,
6463 ISD::CondCode Cond,
6464 DAGCombinerInfo &DCI,
6465 const SDLoc &DL) const {
6466 SmallVector<SDNode *, 7> Built;
6467 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6468 DCI, DL, Built)) {
6469 assert(Built.size() <= 7 && "Max size prediction failed.");
6470 for (SDNode *N : Built)
6471 DCI.AddToWorklist(N);
6472 return Folded;
6473 }
6474
6475 return SDValue();
6476 }
6477
6478 SDValue
prepareSREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL,SmallVectorImpl<SDNode * > & Created) const6479 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6480 SDValue CompTargetNode, ISD::CondCode Cond,
6481 DAGCombinerInfo &DCI, const SDLoc &DL,
6482 SmallVectorImpl<SDNode *> &Created) const {
6483 // Fold:
6484 // (seteq/ne (srem N, D), 0)
6485 // To:
6486 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
6487 //
6488 // - D must be constant, with D = D0 * 2^K where D0 is odd
6489 // - P is the multiplicative inverse of D0 modulo 2^W
6490 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6491 // - Q = floor((2 * A) / (2^K))
6492 // where W is the width of the common type of N and D.
6493 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6494 "Only applicable for (in)equality comparisons.");
6495
6496 SelectionDAG &DAG = DCI.DAG;
6497
6498 EVT VT = REMNode.getValueType();
6499 EVT SVT = VT.getScalarType();
6500 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6501 EVT ShSVT = ShVT.getScalarType();
6502
6503 // If we are after ops legalization, and MUL is unavailable, we can not
6504 // proceed.
6505 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6506 return SDValue();
6507
6508 // TODO: Could support comparing with non-zero too.
6509 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6510 if (!CompTarget || !CompTarget->isZero())
6511 return SDValue();
6512
6513 bool HadIntMinDivisor = false;
6514 bool HadOneDivisor = false;
6515 bool AllDivisorsAreOnes = true;
6516 bool HadEvenDivisor = false;
6517 bool NeedToApplyOffset = false;
6518 bool AllDivisorsArePowerOfTwo = true;
6519 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6520
6521 auto BuildSREMPattern = [&](ConstantSDNode *C) {
6522 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6523 if (C->isZero())
6524 return false;
6525
6526 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6527
6528 // WARNING: this fold is only valid for positive divisors!
6529 APInt D = C->getAPIntValue();
6530 if (D.isNegative())
6531 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
6532
6533 HadIntMinDivisor |= D.isMinSignedValue();
6534
6535 // If all divisors are ones, we will prefer to avoid the fold.
6536 HadOneDivisor |= D.isOne();
6537 AllDivisorsAreOnes &= D.isOne();
6538
6539 // Decompose D into D0 * 2^K
6540 unsigned K = D.countTrailingZeros();
6541 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6542 APInt D0 = D.lshr(K);
6543
6544 if (!D.isMinSignedValue()) {
6545 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
6546 // we don't care about this lane in this fold, we'll special-handle it.
6547 HadEvenDivisor |= (K != 0);
6548 }
6549
6550 // D is a power-of-two if D0 is one. This includes INT_MIN.
6551 // If all divisors are power-of-two, we will prefer to avoid the fold.
6552 AllDivisorsArePowerOfTwo &= D0.isOne();
6553
6554 // P = inv(D0, 2^W)
6555 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6556 unsigned W = D.getBitWidth();
6557 APInt P = D0.zext(W + 1)
6558 .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
6559 .trunc(W);
6560 assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6561 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6562
6563 // A = floor((2^(W - 1) - 1) / D0) & -2^K
6564 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
6565 A.clearLowBits(K);
6566
6567 if (!D.isMinSignedValue()) {
6568 // If divisor INT_MIN, then we don't care about this lane in this fold,
6569 // we'll special-handle it.
6570 NeedToApplyOffset |= A != 0;
6571 }
6572
6573 // Q = floor((2 * A) / (2^K))
6574 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
6575
6576 assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
6577 "We are expecting that A is always less than all-ones for SVT");
6578 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6579 "We are expecting that K is always less than all-ones for ShSVT");
6580
6581 // If the divisor is 1 the result can be constant-folded. Likewise, we
6582 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
6583 if (D.isOne()) {
6584 // Set P, A and K to a bogus values so we can try to splat them.
6585 P = 0;
6586 A = -1;
6587 K = -1;
6588
6589 // x ?% 1 == 0 <--> true <--> x u<= -1
6590 Q = -1;
6591 }
6592
6593 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6594 AAmts.push_back(DAG.getConstant(A, DL, SVT));
6595 KAmts.push_back(
6596 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6597 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6598 return true;
6599 };
6600
6601 SDValue N = REMNode.getOperand(0);
6602 SDValue D = REMNode.getOperand(1);
6603
6604 // Collect the values from each element.
6605 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
6606 return SDValue();
6607
6608 // If this is a srem by a one, avoid the fold since it can be constant-folded.
6609 if (AllDivisorsAreOnes)
6610 return SDValue();
6611
6612 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
6613 // since it can be best implemented as a bit test.
6614 if (AllDivisorsArePowerOfTwo)
6615 return SDValue();
6616
6617 SDValue PVal, AVal, KVal, QVal;
6618 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6619 if (HadOneDivisor) {
6620 // Try to turn PAmts into a splat, since we don't care about the values
6621 // that are currently '0'. If we can't, just keep '0'`s.
6622 turnVectorIntoSplatVector(PAmts, isNullConstant);
6623 // Try to turn AAmts into a splat, since we don't care about the
6624 // values that are currently '-1'. If we can't, change them to '0'`s.
6625 turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
6626 DAG.getConstant(0, DL, SVT));
6627 // Try to turn KAmts into a splat, since we don't care about the values
6628 // that are currently '-1'. If we can't, change them to '0'`s.
6629 turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6630 DAG.getConstant(0, DL, ShSVT));
6631 }
6632
6633 PVal = DAG.getBuildVector(VT, DL, PAmts);
6634 AVal = DAG.getBuildVector(VT, DL, AAmts);
6635 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6636 QVal = DAG.getBuildVector(VT, DL, QAmts);
6637 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6638 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
6639 QAmts.size() == 1 &&
6640 "Expected matchUnaryPredicate to return one element for scalable "
6641 "vectors");
6642 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6643 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
6644 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6645 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6646 } else {
6647 assert(isa<ConstantSDNode>(D) && "Expected a constant");
6648 PVal = PAmts[0];
6649 AVal = AAmts[0];
6650 KVal = KAmts[0];
6651 QVal = QAmts[0];
6652 }
6653
6654 // (mul N, P)
6655 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6656 Created.push_back(Op0.getNode());
6657
6658 if (NeedToApplyOffset) {
6659 // We need ADD to do this.
6660 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
6661 return SDValue();
6662
6663 // (add (mul N, P), A)
6664 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
6665 Created.push_back(Op0.getNode());
6666 }
6667
6668 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6669 // divisors as a performance improvement, since rotating by 0 is a no-op.
6670 if (HadEvenDivisor) {
6671 // We need ROTR to do this.
6672 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6673 return SDValue();
6674 // SREM: (rotr (add (mul N, P), A), K)
6675 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6676 Created.push_back(Op0.getNode());
6677 }
6678
6679 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
6680 SDValue Fold =
6681 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6682 ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6683
6684 // If we didn't have lanes with INT_MIN divisor, then we're done.
6685 if (!HadIntMinDivisor)
6686 return Fold;
6687
6688 // That fold is only valid for positive divisors. Which effectively means,
6689 // it is invalid for INT_MIN divisors. So if we have such a lane,
6690 // we must fix-up results for said lanes.
6691 assert(VT.isVector() && "Can/should only get here for vectors.");
6692
6693 // NOTE: we avoid letting illegal types through even if we're before legalize
6694 // ops – legalization has a hard time producing good code for the code that
6695 // follows.
6696 if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
6697 !isOperationLegalOrCustom(ISD::AND, VT) ||
6698 !isOperationLegalOrCustom(Cond, VT) ||
6699 !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
6700 return SDValue();
6701
6702 Created.push_back(Fold.getNode());
6703
6704 SDValue IntMin = DAG.getConstant(
6705 APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
6706 SDValue IntMax = DAG.getConstant(
6707 APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
6708 SDValue Zero =
6709 DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
6710
6711 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
6712 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
6713 Created.push_back(DivisorIsIntMin.getNode());
6714
6715 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
6716 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
6717 Created.push_back(Masked.getNode());
6718 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
6719 Created.push_back(MaskedIsZero.getNode());
6720
6721 // To produce final result we need to blend 2 vectors: 'SetCC' and
6722 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
6723 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
6724 // constant-folded, select can get lowered to a shuffle with constant mask.
6725 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
6726 MaskedIsZero, Fold);
6727
6728 return Blended;
6729 }
6730
6731 bool TargetLowering::
verifyReturnAddressArgumentIsConstant(SDValue Op,SelectionDAG & DAG) const6732 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
6733 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
6734 DAG.getContext()->emitError("argument to '__builtin_return_address' must "
6735 "be a constant integer");
6736 return true;
6737 }
6738
6739 return false;
6740 }
6741
getSqrtInputTest(SDValue Op,SelectionDAG & DAG,const DenormalMode & Mode) const6742 SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
6743 const DenormalMode &Mode) const {
6744 SDLoc DL(Op);
6745 EVT VT = Op.getValueType();
6746 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6747 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
6748 // Testing it with denormal inputs to avoid wrong estimate.
6749 if (Mode.Input == DenormalMode::IEEE) {
6750 // This is specifically a check for the handling of denormal inputs,
6751 // not the result.
6752
6753 // Test = fabs(X) < SmallestNormal
6754 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
6755 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
6756 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
6757 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
6758 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
6759 }
6760 // Test = X == 0.0
6761 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
6762 }
6763
getNegatedExpression(SDValue Op,SelectionDAG & DAG,bool LegalOps,bool OptForSize,NegatibleCost & Cost,unsigned Depth) const6764 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
6765 bool LegalOps, bool OptForSize,
6766 NegatibleCost &Cost,
6767 unsigned Depth) const {
6768 // fneg is removable even if it has multiple uses.
6769 if (Op.getOpcode() == ISD::FNEG) {
6770 Cost = NegatibleCost::Cheaper;
6771 return Op.getOperand(0);
6772 }
6773
6774 // Don't recurse exponentially.
6775 if (Depth > SelectionDAG::MaxRecursionDepth)
6776 return SDValue();
6777
6778 // Pre-increment recursion depth for use in recursive calls.
6779 ++Depth;
6780 const SDNodeFlags Flags = Op->getFlags();
6781 const TargetOptions &Options = DAG.getTarget().Options;
6782 EVT VT = Op.getValueType();
6783 unsigned Opcode = Op.getOpcode();
6784
6785 // Don't allow anything with multiple uses unless we know it is free.
6786 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
6787 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
6788 isFPExtFree(VT, Op.getOperand(0).getValueType());
6789 if (!IsFreeExtend)
6790 return SDValue();
6791 }
6792
6793 auto RemoveDeadNode = [&](SDValue N) {
6794 if (N && N.getNode()->use_empty())
6795 DAG.RemoveDeadNode(N.getNode());
6796 };
6797
6798 SDLoc DL(Op);
6799
6800 // Because getNegatedExpression can delete nodes we need a handle to keep
6801 // temporary nodes alive in case the recursion manages to create an identical
6802 // node.
6803 std::list<HandleSDNode> Handles;
6804
6805 switch (Opcode) {
6806 case ISD::ConstantFP: {
6807 // Don't invert constant FP values after legalization unless the target says
6808 // the negated constant is legal.
6809 bool IsOpLegal =
6810 isOperationLegal(ISD::ConstantFP, VT) ||
6811 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
6812 OptForSize);
6813
6814 if (LegalOps && !IsOpLegal)
6815 break;
6816
6817 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
6818 V.changeSign();
6819 SDValue CFP = DAG.getConstantFP(V, DL, VT);
6820
6821 // If we already have the use of the negated floating constant, it is free
6822 // to negate it even it has multiple uses.
6823 if (!Op.hasOneUse() && CFP.use_empty())
6824 break;
6825 Cost = NegatibleCost::Neutral;
6826 return CFP;
6827 }
6828 case ISD::BUILD_VECTOR: {
6829 // Only permit BUILD_VECTOR of constants.
6830 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
6831 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
6832 }))
6833 break;
6834
6835 bool IsOpLegal =
6836 (isOperationLegal(ISD::ConstantFP, VT) &&
6837 isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
6838 llvm::all_of(Op->op_values(), [&](SDValue N) {
6839 return N.isUndef() ||
6840 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
6841 OptForSize);
6842 });
6843
6844 if (LegalOps && !IsOpLegal)
6845 break;
6846
6847 SmallVector<SDValue, 4> Ops;
6848 for (SDValue C : Op->op_values()) {
6849 if (C.isUndef()) {
6850 Ops.push_back(C);
6851 continue;
6852 }
6853 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
6854 V.changeSign();
6855 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
6856 }
6857 Cost = NegatibleCost::Neutral;
6858 return DAG.getBuildVector(VT, DL, Ops);
6859 }
6860 case ISD::FADD: {
6861 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
6862 break;
6863
6864 // After operation legalization, it might not be legal to create new FSUBs.
6865 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
6866 break;
6867 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
6868
6869 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
6870 NegatibleCost CostX = NegatibleCost::Expensive;
6871 SDValue NegX =
6872 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
6873 // Prevent this node from being deleted by the next call.
6874 if (NegX)
6875 Handles.emplace_back(NegX);
6876
6877 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
6878 NegatibleCost CostY = NegatibleCost::Expensive;
6879 SDValue NegY =
6880 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
6881
6882 // We're done with the handles.
6883 Handles.clear();
6884
6885 // Negate the X if its cost is less or equal than Y.
6886 if (NegX && (CostX <= CostY)) {
6887 Cost = CostX;
6888 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
6889 if (NegY != N)
6890 RemoveDeadNode(NegY);
6891 return N;
6892 }
6893
6894 // Negate the Y if it is not expensive.
6895 if (NegY) {
6896 Cost = CostY;
6897 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
6898 if (NegX != N)
6899 RemoveDeadNode(NegX);
6900 return N;
6901 }
6902 break;
6903 }
6904 case ISD::FSUB: {
6905 // We can't turn -(A-B) into B-A when we honor signed zeros.
6906 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
6907 break;
6908
6909 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
6910 // fold (fneg (fsub 0, Y)) -> Y
6911 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
6912 if (C->isZero()) {
6913 Cost = NegatibleCost::Cheaper;
6914 return Y;
6915 }
6916
6917 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
6918 Cost = NegatibleCost::Neutral;
6919 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
6920 }
6921 case ISD::FMUL:
6922 case ISD::FDIV: {
6923 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
6924
6925 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
6926 NegatibleCost CostX = NegatibleCost::Expensive;
6927 SDValue NegX =
6928 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
6929 // Prevent this node from being deleted by the next call.
6930 if (NegX)
6931 Handles.emplace_back(NegX);
6932
6933 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
6934 NegatibleCost CostY = NegatibleCost::Expensive;
6935 SDValue NegY =
6936 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
6937
6938 // We're done with the handles.
6939 Handles.clear();
6940
6941 // Negate the X if its cost is less or equal than Y.
6942 if (NegX && (CostX <= CostY)) {
6943 Cost = CostX;
6944 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
6945 if (NegY != N)
6946 RemoveDeadNode(NegY);
6947 return N;
6948 }
6949
6950 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
6951 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
6952 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
6953 break;
6954
6955 // Negate the Y if it is not expensive.
6956 if (NegY) {
6957 Cost = CostY;
6958 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
6959 if (NegX != N)
6960 RemoveDeadNode(NegX);
6961 return N;
6962 }
6963 break;
6964 }
6965 case ISD::FMA:
6966 case ISD::FMAD: {
6967 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
6968 break;
6969
6970 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
6971 NegatibleCost CostZ = NegatibleCost::Expensive;
6972 SDValue NegZ =
6973 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
6974 // Give up if fail to negate the Z.
6975 if (!NegZ)
6976 break;
6977
6978 // Prevent this node from being deleted by the next two calls.
6979 Handles.emplace_back(NegZ);
6980
6981 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
6982 NegatibleCost CostX = NegatibleCost::Expensive;
6983 SDValue NegX =
6984 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
6985 // Prevent this node from being deleted by the next call.
6986 if (NegX)
6987 Handles.emplace_back(NegX);
6988
6989 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
6990 NegatibleCost CostY = NegatibleCost::Expensive;
6991 SDValue NegY =
6992 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
6993
6994 // We're done with the handles.
6995 Handles.clear();
6996
6997 // Negate the X if its cost is less or equal than Y.
6998 if (NegX && (CostX <= CostY)) {
6999 Cost = std::min(CostX, CostZ);
7000 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7001 if (NegY != N)
7002 RemoveDeadNode(NegY);
7003 return N;
7004 }
7005
7006 // Negate the Y if it is not expensive.
7007 if (NegY) {
7008 Cost = std::min(CostY, CostZ);
7009 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7010 if (NegX != N)
7011 RemoveDeadNode(NegX);
7012 return N;
7013 }
7014 break;
7015 }
7016
7017 case ISD::FP_EXTEND:
7018 case ISD::FSIN:
7019 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7020 OptForSize, Cost, Depth))
7021 return DAG.getNode(Opcode, DL, VT, NegV);
7022 break;
7023 case ISD::FP_ROUND:
7024 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7025 OptForSize, Cost, Depth))
7026 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7027 break;
7028 case ISD::SELECT:
7029 case ISD::VSELECT: {
7030 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7031 // iff at least one cost is cheaper and the other is neutral/cheaper
7032 SDValue LHS = Op.getOperand(1);
7033 NegatibleCost CostLHS = NegatibleCost::Expensive;
7034 SDValue NegLHS =
7035 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7036 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7037 RemoveDeadNode(NegLHS);
7038 break;
7039 }
7040
7041 // Prevent this node from being deleted by the next call.
7042 Handles.emplace_back(NegLHS);
7043
7044 SDValue RHS = Op.getOperand(2);
7045 NegatibleCost CostRHS = NegatibleCost::Expensive;
7046 SDValue NegRHS =
7047 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7048
7049 // We're done with the handles.
7050 Handles.clear();
7051
7052 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7053 (CostLHS != NegatibleCost::Cheaper &&
7054 CostRHS != NegatibleCost::Cheaper)) {
7055 RemoveDeadNode(NegLHS);
7056 RemoveDeadNode(NegRHS);
7057 break;
7058 }
7059
7060 Cost = std::min(CostLHS, CostRHS);
7061 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7062 }
7063 }
7064
7065 return SDValue();
7066 }
7067
7068 //===----------------------------------------------------------------------===//
7069 // Legalization Utilities
7070 //===----------------------------------------------------------------------===//
7071
expandMUL_LOHI(unsigned Opcode,EVT VT,const SDLoc & dl,SDValue LHS,SDValue RHS,SmallVectorImpl<SDValue> & Result,EVT HiLoVT,SelectionDAG & DAG,MulExpansionKind Kind,SDValue LL,SDValue LH,SDValue RL,SDValue RH) const7072 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7073 SDValue LHS, SDValue RHS,
7074 SmallVectorImpl<SDValue> &Result,
7075 EVT HiLoVT, SelectionDAG &DAG,
7076 MulExpansionKind Kind, SDValue LL,
7077 SDValue LH, SDValue RL, SDValue RH) const {
7078 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7079 Opcode == ISD::SMUL_LOHI);
7080
7081 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7082 isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
7083 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7084 isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
7085 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7086 isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
7087 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7088 isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
7089
7090 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7091 return false;
7092
7093 unsigned OuterBitSize = VT.getScalarSizeInBits();
7094 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7095
7096 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7097 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7098 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7099
7100 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7101 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7102 bool Signed) -> bool {
7103 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7104 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7105 Hi = SDValue(Lo.getNode(), 1);
7106 return true;
7107 }
7108 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7109 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7110 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7111 return true;
7112 }
7113 return false;
7114 };
7115
7116 SDValue Lo, Hi;
7117
7118 if (!LL.getNode() && !RL.getNode() &&
7119 isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7120 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7121 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7122 }
7123
7124 if (!LL.getNode())
7125 return false;
7126
7127 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7128 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7129 DAG.MaskedValueIsZero(RHS, HighMask)) {
7130 // The inputs are both zero-extended.
7131 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7132 Result.push_back(Lo);
7133 Result.push_back(Hi);
7134 if (Opcode != ISD::MUL) {
7135 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7136 Result.push_back(Zero);
7137 Result.push_back(Zero);
7138 }
7139 return true;
7140 }
7141 }
7142
7143 if (!VT.isVector() && Opcode == ISD::MUL &&
7144 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7145 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7146 // The input values are both sign-extended.
7147 // TODO non-MUL case?
7148 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7149 Result.push_back(Lo);
7150 Result.push_back(Hi);
7151 return true;
7152 }
7153 }
7154
7155 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7156 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7157
7158 if (!LH.getNode() && !RH.getNode() &&
7159 isOperationLegalOrCustom(ISD::SRL, VT) &&
7160 isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7161 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7162 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7163 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7164 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7165 }
7166
7167 if (!LH.getNode())
7168 return false;
7169
7170 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7171 return false;
7172
7173 Result.push_back(Lo);
7174
7175 if (Opcode == ISD::MUL) {
7176 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7177 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7178 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7179 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7180 Result.push_back(Hi);
7181 return true;
7182 }
7183
7184 // Compute the full width result.
7185 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7186 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7187 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7188 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7189 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7190 };
7191
7192 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7193 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7194 return false;
7195
7196 // This is effectively the add part of a multiply-add of half-sized operands,
7197 // so it cannot overflow.
7198 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7199
7200 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7201 return false;
7202
7203 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7204 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7205
7206 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7207 isOperationLegalOrCustom(ISD::ADDE, VT));
7208 if (UseGlue)
7209 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7210 Merge(Lo, Hi));
7211 else
7212 Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
7213 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7214
7215 SDValue Carry = Next.getValue(1);
7216 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7217 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7218
7219 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7220 return false;
7221
7222 if (UseGlue)
7223 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7224 Carry);
7225 else
7226 Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7227 Zero, Carry);
7228
7229 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7230
7231 if (Opcode == ISD::SMUL_LOHI) {
7232 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7233 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7234 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7235
7236 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7237 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7238 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7239 }
7240
7241 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7242 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7243 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7244 return true;
7245 }
7246
expandMUL(SDNode * N,SDValue & Lo,SDValue & Hi,EVT HiLoVT,SelectionDAG & DAG,MulExpansionKind Kind,SDValue LL,SDValue LH,SDValue RL,SDValue RH) const7247 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7248 SelectionDAG &DAG, MulExpansionKind Kind,
7249 SDValue LL, SDValue LH, SDValue RL,
7250 SDValue RH) const {
7251 SmallVector<SDValue, 2> Result;
7252 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7253 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7254 DAG, Kind, LL, LH, RL, RH);
7255 if (Ok) {
7256 assert(Result.size() == 2);
7257 Lo = Result[0];
7258 Hi = Result[1];
7259 }
7260 return Ok;
7261 }
7262
7263 // Optimize unsigned division or remainder by constants for types twice as large
7264 // as a legal VT.
7265 //
7266 // If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7267 // can be computed
7268 // as:
7269 // Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7270 // Remainder = Sum % Constant
7271 // This is based on "Remainder by Summing Digits" from Hacker's Delight.
7272 //
7273 // For division, we can compute the remainder using the algorithm described
7274 // above, subtract it from the dividend to get an exact multiple of Constant.
7275 // Then multiply that extact multiply by the multiplicative inverse modulo
7276 // (1 << (BitWidth / 2)) to get the quotient.
7277
7278 // If Constant is even, we can shift right the dividend and the divisor by the
7279 // number of trailing zeros in Constant before applying the remainder algorithm.
7280 // If we're after the quotient, we can subtract this value from the shifted
7281 // dividend and multiply by the multiplicative inverse of the shifted divisor.
7282 // If we want the remainder, we shift the value left by the number of trailing
7283 // zeros and add the bits that were shifted out of the dividend.
expandDIVREMByConstant(SDNode * N,SmallVectorImpl<SDValue> & Result,EVT HiLoVT,SelectionDAG & DAG,SDValue LL,SDValue LH) const7284 bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7285 SmallVectorImpl<SDValue> &Result,
7286 EVT HiLoVT, SelectionDAG &DAG,
7287 SDValue LL, SDValue LH) const {
7288 unsigned Opcode = N->getOpcode();
7289 EVT VT = N->getValueType(0);
7290
7291 // TODO: Support signed division/remainder.
7292 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7293 return false;
7294 assert(
7295 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7296 "Unexpected opcode");
7297
7298 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7299 if (!CN)
7300 return false;
7301
7302 APInt Divisor = CN->getAPIntValue();
7303 unsigned BitWidth = Divisor.getBitWidth();
7304 unsigned HBitWidth = BitWidth / 2;
7305 assert(VT.getScalarSizeInBits() == BitWidth &&
7306 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7307
7308 // Divisor needs to less than (1 << HBitWidth).
7309 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7310 if (Divisor.uge(HalfMaxPlus1))
7311 return false;
7312
7313 // We depend on the UREM by constant optimization in DAGCombiner that requires
7314 // high multiply.
7315 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7316 !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
7317 return false;
7318
7319 // Don't expand if optimizing for size.
7320 if (DAG.shouldOptForSize())
7321 return false;
7322
7323 // Early out for 0 or 1 divisors.
7324 if (Divisor.ule(1))
7325 return false;
7326
7327 // If the divisor is even, shift it until it becomes odd.
7328 unsigned TrailingZeros = 0;
7329 if (!Divisor[0]) {
7330 TrailingZeros = Divisor.countTrailingZeros();
7331 Divisor.lshrInPlace(TrailingZeros);
7332 }
7333
7334 SDLoc dl(N);
7335 SDValue Sum;
7336 SDValue PartialRem;
7337
7338 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7339 // then add in the carry.
7340 // TODO: If we can't split it in half, we might be able to split into 3 or
7341 // more pieces using a smaller bit width.
7342 if (HalfMaxPlus1.urem(Divisor).isOneValue()) {
7343 assert(!LL == !LH && "Expected both input halves or no input halves!");
7344 if (!LL) {
7345 LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0),
7346 DAG.getIntPtrConstant(0, dl));
7347 LH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0),
7348 DAG.getIntPtrConstant(1, dl));
7349 }
7350
7351 // Shift the input by the number of TrailingZeros in the divisor. The
7352 // shifted out bits will be added to the remainder later.
7353 if (TrailingZeros) {
7354 // Save the shifted off bits if we need the remainder.
7355 if (Opcode != ISD::UDIV) {
7356 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7357 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7358 DAG.getConstant(Mask, dl, HiLoVT));
7359 }
7360
7361 LL = DAG.getNode(
7362 ISD::OR, dl, HiLoVT,
7363 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7364 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7365 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7366 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7367 HiLoVT, dl)));
7368 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7369 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7370 }
7371
7372 // Use addcarry if we can, otherwise use a compare to detect overflow.
7373 EVT SetCCType =
7374 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7375 if (isOperationLegalOrCustom(ISD::ADDCARRY, HiLoVT)) {
7376 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7377 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7378 Sum = DAG.getNode(ISD::ADDCARRY, dl, VTList, Sum,
7379 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7380 } else {
7381 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7382 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7383 // If the boolean for the target is 0 or 1, we can add the setcc result
7384 // directly.
7385 if (getBooleanContents(HiLoVT) ==
7386 TargetLoweringBase::ZeroOrOneBooleanContent)
7387 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7388 else
7389 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7390 DAG.getConstant(0, dl, HiLoVT));
7391 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7392 }
7393 }
7394
7395 // If we didn't find a sum, we can't do the expansion.
7396 if (!Sum)
7397 return false;
7398
7399 // Perform a HiLoVT urem on the Sum using truncated divisor.
7400 SDValue RemL =
7401 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7402 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7403 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7404
7405 if (Opcode != ISD::UREM) {
7406 // Subtract the remainder from the shifted dividend.
7407 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7408 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7409
7410 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7411
7412 // Multiply by the multiplicative inverse of the divisor modulo
7413 // (1 << BitWidth).
7414 APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
7415 APInt MulFactor = Divisor.zext(BitWidth + 1);
7416 MulFactor = MulFactor.multiplicativeInverse(Mod);
7417 MulFactor = MulFactor.trunc(BitWidth);
7418
7419 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7420 DAG.getConstant(MulFactor, dl, VT));
7421
7422 // Split the quotient into low and high parts.
7423 SDValue QuotL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
7424 DAG.getIntPtrConstant(0, dl));
7425 SDValue QuotH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
7426 DAG.getIntPtrConstant(1, dl));
7427 Result.push_back(QuotL);
7428 Result.push_back(QuotH);
7429 }
7430
7431 if (Opcode != ISD::UDIV) {
7432 // If we shifted the input, shift the remainder left and add the bits we
7433 // shifted off the input.
7434 if (TrailingZeros) {
7435 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7436 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7437 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7438 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7439 }
7440 Result.push_back(RemL);
7441 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7442 }
7443
7444 return true;
7445 }
7446
7447 // Check that (every element of) Z is undef or not an exact multiple of BW.
isNonZeroModBitWidthOrUndef(SDValue Z,unsigned BW)7448 static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7449 return ISD::matchUnaryPredicate(
7450 Z,
7451 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7452 true);
7453 }
7454
expandVPFunnelShift(SDNode * Node,SelectionDAG & DAG)7455 static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
7456 EVT VT = Node->getValueType(0);
7457 SDValue ShX, ShY;
7458 SDValue ShAmt, InvShAmt;
7459 SDValue X = Node->getOperand(0);
7460 SDValue Y = Node->getOperand(1);
7461 SDValue Z = Node->getOperand(2);
7462 SDValue Mask = Node->getOperand(3);
7463 SDValue VL = Node->getOperand(4);
7464
7465 unsigned BW = VT.getScalarSizeInBits();
7466 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7467 SDLoc DL(SDValue(Node, 0));
7468
7469 EVT ShVT = Z.getValueType();
7470 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7471 // fshl: X << C | Y >> (BW - C)
7472 // fshr: X << (BW - C) | Y >> C
7473 // where C = Z % BW is not zero
7474 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7475 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7476 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7477 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7478 VL);
7479 ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
7480 VL);
7481 } else {
7482 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7483 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7484 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
7485 if (isPowerOf2_32(BW)) {
7486 // Z % BW -> Z & (BW - 1)
7487 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
7488 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7489 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
7490 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
7491 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
7492 } else {
7493 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7494 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7495 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
7496 }
7497
7498 SDValue One = DAG.getConstant(1, DL, ShVT);
7499 if (IsFSHL) {
7500 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
7501 SDValue ShY1 = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, One, Mask, VL);
7502 ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, ShY1, InvShAmt, Mask, VL);
7503 } else {
7504 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
7505 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
7506 ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, ShAmt, Mask, VL);
7507 }
7508 }
7509 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
7510 }
7511
expandFunnelShift(SDNode * Node,SelectionDAG & DAG) const7512 SDValue TargetLowering::expandFunnelShift(SDNode *Node,
7513 SelectionDAG &DAG) const {
7514 if (Node->isVPOpcode())
7515 return expandVPFunnelShift(Node, DAG);
7516
7517 EVT VT = Node->getValueType(0);
7518
7519 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7520 !isOperationLegalOrCustom(ISD::SRL, VT) ||
7521 !isOperationLegalOrCustom(ISD::SUB, VT) ||
7522 !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
7523 return SDValue();
7524
7525 SDValue X = Node->getOperand(0);
7526 SDValue Y = Node->getOperand(1);
7527 SDValue Z = Node->getOperand(2);
7528
7529 unsigned BW = VT.getScalarSizeInBits();
7530 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7531 SDLoc DL(SDValue(Node, 0));
7532
7533 EVT ShVT = Z.getValueType();
7534
7535 // If a funnel shift in the other direction is more supported, use it.
7536 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7537 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7538 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
7539 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7540 // fshl X, Y, Z -> fshr X, Y, -Z
7541 // fshr X, Y, Z -> fshl X, Y, -Z
7542 SDValue Zero = DAG.getConstant(0, DL, ShVT);
7543 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
7544 } else {
7545 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7546 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7547 SDValue One = DAG.getConstant(1, DL, ShVT);
7548 if (IsFSHL) {
7549 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
7550 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
7551 } else {
7552 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
7553 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
7554 }
7555 Z = DAG.getNOT(DL, Z, ShVT);
7556 }
7557 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
7558 }
7559
7560 SDValue ShX, ShY;
7561 SDValue ShAmt, InvShAmt;
7562 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7563 // fshl: X << C | Y >> (BW - C)
7564 // fshr: X << (BW - C) | Y >> C
7565 // where C = Z % BW is not zero
7566 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7567 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7568 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
7569 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
7570 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
7571 } else {
7572 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7573 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7574 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
7575 if (isPowerOf2_32(BW)) {
7576 // Z % BW -> Z & (BW - 1)
7577 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
7578 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7579 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
7580 } else {
7581 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7582 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7583 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
7584 }
7585
7586 SDValue One = DAG.getConstant(1, DL, ShVT);
7587 if (IsFSHL) {
7588 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
7589 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
7590 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
7591 } else {
7592 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
7593 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
7594 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
7595 }
7596 }
7597 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
7598 }
7599
7600 // TODO: Merge with expandFunnelShift.
expandROT(SDNode * Node,bool AllowVectorOps,SelectionDAG & DAG) const7601 SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
7602 SelectionDAG &DAG) const {
7603 EVT VT = Node->getValueType(0);
7604 unsigned EltSizeInBits = VT.getScalarSizeInBits();
7605 bool IsLeft = Node->getOpcode() == ISD::ROTL;
7606 SDValue Op0 = Node->getOperand(0);
7607 SDValue Op1 = Node->getOperand(1);
7608 SDLoc DL(SDValue(Node, 0));
7609
7610 EVT ShVT = Op1.getValueType();
7611 SDValue Zero = DAG.getConstant(0, DL, ShVT);
7612
7613 // If a rotate in the other direction is more supported, use it.
7614 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
7615 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7616 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
7617 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
7618 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
7619 }
7620
7621 if (!AllowVectorOps && VT.isVector() &&
7622 (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7623 !isOperationLegalOrCustom(ISD::SRL, VT) ||
7624 !isOperationLegalOrCustom(ISD::SUB, VT) ||
7625 !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
7626 !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
7627 return SDValue();
7628
7629 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
7630 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
7631 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
7632 SDValue ShVal;
7633 SDValue HsVal;
7634 if (isPowerOf2_32(EltSizeInBits)) {
7635 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7636 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7637 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
7638 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
7639 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
7640 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
7641 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
7642 } else {
7643 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7644 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7645 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
7646 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
7647 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
7648 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
7649 SDValue One = DAG.getConstant(1, DL, ShVT);
7650 HsVal =
7651 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
7652 }
7653 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
7654 }
7655
expandShiftParts(SDNode * Node,SDValue & Lo,SDValue & Hi,SelectionDAG & DAG) const7656 void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
7657 SelectionDAG &DAG) const {
7658 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
7659 EVT VT = Node->getValueType(0);
7660 unsigned VTBits = VT.getScalarSizeInBits();
7661 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
7662
7663 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
7664 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
7665 SDValue ShOpLo = Node->getOperand(0);
7666 SDValue ShOpHi = Node->getOperand(1);
7667 SDValue ShAmt = Node->getOperand(2);
7668 EVT ShAmtVT = ShAmt.getValueType();
7669 EVT ShAmtCCVT =
7670 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
7671 SDLoc dl(Node);
7672
7673 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
7674 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
7675 // away during isel.
7676 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
7677 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
7678 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
7679 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
7680 : DAG.getConstant(0, dl, VT);
7681
7682 SDValue Tmp2, Tmp3;
7683 if (IsSHL) {
7684 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
7685 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
7686 } else {
7687 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
7688 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
7689 }
7690
7691 // If the shift amount is larger or equal than the width of a part we don't
7692 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
7693 // values for large shift amounts.
7694 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
7695 DAG.getConstant(VTBits, dl, ShAmtVT));
7696 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
7697 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
7698
7699 if (IsSHL) {
7700 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
7701 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
7702 } else {
7703 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
7704 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
7705 }
7706 }
7707
expandFP_TO_SINT(SDNode * Node,SDValue & Result,SelectionDAG & DAG) const7708 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
7709 SelectionDAG &DAG) const {
7710 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
7711 SDValue Src = Node->getOperand(OpNo);
7712 EVT SrcVT = Src.getValueType();
7713 EVT DstVT = Node->getValueType(0);
7714 SDLoc dl(SDValue(Node, 0));
7715
7716 // FIXME: Only f32 to i64 conversions are supported.
7717 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
7718 return false;
7719
7720 if (Node->isStrictFPOpcode())
7721 // When a NaN is converted to an integer a trap is allowed. We can't
7722 // use this expansion here because it would eliminate that trap. Other
7723 // traps are also allowed and cannot be eliminated. See
7724 // IEEE 754-2008 sec 5.8.
7725 return false;
7726
7727 // Expand f32 -> i64 conversion
7728 // This algorithm comes from compiler-rt's implementation of fixsfdi:
7729 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
7730 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
7731 EVT IntVT = SrcVT.changeTypeToInteger();
7732 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
7733
7734 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
7735 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
7736 SDValue Bias = DAG.getConstant(127, dl, IntVT);
7737 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
7738 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
7739 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
7740
7741 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
7742
7743 SDValue ExponentBits = DAG.getNode(
7744 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
7745 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
7746 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
7747
7748 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
7749 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
7750 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
7751 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
7752
7753 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
7754 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
7755 DAG.getConstant(0x00800000, dl, IntVT));
7756
7757 R = DAG.getZExtOrTrunc(R, dl, DstVT);
7758
7759 R = DAG.getSelectCC(
7760 dl, Exponent, ExponentLoBit,
7761 DAG.getNode(ISD::SHL, dl, DstVT, R,
7762 DAG.getZExtOrTrunc(
7763 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
7764 dl, IntShVT)),
7765 DAG.getNode(ISD::SRL, dl, DstVT, R,
7766 DAG.getZExtOrTrunc(
7767 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
7768 dl, IntShVT)),
7769 ISD::SETGT);
7770
7771 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
7772 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
7773
7774 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
7775 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
7776 return true;
7777 }
7778
expandFP_TO_UINT(SDNode * Node,SDValue & Result,SDValue & Chain,SelectionDAG & DAG) const7779 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
7780 SDValue &Chain,
7781 SelectionDAG &DAG) const {
7782 SDLoc dl(SDValue(Node, 0));
7783 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
7784 SDValue Src = Node->getOperand(OpNo);
7785
7786 EVT SrcVT = Src.getValueType();
7787 EVT DstVT = Node->getValueType(0);
7788 EVT SetCCVT =
7789 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7790 EVT DstSetCCVT =
7791 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
7792
7793 // Only expand vector types if we have the appropriate vector bit operations.
7794 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
7795 ISD::FP_TO_SINT;
7796 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
7797 !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
7798 return false;
7799
7800 // If the maximum float value is smaller then the signed integer range,
7801 // the destination signmask can't be represented by the float, so we can
7802 // just use FP_TO_SINT directly.
7803 const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
7804 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
7805 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
7806 if (APFloat::opOverflow &
7807 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
7808 if (Node->isStrictFPOpcode()) {
7809 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
7810 { Node->getOperand(0), Src });
7811 Chain = Result.getValue(1);
7812 } else
7813 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
7814 return true;
7815 }
7816
7817 // Don't expand it if there isn't cheap fsub instruction.
7818 if (!isOperationLegalOrCustom(
7819 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
7820 return false;
7821
7822 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
7823 SDValue Sel;
7824
7825 if (Node->isStrictFPOpcode()) {
7826 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
7827 Node->getOperand(0), /*IsSignaling*/ true);
7828 Chain = Sel.getValue(1);
7829 } else {
7830 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
7831 }
7832
7833 bool Strict = Node->isStrictFPOpcode() ||
7834 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
7835
7836 if (Strict) {
7837 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
7838 // signmask then offset (the result of which should be fully representable).
7839 // Sel = Src < 0x8000000000000000
7840 // FltOfs = select Sel, 0, 0x8000000000000000
7841 // IntOfs = select Sel, 0, 0x8000000000000000
7842 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
7843
7844 // TODO: Should any fast-math-flags be set for the FSUB?
7845 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
7846 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
7847 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
7848 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
7849 DAG.getConstant(0, dl, DstVT),
7850 DAG.getConstant(SignMask, dl, DstVT));
7851 SDValue SInt;
7852 if (Node->isStrictFPOpcode()) {
7853 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
7854 { Chain, Src, FltOfs });
7855 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
7856 { Val.getValue(1), Val });
7857 Chain = SInt.getValue(1);
7858 } else {
7859 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
7860 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
7861 }
7862 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
7863 } else {
7864 // Expand based on maximum range of FP_TO_SINT:
7865 // True = fp_to_sint(Src)
7866 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
7867 // Result = select (Src < 0x8000000000000000), True, False
7868
7869 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
7870 // TODO: Should any fast-math-flags be set for the FSUB?
7871 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
7872 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
7873 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
7874 DAG.getConstant(SignMask, dl, DstVT));
7875 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
7876 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
7877 }
7878 return true;
7879 }
7880
expandUINT_TO_FP(SDNode * Node,SDValue & Result,SDValue & Chain,SelectionDAG & DAG) const7881 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
7882 SDValue &Chain,
7883 SelectionDAG &DAG) const {
7884 // This transform is not correct for converting 0 when rounding mode is set
7885 // to round toward negative infinity which will produce -0.0. So disable under
7886 // strictfp.
7887 if (Node->isStrictFPOpcode())
7888 return false;
7889
7890 SDValue Src = Node->getOperand(0);
7891 EVT SrcVT = Src.getValueType();
7892 EVT DstVT = Node->getValueType(0);
7893
7894 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
7895 return false;
7896
7897 // Only expand vector types if we have the appropriate vector bit operations.
7898 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
7899 !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
7900 !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
7901 !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
7902 !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
7903 return false;
7904
7905 SDLoc dl(SDValue(Node, 0));
7906 EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
7907
7908 // Implementation of unsigned i64 to f64 following the algorithm in
7909 // __floatundidf in compiler_rt. This implementation performs rounding
7910 // correctly in all rounding modes with the exception of converting 0
7911 // when rounding toward negative infinity. In that case the fsub will produce
7912 // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
7913 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
7914 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
7915 BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
7916 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
7917 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
7918 SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
7919
7920 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
7921 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
7922 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
7923 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
7924 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
7925 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
7926 SDValue HiSub =
7927 DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
7928 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
7929 return true;
7930 }
7931
7932 SDValue
createSelectForFMINNUM_FMAXNUM(SDNode * Node,SelectionDAG & DAG) const7933 TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
7934 SelectionDAG &DAG) const {
7935 unsigned Opcode = Node->getOpcode();
7936 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
7937 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
7938 "Wrong opcode");
7939
7940 if (Node->getFlags().hasNoNaNs()) {
7941 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
7942 SDValue Op1 = Node->getOperand(0);
7943 SDValue Op2 = Node->getOperand(1);
7944 SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
7945 // Copy FMF flags, but always set the no-signed-zeros flag
7946 // as this is implied by the FMINNUM/FMAXNUM semantics.
7947 SDNodeFlags Flags = Node->getFlags();
7948 Flags.setNoSignedZeros(true);
7949 SelCC->setFlags(Flags);
7950 return SelCC;
7951 }
7952
7953 return SDValue();
7954 }
7955
expandFMINNUM_FMAXNUM(SDNode * Node,SelectionDAG & DAG) const7956 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
7957 SelectionDAG &DAG) const {
7958 SDLoc dl(Node);
7959 unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
7960 ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
7961 EVT VT = Node->getValueType(0);
7962
7963 if (VT.isScalableVector())
7964 report_fatal_error(
7965 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
7966
7967 if (isOperationLegalOrCustom(NewOp, VT)) {
7968 SDValue Quiet0 = Node->getOperand(0);
7969 SDValue Quiet1 = Node->getOperand(1);
7970
7971 if (!Node->getFlags().hasNoNaNs()) {
7972 // Insert canonicalizes if it's possible we need to quiet to get correct
7973 // sNaN behavior.
7974 if (!DAG.isKnownNeverSNaN(Quiet0)) {
7975 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
7976 Node->getFlags());
7977 }
7978 if (!DAG.isKnownNeverSNaN(Quiet1)) {
7979 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
7980 Node->getFlags());
7981 }
7982 }
7983
7984 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
7985 }
7986
7987 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
7988 // instead if there are no NaNs.
7989 if (Node->getFlags().hasNoNaNs()) {
7990 unsigned IEEE2018Op =
7991 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
7992 if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
7993 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
7994 Node->getOperand(1), Node->getFlags());
7995 }
7996 }
7997
7998 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
7999 return SelCC;
8000
8001 return SDValue();
8002 }
8003
expandIS_FPCLASS(EVT ResultVT,SDValue Op,unsigned Test,SDNodeFlags Flags,const SDLoc & DL,SelectionDAG & DAG) const8004 SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8005 unsigned Test, SDNodeFlags Flags,
8006 const SDLoc &DL,
8007 SelectionDAG &DAG) const {
8008 EVT OperandVT = Op.getValueType();
8009 assert(OperandVT.isFloatingPoint());
8010
8011 // Degenerated cases.
8012 if (Test == 0)
8013 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8014 if ((Test & fcAllFlags) == fcAllFlags)
8015 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8016
8017 // PPC double double is a pair of doubles, of which the higher part determines
8018 // the value class.
8019 if (OperandVT == MVT::ppcf128) {
8020 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8021 DAG.getConstant(1, DL, MVT::i32));
8022 OperandVT = MVT::f64;
8023 }
8024
8025 // Some checks may be represented as inversion of simpler check, for example
8026 // "inf|normal|subnormal|zero" => !"nan".
8027 bool IsInverted = false;
8028 if (unsigned InvertedCheck = getInvertedFPClassTest(Test)) {
8029 IsInverted = true;
8030 Test = InvertedCheck;
8031 }
8032
8033 // Floating-point type properties.
8034 EVT ScalarFloatVT = OperandVT.getScalarType();
8035 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8036 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8037 bool IsF80 = (ScalarFloatVT == MVT::f80);
8038
8039 // Some checks can be implemented using float comparisons, if floating point
8040 // exceptions are ignored.
8041 if (Flags.hasNoFPExcept() &&
8042 isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
8043 if (Test == fcZero)
8044 return DAG.getSetCC(DL, ResultVT, Op,
8045 DAG.getConstantFP(0.0, DL, OperandVT),
8046 IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8047 if (Test == fcNan)
8048 return DAG.getSetCC(DL, ResultVT, Op, Op,
8049 IsInverted ? ISD::SETO : ISD::SETUO);
8050 }
8051
8052 // In the general case use integer operations.
8053 unsigned BitSize = OperandVT.getScalarSizeInBits();
8054 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8055 if (OperandVT.isVector())
8056 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8057 OperandVT.getVectorElementCount());
8058 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8059
8060 // Various masks.
8061 APInt SignBit = APInt::getSignMask(BitSize);
8062 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8063 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8064 const unsigned ExplicitIntBitInF80 = 63;
8065 APInt ExpMask = Inf;
8066 if (IsF80)
8067 ExpMask.clearBit(ExplicitIntBitInF80);
8068 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8069 APInt QNaNBitMask =
8070 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8071 APInt InvertionMask = APInt::getAllOnesValue(ResultVT.getScalarSizeInBits());
8072
8073 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8074 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8075 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8076 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8077 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8078 SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8079
8080 SDValue Res;
8081 const auto appendResult = [&](SDValue PartialRes) {
8082 if (PartialRes) {
8083 if (Res)
8084 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8085 else
8086 Res = PartialRes;
8087 }
8088 };
8089
8090 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8091 const auto getIntBitIsSet = [&]() -> SDValue {
8092 if (!IntBitIsSetV) {
8093 APInt IntBitMask(BitSize, 0);
8094 IntBitMask.setBit(ExplicitIntBitInF80);
8095 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8096 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8097 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8098 }
8099 return IntBitIsSetV;
8100 };
8101
8102 // Split the value into sign bit and absolute value.
8103 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8104 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8105 DAG.getConstant(0.0, DL, IntVT), ISD::SETLT);
8106
8107 // Tests that involve more than one class should be processed first.
8108 SDValue PartialRes;
8109
8110 if (IsF80)
8111 ; // Detect finite numbers of f80 by checking individual classes because
8112 // they have different settings of the explicit integer bit.
8113 else if ((Test & fcFinite) == fcFinite) {
8114 // finite(V) ==> abs(V) < exp_mask
8115 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8116 Test &= ~fcFinite;
8117 } else if ((Test & fcFinite) == fcPosFinite) {
8118 // finite(V) && V > 0 ==> V < exp_mask
8119 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8120 Test &= ~fcPosFinite;
8121 } else if ((Test & fcFinite) == fcNegFinite) {
8122 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8123 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8124 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8125 Test &= ~fcNegFinite;
8126 }
8127 appendResult(PartialRes);
8128
8129 // Check for individual classes.
8130
8131 if (unsigned PartialCheck = Test & fcZero) {
8132 if (PartialCheck == fcPosZero)
8133 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8134 else if (PartialCheck == fcZero)
8135 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8136 else // ISD::fcNegZero
8137 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8138 appendResult(PartialRes);
8139 }
8140
8141 if (unsigned PartialCheck = Test & fcInf) {
8142 if (PartialCheck == fcPosInf)
8143 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
8144 else if (PartialCheck == fcInf)
8145 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
8146 else { // ISD::fcNegInf
8147 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8148 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
8149 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
8150 }
8151 appendResult(PartialRes);
8152 }
8153
8154 if (unsigned PartialCheck = Test & fcNan) {
8155 APInt InfWithQnanBit = Inf | QNaNBitMask;
8156 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
8157 if (PartialCheck == fcNan) {
8158 // isnan(V) ==> abs(V) > int(inf)
8159 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8160 if (IsF80) {
8161 // Recognize unsupported values as NaNs for compatibility with glibc.
8162 // In them (exp(V)==0) == int_bit.
8163 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
8164 SDValue ExpIsZero =
8165 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8166 SDValue IsPseudo =
8167 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
8168 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
8169 }
8170 } else if (PartialCheck == fcQNan) {
8171 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
8172 PartialRes =
8173 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
8174 } else { // ISD::fcSNan
8175 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
8176 // abs(V) < (unsigned(Inf) | quiet_bit)
8177 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8178 SDValue IsNotQnan =
8179 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
8180 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
8181 }
8182 appendResult(PartialRes);
8183 }
8184
8185 if (unsigned PartialCheck = Test & fcSubnormal) {
8186 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8187 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8188 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8189 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8190 SDValue VMinusOneV =
8191 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8192 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8193 if (PartialCheck == fcNegSubnormal)
8194 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8195 appendResult(PartialRes);
8196 }
8197
8198 if (unsigned PartialCheck = Test & fcNormal) {
8199 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8200 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8201 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
8202 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
8203 APInt ExpLimit = ExpMask - ExpLSB;
8204 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
8205 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
8206 if (PartialCheck == fcNegNormal)
8207 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8208 else if (PartialCheck == fcPosNormal) {
8209 SDValue PosSignV =
8210 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
8211 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
8212 }
8213 if (IsF80)
8214 PartialRes =
8215 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
8216 appendResult(PartialRes);
8217 }
8218
8219 if (!Res)
8220 return DAG.getConstant(IsInverted, DL, ResultVT);
8221 if (IsInverted)
8222 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
8223 return Res;
8224 }
8225
8226 // Only expand vector types if we have the appropriate vector bit operations.
canExpandVectorCTPOP(const TargetLowering & TLI,EVT VT)8227 static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
8228 assert(VT.isVector() && "Expected vector type");
8229 unsigned Len = VT.getScalarSizeInBits();
8230 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
8231 TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
8232 TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
8233 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
8234 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
8235 }
8236
expandCTPOP(SDNode * Node,SelectionDAG & DAG) const8237 SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
8238 SDLoc dl(Node);
8239 EVT VT = Node->getValueType(0);
8240 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8241 SDValue Op = Node->getOperand(0);
8242 unsigned Len = VT.getScalarSizeInBits();
8243 assert(VT.isInteger() && "CTPOP not implemented for this type.");
8244
8245 // TODO: Add support for irregular type lengths.
8246 if (!(Len <= 128 && Len % 8 == 0))
8247 return SDValue();
8248
8249 // Only expand vector types if we have the appropriate vector bit operations.
8250 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
8251 return SDValue();
8252
8253 // This is the "best" algorithm from
8254 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8255 SDValue Mask55 =
8256 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8257 SDValue Mask33 =
8258 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8259 SDValue Mask0F =
8260 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8261
8262 // v = v - ((v >> 1) & 0x55555555...)
8263 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
8264 DAG.getNode(ISD::AND, dl, VT,
8265 DAG.getNode(ISD::SRL, dl, VT, Op,
8266 DAG.getConstant(1, dl, ShVT)),
8267 Mask55));
8268 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8269 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
8270 DAG.getNode(ISD::AND, dl, VT,
8271 DAG.getNode(ISD::SRL, dl, VT, Op,
8272 DAG.getConstant(2, dl, ShVT)),
8273 Mask33));
8274 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8275 Op = DAG.getNode(ISD::AND, dl, VT,
8276 DAG.getNode(ISD::ADD, dl, VT, Op,
8277 DAG.getNode(ISD::SRL, dl, VT, Op,
8278 DAG.getConstant(4, dl, ShVT))),
8279 Mask0F);
8280
8281 if (Len <= 8)
8282 return Op;
8283
8284 // Avoid the multiply if we only have 2 bytes to add.
8285 // TODO: Only doing this for scalars because vectors weren't as obviously
8286 // improved.
8287 if (Len == 16 && !VT.isVector()) {
8288 // v = (v + (v >> 8)) & 0x00FF;
8289 return DAG.getNode(ISD::AND, dl, VT,
8290 DAG.getNode(ISD::ADD, dl, VT, Op,
8291 DAG.getNode(ISD::SRL, dl, VT, Op,
8292 DAG.getConstant(8, dl, ShVT))),
8293 DAG.getConstant(0xFF, dl, VT));
8294 }
8295
8296 // v = (v * 0x01010101...) >> (Len - 8)
8297 SDValue Mask01 =
8298 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8299 return DAG.getNode(ISD::SRL, dl, VT,
8300 DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
8301 DAG.getConstant(Len - 8, dl, ShVT));
8302 }
8303
expandVPCTPOP(SDNode * Node,SelectionDAG & DAG) const8304 SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
8305 SDLoc dl(Node);
8306 EVT VT = Node->getValueType(0);
8307 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8308 SDValue Op = Node->getOperand(0);
8309 SDValue Mask = Node->getOperand(1);
8310 SDValue VL = Node->getOperand(2);
8311 unsigned Len = VT.getScalarSizeInBits();
8312 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
8313
8314 // TODO: Add support for irregular type lengths.
8315 if (!(Len <= 128 && Len % 8 == 0))
8316 return SDValue();
8317
8318 // This is same algorithm of expandCTPOP from
8319 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8320 SDValue Mask55 =
8321 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8322 SDValue Mask33 =
8323 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8324 SDValue Mask0F =
8325 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8326
8327 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
8328
8329 // v = v - ((v >> 1) & 0x55555555...)
8330 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
8331 DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
8332 DAG.getConstant(1, dl, ShVT), Mask, VL),
8333 Mask55, Mask, VL);
8334 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
8335
8336 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8337 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
8338 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
8339 DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
8340 DAG.getConstant(2, dl, ShVT), Mask, VL),
8341 Mask33, Mask, VL);
8342 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
8343
8344 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8345 Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
8346 Mask, VL),
8347 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
8348 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
8349
8350 if (Len <= 8)
8351 return Op;
8352
8353 // v = (v * 0x01010101...) >> (Len - 8)
8354 SDValue Mask01 =
8355 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8356 return DAG.getNode(ISD::VP_LSHR, dl, VT,
8357 DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL),
8358 DAG.getConstant(Len - 8, dl, ShVT), Mask, VL);
8359 }
8360
expandCTLZ(SDNode * Node,SelectionDAG & DAG) const8361 SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
8362 SDLoc dl(Node);
8363 EVT VT = Node->getValueType(0);
8364 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8365 SDValue Op = Node->getOperand(0);
8366 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8367
8368 // If the non-ZERO_UNDEF version is supported we can use that instead.
8369 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
8370 isOperationLegalOrCustom(ISD::CTLZ, VT))
8371 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
8372
8373 // If the ZERO_UNDEF version is supported use that and handle the zero case.
8374 if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
8375 EVT SetCCVT =
8376 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8377 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
8378 SDValue Zero = DAG.getConstant(0, dl, VT);
8379 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
8380 return DAG.getSelect(dl, VT, SrcIsZero,
8381 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
8382 }
8383
8384 // Only expand vector types if we have the appropriate vector bit operations.
8385 // This includes the operations needed to expand CTPOP if it isn't supported.
8386 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
8387 (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
8388 !canExpandVectorCTPOP(*this, VT)) ||
8389 !isOperationLegalOrCustom(ISD::SRL, VT) ||
8390 !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
8391 return SDValue();
8392
8393 // for now, we do this:
8394 // x = x | (x >> 1);
8395 // x = x | (x >> 2);
8396 // ...
8397 // x = x | (x >>16);
8398 // x = x | (x >>32); // for 64-bit input
8399 // return popcount(~x);
8400 //
8401 // Ref: "Hacker's Delight" by Henry Warren
8402 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8403 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
8404 Op = DAG.getNode(ISD::OR, dl, VT, Op,
8405 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
8406 }
8407 Op = DAG.getNOT(dl, Op, VT);
8408 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
8409 }
8410
expandVPCTLZ(SDNode * Node,SelectionDAG & DAG) const8411 SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
8412 SDLoc dl(Node);
8413 EVT VT = Node->getValueType(0);
8414 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8415 SDValue Op = Node->getOperand(0);
8416 SDValue Mask = Node->getOperand(1);
8417 SDValue VL = Node->getOperand(2);
8418 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8419
8420 // do this:
8421 // x = x | (x >> 1);
8422 // x = x | (x >> 2);
8423 // ...
8424 // x = x | (x >>16);
8425 // x = x | (x >>32); // for 64-bit input
8426 // return popcount(~x);
8427 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8428 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
8429 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
8430 DAG.getNode(ISD::VP_LSHR, dl, VT, Op, Tmp, Mask, VL), Mask,
8431 VL);
8432 }
8433 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask,
8434 VL);
8435 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
8436 }
8437
CTTZTableLookup(SDNode * Node,SelectionDAG & DAG,const SDLoc & DL,EVT VT,SDValue Op,unsigned BitWidth) const8438 SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
8439 const SDLoc &DL, EVT VT, SDValue Op,
8440 unsigned BitWidth) const {
8441 if (BitWidth != 32 && BitWidth != 64)
8442 return SDValue();
8443 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
8444 : APInt(64, 0x0218A392CD3D5DBFULL);
8445 const DataLayout &TD = DAG.getDataLayout();
8446 MachinePointerInfo PtrInfo =
8447 MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
8448 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
8449 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
8450 SDValue Lookup = DAG.getNode(
8451 ISD::SRL, DL, VT,
8452 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
8453 DAG.getConstant(DeBruijn, DL, VT)),
8454 DAG.getConstant(ShiftAmt, DL, VT));
8455 Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
8456
8457 SmallVector<uint8_t> Table(BitWidth, 0);
8458 for (unsigned i = 0; i < BitWidth; i++) {
8459 APInt Shl = DeBruijn.shl(i);
8460 APInt Lshr = Shl.lshr(ShiftAmt);
8461 Table[Lshr.getZExtValue()] = i;
8462 }
8463
8464 // Create a ConstantArray in Constant Pool
8465 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
8466 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
8467 TD.getPrefTypeAlign(CA->getType()));
8468 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
8469 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
8470 PtrInfo, MVT::i8);
8471 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
8472 return ExtLoad;
8473
8474 EVT SetCCVT =
8475 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8476 SDValue Zero = DAG.getConstant(0, DL, VT);
8477 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
8478 return DAG.getSelect(DL, VT, SrcIsZero,
8479 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
8480 }
8481
expandCTTZ(SDNode * Node,SelectionDAG & DAG) const8482 SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
8483 SDLoc dl(Node);
8484 EVT VT = Node->getValueType(0);
8485 SDValue Op = Node->getOperand(0);
8486 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8487
8488 // If the non-ZERO_UNDEF version is supported we can use that instead.
8489 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
8490 isOperationLegalOrCustom(ISD::CTTZ, VT))
8491 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
8492
8493 // If the ZERO_UNDEF version is supported use that and handle the zero case.
8494 if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
8495 EVT SetCCVT =
8496 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8497 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
8498 SDValue Zero = DAG.getConstant(0, dl, VT);
8499 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
8500 return DAG.getSelect(dl, VT, SrcIsZero,
8501 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
8502 }
8503
8504 // Only expand vector types if we have the appropriate vector bit operations.
8505 // This includes the operations needed to expand CTPOP if it isn't supported.
8506 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
8507 (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
8508 !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
8509 !canExpandVectorCTPOP(*this, VT)) ||
8510 !isOperationLegalOrCustom(ISD::SUB, VT) ||
8511 !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
8512 !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
8513 return SDValue();
8514
8515 // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
8516 if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
8517 !isOperationLegal(ISD::CTLZ, VT))
8518 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
8519 return V;
8520
8521 // for now, we use: { return popcount(~x & (x - 1)); }
8522 // unless the target has ctlz but not ctpop, in which case we use:
8523 // { return 32 - nlz(~x & (x-1)); }
8524 // Ref: "Hacker's Delight" by Henry Warren
8525 SDValue Tmp = DAG.getNode(
8526 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
8527 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
8528
8529 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
8530 if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
8531 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
8532 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
8533 }
8534
8535 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
8536 }
8537
expandVPCTTZ(SDNode * Node,SelectionDAG & DAG) const8538 SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
8539 SDValue Op = Node->getOperand(0);
8540 SDValue Mask = Node->getOperand(1);
8541 SDValue VL = Node->getOperand(2);
8542 SDLoc dl(Node);
8543 EVT VT = Node->getValueType(0);
8544
8545 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
8546 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
8547 DAG.getConstant(-1, dl, VT), Mask, VL);
8548 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
8549 DAG.getConstant(1, dl, VT), Mask, VL);
8550 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
8551 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
8552 }
8553
expandABS(SDNode * N,SelectionDAG & DAG,bool IsNegative) const8554 SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
8555 bool IsNegative) const {
8556 SDLoc dl(N);
8557 EVT VT = N->getValueType(0);
8558 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8559 SDValue Op = N->getOperand(0);
8560
8561 // abs(x) -> smax(x,sub(0,x))
8562 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
8563 isOperationLegal(ISD::SMAX, VT)) {
8564 SDValue Zero = DAG.getConstant(0, dl, VT);
8565 return DAG.getNode(ISD::SMAX, dl, VT, Op,
8566 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
8567 }
8568
8569 // abs(x) -> umin(x,sub(0,x))
8570 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
8571 isOperationLegal(ISD::UMIN, VT)) {
8572 SDValue Zero = DAG.getConstant(0, dl, VT);
8573 Op = DAG.getFreeze(Op);
8574 return DAG.getNode(ISD::UMIN, dl, VT, Op,
8575 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
8576 }
8577
8578 // 0 - abs(x) -> smin(x, sub(0,x))
8579 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
8580 isOperationLegal(ISD::SMIN, VT)) {
8581 Op = DAG.getFreeze(Op);
8582 SDValue Zero = DAG.getConstant(0, dl, VT);
8583 return DAG.getNode(ISD::SMIN, dl, VT, Op,
8584 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
8585 }
8586
8587 // Only expand vector types if we have the appropriate vector operations.
8588 if (VT.isVector() &&
8589 (!isOperationLegalOrCustom(ISD::SRA, VT) ||
8590 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
8591 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
8592 !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
8593 return SDValue();
8594
8595 Op = DAG.getFreeze(Op);
8596 SDValue Shift =
8597 DAG.getNode(ISD::SRA, dl, VT, Op,
8598 DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
8599 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
8600
8601 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
8602 if (!IsNegative)
8603 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
8604
8605 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
8606 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
8607 }
8608
expandBSWAP(SDNode * N,SelectionDAG & DAG) const8609 SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
8610 SDLoc dl(N);
8611 EVT VT = N->getValueType(0);
8612 SDValue Op = N->getOperand(0);
8613
8614 if (!VT.isSimple())
8615 return SDValue();
8616
8617 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
8618 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
8619 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
8620 default:
8621 return SDValue();
8622 case MVT::i16:
8623 // Use a rotate by 8. This can be further expanded if necessary.
8624 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
8625 case MVT::i32:
8626 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
8627 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
8628 DAG.getConstant(0xFF00, dl, VT));
8629 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
8630 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
8631 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
8632 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
8633 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
8634 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
8635 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
8636 case MVT::i64:
8637 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
8638 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
8639 DAG.getConstant(255ULL<<8, dl, VT));
8640 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
8641 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
8642 DAG.getConstant(255ULL<<16, dl, VT));
8643 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
8644 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
8645 DAG.getConstant(255ULL<<24, dl, VT));
8646 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
8647 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
8648 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
8649 DAG.getConstant(255ULL<<24, dl, VT));
8650 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
8651 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
8652 DAG.getConstant(255ULL<<16, dl, VT));
8653 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
8654 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
8655 DAG.getConstant(255ULL<<8, dl, VT));
8656 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
8657 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
8658 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
8659 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
8660 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
8661 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
8662 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
8663 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
8664 }
8665 }
8666
expandVPBSWAP(SDNode * N,SelectionDAG & DAG) const8667 SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
8668 SDLoc dl(N);
8669 EVT VT = N->getValueType(0);
8670 SDValue Op = N->getOperand(0);
8671 SDValue Mask = N->getOperand(1);
8672 SDValue EVL = N->getOperand(2);
8673
8674 if (!VT.isSimple())
8675 return SDValue();
8676
8677 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
8678 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
8679 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
8680 default:
8681 return SDValue();
8682 case MVT::i16:
8683 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
8684 Mask, EVL);
8685 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
8686 Mask, EVL);
8687 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
8688 case MVT::i32:
8689 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
8690 Mask, EVL);
8691 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
8692 Mask, EVL);
8693 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
8694 Mask, EVL);
8695 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
8696 Mask, EVL);
8697 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
8698 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
8699 Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
8700 Mask, EVL);
8701 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
8702 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
8703 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
8704 case MVT::i64:
8705 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
8706 Mask, EVL);
8707 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
8708 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
8709 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
8710 Mask, EVL);
8711 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
8712 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
8713 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
8714 Mask, EVL);
8715 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
8716 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
8717 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
8718 Mask, EVL);
8719 Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
8720 Mask, EVL);
8721 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
8722 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
8723 Tmp3 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
8724 Mask, EVL);
8725 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
8726 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
8727 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
8728 Mask, EVL);
8729 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
8730 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
8731 Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
8732 Mask, EVL);
8733 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
8734 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
8735 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
8736 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
8737 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
8738 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
8739 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
8740 }
8741 }
8742
expandBITREVERSE(SDNode * N,SelectionDAG & DAG) const8743 SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
8744 SDLoc dl(N);
8745 EVT VT = N->getValueType(0);
8746 SDValue Op = N->getOperand(0);
8747 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
8748 unsigned Sz = VT.getScalarSizeInBits();
8749
8750 SDValue Tmp, Tmp2, Tmp3;
8751
8752 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
8753 // and finally the i1 pairs.
8754 // TODO: We can easily support i4/i2 legal types if any target ever does.
8755 if (Sz >= 8 && isPowerOf2_32(Sz)) {
8756 // Create the masks - repeating the pattern every byte.
8757 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
8758 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
8759 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
8760
8761 // BSWAP if the type is wider than a single byte.
8762 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
8763
8764 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
8765 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
8766 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
8767 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
8768 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
8769 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8770
8771 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
8772 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
8773 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
8774 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
8775 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
8776 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8777
8778 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
8779 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
8780 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
8781 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
8782 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
8783 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8784 return Tmp;
8785 }
8786
8787 Tmp = DAG.getConstant(0, dl, VT);
8788 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
8789 if (I < J)
8790 Tmp2 =
8791 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
8792 else
8793 Tmp2 =
8794 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
8795
8796 APInt Shift(Sz, 1);
8797 Shift <<= J;
8798 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
8799 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
8800 }
8801
8802 return Tmp;
8803 }
8804
expandVPBITREVERSE(SDNode * N,SelectionDAG & DAG) const8805 SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
8806 assert(N->getOpcode() == ISD::VP_BITREVERSE);
8807
8808 SDLoc dl(N);
8809 EVT VT = N->getValueType(0);
8810 SDValue Op = N->getOperand(0);
8811 SDValue Mask = N->getOperand(1);
8812 SDValue EVL = N->getOperand(2);
8813 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
8814 unsigned Sz = VT.getScalarSizeInBits();
8815
8816 SDValue Tmp, Tmp2, Tmp3;
8817
8818 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
8819 // and finally the i1 pairs.
8820 // TODO: We can easily support i4/i2 legal types if any target ever does.
8821 if (Sz >= 8 && isPowerOf2_32(Sz)) {
8822 // Create the masks - repeating the pattern every byte.
8823 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
8824 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
8825 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
8826
8827 // BSWAP if the type is wider than a single byte.
8828 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
8829
8830 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
8831 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
8832 Mask, EVL);
8833 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
8834 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
8835 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
8836 Mask, EVL);
8837 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
8838 Mask, EVL);
8839 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
8840
8841 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
8842 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
8843 Mask, EVL);
8844 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
8845 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
8846 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
8847 Mask, EVL);
8848 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
8849 Mask, EVL);
8850 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
8851
8852 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
8853 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
8854 Mask, EVL);
8855 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
8856 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
8857 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
8858 Mask, EVL);
8859 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
8860 Mask, EVL);
8861 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
8862 return Tmp;
8863 }
8864 return SDValue();
8865 }
8866
8867 std::pair<SDValue, SDValue>
scalarizeVectorLoad(LoadSDNode * LD,SelectionDAG & DAG) const8868 TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
8869 SelectionDAG &DAG) const {
8870 SDLoc SL(LD);
8871 SDValue Chain = LD->getChain();
8872 SDValue BasePTR = LD->getBasePtr();
8873 EVT SrcVT = LD->getMemoryVT();
8874 EVT DstVT = LD->getValueType(0);
8875 ISD::LoadExtType ExtType = LD->getExtensionType();
8876
8877 if (SrcVT.isScalableVector())
8878 report_fatal_error("Cannot scalarize scalable vector loads");
8879
8880 unsigned NumElem = SrcVT.getVectorNumElements();
8881
8882 EVT SrcEltVT = SrcVT.getScalarType();
8883 EVT DstEltVT = DstVT.getScalarType();
8884
8885 // A vector must always be stored in memory as-is, i.e. without any padding
8886 // between the elements, since various code depend on it, e.g. in the
8887 // handling of a bitcast of a vector type to int, which may be done with a
8888 // vector store followed by an integer load. A vector that does not have
8889 // elements that are byte-sized must therefore be stored as an integer
8890 // built out of the extracted vector elements.
8891 if (!SrcEltVT.isByteSized()) {
8892 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
8893 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
8894
8895 unsigned NumSrcBits = SrcVT.getSizeInBits();
8896 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
8897
8898 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
8899 SDValue SrcEltBitMask = DAG.getConstant(
8900 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
8901
8902 // Load the whole vector and avoid masking off the top bits as it makes
8903 // the codegen worse.
8904 SDValue Load =
8905 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
8906 LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
8907 LD->getMemOperand()->getFlags(), LD->getAAInfo());
8908
8909 SmallVector<SDValue, 8> Vals;
8910 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
8911 unsigned ShiftIntoIdx =
8912 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
8913 SDValue ShiftAmount =
8914 DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
8915 LoadVT, SL, /*LegalTypes=*/false);
8916 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
8917 SDValue Elt =
8918 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
8919 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
8920
8921 if (ExtType != ISD::NON_EXTLOAD) {
8922 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
8923 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
8924 }
8925
8926 Vals.push_back(Scalar);
8927 }
8928
8929 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
8930 return std::make_pair(Value, Load.getValue(1));
8931 }
8932
8933 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
8934 assert(SrcEltVT.isByteSized());
8935
8936 SmallVector<SDValue, 8> Vals;
8937 SmallVector<SDValue, 8> LoadChains;
8938
8939 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
8940 SDValue ScalarLoad =
8941 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
8942 LD->getPointerInfo().getWithOffset(Idx * Stride),
8943 SrcEltVT, LD->getOriginalAlign(),
8944 LD->getMemOperand()->getFlags(), LD->getAAInfo());
8945
8946 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride));
8947
8948 Vals.push_back(ScalarLoad.getValue(0));
8949 LoadChains.push_back(ScalarLoad.getValue(1));
8950 }
8951
8952 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
8953 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
8954
8955 return std::make_pair(Value, NewChain);
8956 }
8957
scalarizeVectorStore(StoreSDNode * ST,SelectionDAG & DAG) const8958 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
8959 SelectionDAG &DAG) const {
8960 SDLoc SL(ST);
8961
8962 SDValue Chain = ST->getChain();
8963 SDValue BasePtr = ST->getBasePtr();
8964 SDValue Value = ST->getValue();
8965 EVT StVT = ST->getMemoryVT();
8966
8967 if (StVT.isScalableVector())
8968 report_fatal_error("Cannot scalarize scalable vector stores");
8969
8970 // The type of the data we want to save
8971 EVT RegVT = Value.getValueType();
8972 EVT RegSclVT = RegVT.getScalarType();
8973
8974 // The type of data as saved in memory.
8975 EVT MemSclVT = StVT.getScalarType();
8976
8977 unsigned NumElem = StVT.getVectorNumElements();
8978
8979 // A vector must always be stored in memory as-is, i.e. without any padding
8980 // between the elements, since various code depend on it, e.g. in the
8981 // handling of a bitcast of a vector type to int, which may be done with a
8982 // vector store followed by an integer load. A vector that does not have
8983 // elements that are byte-sized must therefore be stored as an integer
8984 // built out of the extracted vector elements.
8985 if (!MemSclVT.isByteSized()) {
8986 unsigned NumBits = StVT.getSizeInBits();
8987 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
8988
8989 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
8990
8991 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
8992 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
8993 DAG.getVectorIdxConstant(Idx, SL));
8994 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
8995 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
8996 unsigned ShiftIntoIdx =
8997 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
8998 SDValue ShiftAmount =
8999 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
9000 SDValue ShiftedElt =
9001 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
9002 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
9003 }
9004
9005 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
9006 ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9007 ST->getAAInfo());
9008 }
9009
9010 // Store Stride in bytes
9011 unsigned Stride = MemSclVT.getSizeInBits() / 8;
9012 assert(Stride && "Zero stride!");
9013 // Extract each of the elements from the original vector and save them into
9014 // memory individually.
9015 SmallVector<SDValue, 8> Stores;
9016 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9017 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9018 DAG.getVectorIdxConstant(Idx, SL));
9019
9020 SDValue Ptr =
9021 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride));
9022
9023 // This scalar TruncStore may be illegal, but we legalize it later.
9024 SDValue Store = DAG.getTruncStore(
9025 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
9026 MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9027 ST->getAAInfo());
9028
9029 Stores.push_back(Store);
9030 }
9031
9032 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
9033 }
9034
9035 std::pair<SDValue, SDValue>
expandUnalignedLoad(LoadSDNode * LD,SelectionDAG & DAG) const9036 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
9037 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
9038 "unaligned indexed loads not implemented!");
9039 SDValue Chain = LD->getChain();
9040 SDValue Ptr = LD->getBasePtr();
9041 EVT VT = LD->getValueType(0);
9042 EVT LoadedVT = LD->getMemoryVT();
9043 SDLoc dl(LD);
9044 auto &MF = DAG.getMachineFunction();
9045
9046 if (VT.isFloatingPoint() || VT.isVector()) {
9047 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
9048 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
9049 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
9050 LoadedVT.isVector()) {
9051 // Scalarize the load and let the individual components be handled.
9052 return scalarizeVectorLoad(LD, DAG);
9053 }
9054
9055 // Expand to a (misaligned) integer load of the same size,
9056 // then bitconvert to floating point or vector.
9057 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
9058 LD->getMemOperand());
9059 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
9060 if (LoadedVT != VT)
9061 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
9062 ISD::ANY_EXTEND, dl, VT, Result);
9063
9064 return std::make_pair(Result, newLoad.getValue(1));
9065 }
9066
9067 // Copy the value to a (aligned) stack slot using (unaligned) integer
9068 // loads and stores, then do a (aligned) load from the stack slot.
9069 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
9070 unsigned LoadedBytes = LoadedVT.getStoreSize();
9071 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9072 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
9073
9074 // Make sure the stack slot is also aligned for the register type.
9075 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
9076 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
9077 SmallVector<SDValue, 8> Stores;
9078 SDValue StackPtr = StackBase;
9079 unsigned Offset = 0;
9080
9081 EVT PtrVT = Ptr.getValueType();
9082 EVT StackPtrVT = StackPtr.getValueType();
9083
9084 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9085 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9086
9087 // Do all but one copies using the full register width.
9088 for (unsigned i = 1; i < NumRegs; i++) {
9089 // Load one integer register's worth from the original location.
9090 SDValue Load = DAG.getLoad(
9091 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
9092 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9093 LD->getAAInfo());
9094 // Follow the load with a store to the stack slot. Remember the store.
9095 Stores.push_back(DAG.getStore(
9096 Load.getValue(1), dl, Load, StackPtr,
9097 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
9098 // Increment the pointers.
9099 Offset += RegBytes;
9100
9101 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9102 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9103 }
9104
9105 // The last copy may be partial. Do an extending load.
9106 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
9107 8 * (LoadedBytes - Offset));
9108 SDValue Load =
9109 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
9110 LD->getPointerInfo().getWithOffset(Offset), MemVT,
9111 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9112 LD->getAAInfo());
9113 // Follow the load with a store to the stack slot. Remember the store.
9114 // On big-endian machines this requires a truncating store to ensure
9115 // that the bits end up in the right place.
9116 Stores.push_back(DAG.getTruncStore(
9117 Load.getValue(1), dl, Load, StackPtr,
9118 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
9119
9120 // The order of the stores doesn't matter - say it with a TokenFactor.
9121 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9122
9123 // Finally, perform the original load only redirected to the stack slot.
9124 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
9125 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
9126 LoadedVT);
9127
9128 // Callers expect a MERGE_VALUES node.
9129 return std::make_pair(Load, TF);
9130 }
9131
9132 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
9133 "Unaligned load of unsupported type.");
9134
9135 // Compute the new VT that is half the size of the old one. This is an
9136 // integer MVT.
9137 unsigned NumBits = LoadedVT.getSizeInBits();
9138 EVT NewLoadedVT;
9139 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
9140 NumBits >>= 1;
9141
9142 Align Alignment = LD->getOriginalAlign();
9143 unsigned IncrementSize = NumBits / 8;
9144 ISD::LoadExtType HiExtType = LD->getExtensionType();
9145
9146 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
9147 if (HiExtType == ISD::NON_EXTLOAD)
9148 HiExtType = ISD::ZEXTLOAD;
9149
9150 // Load the value in two parts
9151 SDValue Lo, Hi;
9152 if (DAG.getDataLayout().isLittleEndian()) {
9153 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9154 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9155 LD->getAAInfo());
9156
9157 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
9158 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
9159 LD->getPointerInfo().getWithOffset(IncrementSize),
9160 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9161 LD->getAAInfo());
9162 } else {
9163 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9164 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9165 LD->getAAInfo());
9166
9167 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
9168 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
9169 LD->getPointerInfo().getWithOffset(IncrementSize),
9170 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9171 LD->getAAInfo());
9172 }
9173
9174 // aggregate the two parts
9175 SDValue ShiftAmount =
9176 DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
9177 DAG.getDataLayout()));
9178 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
9179 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
9180
9181 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
9182 Hi.getValue(1));
9183
9184 return std::make_pair(Result, TF);
9185 }
9186
expandUnalignedStore(StoreSDNode * ST,SelectionDAG & DAG) const9187 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
9188 SelectionDAG &DAG) const {
9189 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
9190 "unaligned indexed stores not implemented!");
9191 SDValue Chain = ST->getChain();
9192 SDValue Ptr = ST->getBasePtr();
9193 SDValue Val = ST->getValue();
9194 EVT VT = Val.getValueType();
9195 Align Alignment = ST->getOriginalAlign();
9196 auto &MF = DAG.getMachineFunction();
9197 EVT StoreMemVT = ST->getMemoryVT();
9198
9199 SDLoc dl(ST);
9200 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
9201 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
9202 if (isTypeLegal(intVT)) {
9203 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
9204 StoreMemVT.isVector()) {
9205 // Scalarize the store and let the individual components be handled.
9206 SDValue Result = scalarizeVectorStore(ST, DAG);
9207 return Result;
9208 }
9209 // Expand to a bitconvert of the value to the integer type of the
9210 // same size, then a (misaligned) int store.
9211 // FIXME: Does not handle truncating floating point stores!
9212 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
9213 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
9214 Alignment, ST->getMemOperand()->getFlags());
9215 return Result;
9216 }
9217 // Do a (aligned) store to a stack slot, then copy from the stack slot
9218 // to the final destination using (unaligned) integer loads and stores.
9219 MVT RegVT = getRegisterType(
9220 *DAG.getContext(),
9221 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
9222 EVT PtrVT = Ptr.getValueType();
9223 unsigned StoredBytes = StoreMemVT.getStoreSize();
9224 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9225 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
9226
9227 // Make sure the stack slot is also aligned for the register type.
9228 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
9229 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
9230
9231 // Perform the original store, only redirected to the stack slot.
9232 SDValue Store = DAG.getTruncStore(
9233 Chain, dl, Val, StackPtr,
9234 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
9235
9236 EVT StackPtrVT = StackPtr.getValueType();
9237
9238 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9239 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9240 SmallVector<SDValue, 8> Stores;
9241 unsigned Offset = 0;
9242
9243 // Do all but one copies using the full register width.
9244 for (unsigned i = 1; i < NumRegs; i++) {
9245 // Load one integer register's worth from the stack slot.
9246 SDValue Load = DAG.getLoad(
9247 RegVT, dl, Store, StackPtr,
9248 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
9249 // Store it to the final location. Remember the store.
9250 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
9251 ST->getPointerInfo().getWithOffset(Offset),
9252 ST->getOriginalAlign(),
9253 ST->getMemOperand()->getFlags()));
9254 // Increment the pointers.
9255 Offset += RegBytes;
9256 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9257 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9258 }
9259
9260 // The last store may be partial. Do a truncating store. On big-endian
9261 // machines this requires an extending load from the stack slot to ensure
9262 // that the bits are in the right place.
9263 EVT LoadMemVT =
9264 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
9265
9266 // Load from the stack slot.
9267 SDValue Load = DAG.getExtLoad(
9268 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
9269 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
9270
9271 Stores.push_back(
9272 DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
9273 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
9274 ST->getOriginalAlign(),
9275 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
9276 // The order of the stores doesn't matter - say it with a TokenFactor.
9277 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9278 return Result;
9279 }
9280
9281 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
9282 "Unaligned store of unknown type.");
9283 // Get the half-size VT
9284 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
9285 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
9286 unsigned IncrementSize = NumBits / 8;
9287
9288 // Divide the stored value in two parts.
9289 SDValue ShiftAmount = DAG.getConstant(
9290 NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
9291 SDValue Lo = Val;
9292 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
9293
9294 // Store the two parts
9295 SDValue Store1, Store2;
9296 Store1 = DAG.getTruncStore(Chain, dl,
9297 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
9298 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
9299 ST->getMemOperand()->getFlags());
9300
9301 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize));
9302 Store2 = DAG.getTruncStore(
9303 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
9304 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
9305 ST->getMemOperand()->getFlags(), ST->getAAInfo());
9306
9307 SDValue Result =
9308 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
9309 return Result;
9310 }
9311
9312 SDValue
IncrementMemoryAddress(SDValue Addr,SDValue Mask,const SDLoc & DL,EVT DataVT,SelectionDAG & DAG,bool IsCompressedMemory) const9313 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
9314 const SDLoc &DL, EVT DataVT,
9315 SelectionDAG &DAG,
9316 bool IsCompressedMemory) const {
9317 SDValue Increment;
9318 EVT AddrVT = Addr.getValueType();
9319 EVT MaskVT = Mask.getValueType();
9320 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
9321 "Incompatible types of Data and Mask");
9322 if (IsCompressedMemory) {
9323 if (DataVT.isScalableVector())
9324 report_fatal_error(
9325 "Cannot currently handle compressed memory with scalable vectors");
9326 // Incrementing the pointer according to number of '1's in the mask.
9327 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
9328 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
9329 if (MaskIntVT.getSizeInBits() < 32) {
9330 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
9331 MaskIntVT = MVT::i32;
9332 }
9333
9334 // Count '1's with POPCNT.
9335 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
9336 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
9337 // Scale is an element size in bytes.
9338 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
9339 AddrVT);
9340 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
9341 } else if (DataVT.isScalableVector()) {
9342 Increment = DAG.getVScale(DL, AddrVT,
9343 APInt(AddrVT.getFixedSizeInBits(),
9344 DataVT.getStoreSize().getKnownMinValue()));
9345 } else
9346 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
9347
9348 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
9349 }
9350
clampDynamicVectorIndex(SelectionDAG & DAG,SDValue Idx,EVT VecVT,const SDLoc & dl,ElementCount SubEC)9351 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
9352 EVT VecVT, const SDLoc &dl,
9353 ElementCount SubEC) {
9354 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
9355 "Cannot index a scalable vector within a fixed-width vector");
9356
9357 unsigned NElts = VecVT.getVectorMinNumElements();
9358 unsigned NumSubElts = SubEC.getKnownMinValue();
9359 EVT IdxVT = Idx.getValueType();
9360
9361 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
9362 // If this is a constant index and we know the value plus the number of the
9363 // elements in the subvector minus one is less than the minimum number of
9364 // elements then it's safe to return Idx.
9365 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
9366 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
9367 return Idx;
9368 SDValue VS =
9369 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
9370 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
9371 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
9372 DAG.getConstant(NumSubElts, dl, IdxVT));
9373 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
9374 }
9375 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
9376 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
9377 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
9378 DAG.getConstant(Imm, dl, IdxVT));
9379 }
9380 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
9381 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
9382 DAG.getConstant(MaxIndex, dl, IdxVT));
9383 }
9384
getVectorElementPointer(SelectionDAG & DAG,SDValue VecPtr,EVT VecVT,SDValue Index) const9385 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
9386 SDValue VecPtr, EVT VecVT,
9387 SDValue Index) const {
9388 return getVectorSubVecPointer(
9389 DAG, VecPtr, VecVT,
9390 EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
9391 Index);
9392 }
9393
getVectorSubVecPointer(SelectionDAG & DAG,SDValue VecPtr,EVT VecVT,EVT SubVecVT,SDValue Index) const9394 SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
9395 SDValue VecPtr, EVT VecVT,
9396 EVT SubVecVT,
9397 SDValue Index) const {
9398 SDLoc dl(Index);
9399 // Make sure the index type is big enough to compute in.
9400 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
9401
9402 EVT EltVT = VecVT.getVectorElementType();
9403
9404 // Calculate the element offset and add it to the pointer.
9405 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
9406 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
9407 "Converting bits to bytes lost precision");
9408 assert(SubVecVT.getVectorElementType() == EltVT &&
9409 "Sub-vector must be a vector with matching element type");
9410 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
9411 SubVecVT.getVectorElementCount());
9412
9413 EVT IdxVT = Index.getValueType();
9414 if (SubVecVT.isScalableVector())
9415 Index =
9416 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
9417 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
9418
9419 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
9420 DAG.getConstant(EltSize, dl, IdxVT));
9421 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
9422 }
9423
9424 //===----------------------------------------------------------------------===//
9425 // Implementation of Emulated TLS Model
9426 //===----------------------------------------------------------------------===//
9427
LowerToTLSEmulatedModel(const GlobalAddressSDNode * GA,SelectionDAG & DAG) const9428 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
9429 SelectionDAG &DAG) const {
9430 // Access to address of TLS varialbe xyz is lowered to a function call:
9431 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
9432 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9433 PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
9434 SDLoc dl(GA);
9435
9436 ArgListTy Args;
9437 ArgListEntry Entry;
9438 std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
9439 Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
9440 StringRef EmuTlsVarName(NameString);
9441 GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
9442 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
9443 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
9444 Entry.Ty = VoidPtrType;
9445 Args.push_back(Entry);
9446
9447 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
9448
9449 TargetLowering::CallLoweringInfo CLI(DAG);
9450 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
9451 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
9452 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9453
9454 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
9455 // At last for X86 targets, maybe good for other targets too?
9456 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9457 MFI.setAdjustsStack(true); // Is this only for X86 target?
9458 MFI.setHasCalls(true);
9459
9460 assert((GA->getOffset() == 0) &&
9461 "Emulated TLS must have zero offset in GlobalAddressSDNode");
9462 return CallResult.first;
9463 }
9464
lowerCmpEqZeroToCtlzSrl(SDValue Op,SelectionDAG & DAG) const9465 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
9466 SelectionDAG &DAG) const {
9467 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
9468 if (!isCtlzFast())
9469 return SDValue();
9470 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
9471 SDLoc dl(Op);
9472 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9473 if (C->isZero() && CC == ISD::SETEQ) {
9474 EVT VT = Op.getOperand(0).getValueType();
9475 SDValue Zext = Op.getOperand(0);
9476 if (VT.bitsLT(MVT::i32)) {
9477 VT = MVT::i32;
9478 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
9479 }
9480 unsigned Log2b = Log2_32(VT.getSizeInBits());
9481 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
9482 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
9483 DAG.getConstant(Log2b, dl, MVT::i32));
9484 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
9485 }
9486 }
9487 return SDValue();
9488 }
9489
expandIntMINMAX(SDNode * Node,SelectionDAG & DAG) const9490 SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
9491 SDValue Op0 = Node->getOperand(0);
9492 SDValue Op1 = Node->getOperand(1);
9493 EVT VT = Op0.getValueType();
9494 unsigned Opcode = Node->getOpcode();
9495 SDLoc DL(Node);
9496
9497 // umin(x,y) -> sub(x,usubsat(x,y))
9498 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
9499 isOperationLegal(ISD::USUBSAT, VT)) {
9500 return DAG.getNode(ISD::SUB, DL, VT, Op0,
9501 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
9502 }
9503
9504 // umax(x,y) -> add(x,usubsat(y,x))
9505 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
9506 isOperationLegal(ISD::USUBSAT, VT)) {
9507 return DAG.getNode(ISD::ADD, DL, VT, Op0,
9508 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
9509 }
9510
9511 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
9512 ISD::CondCode CC;
9513 switch (Opcode) {
9514 default: llvm_unreachable("How did we get here?");
9515 case ISD::SMAX: CC = ISD::SETGT; break;
9516 case ISD::SMIN: CC = ISD::SETLT; break;
9517 case ISD::UMAX: CC = ISD::SETUGT; break;
9518 case ISD::UMIN: CC = ISD::SETULT; break;
9519 }
9520
9521 // FIXME: Should really try to split the vector in case it's legal on a
9522 // subvector.
9523 if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
9524 return DAG.UnrollVectorOp(Node);
9525
9526 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9527 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
9528 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
9529 }
9530
expandAddSubSat(SDNode * Node,SelectionDAG & DAG) const9531 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
9532 unsigned Opcode = Node->getOpcode();
9533 SDValue LHS = Node->getOperand(0);
9534 SDValue RHS = Node->getOperand(1);
9535 EVT VT = LHS.getValueType();
9536 SDLoc dl(Node);
9537
9538 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
9539 assert(VT.isInteger() && "Expected operands to be integers");
9540
9541 // usub.sat(a, b) -> umax(a, b) - b
9542 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
9543 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
9544 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
9545 }
9546
9547 // uadd.sat(a, b) -> umin(a, ~b) + b
9548 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
9549 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
9550 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
9551 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
9552 }
9553
9554 unsigned OverflowOp;
9555 switch (Opcode) {
9556 case ISD::SADDSAT:
9557 OverflowOp = ISD::SADDO;
9558 break;
9559 case ISD::UADDSAT:
9560 OverflowOp = ISD::UADDO;
9561 break;
9562 case ISD::SSUBSAT:
9563 OverflowOp = ISD::SSUBO;
9564 break;
9565 case ISD::USUBSAT:
9566 OverflowOp = ISD::USUBO;
9567 break;
9568 default:
9569 llvm_unreachable("Expected method to receive signed or unsigned saturation "
9570 "addition or subtraction node.");
9571 }
9572
9573 // FIXME: Should really try to split the vector in case it's legal on a
9574 // subvector.
9575 if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
9576 return DAG.UnrollVectorOp(Node);
9577
9578 unsigned BitWidth = LHS.getScalarValueSizeInBits();
9579 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9580 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
9581 SDValue SumDiff = Result.getValue(0);
9582 SDValue Overflow = Result.getValue(1);
9583 SDValue Zero = DAG.getConstant(0, dl, VT);
9584 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
9585
9586 if (Opcode == ISD::UADDSAT) {
9587 if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9588 // (LHS + RHS) | OverflowMask
9589 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
9590 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
9591 }
9592 // Overflow ? 0xffff.... : (LHS + RHS)
9593 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
9594 }
9595
9596 if (Opcode == ISD::USUBSAT) {
9597 if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9598 // (LHS - RHS) & ~OverflowMask
9599 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
9600 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
9601 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
9602 }
9603 // Overflow ? 0 : (LHS - RHS)
9604 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
9605 }
9606
9607 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
9608 APInt MinVal = APInt::getSignedMinValue(BitWidth);
9609 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
9610 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
9611 DAG.getConstant(BitWidth - 1, dl, VT));
9612 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
9613 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
9614 }
9615
expandShlSat(SDNode * Node,SelectionDAG & DAG) const9616 SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
9617 unsigned Opcode = Node->getOpcode();
9618 bool IsSigned = Opcode == ISD::SSHLSAT;
9619 SDValue LHS = Node->getOperand(0);
9620 SDValue RHS = Node->getOperand(1);
9621 EVT VT = LHS.getValueType();
9622 SDLoc dl(Node);
9623
9624 assert((Node->getOpcode() == ISD::SSHLSAT ||
9625 Node->getOpcode() == ISD::USHLSAT) &&
9626 "Expected a SHLSAT opcode");
9627 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
9628 assert(VT.isInteger() && "Expected operands to be integers");
9629
9630 if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
9631 return DAG.UnrollVectorOp(Node);
9632
9633 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
9634
9635 unsigned BW = VT.getScalarSizeInBits();
9636 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9637 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
9638 SDValue Orig =
9639 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
9640
9641 SDValue SatVal;
9642 if (IsSigned) {
9643 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
9644 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
9645 SDValue Cond =
9646 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
9647 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
9648 } else {
9649 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
9650 }
9651 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
9652 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
9653 }
9654
9655 SDValue
expandFixedPointMul(SDNode * Node,SelectionDAG & DAG) const9656 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
9657 assert((Node->getOpcode() == ISD::SMULFIX ||
9658 Node->getOpcode() == ISD::UMULFIX ||
9659 Node->getOpcode() == ISD::SMULFIXSAT ||
9660 Node->getOpcode() == ISD::UMULFIXSAT) &&
9661 "Expected a fixed point multiplication opcode");
9662
9663 SDLoc dl(Node);
9664 SDValue LHS = Node->getOperand(0);
9665 SDValue RHS = Node->getOperand(1);
9666 EVT VT = LHS.getValueType();
9667 unsigned Scale = Node->getConstantOperandVal(2);
9668 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
9669 Node->getOpcode() == ISD::UMULFIXSAT);
9670 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
9671 Node->getOpcode() == ISD::SMULFIXSAT);
9672 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9673 unsigned VTSize = VT.getScalarSizeInBits();
9674
9675 if (!Scale) {
9676 // [us]mul.fix(a, b, 0) -> mul(a, b)
9677 if (!Saturating) {
9678 if (isOperationLegalOrCustom(ISD::MUL, VT))
9679 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
9680 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
9681 SDValue Result =
9682 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
9683 SDValue Product = Result.getValue(0);
9684 SDValue Overflow = Result.getValue(1);
9685 SDValue Zero = DAG.getConstant(0, dl, VT);
9686
9687 APInt MinVal = APInt::getSignedMinValue(VTSize);
9688 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
9689 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
9690 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
9691 // Xor the inputs, if resulting sign bit is 0 the product will be
9692 // positive, else negative.
9693 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
9694 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
9695 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
9696 return DAG.getSelect(dl, VT, Overflow, Result, Product);
9697 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
9698 SDValue Result =
9699 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
9700 SDValue Product = Result.getValue(0);
9701 SDValue Overflow = Result.getValue(1);
9702
9703 APInt MaxVal = APInt::getMaxValue(VTSize);
9704 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
9705 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
9706 }
9707 }
9708
9709 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
9710 "Expected scale to be less than the number of bits if signed or at "
9711 "most the number of bits if unsigned.");
9712 assert(LHS.getValueType() == RHS.getValueType() &&
9713 "Expected both operands to be the same type");
9714
9715 // Get the upper and lower bits of the result.
9716 SDValue Lo, Hi;
9717 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
9718 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
9719 if (isOperationLegalOrCustom(LoHiOp, VT)) {
9720 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
9721 Lo = Result.getValue(0);
9722 Hi = Result.getValue(1);
9723 } else if (isOperationLegalOrCustom(HiOp, VT)) {
9724 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
9725 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
9726 } else if (VT.isVector()) {
9727 return SDValue();
9728 } else {
9729 report_fatal_error("Unable to expand fixed point multiplication.");
9730 }
9731
9732 if (Scale == VTSize)
9733 // Result is just the top half since we'd be shifting by the width of the
9734 // operand. Overflow impossible so this works for both UMULFIX and
9735 // UMULFIXSAT.
9736 return Hi;
9737
9738 // The result will need to be shifted right by the scale since both operands
9739 // are scaled. The result is given to us in 2 halves, so we only want part of
9740 // both in the result.
9741 EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
9742 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
9743 DAG.getConstant(Scale, dl, ShiftTy));
9744 if (!Saturating)
9745 return Result;
9746
9747 if (!Signed) {
9748 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
9749 // widened multiplication) aren't all zeroes.
9750
9751 // Saturate to max if ((Hi >> Scale) != 0),
9752 // which is the same as if (Hi > ((1 << Scale) - 1))
9753 APInt MaxVal = APInt::getMaxValue(VTSize);
9754 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
9755 dl, VT);
9756 Result = DAG.getSelectCC(dl, Hi, LowMask,
9757 DAG.getConstant(MaxVal, dl, VT), Result,
9758 ISD::SETUGT);
9759
9760 return Result;
9761 }
9762
9763 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
9764 // widened multiplication) aren't all ones or all zeroes.
9765
9766 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
9767 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
9768
9769 if (Scale == 0) {
9770 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
9771 DAG.getConstant(VTSize - 1, dl, ShiftTy));
9772 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
9773 // Saturated to SatMin if wide product is negative, and SatMax if wide
9774 // product is positive ...
9775 SDValue Zero = DAG.getConstant(0, dl, VT);
9776 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
9777 ISD::SETLT);
9778 // ... but only if we overflowed.
9779 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
9780 }
9781
9782 // We handled Scale==0 above so all the bits to examine is in Hi.
9783
9784 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
9785 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
9786 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
9787 dl, VT);
9788 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
9789 // Saturate to min if (Hi >> (Scale - 1)) < -1),
9790 // which is the same as if (HI < (-1 << (Scale - 1))
9791 SDValue HighMask =
9792 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
9793 dl, VT);
9794 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
9795 return Result;
9796 }
9797
9798 SDValue
expandFixedPointDiv(unsigned Opcode,const SDLoc & dl,SDValue LHS,SDValue RHS,unsigned Scale,SelectionDAG & DAG) const9799 TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
9800 SDValue LHS, SDValue RHS,
9801 unsigned Scale, SelectionDAG &DAG) const {
9802 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
9803 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
9804 "Expected a fixed point division opcode");
9805
9806 EVT VT = LHS.getValueType();
9807 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
9808 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
9809 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9810
9811 // If there is enough room in the type to upscale the LHS or downscale the
9812 // RHS before the division, we can perform it in this type without having to
9813 // resize. For signed operations, the LHS headroom is the number of
9814 // redundant sign bits, and for unsigned ones it is the number of zeroes.
9815 // The headroom for the RHS is the number of trailing zeroes.
9816 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
9817 : DAG.computeKnownBits(LHS).countMinLeadingZeros();
9818 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
9819
9820 // For signed saturating operations, we need to be able to detect true integer
9821 // division overflow; that is, when you have MIN / -EPS. However, this
9822 // is undefined behavior and if we emit divisions that could take such
9823 // values it may cause undesired behavior (arithmetic exceptions on x86, for
9824 // example).
9825 // Avoid this by requiring an extra bit so that we never get this case.
9826 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
9827 // signed saturating division, we need to emit a whopping 32-bit division.
9828 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
9829 return SDValue();
9830
9831 unsigned LHSShift = std::min(LHSLead, Scale);
9832 unsigned RHSShift = Scale - LHSShift;
9833
9834 // At this point, we know that if we shift the LHS up by LHSShift and the
9835 // RHS down by RHSShift, we can emit a regular division with a final scaling
9836 // factor of Scale.
9837
9838 EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
9839 if (LHSShift)
9840 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
9841 DAG.getConstant(LHSShift, dl, ShiftTy));
9842 if (RHSShift)
9843 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
9844 DAG.getConstant(RHSShift, dl, ShiftTy));
9845
9846 SDValue Quot;
9847 if (Signed) {
9848 // For signed operations, if the resulting quotient is negative and the
9849 // remainder is nonzero, subtract 1 from the quotient to round towards
9850 // negative infinity.
9851 SDValue Rem;
9852 // FIXME: Ideally we would always produce an SDIVREM here, but if the
9853 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
9854 // we couldn't just form a libcall, but the type legalizer doesn't do it.
9855 if (isTypeLegal(VT) &&
9856 isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
9857 Quot = DAG.getNode(ISD::SDIVREM, dl,
9858 DAG.getVTList(VT, VT),
9859 LHS, RHS);
9860 Rem = Quot.getValue(1);
9861 Quot = Quot.getValue(0);
9862 } else {
9863 Quot = DAG.getNode(ISD::SDIV, dl, VT,
9864 LHS, RHS);
9865 Rem = DAG.getNode(ISD::SREM, dl, VT,
9866 LHS, RHS);
9867 }
9868 SDValue Zero = DAG.getConstant(0, dl, VT);
9869 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
9870 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
9871 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
9872 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
9873 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
9874 DAG.getConstant(1, dl, VT));
9875 Quot = DAG.getSelect(dl, VT,
9876 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
9877 Sub1, Quot);
9878 } else
9879 Quot = DAG.getNode(ISD::UDIV, dl, VT,
9880 LHS, RHS);
9881
9882 return Quot;
9883 }
9884
expandUADDSUBO(SDNode * Node,SDValue & Result,SDValue & Overflow,SelectionDAG & DAG) const9885 void TargetLowering::expandUADDSUBO(
9886 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
9887 SDLoc dl(Node);
9888 SDValue LHS = Node->getOperand(0);
9889 SDValue RHS = Node->getOperand(1);
9890 bool IsAdd = Node->getOpcode() == ISD::UADDO;
9891
9892 // If ADD/SUBCARRY is legal, use that instead.
9893 unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
9894 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
9895 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
9896 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
9897 { LHS, RHS, CarryIn });
9898 Result = SDValue(NodeCarry.getNode(), 0);
9899 Overflow = SDValue(NodeCarry.getNode(), 1);
9900 return;
9901 }
9902
9903 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
9904 LHS.getValueType(), LHS, RHS);
9905
9906 EVT ResultType = Node->getValueType(1);
9907 EVT SetCCType = getSetCCResultType(
9908 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
9909 SDValue SetCC;
9910 if (IsAdd && isOneConstant(RHS)) {
9911 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
9912 // the live range of X. We assume comparing with 0 is cheap.
9913 // The general case (X + C) < C is not necessarily beneficial. Although we
9914 // reduce the live range of X, we may introduce the materialization of
9915 // constant C.
9916 SetCC =
9917 DAG.getSetCC(dl, SetCCType, Result,
9918 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
9919 } else {
9920 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
9921 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
9922 }
9923 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
9924 }
9925
expandSADDSUBO(SDNode * Node,SDValue & Result,SDValue & Overflow,SelectionDAG & DAG) const9926 void TargetLowering::expandSADDSUBO(
9927 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
9928 SDLoc dl(Node);
9929 SDValue LHS = Node->getOperand(0);
9930 SDValue RHS = Node->getOperand(1);
9931 bool IsAdd = Node->getOpcode() == ISD::SADDO;
9932
9933 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
9934 LHS.getValueType(), LHS, RHS);
9935
9936 EVT ResultType = Node->getValueType(1);
9937 EVT OType = getSetCCResultType(
9938 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
9939
9940 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9941 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
9942 if (isOperationLegal(OpcSat, LHS.getValueType())) {
9943 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
9944 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
9945 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
9946 return;
9947 }
9948
9949 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
9950
9951 // For an addition, the result should be less than one of the operands (LHS)
9952 // if and only if the other operand (RHS) is negative, otherwise there will
9953 // be overflow.
9954 // For a subtraction, the result should be less than one of the operands
9955 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9956 // otherwise there will be overflow.
9957 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
9958 SDValue ConditionRHS =
9959 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
9960
9961 Overflow = DAG.getBoolExtOrTrunc(
9962 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
9963 ResultType, ResultType);
9964 }
9965
expandMULO(SDNode * Node,SDValue & Result,SDValue & Overflow,SelectionDAG & DAG) const9966 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
9967 SDValue &Overflow, SelectionDAG &DAG) const {
9968 SDLoc dl(Node);
9969 EVT VT = Node->getValueType(0);
9970 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9971 SDValue LHS = Node->getOperand(0);
9972 SDValue RHS = Node->getOperand(1);
9973 bool isSigned = Node->getOpcode() == ISD::SMULO;
9974
9975 // For power-of-two multiplications we can use a simpler shift expansion.
9976 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
9977 const APInt &C = RHSC->getAPIntValue();
9978 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
9979 if (C.isPowerOf2()) {
9980 // smulo(x, signed_min) is same as umulo(x, signed_min).
9981 bool UseArithShift = isSigned && !C.isMinSignedValue();
9982 EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
9983 SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
9984 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
9985 Overflow = DAG.getSetCC(dl, SetCCVT,
9986 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
9987 dl, VT, Result, ShiftAmt),
9988 LHS, ISD::SETNE);
9989 return true;
9990 }
9991 }
9992
9993 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
9994 if (VT.isVector())
9995 WideVT =
9996 EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
9997
9998 SDValue BottomHalf;
9999 SDValue TopHalf;
10000 static const unsigned Ops[2][3] =
10001 { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
10002 { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
10003 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
10004 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10005 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
10006 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
10007 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
10008 RHS);
10009 TopHalf = BottomHalf.getValue(1);
10010 } else if (isTypeLegal(WideVT)) {
10011 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
10012 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
10013 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
10014 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
10015 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
10016 getShiftAmountTy(WideVT, DAG.getDataLayout()));
10017 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
10018 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
10019 } else {
10020 if (VT.isVector())
10021 return false;
10022
10023 // We can fall back to a libcall with an illegal type for the MUL if we
10024 // have a libcall big enough.
10025 // Also, we can fall back to a division in some cases, but that's a big
10026 // performance hit in the general case.
10027 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10028 if (WideVT == MVT::i16)
10029 LC = RTLIB::MUL_I16;
10030 else if (WideVT == MVT::i32)
10031 LC = RTLIB::MUL_I32;
10032 else if (WideVT == MVT::i64)
10033 LC = RTLIB::MUL_I64;
10034 else if (WideVT == MVT::i128)
10035 LC = RTLIB::MUL_I128;
10036 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
10037
10038 SDValue HiLHS;
10039 SDValue HiRHS;
10040 if (isSigned) {
10041 // The high part is obtained by SRA'ing all but one of the bits of low
10042 // part.
10043 unsigned LoSize = VT.getFixedSizeInBits();
10044 HiLHS =
10045 DAG.getNode(ISD::SRA, dl, VT, LHS,
10046 DAG.getConstant(LoSize - 1, dl,
10047 getPointerTy(DAG.getDataLayout())));
10048 HiRHS =
10049 DAG.getNode(ISD::SRA, dl, VT, RHS,
10050 DAG.getConstant(LoSize - 1, dl,
10051 getPointerTy(DAG.getDataLayout())));
10052 } else {
10053 HiLHS = DAG.getConstant(0, dl, VT);
10054 HiRHS = DAG.getConstant(0, dl, VT);
10055 }
10056
10057 // Here we're passing the 2 arguments explicitly as 4 arguments that are
10058 // pre-lowered to the correct types. This all depends upon WideVT not
10059 // being a legal type for the architecture and thus has to be split to
10060 // two arguments.
10061 SDValue Ret;
10062 TargetLowering::MakeLibCallOptions CallOptions;
10063 CallOptions.setSExt(isSigned);
10064 CallOptions.setIsPostTypeLegalization(true);
10065 if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10066 // Halves of WideVT are packed into registers in different order
10067 // depending on platform endianness. This is usually handled by
10068 // the C calling convention, but we can't defer to it in
10069 // the legalizer.
10070 SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
10071 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10072 } else {
10073 SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
10074 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10075 }
10076 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10077 "Ret value is a collection of constituent nodes holding result.");
10078 if (DAG.getDataLayout().isLittleEndian()) {
10079 // Same as above.
10080 BottomHalf = Ret.getOperand(0);
10081 TopHalf = Ret.getOperand(1);
10082 } else {
10083 BottomHalf = Ret.getOperand(1);
10084 TopHalf = Ret.getOperand(0);
10085 }
10086 }
10087
10088 Result = BottomHalf;
10089 if (isSigned) {
10090 SDValue ShiftAmt = DAG.getConstant(
10091 VT.getScalarSizeInBits() - 1, dl,
10092 getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
10093 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
10094 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
10095 } else {
10096 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
10097 DAG.getConstant(0, dl, VT), ISD::SETNE);
10098 }
10099
10100 // Truncate the result if SetCC returns a larger type than needed.
10101 EVT RType = Node->getValueType(1);
10102 if (RType.bitsLT(Overflow.getValueType()))
10103 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
10104
10105 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
10106 "Unexpected result type for S/UMULO legalization");
10107 return true;
10108 }
10109
expandVecReduce(SDNode * Node,SelectionDAG & DAG) const10110 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
10111 SDLoc dl(Node);
10112 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
10113 SDValue Op = Node->getOperand(0);
10114 EVT VT = Op.getValueType();
10115
10116 if (VT.isScalableVector())
10117 report_fatal_error(
10118 "Expanding reductions for scalable vectors is undefined.");
10119
10120 // Try to use a shuffle reduction for power of two vectors.
10121 if (VT.isPow2VectorType()) {
10122 while (VT.getVectorNumElements() > 1) {
10123 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
10124 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
10125 break;
10126
10127 SDValue Lo, Hi;
10128 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
10129 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
10130 VT = HalfVT;
10131 }
10132 }
10133
10134 EVT EltVT = VT.getVectorElementType();
10135 unsigned NumElts = VT.getVectorNumElements();
10136
10137 SmallVector<SDValue, 8> Ops;
10138 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
10139
10140 SDValue Res = Ops[0];
10141 for (unsigned i = 1; i < NumElts; i++)
10142 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
10143
10144 // Result type may be wider than element type.
10145 if (EltVT != Node->getValueType(0))
10146 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
10147 return Res;
10148 }
10149
expandVecReduceSeq(SDNode * Node,SelectionDAG & DAG) const10150 SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
10151 SDLoc dl(Node);
10152 SDValue AccOp = Node->getOperand(0);
10153 SDValue VecOp = Node->getOperand(1);
10154 SDNodeFlags Flags = Node->getFlags();
10155
10156 EVT VT = VecOp.getValueType();
10157 EVT EltVT = VT.getVectorElementType();
10158
10159 if (VT.isScalableVector())
10160 report_fatal_error(
10161 "Expanding reductions for scalable vectors is undefined.");
10162
10163 unsigned NumElts = VT.getVectorNumElements();
10164
10165 SmallVector<SDValue, 8> Ops;
10166 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
10167
10168 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
10169
10170 SDValue Res = AccOp;
10171 for (unsigned i = 0; i < NumElts; i++)
10172 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
10173
10174 return Res;
10175 }
10176
expandREM(SDNode * Node,SDValue & Result,SelectionDAG & DAG) const10177 bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
10178 SelectionDAG &DAG) const {
10179 EVT VT = Node->getValueType(0);
10180 SDLoc dl(Node);
10181 bool isSigned = Node->getOpcode() == ISD::SREM;
10182 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
10183 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
10184 SDValue Dividend = Node->getOperand(0);
10185 SDValue Divisor = Node->getOperand(1);
10186 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
10187 SDVTList VTs = DAG.getVTList(VT, VT);
10188 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
10189 return true;
10190 }
10191 if (isOperationLegalOrCustom(DivOpc, VT)) {
10192 // X % Y -> X-X/Y*Y
10193 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
10194 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
10195 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
10196 return true;
10197 }
10198 return false;
10199 }
10200
expandFP_TO_INT_SAT(SDNode * Node,SelectionDAG & DAG) const10201 SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
10202 SelectionDAG &DAG) const {
10203 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
10204 SDLoc dl(SDValue(Node, 0));
10205 SDValue Src = Node->getOperand(0);
10206
10207 // DstVT is the result type, while SatVT is the size to which we saturate
10208 EVT SrcVT = Src.getValueType();
10209 EVT DstVT = Node->getValueType(0);
10210
10211 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
10212 unsigned SatWidth = SatVT.getScalarSizeInBits();
10213 unsigned DstWidth = DstVT.getScalarSizeInBits();
10214 assert(SatWidth <= DstWidth &&
10215 "Expected saturation width smaller than result width");
10216
10217 // Determine minimum and maximum integer values and their corresponding
10218 // floating-point values.
10219 APInt MinInt, MaxInt;
10220 if (IsSigned) {
10221 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
10222 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
10223 } else {
10224 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
10225 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
10226 }
10227
10228 // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
10229 // libcall emission cannot handle this. Large result types will fail.
10230 if (SrcVT == MVT::f16) {
10231 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
10232 SrcVT = Src.getValueType();
10233 }
10234
10235 APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
10236 APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
10237
10238 APFloat::opStatus MinStatus =
10239 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
10240 APFloat::opStatus MaxStatus =
10241 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
10242 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
10243 !(MaxStatus & APFloat::opStatus::opInexact);
10244
10245 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
10246 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
10247
10248 // If the integer bounds are exactly representable as floats and min/max are
10249 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
10250 // of comparisons and selects.
10251 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
10252 isOperationLegal(ISD::FMAXNUM, SrcVT);
10253 if (AreExactFloatBounds && MinMaxLegal) {
10254 SDValue Clamped = Src;
10255
10256 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
10257 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
10258 // Clamp by MaxFloat from above. NaN cannot occur.
10259 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
10260 // Convert clamped value to integer.
10261 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
10262 dl, DstVT, Clamped);
10263
10264 // In the unsigned case we're done, because we mapped NaN to MinFloat,
10265 // which will cast to zero.
10266 if (!IsSigned)
10267 return FpToInt;
10268
10269 // Otherwise, select 0 if Src is NaN.
10270 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
10271 return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
10272 ISD::CondCode::SETUO);
10273 }
10274
10275 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
10276 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
10277
10278 // Result of direct conversion. The assumption here is that the operation is
10279 // non-trapping and it's fine to apply it to an out-of-range value if we
10280 // select it away later.
10281 SDValue FpToInt =
10282 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
10283
10284 SDValue Select = FpToInt;
10285
10286 // If Src ULT MinFloat, select MinInt. In particular, this also selects
10287 // MinInt if Src is NaN.
10288 Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
10289 ISD::CondCode::SETULT);
10290 // If Src OGT MaxFloat, select MaxInt.
10291 Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
10292 ISD::CondCode::SETOGT);
10293
10294 // In the unsigned case we are done, because we mapped NaN to MinInt, which
10295 // is already zero.
10296 if (!IsSigned)
10297 return Select;
10298
10299 // Otherwise, select 0 if Src is NaN.
10300 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
10301 return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
10302 }
10303
expandVectorSplice(SDNode * Node,SelectionDAG & DAG) const10304 SDValue TargetLowering::expandVectorSplice(SDNode *Node,
10305 SelectionDAG &DAG) const {
10306 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
10307 assert(Node->getValueType(0).isScalableVector() &&
10308 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
10309
10310 EVT VT = Node->getValueType(0);
10311 SDValue V1 = Node->getOperand(0);
10312 SDValue V2 = Node->getOperand(1);
10313 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
10314 SDLoc DL(Node);
10315
10316 // Expand through memory thusly:
10317 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
10318 // Store V1, Ptr
10319 // Store V2, Ptr + sizeof(V1)
10320 // If (Imm < 0)
10321 // TrailingElts = -Imm
10322 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
10323 // else
10324 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
10325 // Res = Load Ptr
10326
10327 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
10328
10329 EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
10330 VT.getVectorElementCount() * 2);
10331 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
10332 EVT PtrVT = StackPtr.getValueType();
10333 auto &MF = DAG.getMachineFunction();
10334 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10335 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
10336
10337 // Store the lo part of CONCAT_VECTORS(V1, V2)
10338 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
10339 // Store the hi part of CONCAT_VECTORS(V1, V2)
10340 SDValue OffsetToV2 = DAG.getVScale(
10341 DL, PtrVT,
10342 APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
10343 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
10344 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
10345
10346 if (Imm >= 0) {
10347 // Load back the required element. getVectorElementPointer takes care of
10348 // clamping the index if it's out-of-bounds.
10349 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
10350 // Load the spliced result
10351 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
10352 MachinePointerInfo::getUnknownStack(MF));
10353 }
10354
10355 uint64_t TrailingElts = -Imm;
10356
10357 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
10358 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
10359 SDValue TrailingBytes =
10360 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
10361
10362 if (TrailingElts > VT.getVectorMinNumElements()) {
10363 SDValue VLBytes =
10364 DAG.getVScale(DL, PtrVT,
10365 APInt(PtrVT.getFixedSizeInBits(),
10366 VT.getStoreSize().getKnownMinValue()));
10367 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
10368 }
10369
10370 // Calculate the start address of the spliced result.
10371 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
10372
10373 // Load the spliced result
10374 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
10375 MachinePointerInfo::getUnknownStack(MF));
10376 }
10377
LegalizeSetCCCondCode(SelectionDAG & DAG,EVT VT,SDValue & LHS,SDValue & RHS,SDValue & CC,SDValue Mask,SDValue EVL,bool & NeedInvert,const SDLoc & dl,SDValue & Chain,bool IsSignaling) const10378 bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
10379 SDValue &LHS, SDValue &RHS,
10380 SDValue &CC, SDValue Mask,
10381 SDValue EVL, bool &NeedInvert,
10382 const SDLoc &dl, SDValue &Chain,
10383 bool IsSignaling) const {
10384 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10385 MVT OpVT = LHS.getSimpleValueType();
10386 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
10387 NeedInvert = false;
10388 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
10389 bool IsNonVP = !EVL;
10390 switch (TLI.getCondCodeAction(CCCode, OpVT)) {
10391 default:
10392 llvm_unreachable("Unknown condition code action!");
10393 case TargetLowering::Legal:
10394 // Nothing to do.
10395 break;
10396 case TargetLowering::Expand: {
10397 ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
10398 if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
10399 std::swap(LHS, RHS);
10400 CC = DAG.getCondCode(InvCC);
10401 return true;
10402 }
10403 // Swapping operands didn't work. Try inverting the condition.
10404 bool NeedSwap = false;
10405 InvCC = getSetCCInverse(CCCode, OpVT);
10406 if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
10407 // If inverting the condition is not enough, try swapping operands
10408 // on top of it.
10409 InvCC = ISD::getSetCCSwappedOperands(InvCC);
10410 NeedSwap = true;
10411 }
10412 if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
10413 CC = DAG.getCondCode(InvCC);
10414 NeedInvert = true;
10415 if (NeedSwap)
10416 std::swap(LHS, RHS);
10417 return true;
10418 }
10419
10420 ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
10421 unsigned Opc = 0;
10422 switch (CCCode) {
10423 default:
10424 llvm_unreachable("Don't know how to expand this condition!");
10425 case ISD::SETUO:
10426 if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
10427 CC1 = ISD::SETUNE;
10428 CC2 = ISD::SETUNE;
10429 Opc = ISD::OR;
10430 break;
10431 }
10432 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
10433 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
10434 NeedInvert = true;
10435 [[fallthrough]];
10436 case ISD::SETO:
10437 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
10438 "If SETO is expanded, SETOEQ must be legal!");
10439 CC1 = ISD::SETOEQ;
10440 CC2 = ISD::SETOEQ;
10441 Opc = ISD::AND;
10442 break;
10443 case ISD::SETONE:
10444 case ISD::SETUEQ:
10445 // If the SETUO or SETO CC isn't legal, we might be able to use
10446 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
10447 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
10448 // the operands.
10449 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
10450 if (!TLI.isCondCodeLegal(CC2, OpVT) &&
10451 (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
10452 TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
10453 CC1 = ISD::SETOGT;
10454 CC2 = ISD::SETOLT;
10455 Opc = ISD::OR;
10456 NeedInvert = ((unsigned)CCCode & 0x8U);
10457 break;
10458 }
10459 [[fallthrough]];
10460 case ISD::SETOEQ:
10461 case ISD::SETOGT:
10462 case ISD::SETOGE:
10463 case ISD::SETOLT:
10464 case ISD::SETOLE:
10465 case ISD::SETUNE:
10466 case ISD::SETUGT:
10467 case ISD::SETUGE:
10468 case ISD::SETULT:
10469 case ISD::SETULE:
10470 // If we are floating point, assign and break, otherwise fall through.
10471 if (!OpVT.isInteger()) {
10472 // We can use the 4th bit to tell if we are the unordered
10473 // or ordered version of the opcode.
10474 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
10475 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
10476 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
10477 break;
10478 }
10479 // Fallthrough if we are unsigned integer.
10480 [[fallthrough]];
10481 case ISD::SETLE:
10482 case ISD::SETGT:
10483 case ISD::SETGE:
10484 case ISD::SETLT:
10485 case ISD::SETNE:
10486 case ISD::SETEQ:
10487 // If all combinations of inverting the condition and swapping operands
10488 // didn't work then we have no means to expand the condition.
10489 llvm_unreachable("Don't know how to expand this condition!");
10490 }
10491
10492 SDValue SetCC1, SetCC2;
10493 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
10494 // If we aren't the ordered or unorder operation,
10495 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
10496 if (IsNonVP) {
10497 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
10498 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
10499 } else {
10500 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
10501 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
10502 }
10503 } else {
10504 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
10505 if (IsNonVP) {
10506 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
10507 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
10508 } else {
10509 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
10510 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
10511 }
10512 }
10513 if (Chain)
10514 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
10515 SetCC2.getValue(1));
10516 if (IsNonVP)
10517 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
10518 else {
10519 // Transform the binary opcode to the VP equivalent.
10520 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
10521 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
10522 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
10523 }
10524 RHS = SDValue();
10525 CC = SDValue();
10526 return true;
10527 }
10528 }
10529 return false;
10530 }
10531