1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This implements the TargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/Analysis/VectorUtils.h"
16 #include "llvm/CodeGen/CallingConvLower.h"
17 #include "llvm/CodeGen/CodeGenCommonISel.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineJumpTableInfo.h"
21 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
22 #include "llvm/CodeGen/MachineRegisterInfo.h"
23 #include "llvm/CodeGen/SelectionDAG.h"
24 #include "llvm/CodeGen/TargetRegisterInfo.h"
25 #include "llvm/IR/DataLayout.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/Support/DivisionByConstantInfo.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/KnownBits.h"
34 #include "llvm/Support/MathExtras.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include <cctype>
37 using namespace llvm;
38
39 /// NOTE: The TargetMachine owns TLOF.
TargetLowering(const TargetMachine & tm)40 TargetLowering::TargetLowering(const TargetMachine &tm)
41 : TargetLoweringBase(tm) {}
42
getTargetNodeName(unsigned Opcode) const43 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44 return nullptr;
45 }
46
isPositionIndependent() const47 bool TargetLowering::isPositionIndependent() const {
48 return getTargetMachine().isPositionIndependent();
49 }
50
51 /// Check whether a given call node is in tail position within its function. If
52 /// so, it sets Chain to the input chain of the tail call.
isInTailCallPosition(SelectionDAG & DAG,SDNode * Node,SDValue & Chain) const53 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
54 SDValue &Chain) const {
55 const Function &F = DAG.getMachineFunction().getFunction();
56
57 // First, check if tail calls have been disabled in this function.
58 if (F.getFnAttribute("disable-tail-calls").getValueAsBool())
59 return false;
60
61 // Conservatively require the attributes of the call to match those of
62 // the return. Ignore following attributes because they don't affect the
63 // call sequence.
64 AttrBuilder CallerAttrs(F.getContext(), F.getAttributes().getRetAttrs());
65 for (const auto &Attr : {Attribute::Alignment, Attribute::Dereferenceable,
66 Attribute::DereferenceableOrNull, Attribute::NoAlias,
67 Attribute::NonNull, Attribute::NoUndef})
68 CallerAttrs.removeAttribute(Attr);
69
70 if (CallerAttrs.hasAttributes())
71 return false;
72
73 // It's not safe to eliminate the sign / zero extension of the return value.
74 if (CallerAttrs.contains(Attribute::ZExt) ||
75 CallerAttrs.contains(Attribute::SExt))
76 return false;
77
78 // Check if the only use is a function return node.
79 return isUsedByReturnOnly(Node, Chain);
80 }
81
parametersInCSRMatch(const MachineRegisterInfo & MRI,const uint32_t * CallerPreservedMask,const SmallVectorImpl<CCValAssign> & ArgLocs,const SmallVectorImpl<SDValue> & OutVals) const82 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
83 const uint32_t *CallerPreservedMask,
84 const SmallVectorImpl<CCValAssign> &ArgLocs,
85 const SmallVectorImpl<SDValue> &OutVals) const {
86 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
87 const CCValAssign &ArgLoc = ArgLocs[I];
88 if (!ArgLoc.isRegLoc())
89 continue;
90 MCRegister Reg = ArgLoc.getLocReg();
91 // Only look at callee saved registers.
92 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
93 continue;
94 // Check that we pass the value used for the caller.
95 // (We look for a CopyFromReg reading a virtual register that is used
96 // for the function live-in value of register Reg)
97 SDValue Value = OutVals[I];
98 if (Value->getOpcode() == ISD::AssertZext)
99 Value = Value.getOperand(0);
100 if (Value->getOpcode() != ISD::CopyFromReg)
101 return false;
102 Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
103 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
104 return false;
105 }
106 return true;
107 }
108
109 /// Set CallLoweringInfo attribute flags based on a call instruction
110 /// and called function attributes.
setAttributes(const CallBase * Call,unsigned ArgIdx)111 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
112 unsigned ArgIdx) {
113 IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
114 IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
115 IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
116 IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
117 IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
118 IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
119 IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
120 IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
121 IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
122 IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
123 IsSwiftAsync = Call->paramHasAttr(ArgIdx, Attribute::SwiftAsync);
124 IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
125 Alignment = Call->getParamStackAlign(ArgIdx);
126 IndirectType = nullptr;
127 assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 &&
128 "multiple ABI attributes?");
129 if (IsByVal) {
130 IndirectType = Call->getParamByValType(ArgIdx);
131 if (!Alignment)
132 Alignment = Call->getParamAlign(ArgIdx);
133 }
134 if (IsPreallocated)
135 IndirectType = Call->getParamPreallocatedType(ArgIdx);
136 if (IsInAlloca)
137 IndirectType = Call->getParamInAllocaType(ArgIdx);
138 if (IsSRet)
139 IndirectType = Call->getParamStructRetType(ArgIdx);
140 }
141
142 /// Generate a libcall taking the given operands as arguments and returning a
143 /// result of type RetVT.
144 std::pair<SDValue, SDValue>
makeLibCall(SelectionDAG & DAG,RTLIB::Libcall LC,EVT RetVT,ArrayRef<SDValue> Ops,MakeLibCallOptions CallOptions,const SDLoc & dl,SDValue InChain) const145 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
146 ArrayRef<SDValue> Ops,
147 MakeLibCallOptions CallOptions,
148 const SDLoc &dl,
149 SDValue InChain) const {
150 if (!InChain)
151 InChain = DAG.getEntryNode();
152
153 TargetLowering::ArgListTy Args;
154 Args.reserve(Ops.size());
155
156 TargetLowering::ArgListEntry Entry;
157 for (unsigned i = 0; i < Ops.size(); ++i) {
158 SDValue NewOp = Ops[i];
159 Entry.Node = NewOp;
160 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
161 Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
162 CallOptions.IsSExt);
163 Entry.IsZExt = !Entry.IsSExt;
164
165 if (CallOptions.IsSoften &&
166 !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
167 Entry.IsSExt = Entry.IsZExt = false;
168 }
169 Args.push_back(Entry);
170 }
171
172 if (LC == RTLIB::UNKNOWN_LIBCALL)
173 report_fatal_error("Unsupported library call operation!");
174 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
175 getPointerTy(DAG.getDataLayout()));
176
177 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
178 TargetLowering::CallLoweringInfo CLI(DAG);
179 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
180 bool zeroExtend = !signExtend;
181
182 if (CallOptions.IsSoften &&
183 !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
184 signExtend = zeroExtend = false;
185 }
186
187 CLI.setDebugLoc(dl)
188 .setChain(InChain)
189 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
190 .setNoReturn(CallOptions.DoesNotReturn)
191 .setDiscardResult(!CallOptions.IsReturnValueUsed)
192 .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
193 .setSExtResult(signExtend)
194 .setZExtResult(zeroExtend);
195 return LowerCallTo(CLI);
196 }
197
findOptimalMemOpLowering(std::vector<EVT> & MemOps,unsigned Limit,const MemOp & Op,unsigned DstAS,unsigned SrcAS,const AttributeList & FuncAttributes) const198 bool TargetLowering::findOptimalMemOpLowering(
199 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
200 unsigned SrcAS, const AttributeList &FuncAttributes) const {
201 if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() &&
202 Op.getSrcAlign() < Op.getDstAlign())
203 return false;
204
205 EVT VT = getOptimalMemOpType(Op, FuncAttributes);
206
207 if (VT == MVT::Other) {
208 // Use the largest integer type whose alignment constraints are satisfied.
209 // We only need to check DstAlign here as SrcAlign is always greater or
210 // equal to DstAlign (or zero).
211 VT = MVT::i64;
212 if (Op.isFixedDstAlign())
213 while (Op.getDstAlign() < (VT.getSizeInBits() / 8) &&
214 !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign()))
215 VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
216 assert(VT.isInteger());
217
218 // Find the largest legal integer type.
219 MVT LVT = MVT::i64;
220 while (!isTypeLegal(LVT))
221 LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
222 assert(LVT.isInteger());
223
224 // If the type we've chosen is larger than the largest legal integer type
225 // then use that instead.
226 if (VT.bitsGT(LVT))
227 VT = LVT;
228 }
229
230 unsigned NumMemOps = 0;
231 uint64_t Size = Op.size();
232 while (Size) {
233 unsigned VTSize = VT.getSizeInBits() / 8;
234 while (VTSize > Size) {
235 // For now, only use non-vector load / store's for the left-over pieces.
236 EVT NewVT = VT;
237 unsigned NewVTSize;
238
239 bool Found = false;
240 if (VT.isVector() || VT.isFloatingPoint()) {
241 NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
242 if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
243 isSafeMemOpType(NewVT.getSimpleVT()))
244 Found = true;
245 else if (NewVT == MVT::i64 &&
246 isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
247 isSafeMemOpType(MVT::f64)) {
248 // i64 is usually not legal on 32-bit targets, but f64 may be.
249 NewVT = MVT::f64;
250 Found = true;
251 }
252 }
253
254 if (!Found) {
255 do {
256 NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
257 if (NewVT == MVT::i8)
258 break;
259 } while (!isSafeMemOpType(NewVT.getSimpleVT()));
260 }
261 NewVTSize = NewVT.getSizeInBits() / 8;
262
263 // If the new VT cannot cover all of the remaining bits, then consider
264 // issuing a (or a pair of) unaligned and overlapping load / store.
265 unsigned Fast;
266 if (NumMemOps && Op.allowOverlap() && NewVTSize < Size &&
267 allowsMisalignedMemoryAccesses(
268 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
269 MachineMemOperand::MONone, &Fast) &&
270 Fast)
271 VTSize = Size;
272 else {
273 VT = NewVT;
274 VTSize = NewVTSize;
275 }
276 }
277
278 if (++NumMemOps > Limit)
279 return false;
280
281 MemOps.push_back(VT);
282 Size -= VTSize;
283 }
284
285 return true;
286 }
287
288 /// Soften the operands of a comparison. This code is shared among BR_CC,
289 /// SELECT_CC, and SETCC handlers.
softenSetCCOperands(SelectionDAG & DAG,EVT VT,SDValue & NewLHS,SDValue & NewRHS,ISD::CondCode & CCCode,const SDLoc & dl,const SDValue OldLHS,const SDValue OldRHS) const290 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
291 SDValue &NewLHS, SDValue &NewRHS,
292 ISD::CondCode &CCCode,
293 const SDLoc &dl, const SDValue OldLHS,
294 const SDValue OldRHS) const {
295 SDValue Chain;
296 return softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, dl, OldLHS,
297 OldRHS, Chain);
298 }
299
softenSetCCOperands(SelectionDAG & DAG,EVT VT,SDValue & NewLHS,SDValue & NewRHS,ISD::CondCode & CCCode,const SDLoc & dl,const SDValue OldLHS,const SDValue OldRHS,SDValue & Chain,bool IsSignaling) const300 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
301 SDValue &NewLHS, SDValue &NewRHS,
302 ISD::CondCode &CCCode,
303 const SDLoc &dl, const SDValue OldLHS,
304 const SDValue OldRHS,
305 SDValue &Chain,
306 bool IsSignaling) const {
307 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
308 // not supporting it. We can update this code when libgcc provides such
309 // functions.
310
311 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
312 && "Unsupported setcc type!");
313
314 // Expand into one or more soft-fp libcall(s).
315 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
316 bool ShouldInvertCC = false;
317 switch (CCCode) {
318 case ISD::SETEQ:
319 case ISD::SETOEQ:
320 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
321 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
322 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
323 break;
324 case ISD::SETNE:
325 case ISD::SETUNE:
326 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
327 (VT == MVT::f64) ? RTLIB::UNE_F64 :
328 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
329 break;
330 case ISD::SETGE:
331 case ISD::SETOGE:
332 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
333 (VT == MVT::f64) ? RTLIB::OGE_F64 :
334 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
335 break;
336 case ISD::SETLT:
337 case ISD::SETOLT:
338 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
339 (VT == MVT::f64) ? RTLIB::OLT_F64 :
340 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
341 break;
342 case ISD::SETLE:
343 case ISD::SETOLE:
344 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
345 (VT == MVT::f64) ? RTLIB::OLE_F64 :
346 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
347 break;
348 case ISD::SETGT:
349 case ISD::SETOGT:
350 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
351 (VT == MVT::f64) ? RTLIB::OGT_F64 :
352 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
353 break;
354 case ISD::SETO:
355 ShouldInvertCC = true;
356 [[fallthrough]];
357 case ISD::SETUO:
358 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
359 (VT == MVT::f64) ? RTLIB::UO_F64 :
360 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
361 break;
362 case ISD::SETONE:
363 // SETONE = O && UNE
364 ShouldInvertCC = true;
365 [[fallthrough]];
366 case ISD::SETUEQ:
367 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
368 (VT == MVT::f64) ? RTLIB::UO_F64 :
369 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
370 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
371 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
372 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
373 break;
374 default:
375 // Invert CC for unordered comparisons
376 ShouldInvertCC = true;
377 switch (CCCode) {
378 case ISD::SETULT:
379 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
380 (VT == MVT::f64) ? RTLIB::OGE_F64 :
381 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
382 break;
383 case ISD::SETULE:
384 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
385 (VT == MVT::f64) ? RTLIB::OGT_F64 :
386 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
387 break;
388 case ISD::SETUGT:
389 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
390 (VT == MVT::f64) ? RTLIB::OLE_F64 :
391 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
392 break;
393 case ISD::SETUGE:
394 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
395 (VT == MVT::f64) ? RTLIB::OLT_F64 :
396 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
397 break;
398 default: llvm_unreachable("Do not know how to soften this setcc!");
399 }
400 }
401
402 // Use the target specific return value for comparison lib calls.
403 EVT RetVT = getCmpLibcallReturnType();
404 SDValue Ops[2] = {NewLHS, NewRHS};
405 TargetLowering::MakeLibCallOptions CallOptions;
406 EVT OpsVT[2] = { OldLHS.getValueType(),
407 OldRHS.getValueType() };
408 CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
409 auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
410 NewLHS = Call.first;
411 NewRHS = DAG.getConstant(0, dl, RetVT);
412
413 CCCode = getCmpLibcallCC(LC1);
414 if (ShouldInvertCC) {
415 assert(RetVT.isInteger());
416 CCCode = getSetCCInverse(CCCode, RetVT);
417 }
418
419 if (LC2 == RTLIB::UNKNOWN_LIBCALL) {
420 // Update Chain.
421 Chain = Call.second;
422 } else {
423 EVT SetCCVT =
424 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT);
425 SDValue Tmp = DAG.getSetCC(dl, SetCCVT, NewLHS, NewRHS, CCCode);
426 auto Call2 = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl, Chain);
427 CCCode = getCmpLibcallCC(LC2);
428 if (ShouldInvertCC)
429 CCCode = getSetCCInverse(CCCode, RetVT);
430 NewLHS = DAG.getSetCC(dl, SetCCVT, Call2.first, NewRHS, CCCode);
431 if (Chain)
432 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Call.second,
433 Call2.second);
434 NewLHS = DAG.getNode(ShouldInvertCC ? ISD::AND : ISD::OR, dl,
435 Tmp.getValueType(), Tmp, NewLHS);
436 NewRHS = SDValue();
437 }
438 }
439
440 /// Return the entry encoding for a jump table in the current function. The
441 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
getJumpTableEncoding() const442 unsigned TargetLowering::getJumpTableEncoding() const {
443 // In non-pic modes, just use the address of a block.
444 if (!isPositionIndependent())
445 return MachineJumpTableInfo::EK_BlockAddress;
446
447 // In PIC mode, if the target supports a GPRel32 directive, use it.
448 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
449 return MachineJumpTableInfo::EK_GPRel32BlockAddress;
450
451 // Otherwise, use a label difference.
452 return MachineJumpTableInfo::EK_LabelDifference32;
453 }
454
getPICJumpTableRelocBase(SDValue Table,SelectionDAG & DAG) const455 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
456 SelectionDAG &DAG) const {
457 // If our PIC model is GP relative, use the global offset table as the base.
458 unsigned JTEncoding = getJumpTableEncoding();
459
460 if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
461 (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
462 return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
463
464 return Table;
465 }
466
467 /// This returns the relocation base for the given PIC jumptable, the same as
468 /// getPICJumpTableRelocBase, but as an MCExpr.
469 const MCExpr *
getPICJumpTableRelocBaseExpr(const MachineFunction * MF,unsigned JTI,MCContext & Ctx) const470 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
471 unsigned JTI,MCContext &Ctx) const{
472 // The normal PIC reloc base is the label at the start of the jump table.
473 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
474 }
475
expandIndirectJTBranch(const SDLoc & dl,SDValue Value,SDValue Addr,int JTI,SelectionDAG & DAG) const476 SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value,
477 SDValue Addr, int JTI,
478 SelectionDAG &DAG) const {
479 SDValue Chain = Value;
480 // Jump table debug info is only needed if CodeView is enabled.
481 if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) {
482 Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl);
483 }
484 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr);
485 }
486
487 bool
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const488 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
489 const TargetMachine &TM = getTargetMachine();
490 const GlobalValue *GV = GA->getGlobal();
491
492 // If the address is not even local to this DSO we will have to load it from
493 // a got and then add the offset.
494 if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
495 return false;
496
497 // If the code is position independent we will have to add a base register.
498 if (isPositionIndependent())
499 return false;
500
501 // Otherwise we can do it.
502 return true;
503 }
504
505 //===----------------------------------------------------------------------===//
506 // Optimization Methods
507 //===----------------------------------------------------------------------===//
508
509 /// If the specified instruction has a constant integer operand and there are
510 /// bits set in that constant that are not demanded, then clear those bits and
511 /// return true.
ShrinkDemandedConstant(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,TargetLoweringOpt & TLO) const512 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
513 const APInt &DemandedBits,
514 const APInt &DemandedElts,
515 TargetLoweringOpt &TLO) const {
516 SDLoc DL(Op);
517 unsigned Opcode = Op.getOpcode();
518
519 // Early-out if we've ended up calling an undemanded node, leave this to
520 // constant folding.
521 if (DemandedBits.isZero() || DemandedElts.isZero())
522 return false;
523
524 // Do target-specific constant optimization.
525 if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
526 return TLO.New.getNode();
527
528 // FIXME: ISD::SELECT, ISD::SELECT_CC
529 switch (Opcode) {
530 default:
531 break;
532 case ISD::XOR:
533 case ISD::AND:
534 case ISD::OR: {
535 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
536 if (!Op1C || Op1C->isOpaque())
537 return false;
538
539 // If this is a 'not' op, don't touch it because that's a canonical form.
540 const APInt &C = Op1C->getAPIntValue();
541 if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C))
542 return false;
543
544 if (!C.isSubsetOf(DemandedBits)) {
545 EVT VT = Op.getValueType();
546 SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT);
547 SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
548 return TLO.CombineTo(Op, NewOp);
549 }
550
551 break;
552 }
553 }
554
555 return false;
556 }
557
ShrinkDemandedConstant(SDValue Op,const APInt & DemandedBits,TargetLoweringOpt & TLO) const558 bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
559 const APInt &DemandedBits,
560 TargetLoweringOpt &TLO) const {
561 EVT VT = Op.getValueType();
562 APInt DemandedElts = VT.isVector()
563 ? APInt::getAllOnes(VT.getVectorNumElements())
564 : APInt(1, 1);
565 return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
566 }
567
568 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
569 /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast,
570 /// but it could be generalized for targets with other types of implicit
571 /// widening casts.
ShrinkDemandedOp(SDValue Op,unsigned BitWidth,const APInt & DemandedBits,TargetLoweringOpt & TLO) const572 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
573 const APInt &DemandedBits,
574 TargetLoweringOpt &TLO) const {
575 assert(Op.getNumOperands() == 2 &&
576 "ShrinkDemandedOp only supports binary operators!");
577 assert(Op.getNode()->getNumValues() == 1 &&
578 "ShrinkDemandedOp only supports nodes with one result!");
579
580 EVT VT = Op.getValueType();
581 SelectionDAG &DAG = TLO.DAG;
582 SDLoc dl(Op);
583
584 // Early return, as this function cannot handle vector types.
585 if (VT.isVector())
586 return false;
587
588 // Don't do this if the node has another user, which may require the
589 // full value.
590 if (!Op.getNode()->hasOneUse())
591 return false;
592
593 // Search for the smallest integer type with free casts to and from
594 // Op's type. For expedience, just check power-of-2 integer types.
595 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
596 unsigned DemandedSize = DemandedBits.getActiveBits();
597 for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
598 SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
599 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
600 if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
601 // We found a type with free casts.
602 SDValue X = DAG.getNode(
603 Op.getOpcode(), dl, SmallVT,
604 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
605 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
606 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
607 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
608 return TLO.CombineTo(Op, Z);
609 }
610 }
611 return false;
612 }
613
SimplifyDemandedBits(SDValue Op,const APInt & DemandedBits,DAGCombinerInfo & DCI) const614 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
615 DAGCombinerInfo &DCI) const {
616 SelectionDAG &DAG = DCI.DAG;
617 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
618 !DCI.isBeforeLegalizeOps());
619 KnownBits Known;
620
621 bool Simplified = SimplifyDemandedBits(Op, DemandedBits, Known, TLO);
622 if (Simplified) {
623 DCI.AddToWorklist(Op.getNode());
624 DCI.CommitTargetLoweringOpt(TLO);
625 }
626 return Simplified;
627 }
628
SimplifyDemandedBits(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,DAGCombinerInfo & DCI) const629 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
630 const APInt &DemandedElts,
631 DAGCombinerInfo &DCI) const {
632 SelectionDAG &DAG = DCI.DAG;
633 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
634 !DCI.isBeforeLegalizeOps());
635 KnownBits Known;
636
637 bool Simplified =
638 SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO);
639 if (Simplified) {
640 DCI.AddToWorklist(Op.getNode());
641 DCI.CommitTargetLoweringOpt(TLO);
642 }
643 return Simplified;
644 }
645
SimplifyDemandedBits(SDValue Op,const APInt & DemandedBits,KnownBits & Known,TargetLoweringOpt & TLO,unsigned Depth,bool AssumeSingleUse) const646 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
647 KnownBits &Known,
648 TargetLoweringOpt &TLO,
649 unsigned Depth,
650 bool AssumeSingleUse) const {
651 EVT VT = Op.getValueType();
652
653 // Since the number of lanes in a scalable vector is unknown at compile time,
654 // we track one bit which is implicitly broadcast to all lanes. This means
655 // that all lanes in a scalable vector are considered demanded.
656 APInt DemandedElts = VT.isFixedLengthVector()
657 ? APInt::getAllOnes(VT.getVectorNumElements())
658 : APInt(1, 1);
659 return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
660 AssumeSingleUse);
661 }
662
663 // TODO: Under what circumstances can we create nodes? Constant folding?
SimplifyMultipleUseDemandedBits(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,SelectionDAG & DAG,unsigned Depth) const664 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
665 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
666 SelectionDAG &DAG, unsigned Depth) const {
667 EVT VT = Op.getValueType();
668
669 // Limit search depth.
670 if (Depth >= SelectionDAG::MaxRecursionDepth)
671 return SDValue();
672
673 // Ignore UNDEFs.
674 if (Op.isUndef())
675 return SDValue();
676
677 // Not demanding any bits/elts from Op.
678 if (DemandedBits == 0 || DemandedElts == 0)
679 return DAG.getUNDEF(VT);
680
681 bool IsLE = DAG.getDataLayout().isLittleEndian();
682 unsigned NumElts = DemandedElts.getBitWidth();
683 unsigned BitWidth = DemandedBits.getBitWidth();
684 KnownBits LHSKnown, RHSKnown;
685 switch (Op.getOpcode()) {
686 case ISD::BITCAST: {
687 if (VT.isScalableVector())
688 return SDValue();
689
690 SDValue Src = peekThroughBitcasts(Op.getOperand(0));
691 EVT SrcVT = Src.getValueType();
692 EVT DstVT = Op.getValueType();
693 if (SrcVT == DstVT)
694 return Src;
695
696 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
697 unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
698 if (NumSrcEltBits == NumDstEltBits)
699 if (SDValue V = SimplifyMultipleUseDemandedBits(
700 Src, DemandedBits, DemandedElts, DAG, Depth + 1))
701 return DAG.getBitcast(DstVT, V);
702
703 if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) {
704 unsigned Scale = NumDstEltBits / NumSrcEltBits;
705 unsigned NumSrcElts = SrcVT.getVectorNumElements();
706 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
707 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
708 for (unsigned i = 0; i != Scale; ++i) {
709 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
710 unsigned BitOffset = EltOffset * NumSrcEltBits;
711 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
712 if (!Sub.isZero()) {
713 DemandedSrcBits |= Sub;
714 for (unsigned j = 0; j != NumElts; ++j)
715 if (DemandedElts[j])
716 DemandedSrcElts.setBit((j * Scale) + i);
717 }
718 }
719
720 if (SDValue V = SimplifyMultipleUseDemandedBits(
721 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
722 return DAG.getBitcast(DstVT, V);
723 }
724
725 // TODO - bigendian once we have test coverage.
726 if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) {
727 unsigned Scale = NumSrcEltBits / NumDstEltBits;
728 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
729 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
730 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
731 for (unsigned i = 0; i != NumElts; ++i)
732 if (DemandedElts[i]) {
733 unsigned Offset = (i % Scale) * NumDstEltBits;
734 DemandedSrcBits.insertBits(DemandedBits, Offset);
735 DemandedSrcElts.setBit(i / Scale);
736 }
737
738 if (SDValue V = SimplifyMultipleUseDemandedBits(
739 Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
740 return DAG.getBitcast(DstVT, V);
741 }
742
743 break;
744 }
745 case ISD::AND: {
746 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
747 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
748
749 // If all of the demanded bits are known 1 on one side, return the other.
750 // These bits cannot contribute to the result of the 'and' in this
751 // context.
752 if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
753 return Op.getOperand(0);
754 if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
755 return Op.getOperand(1);
756 break;
757 }
758 case ISD::OR: {
759 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
760 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
761
762 // If all of the demanded bits are known zero on one side, return the
763 // other. These bits cannot contribute to the result of the 'or' in this
764 // context.
765 if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
766 return Op.getOperand(0);
767 if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
768 return Op.getOperand(1);
769 break;
770 }
771 case ISD::XOR: {
772 LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
773 RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
774
775 // If all of the demanded bits are known zero on one side, return the
776 // other.
777 if (DemandedBits.isSubsetOf(RHSKnown.Zero))
778 return Op.getOperand(0);
779 if (DemandedBits.isSubsetOf(LHSKnown.Zero))
780 return Op.getOperand(1);
781 break;
782 }
783 case ISD::SHL: {
784 // If we are only demanding sign bits then we can use the shift source
785 // directly.
786 if (const APInt *MaxSA =
787 DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
788 SDValue Op0 = Op.getOperand(0);
789 unsigned ShAmt = MaxSA->getZExtValue();
790 unsigned NumSignBits =
791 DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
792 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
793 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
794 return Op0;
795 }
796 break;
797 }
798 case ISD::SETCC: {
799 SDValue Op0 = Op.getOperand(0);
800 SDValue Op1 = Op.getOperand(1);
801 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
802 // If (1) we only need the sign-bit, (2) the setcc operands are the same
803 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
804 // -1, we may be able to bypass the setcc.
805 if (DemandedBits.isSignMask() &&
806 Op0.getScalarValueSizeInBits() == BitWidth &&
807 getBooleanContents(Op0.getValueType()) ==
808 BooleanContent::ZeroOrNegativeOneBooleanContent) {
809 // If we're testing X < 0, then this compare isn't needed - just use X!
810 // FIXME: We're limiting to integer types here, but this should also work
811 // if we don't care about FP signed-zero. The use of SETLT with FP means
812 // that we don't care about NaNs.
813 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
814 (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
815 return Op0;
816 }
817 break;
818 }
819 case ISD::SIGN_EXTEND_INREG: {
820 // If none of the extended bits are demanded, eliminate the sextinreg.
821 SDValue Op0 = Op.getOperand(0);
822 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
823 unsigned ExBits = ExVT.getScalarSizeInBits();
824 if (DemandedBits.getActiveBits() <= ExBits &&
825 shouldRemoveRedundantExtend(Op))
826 return Op0;
827 // If the input is already sign extended, just drop the extension.
828 unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
829 if (NumSignBits >= (BitWidth - ExBits + 1))
830 return Op0;
831 break;
832 }
833 case ISD::ANY_EXTEND_VECTOR_INREG:
834 case ISD::SIGN_EXTEND_VECTOR_INREG:
835 case ISD::ZERO_EXTEND_VECTOR_INREG: {
836 if (VT.isScalableVector())
837 return SDValue();
838
839 // If we only want the lowest element and none of extended bits, then we can
840 // return the bitcasted source vector.
841 SDValue Src = Op.getOperand(0);
842 EVT SrcVT = Src.getValueType();
843 EVT DstVT = Op.getValueType();
844 if (IsLE && DemandedElts == 1 &&
845 DstVT.getSizeInBits() == SrcVT.getSizeInBits() &&
846 DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) {
847 return DAG.getBitcast(DstVT, Src);
848 }
849 break;
850 }
851 case ISD::INSERT_VECTOR_ELT: {
852 if (VT.isScalableVector())
853 return SDValue();
854
855 // If we don't demand the inserted element, return the base vector.
856 SDValue Vec = Op.getOperand(0);
857 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
858 EVT VecVT = Vec.getValueType();
859 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
860 !DemandedElts[CIdx->getZExtValue()])
861 return Vec;
862 break;
863 }
864 case ISD::INSERT_SUBVECTOR: {
865 if (VT.isScalableVector())
866 return SDValue();
867
868 SDValue Vec = Op.getOperand(0);
869 SDValue Sub = Op.getOperand(1);
870 uint64_t Idx = Op.getConstantOperandVal(2);
871 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
872 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
873 // If we don't demand the inserted subvector, return the base vector.
874 if (DemandedSubElts == 0)
875 return Vec;
876 break;
877 }
878 case ISD::VECTOR_SHUFFLE: {
879 assert(!VT.isScalableVector());
880 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
881
882 // If all the demanded elts are from one operand and are inline,
883 // then we can use the operand directly.
884 bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
885 for (unsigned i = 0; i != NumElts; ++i) {
886 int M = ShuffleMask[i];
887 if (M < 0 || !DemandedElts[i])
888 continue;
889 AllUndef = false;
890 IdentityLHS &= (M == (int)i);
891 IdentityRHS &= ((M - NumElts) == i);
892 }
893
894 if (AllUndef)
895 return DAG.getUNDEF(Op.getValueType());
896 if (IdentityLHS)
897 return Op.getOperand(0);
898 if (IdentityRHS)
899 return Op.getOperand(1);
900 break;
901 }
902 default:
903 // TODO: Probably okay to remove after audit; here to reduce change size
904 // in initial enablement patch for scalable vectors
905 if (VT.isScalableVector())
906 return SDValue();
907
908 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
909 if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
910 Op, DemandedBits, DemandedElts, DAG, Depth))
911 return V;
912 break;
913 }
914 return SDValue();
915 }
916
SimplifyMultipleUseDemandedBits(SDValue Op,const APInt & DemandedBits,SelectionDAG & DAG,unsigned Depth) const917 SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
918 SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG,
919 unsigned Depth) const {
920 EVT VT = Op.getValueType();
921 // Since the number of lanes in a scalable vector is unknown at compile time,
922 // we track one bit which is implicitly broadcast to all lanes. This means
923 // that all lanes in a scalable vector are considered demanded.
924 APInt DemandedElts = VT.isFixedLengthVector()
925 ? APInt::getAllOnes(VT.getVectorNumElements())
926 : APInt(1, 1);
927 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
928 Depth);
929 }
930
SimplifyMultipleUseDemandedVectorElts(SDValue Op,const APInt & DemandedElts,SelectionDAG & DAG,unsigned Depth) const931 SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
932 SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
933 unsigned Depth) const {
934 APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
935 return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
936 Depth);
937 }
938
939 // Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1).
940 // or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1).
combineShiftToAVG(SDValue Op,SelectionDAG & DAG,const TargetLowering & TLI,const APInt & DemandedBits,const APInt & DemandedElts,unsigned Depth)941 static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
942 const TargetLowering &TLI,
943 const APInt &DemandedBits,
944 const APInt &DemandedElts,
945 unsigned Depth) {
946 assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) &&
947 "SRL or SRA node is required here!");
948 // Is the right shift using an immediate value of 1?
949 ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
950 if (!N1C || !N1C->isOne())
951 return SDValue();
952
953 // We are looking for an avgfloor
954 // add(ext, ext)
955 // or one of these as a avgceil
956 // add(add(ext, ext), 1)
957 // add(add(ext, 1), ext)
958 // add(ext, add(ext, 1))
959 SDValue Add = Op.getOperand(0);
960 if (Add.getOpcode() != ISD::ADD)
961 return SDValue();
962
963 SDValue ExtOpA = Add.getOperand(0);
964 SDValue ExtOpB = Add.getOperand(1);
965 SDValue Add2;
966 auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
967 ConstantSDNode *ConstOp;
968 if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
969 ConstOp->isOne()) {
970 ExtOpA = Op1;
971 ExtOpB = Op3;
972 Add2 = A;
973 return true;
974 }
975 if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
976 ConstOp->isOne()) {
977 ExtOpA = Op1;
978 ExtOpB = Op2;
979 Add2 = A;
980 return true;
981 }
982 return false;
983 };
984 bool IsCeil =
985 (ExtOpA.getOpcode() == ISD::ADD &&
986 MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
987 (ExtOpB.getOpcode() == ISD::ADD &&
988 MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
989
990 // If the shift is signed (sra):
991 // - Needs >= 2 sign bit for both operands.
992 // - Needs >= 2 zero bits.
993 // If the shift is unsigned (srl):
994 // - Needs >= 1 zero bit for both operands.
995 // - Needs 1 demanded bit zero and >= 2 sign bits.
996 unsigned ShiftOpc = Op.getOpcode();
997 bool IsSigned = false;
998 unsigned KnownBits;
999 unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth);
1000 unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth);
1001 unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1;
1002 unsigned NumZeroA =
1003 DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros();
1004 unsigned NumZeroB =
1005 DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros();
1006 unsigned NumZero = std::min(NumZeroA, NumZeroB);
1007
1008 switch (ShiftOpc) {
1009 default:
1010 llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG");
1011 case ISD::SRA: {
1012 if (NumZero >= 2 && NumSigned < NumZero) {
1013 IsSigned = false;
1014 KnownBits = NumZero;
1015 break;
1016 }
1017 if (NumSigned >= 1) {
1018 IsSigned = true;
1019 KnownBits = NumSigned;
1020 break;
1021 }
1022 return SDValue();
1023 }
1024 case ISD::SRL: {
1025 if (NumZero >= 1 && NumSigned < NumZero) {
1026 IsSigned = false;
1027 KnownBits = NumZero;
1028 break;
1029 }
1030 if (NumSigned >= 1 && DemandedBits.isSignBitClear()) {
1031 IsSigned = true;
1032 KnownBits = NumSigned;
1033 break;
1034 }
1035 return SDValue();
1036 }
1037 }
1038
1039 unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU)
1040 : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU);
1041
1042 // Find the smallest power-2 type that is legal for this vector size and
1043 // operation, given the original type size and the number of known sign/zero
1044 // bits.
1045 EVT VT = Op.getValueType();
1046 unsigned MinWidth =
1047 std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
1048 EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
1049 if (VT.isVector())
1050 NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
1051 if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
1052 // If we could not transform, and (both) adds are nuw/nsw, we can use the
1053 // larger type size to do the transform.
1054 if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
1055 return SDValue();
1056 if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0),
1057 Add.getOperand(1)) &&
1058 (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0),
1059 Add2.getOperand(1))))
1060 NVT = VT;
1061 else
1062 return SDValue();
1063 }
1064
1065 SDLoc DL(Op);
1066 SDValue ResultAVG =
1067 DAG.getNode(AVGOpc, DL, NVT, DAG.getExtOrTrunc(IsSigned, ExtOpA, DL, NVT),
1068 DAG.getExtOrTrunc(IsSigned, ExtOpB, DL, NVT));
1069 return DAG.getExtOrTrunc(IsSigned, ResultAVG, DL, VT);
1070 }
1071
1072 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
1073 /// result of Op are ever used downstream. If we can use this information to
1074 /// simplify Op, create a new simplified DAG node and return true, returning the
1075 /// original and new nodes in Old and New. Otherwise, analyze the expression and
1076 /// return a mask of Known bits for the expression (used to simplify the
1077 /// caller). The Known bits may only be accurate for those bits in the
1078 /// OriginalDemandedBits and OriginalDemandedElts.
SimplifyDemandedBits(SDValue Op,const APInt & OriginalDemandedBits,const APInt & OriginalDemandedElts,KnownBits & Known,TargetLoweringOpt & TLO,unsigned Depth,bool AssumeSingleUse) const1079 bool TargetLowering::SimplifyDemandedBits(
1080 SDValue Op, const APInt &OriginalDemandedBits,
1081 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
1082 unsigned Depth, bool AssumeSingleUse) const {
1083 unsigned BitWidth = OriginalDemandedBits.getBitWidth();
1084 assert(Op.getScalarValueSizeInBits() == BitWidth &&
1085 "Mask size mismatches value type size!");
1086
1087 // Don't know anything.
1088 Known = KnownBits(BitWidth);
1089
1090 EVT VT = Op.getValueType();
1091 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
1092 unsigned NumElts = OriginalDemandedElts.getBitWidth();
1093 assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) &&
1094 "Unexpected vector size");
1095
1096 APInt DemandedBits = OriginalDemandedBits;
1097 APInt DemandedElts = OriginalDemandedElts;
1098 SDLoc dl(Op);
1099 auto &DL = TLO.DAG.getDataLayout();
1100
1101 // Undef operand.
1102 if (Op.isUndef())
1103 return false;
1104
1105 // We can't simplify target constants.
1106 if (Op.getOpcode() == ISD::TargetConstant)
1107 return false;
1108
1109 if (Op.getOpcode() == ISD::Constant) {
1110 // We know all of the bits for a constant!
1111 Known = KnownBits::makeConstant(Op->getAsAPIntVal());
1112 return false;
1113 }
1114
1115 if (Op.getOpcode() == ISD::ConstantFP) {
1116 // We know all of the bits for a floating point constant!
1117 Known = KnownBits::makeConstant(
1118 cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt());
1119 return false;
1120 }
1121
1122 // Other users may use these bits.
1123 bool HasMultiUse = false;
1124 if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) {
1125 if (Depth >= SelectionDAG::MaxRecursionDepth) {
1126 // Limit search depth.
1127 return false;
1128 }
1129 // Allow multiple uses, just set the DemandedBits/Elts to all bits.
1130 DemandedBits = APInt::getAllOnes(BitWidth);
1131 DemandedElts = APInt::getAllOnes(NumElts);
1132 HasMultiUse = true;
1133 } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
1134 // Not demanding any bits/elts from Op.
1135 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1136 } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
1137 // Limit search depth.
1138 return false;
1139 }
1140
1141 KnownBits Known2;
1142 switch (Op.getOpcode()) {
1143 case ISD::SCALAR_TO_VECTOR: {
1144 if (VT.isScalableVector())
1145 return false;
1146 if (!DemandedElts[0])
1147 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1148
1149 KnownBits SrcKnown;
1150 SDValue Src = Op.getOperand(0);
1151 unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
1152 APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth);
1153 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
1154 return true;
1155
1156 // Upper elements are undef, so only get the knownbits if we just demand
1157 // the bottom element.
1158 if (DemandedElts == 1)
1159 Known = SrcKnown.anyextOrTrunc(BitWidth);
1160 break;
1161 }
1162 case ISD::BUILD_VECTOR:
1163 // Collect the known bits that are shared by every demanded element.
1164 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
1165 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1166 return false; // Don't fall through, will infinitely loop.
1167 case ISD::SPLAT_VECTOR: {
1168 SDValue Scl = Op.getOperand(0);
1169 APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits());
1170 KnownBits KnownScl;
1171 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1172 return true;
1173
1174 // Implicitly truncate the bits to match the official semantics of
1175 // SPLAT_VECTOR.
1176 Known = KnownScl.trunc(BitWidth);
1177 break;
1178 }
1179 case ISD::LOAD: {
1180 auto *LD = cast<LoadSDNode>(Op);
1181 if (getTargetConstantFromLoad(LD)) {
1182 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1183 return false; // Don't fall through, will infinitely loop.
1184 }
1185 if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
1186 // If this is a ZEXTLoad and we are looking at the loaded value.
1187 EVT MemVT = LD->getMemoryVT();
1188 unsigned MemBits = MemVT.getScalarSizeInBits();
1189 Known.Zero.setBitsFrom(MemBits);
1190 return false; // Don't fall through, will infinitely loop.
1191 }
1192 break;
1193 }
1194 case ISD::INSERT_VECTOR_ELT: {
1195 if (VT.isScalableVector())
1196 return false;
1197 SDValue Vec = Op.getOperand(0);
1198 SDValue Scl = Op.getOperand(1);
1199 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1200 EVT VecVT = Vec.getValueType();
1201
1202 // If index isn't constant, assume we need all vector elements AND the
1203 // inserted element.
1204 APInt DemandedVecElts(DemandedElts);
1205 if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
1206 unsigned Idx = CIdx->getZExtValue();
1207 DemandedVecElts.clearBit(Idx);
1208
1209 // Inserted element is not required.
1210 if (!DemandedElts[Idx])
1211 return TLO.CombineTo(Op, Vec);
1212 }
1213
1214 KnownBits KnownScl;
1215 unsigned NumSclBits = Scl.getScalarValueSizeInBits();
1216 APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
1217 if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
1218 return true;
1219
1220 Known = KnownScl.anyextOrTrunc(BitWidth);
1221
1222 KnownBits KnownVec;
1223 if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
1224 Depth + 1))
1225 return true;
1226
1227 if (!!DemandedVecElts)
1228 Known = Known.intersectWith(KnownVec);
1229
1230 return false;
1231 }
1232 case ISD::INSERT_SUBVECTOR: {
1233 if (VT.isScalableVector())
1234 return false;
1235 // Demand any elements from the subvector and the remainder from the src its
1236 // inserted into.
1237 SDValue Src = Op.getOperand(0);
1238 SDValue Sub = Op.getOperand(1);
1239 uint64_t Idx = Op.getConstantOperandVal(2);
1240 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
1241 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
1242 APInt DemandedSrcElts = DemandedElts;
1243 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
1244
1245 KnownBits KnownSub, KnownSrc;
1246 if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
1247 Depth + 1))
1248 return true;
1249 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO,
1250 Depth + 1))
1251 return true;
1252
1253 Known.Zero.setAllBits();
1254 Known.One.setAllBits();
1255 if (!!DemandedSubElts)
1256 Known = Known.intersectWith(KnownSub);
1257 if (!!DemandedSrcElts)
1258 Known = Known.intersectWith(KnownSrc);
1259
1260 // Attempt to avoid multi-use src if we don't need anything from it.
1261 if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
1262 !DemandedSrcElts.isAllOnes()) {
1263 SDValue NewSub = SimplifyMultipleUseDemandedBits(
1264 Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
1265 SDValue NewSrc = SimplifyMultipleUseDemandedBits(
1266 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1267 if (NewSub || NewSrc) {
1268 NewSub = NewSub ? NewSub : Sub;
1269 NewSrc = NewSrc ? NewSrc : Src;
1270 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub,
1271 Op.getOperand(2));
1272 return TLO.CombineTo(Op, NewOp);
1273 }
1274 }
1275 break;
1276 }
1277 case ISD::EXTRACT_SUBVECTOR: {
1278 if (VT.isScalableVector())
1279 return false;
1280 // Offset the demanded elts by the subvector index.
1281 SDValue Src = Op.getOperand(0);
1282 if (Src.getValueType().isScalableVector())
1283 break;
1284 uint64_t Idx = Op.getConstantOperandVal(1);
1285 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1286 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
1287
1288 if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO,
1289 Depth + 1))
1290 return true;
1291
1292 // Attempt to avoid multi-use src if we don't need anything from it.
1293 if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
1294 SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1295 Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
1296 if (DemandedSrc) {
1297 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc,
1298 Op.getOperand(1));
1299 return TLO.CombineTo(Op, NewOp);
1300 }
1301 }
1302 break;
1303 }
1304 case ISD::CONCAT_VECTORS: {
1305 if (VT.isScalableVector())
1306 return false;
1307 Known.Zero.setAllBits();
1308 Known.One.setAllBits();
1309 EVT SubVT = Op.getOperand(0).getValueType();
1310 unsigned NumSubVecs = Op.getNumOperands();
1311 unsigned NumSubElts = SubVT.getVectorNumElements();
1312 for (unsigned i = 0; i != NumSubVecs; ++i) {
1313 APInt DemandedSubElts =
1314 DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1315 if (SimplifyDemandedBits(Op.getOperand(i), DemandedBits, DemandedSubElts,
1316 Known2, TLO, Depth + 1))
1317 return true;
1318 // Known bits are shared by every demanded subvector element.
1319 if (!!DemandedSubElts)
1320 Known = Known.intersectWith(Known2);
1321 }
1322 break;
1323 }
1324 case ISD::VECTOR_SHUFFLE: {
1325 assert(!VT.isScalableVector());
1326 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1327
1328 // Collect demanded elements from shuffle operands..
1329 APInt DemandedLHS, DemandedRHS;
1330 if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS,
1331 DemandedRHS))
1332 break;
1333
1334 if (!!DemandedLHS || !!DemandedRHS) {
1335 SDValue Op0 = Op.getOperand(0);
1336 SDValue Op1 = Op.getOperand(1);
1337
1338 Known.Zero.setAllBits();
1339 Known.One.setAllBits();
1340 if (!!DemandedLHS) {
1341 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
1342 Depth + 1))
1343 return true;
1344 Known = Known.intersectWith(Known2);
1345 }
1346 if (!!DemandedRHS) {
1347 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
1348 Depth + 1))
1349 return true;
1350 Known = Known.intersectWith(Known2);
1351 }
1352
1353 // Attempt to avoid multi-use ops if we don't need anything from them.
1354 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1355 Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
1356 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1357 Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
1358 if (DemandedOp0 || DemandedOp1) {
1359 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1360 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1361 SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
1362 return TLO.CombineTo(Op, NewOp);
1363 }
1364 }
1365 break;
1366 }
1367 case ISD::AND: {
1368 SDValue Op0 = Op.getOperand(0);
1369 SDValue Op1 = Op.getOperand(1);
1370
1371 // If the RHS is a constant, check to see if the LHS would be zero without
1372 // using the bits from the RHS. Below, we use knowledge about the RHS to
1373 // simplify the LHS, here we're using information from the LHS to simplify
1374 // the RHS.
1375 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op1)) {
1376 // Do not increment Depth here; that can cause an infinite loop.
1377 KnownBits LHSKnown = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth);
1378 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1379 if ((LHSKnown.Zero & DemandedBits) ==
1380 (~RHSC->getAPIntValue() & DemandedBits))
1381 return TLO.CombineTo(Op, Op0);
1382
1383 // If any of the set bits in the RHS are known zero on the LHS, shrink
1384 // the constant.
1385 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits,
1386 DemandedElts, TLO))
1387 return true;
1388
1389 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1390 // constant, but if this 'and' is only clearing bits that were just set by
1391 // the xor, then this 'and' can be eliminated by shrinking the mask of
1392 // the xor. For example, for a 32-bit X:
1393 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1394 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
1395 LHSKnown.One == ~RHSC->getAPIntValue()) {
1396 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), Op1);
1397 return TLO.CombineTo(Op, Xor);
1398 }
1399 }
1400
1401 // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I)
1402 // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits).
1403 if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() &&
1404 (Op0.getOperand(0).isUndef() ||
1405 ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) &&
1406 Op0->hasOneUse()) {
1407 unsigned NumSubElts =
1408 Op0.getOperand(1).getValueType().getVectorNumElements();
1409 unsigned SubIdx = Op0.getConstantOperandVal(2);
1410 APInt DemandedSub =
1411 APInt::getBitsSet(NumElts, SubIdx, SubIdx + NumSubElts);
1412 KnownBits KnownSubMask =
1413 TLO.DAG.computeKnownBits(Op1, DemandedSub & DemandedElts, Depth + 1);
1414 if (DemandedBits.isSubsetOf(KnownSubMask.One)) {
1415 SDValue NewAnd =
1416 TLO.DAG.getNode(ISD::AND, dl, VT, Op0.getOperand(0), Op1);
1417 SDValue NewInsert =
1418 TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, NewAnd,
1419 Op0.getOperand(1), Op0.getOperand(2));
1420 return TLO.CombineTo(Op, NewInsert);
1421 }
1422 }
1423
1424 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1425 Depth + 1))
1426 return true;
1427 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1428 if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
1429 Known2, TLO, Depth + 1))
1430 return true;
1431 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1432
1433 // If all of the demanded bits are known one on one side, return the other.
1434 // These bits cannot contribute to the result of the 'and'.
1435 if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
1436 return TLO.CombineTo(Op, Op0);
1437 if (DemandedBits.isSubsetOf(Known.Zero | Known2.One))
1438 return TLO.CombineTo(Op, Op1);
1439 // If all of the demanded bits in the inputs are known zeros, return zero.
1440 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1441 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
1442 // If the RHS is a constant, see if we can simplify it.
1443 if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts,
1444 TLO))
1445 return true;
1446 // If the operation can be done in a smaller type, do so.
1447 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1448 return true;
1449
1450 // Attempt to avoid multi-use ops if we don't need anything from them.
1451 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1452 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1453 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1454 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1455 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1456 if (DemandedOp0 || DemandedOp1) {
1457 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1458 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1459 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1460 return TLO.CombineTo(Op, NewOp);
1461 }
1462 }
1463
1464 Known &= Known2;
1465 break;
1466 }
1467 case ISD::OR: {
1468 SDValue Op0 = Op.getOperand(0);
1469 SDValue Op1 = Op.getOperand(1);
1470 SDNodeFlags Flags = Op.getNode()->getFlags();
1471 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1472 Depth + 1)) {
1473 if (Flags.hasDisjoint()) {
1474 Flags.setDisjoint(false);
1475 Op->setFlags(Flags);
1476 }
1477 return true;
1478 }
1479 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1480 if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
1481 Known2, TLO, Depth + 1)) {
1482 if (Flags.hasDisjoint()) {
1483 Flags.setDisjoint(false);
1484 Op->setFlags(Flags);
1485 }
1486 return true;
1487 }
1488 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1489
1490 // If all of the demanded bits are known zero on one side, return the other.
1491 // These bits cannot contribute to the result of the 'or'.
1492 if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
1493 return TLO.CombineTo(Op, Op0);
1494 if (DemandedBits.isSubsetOf(Known.One | Known2.Zero))
1495 return TLO.CombineTo(Op, Op1);
1496 // If the RHS is a constant, see if we can simplify it.
1497 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1498 return true;
1499 // If the operation can be done in a smaller type, do so.
1500 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1501 return true;
1502
1503 // Attempt to avoid multi-use ops if we don't need anything from them.
1504 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1505 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1506 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1507 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1508 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1509 if (DemandedOp0 || DemandedOp1) {
1510 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1511 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1512 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1513 return TLO.CombineTo(Op, NewOp);
1514 }
1515 }
1516
1517 // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2))
1518 // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks.
1519 if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND &&
1520 Op0->hasOneUse() && Op1->hasOneUse()) {
1521 // Attempt to match all commutations - m_c_Or would've been useful!
1522 for (int I = 0; I != 2; ++I) {
1523 SDValue X = Op.getOperand(I).getOperand(0);
1524 SDValue C1 = Op.getOperand(I).getOperand(1);
1525 SDValue Alt = Op.getOperand(1 - I).getOperand(0);
1526 SDValue C2 = Op.getOperand(1 - I).getOperand(1);
1527 if (Alt.getOpcode() == ISD::OR) {
1528 for (int J = 0; J != 2; ++J) {
1529 if (X == Alt.getOperand(J)) {
1530 SDValue Y = Alt.getOperand(1 - J);
1531 if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT,
1532 {C1, C2})) {
1533 SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12);
1534 SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2);
1535 return TLO.CombineTo(
1536 Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY));
1537 }
1538 }
1539 }
1540 }
1541 }
1542 }
1543
1544 Known |= Known2;
1545 break;
1546 }
1547 case ISD::XOR: {
1548 SDValue Op0 = Op.getOperand(0);
1549 SDValue Op1 = Op.getOperand(1);
1550
1551 if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
1552 Depth + 1))
1553 return true;
1554 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1555 if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
1556 Depth + 1))
1557 return true;
1558 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1559
1560 // If all of the demanded bits are known zero on one side, return the other.
1561 // These bits cannot contribute to the result of the 'xor'.
1562 if (DemandedBits.isSubsetOf(Known.Zero))
1563 return TLO.CombineTo(Op, Op0);
1564 if (DemandedBits.isSubsetOf(Known2.Zero))
1565 return TLO.CombineTo(Op, Op1);
1566 // If the operation can be done in a smaller type, do so.
1567 if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1568 return true;
1569
1570 // If all of the unknown bits are known to be zero on one side or the other
1571 // turn this into an *inclusive* or.
1572 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1573 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero))
1574 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1));
1575
1576 ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts);
1577 if (C) {
1578 // If one side is a constant, and all of the set bits in the constant are
1579 // also known set on the other side, turn this into an AND, as we know
1580 // the bits will be cleared.
1581 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1582 // NB: it is okay if more bits are known than are requested
1583 if (C->getAPIntValue() == Known2.One) {
1584 SDValue ANDC =
1585 TLO.DAG.getConstant(~C->getAPIntValue() & DemandedBits, dl, VT);
1586 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op0, ANDC));
1587 }
1588
1589 // If the RHS is a constant, see if we can change it. Don't alter a -1
1590 // constant because that's a 'not' op, and that is better for combining
1591 // and codegen.
1592 if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
1593 // We're flipping all demanded bits. Flip the undemanded bits too.
1594 SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
1595 return TLO.CombineTo(Op, New);
1596 }
1597
1598 unsigned Op0Opcode = Op0.getOpcode();
1599 if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) {
1600 if (ConstantSDNode *ShiftC =
1601 isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
1602 // Don't crash on an oversized shift. We can not guarantee that a
1603 // bogus shift has been simplified to undef.
1604 if (ShiftC->getAPIntValue().ult(BitWidth)) {
1605 uint64_t ShiftAmt = ShiftC->getZExtValue();
1606 APInt Ones = APInt::getAllOnes(BitWidth);
1607 Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt)
1608 : Ones.lshr(ShiftAmt);
1609 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
1610 if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) &&
1611 TLI.isDesirableToCommuteXorWithShift(Op.getNode())) {
1612 // If the xor constant is a demanded mask, do a 'not' before the
1613 // shift:
1614 // xor (X << ShiftC), XorC --> (not X) << ShiftC
1615 // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
1616 SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT);
1617 return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not,
1618 Op0.getOperand(1)));
1619 }
1620 }
1621 }
1622 }
1623 }
1624
1625 // If we can't turn this into a 'not', try to shrink the constant.
1626 if (!C || !C->isAllOnes())
1627 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1628 return true;
1629
1630 // Attempt to avoid multi-use ops if we don't need anything from them.
1631 if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
1632 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1633 Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1634 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
1635 Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
1636 if (DemandedOp0 || DemandedOp1) {
1637 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
1638 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
1639 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
1640 return TLO.CombineTo(Op, NewOp);
1641 }
1642 }
1643
1644 Known ^= Known2;
1645 break;
1646 }
1647 case ISD::SELECT:
1648 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1649 Known, TLO, Depth + 1))
1650 return true;
1651 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1652 Known2, TLO, Depth + 1))
1653 return true;
1654 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1655 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1656
1657 // If the operands are constants, see if we can simplify them.
1658 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1659 return true;
1660
1661 // Only known if known in both the LHS and RHS.
1662 Known = Known.intersectWith(Known2);
1663 break;
1664 case ISD::VSELECT:
1665 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1666 Known, TLO, Depth + 1))
1667 return true;
1668 if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts,
1669 Known2, TLO, Depth + 1))
1670 return true;
1671 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1672 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1673
1674 // Only known if known in both the LHS and RHS.
1675 Known = Known.intersectWith(Known2);
1676 break;
1677 case ISD::SELECT_CC:
1678 if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, DemandedElts,
1679 Known, TLO, Depth + 1))
1680 return true;
1681 if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
1682 Known2, TLO, Depth + 1))
1683 return true;
1684 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1685 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
1686
1687 // If the operands are constants, see if we can simplify them.
1688 if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
1689 return true;
1690
1691 // Only known if known in both the LHS and RHS.
1692 Known = Known.intersectWith(Known2);
1693 break;
1694 case ISD::SETCC: {
1695 SDValue Op0 = Op.getOperand(0);
1696 SDValue Op1 = Op.getOperand(1);
1697 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1698 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1699 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1700 // -1, we may be able to bypass the setcc.
1701 if (DemandedBits.isSignMask() &&
1702 Op0.getScalarValueSizeInBits() == BitWidth &&
1703 getBooleanContents(Op0.getValueType()) ==
1704 BooleanContent::ZeroOrNegativeOneBooleanContent) {
1705 // If we're testing X < 0, then this compare isn't needed - just use X!
1706 // FIXME: We're limiting to integer types here, but this should also work
1707 // if we don't care about FP signed-zero. The use of SETLT with FP means
1708 // that we don't care about NaNs.
1709 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
1710 (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
1711 return TLO.CombineTo(Op, Op0);
1712
1713 // TODO: Should we check for other forms of sign-bit comparisons?
1714 // Examples: X <= -1, X >= 0
1715 }
1716 if (getBooleanContents(Op0.getValueType()) ==
1717 TargetLowering::ZeroOrOneBooleanContent &&
1718 BitWidth > 1)
1719 Known.Zero.setBitsFrom(1);
1720 break;
1721 }
1722 case ISD::SHL: {
1723 SDValue Op0 = Op.getOperand(0);
1724 SDValue Op1 = Op.getOperand(1);
1725 EVT ShiftVT = Op1.getValueType();
1726
1727 if (const APInt *SA =
1728 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1729 unsigned ShAmt = SA->getZExtValue();
1730 if (ShAmt == 0)
1731 return TLO.CombineTo(Op, Op0);
1732
1733 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1734 // single shift. We can do this if the bottom bits (which are shifted
1735 // out) are never demanded.
1736 // TODO - support non-uniform vector amounts.
1737 if (Op0.getOpcode() == ISD::SRL) {
1738 if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
1739 if (const APInt *SA2 =
1740 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1741 unsigned C1 = SA2->getZExtValue();
1742 unsigned Opc = ISD::SHL;
1743 int Diff = ShAmt - C1;
1744 if (Diff < 0) {
1745 Diff = -Diff;
1746 Opc = ISD::SRL;
1747 }
1748 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1749 return TLO.CombineTo(
1750 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1751 }
1752 }
1753 }
1754
1755 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1756 // are not demanded. This will likely allow the anyext to be folded away.
1757 // TODO - support non-uniform vector amounts.
1758 if (Op0.getOpcode() == ISD::ANY_EXTEND) {
1759 SDValue InnerOp = Op0.getOperand(0);
1760 EVT InnerVT = InnerOp.getValueType();
1761 unsigned InnerBits = InnerVT.getScalarSizeInBits();
1762 if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
1763 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
1764 SDValue NarrowShl = TLO.DAG.getNode(
1765 ISD::SHL, dl, InnerVT, InnerOp,
1766 TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
1767 return TLO.CombineTo(
1768 Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
1769 }
1770
1771 // Repeat the SHL optimization above in cases where an extension
1772 // intervenes: (shl (anyext (shr x, c1)), c2) to
1773 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1774 // aren't demanded (as above) and that the shifted upper c1 bits of
1775 // x aren't demanded.
1776 // TODO - support non-uniform vector amounts.
1777 if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() &&
1778 InnerOp.hasOneUse()) {
1779 if (const APInt *SA2 =
1780 TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) {
1781 unsigned InnerShAmt = SA2->getZExtValue();
1782 if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
1783 DemandedBits.getActiveBits() <=
1784 (InnerBits - InnerShAmt + ShAmt) &&
1785 DemandedBits.countr_zero() >= ShAmt) {
1786 SDValue NewSA =
1787 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
1788 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1789 InnerOp.getOperand(0));
1790 return TLO.CombineTo(
1791 Op, TLO.DAG.getNode(ISD::SHL, dl, VT, NewExt, NewSA));
1792 }
1793 }
1794 }
1795 }
1796
1797 APInt InDemandedMask = DemandedBits.lshr(ShAmt);
1798 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1799 Depth + 1)) {
1800 SDNodeFlags Flags = Op.getNode()->getFlags();
1801 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1802 // Disable the nsw and nuw flags. We can no longer guarantee that we
1803 // won't wrap after simplification.
1804 Flags.setNoSignedWrap(false);
1805 Flags.setNoUnsignedWrap(false);
1806 Op->setFlags(Flags);
1807 }
1808 return true;
1809 }
1810 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1811 Known.Zero <<= ShAmt;
1812 Known.One <<= ShAmt;
1813 // low bits known zero.
1814 Known.Zero.setLowBits(ShAmt);
1815
1816 // Attempt to avoid multi-use ops if we don't need anything from them.
1817 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1818 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1819 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1820 if (DemandedOp0) {
1821 SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1);
1822 return TLO.CombineTo(Op, NewOp);
1823 }
1824 }
1825
1826 // Try shrinking the operation as long as the shift amount will still be
1827 // in range.
1828 if ((ShAmt < DemandedBits.getActiveBits()) &&
1829 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
1830 return true;
1831
1832 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1833 // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K))
1834 // Only do this if we demand the upper half so the knownbits are correct.
1835 unsigned HalfWidth = BitWidth / 2;
1836 if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth &&
1837 DemandedBits.countLeadingOnes() >= HalfWidth) {
1838 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth);
1839 if (isNarrowingProfitable(VT, HalfVT) &&
1840 isTypeDesirableForOp(ISD::SHL, HalfVT) &&
1841 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1842 (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) {
1843 // If we're demanding the upper bits at all, we must ensure
1844 // that the upper bits of the shift result are known to be zero,
1845 // which is equivalent to the narrow shift being NUW.
1846 if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) {
1847 bool IsNSW = Known.countMinSignBits() > HalfWidth;
1848 SDNodeFlags Flags;
1849 Flags.setNoSignedWrap(IsNSW);
1850 Flags.setNoUnsignedWrap(IsNUW);
1851 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1852 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1853 ShAmt, HalfVT, dl, TLO.LegalTypes());
1854 SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp,
1855 NewShiftAmt, Flags);
1856 SDValue NewExt =
1857 TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift);
1858 return TLO.CombineTo(Op, NewExt);
1859 }
1860 }
1861 }
1862 } else {
1863 // This is a variable shift, so we can't shift the demand mask by a known
1864 // amount. But if we are not demanding high bits, then we are not
1865 // demanding those bits from the pre-shifted operand either.
1866 if (unsigned CTLZ = DemandedBits.countl_zero()) {
1867 APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
1868 if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
1869 Depth + 1)) {
1870 SDNodeFlags Flags = Op.getNode()->getFlags();
1871 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1872 // Disable the nsw and nuw flags. We can no longer guarantee that we
1873 // won't wrap after simplification.
1874 Flags.setNoSignedWrap(false);
1875 Flags.setNoUnsignedWrap(false);
1876 Op->setFlags(Flags);
1877 }
1878 return true;
1879 }
1880 Known.resetAll();
1881 }
1882 }
1883
1884 // If we are only demanding sign bits then we can use the shift source
1885 // directly.
1886 if (const APInt *MaxSA =
1887 TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) {
1888 unsigned ShAmt = MaxSA->getZExtValue();
1889 unsigned NumSignBits =
1890 TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
1891 unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
1892 if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
1893 return TLO.CombineTo(Op, Op0);
1894 }
1895 break;
1896 }
1897 case ISD::SRL: {
1898 SDValue Op0 = Op.getOperand(0);
1899 SDValue Op1 = Op.getOperand(1);
1900 EVT ShiftVT = Op1.getValueType();
1901
1902 // Try to match AVG patterns.
1903 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
1904 DemandedElts, Depth + 1))
1905 return TLO.CombineTo(Op, AVG);
1906
1907 if (const APInt *SA =
1908 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
1909 unsigned ShAmt = SA->getZExtValue();
1910 if (ShAmt == 0)
1911 return TLO.CombineTo(Op, Op0);
1912
1913 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1914 // single shift. We can do this if the top bits (which are shifted out)
1915 // are never demanded.
1916 // TODO - support non-uniform vector amounts.
1917 if (Op0.getOpcode() == ISD::SHL) {
1918 if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) {
1919 if (const APInt *SA2 =
1920 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
1921 unsigned C1 = SA2->getZExtValue();
1922 unsigned Opc = ISD::SRL;
1923 int Diff = ShAmt - C1;
1924 if (Diff < 0) {
1925 Diff = -Diff;
1926 Opc = ISD::SHL;
1927 }
1928 SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
1929 return TLO.CombineTo(
1930 Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
1931 }
1932 }
1933 }
1934
1935 APInt InDemandedMask = (DemandedBits << ShAmt);
1936
1937 // If the shift is exact, then it does demand the low bits (and knows that
1938 // they are zero).
1939 if (Op->getFlags().hasExact())
1940 InDemandedMask.setLowBits(ShAmt);
1941
1942 // Narrow shift to lower half - similar to ShrinkDemandedOp.
1943 // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
1944 if ((BitWidth % 2) == 0 && !VT.isVector()) {
1945 APInt HiBits = APInt::getHighBitsSet(BitWidth, BitWidth / 2);
1946 EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
1947 if (isNarrowingProfitable(VT, HalfVT) &&
1948 isTypeDesirableForOp(ISD::SRL, HalfVT) &&
1949 isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
1950 (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) &&
1951 ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
1952 TLO.DAG.MaskedValueIsZero(Op0, HiBits))) {
1953 SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
1954 SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
1955 ShAmt, HalfVT, dl, TLO.LegalTypes());
1956 SDValue NewShift =
1957 TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
1958 return TLO.CombineTo(
1959 Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
1960 }
1961 }
1962
1963 // Compute the new bits that are at the top now.
1964 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
1965 Depth + 1))
1966 return true;
1967 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1968 Known.Zero.lshrInPlace(ShAmt);
1969 Known.One.lshrInPlace(ShAmt);
1970 // High bits known zero.
1971 Known.Zero.setHighBits(ShAmt);
1972
1973 // Attempt to avoid multi-use ops if we don't need anything from them.
1974 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
1975 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
1976 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
1977 if (DemandedOp0) {
1978 SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1);
1979 return TLO.CombineTo(Op, NewOp);
1980 }
1981 }
1982 } else {
1983 // Use generic knownbits computation as it has support for non-uniform
1984 // shift amounts.
1985 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
1986 }
1987 break;
1988 }
1989 case ISD::SRA: {
1990 SDValue Op0 = Op.getOperand(0);
1991 SDValue Op1 = Op.getOperand(1);
1992 EVT ShiftVT = Op1.getValueType();
1993
1994 // If we only want bits that already match the signbit then we don't need
1995 // to shift.
1996 unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
1997 if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
1998 NumHiDemandedBits)
1999 return TLO.CombineTo(Op, Op0);
2000
2001 // If this is an arithmetic shift right and only the low-bit is set, we can
2002 // always convert this into a logical shr, even if the shift amount is
2003 // variable. The low bit of the shift cannot be an input sign bit unless
2004 // the shift amount is >= the size of the datatype, which is undefined.
2005 if (DemandedBits.isOne())
2006 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2007
2008 // Try to match AVG patterns.
2009 if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits,
2010 DemandedElts, Depth + 1))
2011 return TLO.CombineTo(Op, AVG);
2012
2013 if (const APInt *SA =
2014 TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) {
2015 unsigned ShAmt = SA->getZExtValue();
2016 if (ShAmt == 0)
2017 return TLO.CombineTo(Op, Op0);
2018
2019 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target
2020 // supports sext_inreg.
2021 if (Op0.getOpcode() == ISD::SHL) {
2022 if (const APInt *InnerSA =
2023 TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) {
2024 unsigned LowBits = BitWidth - ShAmt;
2025 EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits);
2026 if (VT.isVector())
2027 ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT,
2028 VT.getVectorElementCount());
2029
2030 if (*InnerSA == ShAmt) {
2031 if (!TLO.LegalOperations() ||
2032 getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == Legal)
2033 return TLO.CombineTo(
2034 Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT,
2035 Op0.getOperand(0),
2036 TLO.DAG.getValueType(ExtVT)));
2037
2038 // Even if we can't convert to sext_inreg, we might be able to
2039 // remove this shift pair if the input is already sign extended.
2040 unsigned NumSignBits =
2041 TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts);
2042 if (NumSignBits > ShAmt)
2043 return TLO.CombineTo(Op, Op0.getOperand(0));
2044 }
2045 }
2046 }
2047
2048 APInt InDemandedMask = (DemandedBits << ShAmt);
2049
2050 // If the shift is exact, then it does demand the low bits (and knows that
2051 // they are zero).
2052 if (Op->getFlags().hasExact())
2053 InDemandedMask.setLowBits(ShAmt);
2054
2055 // If any of the demanded bits are produced by the sign extension, we also
2056 // demand the input sign bit.
2057 if (DemandedBits.countl_zero() < ShAmt)
2058 InDemandedMask.setSignBit();
2059
2060 if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
2061 Depth + 1))
2062 return true;
2063 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2064 Known.Zero.lshrInPlace(ShAmt);
2065 Known.One.lshrInPlace(ShAmt);
2066
2067 // If the input sign bit is known to be zero, or if none of the top bits
2068 // are demanded, turn this into an unsigned shift right.
2069 if (Known.Zero[BitWidth - ShAmt - 1] ||
2070 DemandedBits.countl_zero() >= ShAmt) {
2071 SDNodeFlags Flags;
2072 Flags.setExact(Op->getFlags().hasExact());
2073 return TLO.CombineTo(
2074 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1, Flags));
2075 }
2076
2077 int Log2 = DemandedBits.exactLogBase2();
2078 if (Log2 >= 0) {
2079 // The bit must come from the sign.
2080 SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT);
2081 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA));
2082 }
2083
2084 if (Known.One[BitWidth - ShAmt - 1])
2085 // New bits are known one.
2086 Known.One.setHighBits(ShAmt);
2087
2088 // Attempt to avoid multi-use ops if we don't need anything from them.
2089 if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2090 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2091 Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
2092 if (DemandedOp0) {
2093 SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1);
2094 return TLO.CombineTo(Op, NewOp);
2095 }
2096 }
2097 }
2098 break;
2099 }
2100 case ISD::FSHL:
2101 case ISD::FSHR: {
2102 SDValue Op0 = Op.getOperand(0);
2103 SDValue Op1 = Op.getOperand(1);
2104 SDValue Op2 = Op.getOperand(2);
2105 bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
2106
2107 if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
2108 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2109
2110 // For fshl, 0-shift returns the 1st arg.
2111 // For fshr, 0-shift returns the 2nd arg.
2112 if (Amt == 0) {
2113 if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
2114 Known, TLO, Depth + 1))
2115 return true;
2116 break;
2117 }
2118
2119 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
2120 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
2121 APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
2122 APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
2123 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2124 Depth + 1))
2125 return true;
2126 if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
2127 Depth + 1))
2128 return true;
2129
2130 Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
2131 Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
2132 Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2133 Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
2134 Known = Known.unionWith(Known2);
2135
2136 // Attempt to avoid multi-use ops if we don't need anything from them.
2137 if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
2138 !DemandedElts.isAllOnes()) {
2139 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2140 Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1);
2141 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2142 Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1);
2143 if (DemandedOp0 || DemandedOp1) {
2144 DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0;
2145 DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1;
2146 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0,
2147 DemandedOp1, Op2);
2148 return TLO.CombineTo(Op, NewOp);
2149 }
2150 }
2151 }
2152
2153 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2154 if (isPowerOf2_32(BitWidth)) {
2155 APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1);
2156 if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts,
2157 Known2, TLO, Depth + 1))
2158 return true;
2159 }
2160 break;
2161 }
2162 case ISD::ROTL:
2163 case ISD::ROTR: {
2164 SDValue Op0 = Op.getOperand(0);
2165 SDValue Op1 = Op.getOperand(1);
2166 bool IsROTL = (Op.getOpcode() == ISD::ROTL);
2167
2168 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
2169 if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1))
2170 return TLO.CombineTo(Op, Op0);
2171
2172 if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
2173 unsigned Amt = SA->getAPIntValue().urem(BitWidth);
2174 unsigned RevAmt = BitWidth - Amt;
2175
2176 // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt))
2177 // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt)
2178 APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt);
2179 if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
2180 Depth + 1))
2181 return true;
2182
2183 // rot*(x, 0) --> x
2184 if (Amt == 0)
2185 return TLO.CombineTo(Op, Op0);
2186
2187 // See if we don't demand either half of the rotated bits.
2188 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
2189 DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
2190 Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
2191 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
2192 }
2193 if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
2194 DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
2195 Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
2196 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
2197 }
2198 }
2199
2200 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
2201 if (isPowerOf2_32(BitWidth)) {
2202 APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1);
2203 if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO,
2204 Depth + 1))
2205 return true;
2206 }
2207 break;
2208 }
2209 case ISD::SMIN:
2210 case ISD::SMAX:
2211 case ISD::UMIN:
2212 case ISD::UMAX: {
2213 unsigned Opc = Op.getOpcode();
2214 SDValue Op0 = Op.getOperand(0);
2215 SDValue Op1 = Op.getOperand(1);
2216
2217 // If we're only demanding signbits, then we can simplify to OR/AND node.
2218 unsigned BitOp =
2219 (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND;
2220 unsigned NumSignBits =
2221 std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1),
2222 TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1));
2223 unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero();
2224 if (NumSignBits >= NumDemandedUpperBits)
2225 return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1));
2226
2227 // Check if one arg is always less/greater than (or equal) to the other arg.
2228 KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1);
2229 KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1);
2230 switch (Opc) {
2231 case ISD::SMIN:
2232 if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1))
2233 return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1);
2234 if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1))
2235 return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1);
2236 Known = KnownBits::smin(Known0, Known1);
2237 break;
2238 case ISD::SMAX:
2239 if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1))
2240 return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1);
2241 if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1))
2242 return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1);
2243 Known = KnownBits::smax(Known0, Known1);
2244 break;
2245 case ISD::UMIN:
2246 if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1))
2247 return TLO.CombineTo(Op, *IsULE ? Op0 : Op1);
2248 if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1))
2249 return TLO.CombineTo(Op, *IsULT ? Op0 : Op1);
2250 Known = KnownBits::umin(Known0, Known1);
2251 break;
2252 case ISD::UMAX:
2253 if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1))
2254 return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1);
2255 if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1))
2256 return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1);
2257 Known = KnownBits::umax(Known0, Known1);
2258 break;
2259 }
2260 break;
2261 }
2262 case ISD::BITREVERSE: {
2263 SDValue Src = Op.getOperand(0);
2264 APInt DemandedSrcBits = DemandedBits.reverseBits();
2265 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2266 Depth + 1))
2267 return true;
2268 Known.One = Known2.One.reverseBits();
2269 Known.Zero = Known2.Zero.reverseBits();
2270 break;
2271 }
2272 case ISD::BSWAP: {
2273 SDValue Src = Op.getOperand(0);
2274
2275 // If the only bits demanded come from one byte of the bswap result,
2276 // just shift the input byte into position to eliminate the bswap.
2277 unsigned NLZ = DemandedBits.countl_zero();
2278 unsigned NTZ = DemandedBits.countr_zero();
2279
2280 // Round NTZ down to the next byte. If we have 11 trailing zeros, then
2281 // we need all the bits down to bit 8. Likewise, round NLZ. If we
2282 // have 14 leading zeros, round to 8.
2283 NLZ = alignDown(NLZ, 8);
2284 NTZ = alignDown(NTZ, 8);
2285 // If we need exactly one byte, we can do this transformation.
2286 if (BitWidth - NLZ - NTZ == 8) {
2287 // Replace this with either a left or right shift to get the byte into
2288 // the right place.
2289 unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
2290 if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
2291 EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
2292 unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
2293 SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
2294 SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
2295 return TLO.CombineTo(Op, NewOp);
2296 }
2297 }
2298
2299 APInt DemandedSrcBits = DemandedBits.byteSwap();
2300 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
2301 Depth + 1))
2302 return true;
2303 Known.One = Known2.One.byteSwap();
2304 Known.Zero = Known2.Zero.byteSwap();
2305 break;
2306 }
2307 case ISD::CTPOP: {
2308 // If only 1 bit is demanded, replace with PARITY as long as we're before
2309 // op legalization.
2310 // FIXME: Limit to scalars for now.
2311 if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
2312 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
2313 Op.getOperand(0)));
2314
2315 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2316 break;
2317 }
2318 case ISD::SIGN_EXTEND_INREG: {
2319 SDValue Op0 = Op.getOperand(0);
2320 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2321 unsigned ExVTBits = ExVT.getScalarSizeInBits();
2322
2323 // If we only care about the highest bit, don't bother shifting right.
2324 if (DemandedBits.isSignMask()) {
2325 unsigned MinSignedBits =
2326 TLO.DAG.ComputeMaxSignificantBits(Op0, DemandedElts, Depth + 1);
2327 bool AlreadySignExtended = ExVTBits >= MinSignedBits;
2328 // However if the input is already sign extended we expect the sign
2329 // extension to be dropped altogether later and do not simplify.
2330 if (!AlreadySignExtended) {
2331 // Compute the correct shift amount type, which must be getShiftAmountTy
2332 // for scalar types after legalization.
2333 SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
2334 getShiftAmountTy(VT, DL));
2335 return TLO.CombineTo(Op,
2336 TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, ShiftAmt));
2337 }
2338 }
2339
2340 // If none of the extended bits are demanded, eliminate the sextinreg.
2341 if (DemandedBits.getActiveBits() <= ExVTBits)
2342 return TLO.CombineTo(Op, Op0);
2343
2344 APInt InputDemandedBits = DemandedBits.getLoBits(ExVTBits);
2345
2346 // Since the sign extended bits are demanded, we know that the sign
2347 // bit is demanded.
2348 InputDemandedBits.setBit(ExVTBits - 1);
2349
2350 if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO,
2351 Depth + 1))
2352 return true;
2353 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2354
2355 // If the sign bit of the input is known set or clear, then we know the
2356 // top bits of the result.
2357
2358 // If the input sign bit is known zero, convert this into a zero extension.
2359 if (Known.Zero[ExVTBits - 1])
2360 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT));
2361
2362 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
2363 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
2364 Known.One.setBitsFrom(ExVTBits);
2365 Known.Zero &= Mask;
2366 } else { // Input sign bit unknown
2367 Known.Zero &= Mask;
2368 Known.One &= Mask;
2369 }
2370 break;
2371 }
2372 case ISD::BUILD_PAIR: {
2373 EVT HalfVT = Op.getOperand(0).getValueType();
2374 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
2375
2376 APInt MaskLo = DemandedBits.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
2377 APInt MaskHi = DemandedBits.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
2378
2379 KnownBits KnownLo, KnownHi;
2380
2381 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
2382 return true;
2383
2384 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
2385 return true;
2386
2387 Known = KnownHi.concat(KnownLo);
2388 break;
2389 }
2390 case ISD::ZERO_EXTEND_VECTOR_INREG:
2391 if (VT.isScalableVector())
2392 return false;
2393 [[fallthrough]];
2394 case ISD::ZERO_EXTEND: {
2395 SDValue Src = Op.getOperand(0);
2396 EVT SrcVT = Src.getValueType();
2397 unsigned InBits = SrcVT.getScalarSizeInBits();
2398 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2399 bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
2400
2401 // If none of the top bits are demanded, convert this into an any_extend.
2402 if (DemandedBits.getActiveBits() <= InBits) {
2403 // If we only need the non-extended bits of the bottom element
2404 // then we can just bitcast to the result.
2405 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2406 VT.getSizeInBits() == SrcVT.getSizeInBits())
2407 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2408
2409 unsigned Opc =
2410 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2411 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2412 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2413 }
2414
2415 SDNodeFlags Flags = Op->getFlags();
2416 APInt InDemandedBits = DemandedBits.trunc(InBits);
2417 APInt InDemandedElts = DemandedElts.zext(InElts);
2418 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2419 Depth + 1)) {
2420 if (Flags.hasNonNeg()) {
2421 Flags.setNonNeg(false);
2422 Op->setFlags(Flags);
2423 }
2424 return true;
2425 }
2426 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2427 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2428 Known = Known.zext(BitWidth);
2429
2430 // Attempt to avoid multi-use ops if we don't need anything from them.
2431 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2432 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2433 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2434 break;
2435 }
2436 case ISD::SIGN_EXTEND_VECTOR_INREG:
2437 if (VT.isScalableVector())
2438 return false;
2439 [[fallthrough]];
2440 case ISD::SIGN_EXTEND: {
2441 SDValue Src = Op.getOperand(0);
2442 EVT SrcVT = Src.getValueType();
2443 unsigned InBits = SrcVT.getScalarSizeInBits();
2444 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2445 bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
2446
2447 APInt InDemandedElts = DemandedElts.zext(InElts);
2448 APInt InDemandedBits = DemandedBits.trunc(InBits);
2449
2450 // Since some of the sign extended bits are demanded, we know that the sign
2451 // bit is demanded.
2452 InDemandedBits.setBit(InBits - 1);
2453
2454 // If none of the top bits are demanded, convert this into an any_extend.
2455 if (DemandedBits.getActiveBits() <= InBits) {
2456 // If we only need the non-extended bits of the bottom element
2457 // then we can just bitcast to the result.
2458 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2459 VT.getSizeInBits() == SrcVT.getSizeInBits())
2460 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2461
2462 // Don't lose an all signbits 0/-1 splat on targets with 0/-1 booleans.
2463 if (getBooleanContents(VT) != ZeroOrNegativeOneBooleanContent ||
2464 TLO.DAG.ComputeNumSignBits(Src, InDemandedElts, Depth + 1) !=
2465 InBits) {
2466 unsigned Opc =
2467 IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
2468 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
2469 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
2470 }
2471 }
2472
2473 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2474 Depth + 1))
2475 return true;
2476 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2477 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2478
2479 // If the sign bit is known one, the top bits match.
2480 Known = Known.sext(BitWidth);
2481
2482 // If the sign bit is known zero, convert this to a zero extend.
2483 if (Known.isNonNegative()) {
2484 unsigned Opc =
2485 IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
2486 if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) {
2487 SDNodeFlags Flags;
2488 if (!IsVecInReg)
2489 Flags.setNonNeg(true);
2490 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src, Flags));
2491 }
2492 }
2493
2494 // Attempt to avoid multi-use ops if we don't need anything from them.
2495 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2496 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2497 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2498 break;
2499 }
2500 case ISD::ANY_EXTEND_VECTOR_INREG:
2501 if (VT.isScalableVector())
2502 return false;
2503 [[fallthrough]];
2504 case ISD::ANY_EXTEND: {
2505 SDValue Src = Op.getOperand(0);
2506 EVT SrcVT = Src.getValueType();
2507 unsigned InBits = SrcVT.getScalarSizeInBits();
2508 unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1;
2509 bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
2510
2511 // If we only need the bottom element then we can just bitcast.
2512 // TODO: Handle ANY_EXTEND?
2513 if (IsLE && IsVecInReg && DemandedElts == 1 &&
2514 VT.getSizeInBits() == SrcVT.getSizeInBits())
2515 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
2516
2517 APInt InDemandedBits = DemandedBits.trunc(InBits);
2518 APInt InDemandedElts = DemandedElts.zext(InElts);
2519 if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
2520 Depth + 1))
2521 return true;
2522 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2523 assert(Known.getBitWidth() == InBits && "Src width has changed?");
2524 Known = Known.anyext(BitWidth);
2525
2526 // Attempt to avoid multi-use ops if we don't need anything from them.
2527 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2528 Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1))
2529 return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc));
2530 break;
2531 }
2532 case ISD::TRUNCATE: {
2533 SDValue Src = Op.getOperand(0);
2534
2535 // Simplify the input, using demanded bit information, and compute the known
2536 // zero/one bits live out.
2537 unsigned OperandBitWidth = Src.getScalarValueSizeInBits();
2538 APInt TruncMask = DemandedBits.zext(OperandBitWidth);
2539 if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO,
2540 Depth + 1))
2541 return true;
2542 Known = Known.trunc(BitWidth);
2543
2544 // Attempt to avoid multi-use ops if we don't need anything from them.
2545 if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
2546 Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
2547 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
2548
2549 // If the input is only used by this truncate, see if we can shrink it based
2550 // on the known demanded bits.
2551 switch (Src.getOpcode()) {
2552 default:
2553 break;
2554 case ISD::SRL:
2555 // Shrink SRL by a constant if none of the high bits shifted in are
2556 // demanded.
2557 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
2558 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2559 // undesirable.
2560 break;
2561
2562 if (Src.getNode()->hasOneUse()) {
2563 const APInt *ShAmtC =
2564 TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts);
2565 if (!ShAmtC || ShAmtC->uge(BitWidth))
2566 break;
2567 uint64_t ShVal = ShAmtC->getZExtValue();
2568
2569 APInt HighBits =
2570 APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
2571 HighBits.lshrInPlace(ShVal);
2572 HighBits = HighBits.trunc(BitWidth);
2573
2574 if (!(HighBits & DemandedBits)) {
2575 // None of the shifted in bits are needed. Add a truncate of the
2576 // shift input, then shift it.
2577 SDValue NewShAmt = TLO.DAG.getConstant(
2578 ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes()));
2579 SDValue NewTrunc =
2580 TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
2581 return TLO.CombineTo(
2582 Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt));
2583 }
2584 }
2585 break;
2586 }
2587
2588 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2589 break;
2590 }
2591 case ISD::AssertZext: {
2592 // AssertZext demands all of the high bits, plus any of the low bits
2593 // demanded by its users.
2594 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2595 APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
2596 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
2597 TLO, Depth + 1))
2598 return true;
2599 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
2600
2601 Known.Zero |= ~InMask;
2602 Known.One &= (~Known.Zero);
2603 break;
2604 }
2605 case ISD::EXTRACT_VECTOR_ELT: {
2606 SDValue Src = Op.getOperand(0);
2607 SDValue Idx = Op.getOperand(1);
2608 ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount();
2609 unsigned EltBitWidth = Src.getScalarValueSizeInBits();
2610
2611 if (SrcEltCnt.isScalable())
2612 return false;
2613
2614 // Demand the bits from every vector element without a constant index.
2615 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
2616 APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
2617 if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
2618 if (CIdx->getAPIntValue().ult(NumSrcElts))
2619 DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
2620
2621 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2622 // anything about the extended bits.
2623 APInt DemandedSrcBits = DemandedBits;
2624 if (BitWidth > EltBitWidth)
2625 DemandedSrcBits = DemandedSrcBits.trunc(EltBitWidth);
2626
2627 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, Known2, TLO,
2628 Depth + 1))
2629 return true;
2630
2631 // Attempt to avoid multi-use ops if we don't need anything from them.
2632 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2633 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2634 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2635 SDValue NewOp =
2636 TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
2637 return TLO.CombineTo(Op, NewOp);
2638 }
2639 }
2640
2641 Known = Known2;
2642 if (BitWidth > EltBitWidth)
2643 Known = Known.anyext(BitWidth);
2644 break;
2645 }
2646 case ISD::BITCAST: {
2647 if (VT.isScalableVector())
2648 return false;
2649 SDValue Src = Op.getOperand(0);
2650 EVT SrcVT = Src.getValueType();
2651 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
2652
2653 // If this is an FP->Int bitcast and if the sign bit is the only
2654 // thing demanded, turn this into a FGETSIGN.
2655 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
2656 DemandedBits == APInt::getSignMask(Op.getValueSizeInBits()) &&
2657 SrcVT.isFloatingPoint()) {
2658 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
2659 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
2660 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
2661 SrcVT != MVT::f128) {
2662 // Cannot eliminate/lower SHL for f128 yet.
2663 EVT Ty = OpVTLegal ? VT : MVT::i32;
2664 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2665 // place. We expect the SHL to be eliminated by other optimizations.
2666 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
2667 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
2668 if (!OpVTLegal && OpVTSizeInBits > 32)
2669 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
2670 unsigned ShVal = Op.getValueSizeInBits() - 1;
2671 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
2672 return TLO.CombineTo(Op,
2673 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
2674 }
2675 }
2676
2677 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2678 // Demand the elt/bit if any of the original elts/bits are demanded.
2679 if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) {
2680 unsigned Scale = BitWidth / NumSrcEltBits;
2681 unsigned NumSrcElts = SrcVT.getVectorNumElements();
2682 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2683 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2684 for (unsigned i = 0; i != Scale; ++i) {
2685 unsigned EltOffset = IsLE ? i : (Scale - 1 - i);
2686 unsigned BitOffset = EltOffset * NumSrcEltBits;
2687 APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset);
2688 if (!Sub.isZero()) {
2689 DemandedSrcBits |= Sub;
2690 for (unsigned j = 0; j != NumElts; ++j)
2691 if (DemandedElts[j])
2692 DemandedSrcElts.setBit((j * Scale) + i);
2693 }
2694 }
2695
2696 APInt KnownSrcUndef, KnownSrcZero;
2697 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2698 KnownSrcZero, TLO, Depth + 1))
2699 return true;
2700
2701 KnownBits KnownSrcBits;
2702 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2703 KnownSrcBits, TLO, Depth + 1))
2704 return true;
2705 } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) {
2706 // TODO - bigendian once we have test coverage.
2707 unsigned Scale = NumSrcEltBits / BitWidth;
2708 unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
2709 APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
2710 APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
2711 for (unsigned i = 0; i != NumElts; ++i)
2712 if (DemandedElts[i]) {
2713 unsigned Offset = (i % Scale) * BitWidth;
2714 DemandedSrcBits.insertBits(DemandedBits, Offset);
2715 DemandedSrcElts.setBit(i / Scale);
2716 }
2717
2718 if (SrcVT.isVector()) {
2719 APInt KnownSrcUndef, KnownSrcZero;
2720 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
2721 KnownSrcZero, TLO, Depth + 1))
2722 return true;
2723 }
2724
2725 KnownBits KnownSrcBits;
2726 if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
2727 KnownSrcBits, TLO, Depth + 1))
2728 return true;
2729
2730 // Attempt to avoid multi-use ops if we don't need anything from them.
2731 if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
2732 if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
2733 Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
2734 SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
2735 return TLO.CombineTo(Op, NewOp);
2736 }
2737 }
2738 }
2739
2740 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2741 // recursive call where Known may be useful to the caller.
2742 if (Depth > 0) {
2743 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2744 return false;
2745 }
2746 break;
2747 }
2748 case ISD::MUL:
2749 if (DemandedBits.isPowerOf2()) {
2750 // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
2751 // If we demand exactly one bit N and we have "X * (C' << N)" where C' is
2752 // odd (has LSB set), then the left-shifted low bit of X is the answer.
2753 unsigned CTZ = DemandedBits.countr_zero();
2754 ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
2755 if (C && C->getAPIntValue().countr_zero() == CTZ) {
2756 EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2757 SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
2758 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
2759 return TLO.CombineTo(Op, Shl);
2760 }
2761 }
2762 // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because:
2763 // X * X is odd iff X is odd.
2764 // 'Quadratic Reciprocity': X * X -> 0 for bit[1]
2765 if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) {
2766 SDValue One = TLO.DAG.getConstant(1, dl, VT);
2767 SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One);
2768 return TLO.CombineTo(Op, And1);
2769 }
2770 [[fallthrough]];
2771 case ISD::ADD:
2772 case ISD::SUB: {
2773 // Add, Sub, and Mul don't demand any bits in positions beyond that
2774 // of the highest bit demanded of them.
2775 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
2776 SDNodeFlags Flags = Op.getNode()->getFlags();
2777 unsigned DemandedBitsLZ = DemandedBits.countl_zero();
2778 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
2779 KnownBits KnownOp0, KnownOp1;
2780 if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, KnownOp0, TLO,
2781 Depth + 1) ||
2782 SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
2783 Depth + 1) ||
2784 // See if the operation should be performed at a smaller bit width.
2785 ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
2786 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
2787 // Disable the nsw and nuw flags. We can no longer guarantee that we
2788 // won't wrap after simplification.
2789 Flags.setNoSignedWrap(false);
2790 Flags.setNoUnsignedWrap(false);
2791 Op->setFlags(Flags);
2792 }
2793 return true;
2794 }
2795
2796 // neg x with only low bit demanded is simply x.
2797 if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() &&
2798 isNullConstant(Op0))
2799 return TLO.CombineTo(Op, Op1);
2800
2801 // Attempt to avoid multi-use ops if we don't need anything from them.
2802 if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
2803 SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
2804 Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2805 SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
2806 Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
2807 if (DemandedOp0 || DemandedOp1) {
2808 Flags.setNoSignedWrap(false);
2809 Flags.setNoUnsignedWrap(false);
2810 Op0 = DemandedOp0 ? DemandedOp0 : Op0;
2811 Op1 = DemandedOp1 ? DemandedOp1 : Op1;
2812 SDValue NewOp =
2813 TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
2814 return TLO.CombineTo(Op, NewOp);
2815 }
2816 }
2817
2818 // If we have a constant operand, we may be able to turn it into -1 if we
2819 // do not demand the high bits. This can make the constant smaller to
2820 // encode, allow more general folding, or match specialized instruction
2821 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2822 // is probably not useful (and could be detrimental).
2823 ConstantSDNode *C = isConstOrConstSplat(Op1);
2824 APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
2825 if (C && !C->isAllOnes() && !C->isOne() &&
2826 (C->getAPIntValue() | HighMask).isAllOnes()) {
2827 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
2828 // Disable the nsw and nuw flags. We can no longer guarantee that we
2829 // won't wrap after simplification.
2830 Flags.setNoSignedWrap(false);
2831 Flags.setNoUnsignedWrap(false);
2832 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
2833 return TLO.CombineTo(Op, NewOp);
2834 }
2835
2836 // Match a multiply with a disguised negated-power-of-2 and convert to a
2837 // an equivalent shift-left amount.
2838 // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2839 auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned {
2840 if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse())
2841 return 0;
2842
2843 // Don't touch opaque constants. Also, ignore zero and power-of-2
2844 // multiplies. Those will get folded later.
2845 ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1));
2846 if (MulC && !MulC->isOpaque() && !MulC->isZero() &&
2847 !MulC->getAPIntValue().isPowerOf2()) {
2848 APInt UnmaskedC = MulC->getAPIntValue() | HighMask;
2849 if (UnmaskedC.isNegatedPowerOf2())
2850 return (-UnmaskedC).logBase2();
2851 }
2852 return 0;
2853 };
2854
2855 auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) {
2856 EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
2857 SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy);
2858 SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC);
2859 SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl);
2860 return TLO.CombineTo(Op, Res);
2861 };
2862
2863 if (isOperationLegalOrCustom(ISD::SHL, VT)) {
2864 if (Op.getOpcode() == ISD::ADD) {
2865 // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC))
2866 if (unsigned ShAmt = getShiftLeftAmt(Op0))
2867 return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt);
2868 // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC))
2869 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2870 return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt);
2871 }
2872 if (Op.getOpcode() == ISD::SUB) {
2873 // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC))
2874 if (unsigned ShAmt = getShiftLeftAmt(Op1))
2875 return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt);
2876 }
2877 }
2878
2879 if (Op.getOpcode() == ISD::MUL) {
2880 Known = KnownBits::mul(KnownOp0, KnownOp1);
2881 } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
2882 Known = KnownBits::computeForAddSub(Op.getOpcode() == ISD::ADD,
2883 Flags.hasNoSignedWrap(), KnownOp0,
2884 KnownOp1);
2885 }
2886 break;
2887 }
2888 default:
2889 // We also ask the target about intrinsics (which could be specific to it).
2890 if (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
2891 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
2892 // TODO: Probably okay to remove after audit; here to reduce change size
2893 // in initial enablement patch for scalable vectors
2894 if (Op.getValueType().isScalableVector())
2895 break;
2896 if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts,
2897 Known, TLO, Depth))
2898 return true;
2899 break;
2900 }
2901
2902 // Just use computeKnownBits to compute output bits.
2903 Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
2904 break;
2905 }
2906
2907 // If we know the value of all of the demanded bits, return this as a
2908 // constant.
2909 if (!isTargetCanonicalConstantNode(Op) &&
2910 DemandedBits.isSubsetOf(Known.Zero | Known.One)) {
2911 // Avoid folding to a constant if any OpaqueConstant is involved.
2912 const SDNode *N = Op.getNode();
2913 for (SDNode *Op :
2914 llvm::make_range(SDNodeIterator::begin(N), SDNodeIterator::end(N))) {
2915 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
2916 if (C->isOpaque())
2917 return false;
2918 }
2919 if (VT.isInteger())
2920 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
2921 if (VT.isFloatingPoint())
2922 return TLO.CombineTo(
2923 Op,
2924 TLO.DAG.getConstantFP(
2925 APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT));
2926 }
2927
2928 // A multi use 'all demanded elts' simplify failed to find any knownbits.
2929 // Try again just for the original demanded elts.
2930 // Ensure we do this AFTER constant folding above.
2931 if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes())
2932 Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth);
2933
2934 return false;
2935 }
2936
SimplifyDemandedVectorElts(SDValue Op,const APInt & DemandedElts,DAGCombinerInfo & DCI) const2937 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
2938 const APInt &DemandedElts,
2939 DAGCombinerInfo &DCI) const {
2940 SelectionDAG &DAG = DCI.DAG;
2941 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
2942 !DCI.isBeforeLegalizeOps());
2943
2944 APInt KnownUndef, KnownZero;
2945 bool Simplified =
2946 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
2947 if (Simplified) {
2948 DCI.AddToWorklist(Op.getNode());
2949 DCI.CommitTargetLoweringOpt(TLO);
2950 }
2951
2952 return Simplified;
2953 }
2954
2955 /// Given a vector binary operation and known undefined elements for each input
2956 /// operand, compute whether each element of the output is undefined.
getKnownUndefForVectorBinop(SDValue BO,SelectionDAG & DAG,const APInt & UndefOp0,const APInt & UndefOp1)2957 static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
2958 const APInt &UndefOp0,
2959 const APInt &UndefOp1) {
2960 EVT VT = BO.getValueType();
2961 assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
2962 "Vector binop only");
2963
2964 EVT EltVT = VT.getVectorElementType();
2965 unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1;
2966 assert(UndefOp0.getBitWidth() == NumElts &&
2967 UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
2968
2969 auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
2970 const APInt &UndefVals) {
2971 if (UndefVals[Index])
2972 return DAG.getUNDEF(EltVT);
2973
2974 if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
2975 // Try hard to make sure that the getNode() call is not creating temporary
2976 // nodes. Ignore opaque integers because they do not constant fold.
2977 SDValue Elt = BV->getOperand(Index);
2978 auto *C = dyn_cast<ConstantSDNode>(Elt);
2979 if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
2980 return Elt;
2981 }
2982
2983 return SDValue();
2984 };
2985
2986 APInt KnownUndef = APInt::getZero(NumElts);
2987 for (unsigned i = 0; i != NumElts; ++i) {
2988 // If both inputs for this element are either constant or undef and match
2989 // the element type, compute the constant/undef result for this element of
2990 // the vector.
2991 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2992 // not handle FP constants. The code within getNode() should be refactored
2993 // to avoid the danger of creating a bogus temporary node here.
2994 SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
2995 SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
2996 if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
2997 if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
2998 KnownUndef.setBit(i);
2999 }
3000 return KnownUndef;
3001 }
3002
SimplifyDemandedVectorElts(SDValue Op,const APInt & OriginalDemandedElts,APInt & KnownUndef,APInt & KnownZero,TargetLoweringOpt & TLO,unsigned Depth,bool AssumeSingleUse) const3003 bool TargetLowering::SimplifyDemandedVectorElts(
3004 SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
3005 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
3006 bool AssumeSingleUse) const {
3007 EVT VT = Op.getValueType();
3008 unsigned Opcode = Op.getOpcode();
3009 APInt DemandedElts = OriginalDemandedElts;
3010 unsigned NumElts = DemandedElts.getBitWidth();
3011 assert(VT.isVector() && "Expected vector op");
3012
3013 KnownUndef = KnownZero = APInt::getZero(NumElts);
3014
3015 const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo();
3016 if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO))
3017 return false;
3018
3019 // TODO: For now we assume we know nothing about scalable vectors.
3020 if (VT.isScalableVector())
3021 return false;
3022
3023 assert(VT.getVectorNumElements() == NumElts &&
3024 "Mask size mismatches value type element count!");
3025
3026 // Undef operand.
3027 if (Op.isUndef()) {
3028 KnownUndef.setAllBits();
3029 return false;
3030 }
3031
3032 // If Op has other users, assume that all elements are needed.
3033 if (!AssumeSingleUse && !Op.getNode()->hasOneUse())
3034 DemandedElts.setAllBits();
3035
3036 // Not demanding any elements from Op.
3037 if (DemandedElts == 0) {
3038 KnownUndef.setAllBits();
3039 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3040 }
3041
3042 // Limit search depth.
3043 if (Depth >= SelectionDAG::MaxRecursionDepth)
3044 return false;
3045
3046 SDLoc DL(Op);
3047 unsigned EltSizeInBits = VT.getScalarSizeInBits();
3048 bool IsLE = TLO.DAG.getDataLayout().isLittleEndian();
3049
3050 // Helper for demanding the specified elements and all the bits of both binary
3051 // operands.
3052 auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) {
3053 SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts,
3054 TLO.DAG, Depth + 1);
3055 SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts,
3056 TLO.DAG, Depth + 1);
3057 if (NewOp0 || NewOp1) {
3058 SDValue NewOp =
3059 TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0,
3060 NewOp1 ? NewOp1 : Op1, Op->getFlags());
3061 return TLO.CombineTo(Op, NewOp);
3062 }
3063 return false;
3064 };
3065
3066 switch (Opcode) {
3067 case ISD::SCALAR_TO_VECTOR: {
3068 if (!DemandedElts[0]) {
3069 KnownUndef.setAllBits();
3070 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3071 }
3072 SDValue ScalarSrc = Op.getOperand(0);
3073 if (ScalarSrc.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
3074 SDValue Src = ScalarSrc.getOperand(0);
3075 SDValue Idx = ScalarSrc.getOperand(1);
3076 EVT SrcVT = Src.getValueType();
3077
3078 ElementCount SrcEltCnt = SrcVT.getVectorElementCount();
3079
3080 if (SrcEltCnt.isScalable())
3081 return false;
3082
3083 unsigned NumSrcElts = SrcEltCnt.getFixedValue();
3084 if (isNullConstant(Idx)) {
3085 APInt SrcDemandedElts = APInt::getOneBitSet(NumSrcElts, 0);
3086 APInt SrcUndef = KnownUndef.zextOrTrunc(NumSrcElts);
3087 APInt SrcZero = KnownZero.zextOrTrunc(NumSrcElts);
3088 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3089 TLO, Depth + 1))
3090 return true;
3091 }
3092 }
3093 KnownUndef.setHighBits(NumElts - 1);
3094 break;
3095 }
3096 case ISD::BITCAST: {
3097 SDValue Src = Op.getOperand(0);
3098 EVT SrcVT = Src.getValueType();
3099
3100 // We only handle vectors here.
3101 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
3102 if (!SrcVT.isVector())
3103 break;
3104
3105 // Fast handling of 'identity' bitcasts.
3106 unsigned NumSrcElts = SrcVT.getVectorNumElements();
3107 if (NumSrcElts == NumElts)
3108 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
3109 KnownZero, TLO, Depth + 1);
3110
3111 APInt SrcDemandedElts, SrcZero, SrcUndef;
3112
3113 // Bitcast from 'large element' src vector to 'small element' vector, we
3114 // must demand a source element if any DemandedElt maps to it.
3115 if ((NumElts % NumSrcElts) == 0) {
3116 unsigned Scale = NumElts / NumSrcElts;
3117 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3118 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3119 TLO, Depth + 1))
3120 return true;
3121
3122 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
3123 // of the large element.
3124 // TODO - bigendian once we have test coverage.
3125 if (IsLE) {
3126 unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
3127 APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
3128 for (unsigned i = 0; i != NumElts; ++i)
3129 if (DemandedElts[i]) {
3130 unsigned Ofs = (i % Scale) * EltSizeInBits;
3131 SrcDemandedBits.setBits(Ofs, Ofs + EltSizeInBits);
3132 }
3133
3134 KnownBits Known;
3135 if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known,
3136 TLO, Depth + 1))
3137 return true;
3138
3139 // The bitcast has split each wide element into a number of
3140 // narrow subelements. We have just computed the Known bits
3141 // for wide elements. See if element splitting results in
3142 // some subelements being zero. Only for demanded elements!
3143 for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) {
3144 if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits)
3145 .isAllOnes())
3146 continue;
3147 for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) {
3148 unsigned Elt = Scale * SrcElt + SubElt;
3149 if (DemandedElts[Elt])
3150 KnownZero.setBit(Elt);
3151 }
3152 }
3153 }
3154
3155 // If the src element is zero/undef then all the output elements will be -
3156 // only demanded elements are guaranteed to be correct.
3157 for (unsigned i = 0; i != NumSrcElts; ++i) {
3158 if (SrcDemandedElts[i]) {
3159 if (SrcZero[i])
3160 KnownZero.setBits(i * Scale, (i + 1) * Scale);
3161 if (SrcUndef[i])
3162 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
3163 }
3164 }
3165 }
3166
3167 // Bitcast from 'small element' src vector to 'large element' vector, we
3168 // demand all smaller source elements covered by the larger demanded element
3169 // of this vector.
3170 if ((NumSrcElts % NumElts) == 0) {
3171 unsigned Scale = NumSrcElts / NumElts;
3172 SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
3173 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
3174 TLO, Depth + 1))
3175 return true;
3176
3177 // If all the src elements covering an output element are zero/undef, then
3178 // the output element will be as well, assuming it was demanded.
3179 for (unsigned i = 0; i != NumElts; ++i) {
3180 if (DemandedElts[i]) {
3181 if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
3182 KnownZero.setBit(i);
3183 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
3184 KnownUndef.setBit(i);
3185 }
3186 }
3187 }
3188 break;
3189 }
3190 case ISD::BUILD_VECTOR: {
3191 // Check all elements and simplify any unused elements with UNDEF.
3192 if (!DemandedElts.isAllOnes()) {
3193 // Don't simplify BROADCASTS.
3194 if (llvm::any_of(Op->op_values(),
3195 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
3196 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
3197 bool Updated = false;
3198 for (unsigned i = 0; i != NumElts; ++i) {
3199 if (!DemandedElts[i] && !Ops[i].isUndef()) {
3200 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
3201 KnownUndef.setBit(i);
3202 Updated = true;
3203 }
3204 }
3205 if (Updated)
3206 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
3207 }
3208 }
3209 for (unsigned i = 0; i != NumElts; ++i) {
3210 SDValue SrcOp = Op.getOperand(i);
3211 if (SrcOp.isUndef()) {
3212 KnownUndef.setBit(i);
3213 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
3214 (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
3215 KnownZero.setBit(i);
3216 }
3217 }
3218 break;
3219 }
3220 case ISD::CONCAT_VECTORS: {
3221 EVT SubVT = Op.getOperand(0).getValueType();
3222 unsigned NumSubVecs = Op.getNumOperands();
3223 unsigned NumSubElts = SubVT.getVectorNumElements();
3224 for (unsigned i = 0; i != NumSubVecs; ++i) {
3225 SDValue SubOp = Op.getOperand(i);
3226 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3227 APInt SubUndef, SubZero;
3228 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
3229 Depth + 1))
3230 return true;
3231 KnownUndef.insertBits(SubUndef, i * NumSubElts);
3232 KnownZero.insertBits(SubZero, i * NumSubElts);
3233 }
3234
3235 // Attempt to avoid multi-use ops if we don't need anything from them.
3236 if (!DemandedElts.isAllOnes()) {
3237 bool FoundNewSub = false;
3238 SmallVector<SDValue, 2> DemandedSubOps;
3239 for (unsigned i = 0; i != NumSubVecs; ++i) {
3240 SDValue SubOp = Op.getOperand(i);
3241 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
3242 SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts(
3243 SubOp, SubElts, TLO.DAG, Depth + 1);
3244 DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp);
3245 FoundNewSub = NewSubOp ? true : FoundNewSub;
3246 }
3247 if (FoundNewSub) {
3248 SDValue NewOp =
3249 TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps);
3250 return TLO.CombineTo(Op, NewOp);
3251 }
3252 }
3253 break;
3254 }
3255 case ISD::INSERT_SUBVECTOR: {
3256 // Demand any elements from the subvector and the remainder from the src its
3257 // inserted into.
3258 SDValue Src = Op.getOperand(0);
3259 SDValue Sub = Op.getOperand(1);
3260 uint64_t Idx = Op.getConstantOperandVal(2);
3261 unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
3262 APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
3263 APInt DemandedSrcElts = DemandedElts;
3264 DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
3265
3266 APInt SubUndef, SubZero;
3267 if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
3268 Depth + 1))
3269 return true;
3270
3271 // If none of the src operand elements are demanded, replace it with undef.
3272 if (!DemandedSrcElts && !Src.isUndef())
3273 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
3274 TLO.DAG.getUNDEF(VT), Sub,
3275 Op.getOperand(2)));
3276
3277 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero,
3278 TLO, Depth + 1))
3279 return true;
3280 KnownUndef.insertBits(SubUndef, Idx);
3281 KnownZero.insertBits(SubZero, Idx);
3282
3283 // Attempt to avoid multi-use ops if we don't need anything from them.
3284 if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
3285 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3286 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3287 SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
3288 Sub, DemandedSubElts, TLO.DAG, Depth + 1);
3289 if (NewSrc || NewSub) {
3290 NewSrc = NewSrc ? NewSrc : Src;
3291 NewSub = NewSub ? NewSub : Sub;
3292 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3293 NewSub, Op.getOperand(2));
3294 return TLO.CombineTo(Op, NewOp);
3295 }
3296 }
3297 break;
3298 }
3299 case ISD::EXTRACT_SUBVECTOR: {
3300 // Offset the demanded elts by the subvector index.
3301 SDValue Src = Op.getOperand(0);
3302 if (Src.getValueType().isScalableVector())
3303 break;
3304 uint64_t Idx = Op.getConstantOperandVal(1);
3305 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3306 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
3307
3308 APInt SrcUndef, SrcZero;
3309 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3310 Depth + 1))
3311 return true;
3312 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
3313 KnownZero = SrcZero.extractBits(NumElts, Idx);
3314
3315 // Attempt to avoid multi-use ops if we don't need anything from them.
3316 if (!DemandedElts.isAllOnes()) {
3317 SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
3318 Src, DemandedSrcElts, TLO.DAG, Depth + 1);
3319 if (NewSrc) {
3320 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc,
3321 Op.getOperand(1));
3322 return TLO.CombineTo(Op, NewOp);
3323 }
3324 }
3325 break;
3326 }
3327 case ISD::INSERT_VECTOR_ELT: {
3328 SDValue Vec = Op.getOperand(0);
3329 SDValue Scl = Op.getOperand(1);
3330 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3331
3332 // For a legal, constant insertion index, if we don't need this insertion
3333 // then strip it, else remove it from the demanded elts.
3334 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
3335 unsigned Idx = CIdx->getZExtValue();
3336 if (!DemandedElts[Idx])
3337 return TLO.CombineTo(Op, Vec);
3338
3339 APInt DemandedVecElts(DemandedElts);
3340 DemandedVecElts.clearBit(Idx);
3341 if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef,
3342 KnownZero, TLO, Depth + 1))
3343 return true;
3344
3345 KnownUndef.setBitVal(Idx, Scl.isUndef());
3346
3347 KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl));
3348 break;
3349 }
3350
3351 APInt VecUndef, VecZero;
3352 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
3353 Depth + 1))
3354 return true;
3355 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
3356 break;
3357 }
3358 case ISD::VSELECT: {
3359 SDValue Sel = Op.getOperand(0);
3360 SDValue LHS = Op.getOperand(1);
3361 SDValue RHS = Op.getOperand(2);
3362
3363 // Try to transform the select condition based on the current demanded
3364 // elements.
3365 APInt UndefSel, ZeroSel;
3366 if (SimplifyDemandedVectorElts(Sel, DemandedElts, UndefSel, ZeroSel, TLO,
3367 Depth + 1))
3368 return true;
3369
3370 // See if we can simplify either vselect operand.
3371 APInt DemandedLHS(DemandedElts);
3372 APInt DemandedRHS(DemandedElts);
3373 APInt UndefLHS, ZeroLHS;
3374 APInt UndefRHS, ZeroRHS;
3375 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3376 Depth + 1))
3377 return true;
3378 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3379 Depth + 1))
3380 return true;
3381
3382 KnownUndef = UndefLHS & UndefRHS;
3383 KnownZero = ZeroLHS & ZeroRHS;
3384
3385 // If we know that the selected element is always zero, we don't need the
3386 // select value element.
3387 APInt DemandedSel = DemandedElts & ~KnownZero;
3388 if (DemandedSel != DemandedElts)
3389 if (SimplifyDemandedVectorElts(Sel, DemandedSel, UndefSel, ZeroSel, TLO,
3390 Depth + 1))
3391 return true;
3392
3393 break;
3394 }
3395 case ISD::VECTOR_SHUFFLE: {
3396 SDValue LHS = Op.getOperand(0);
3397 SDValue RHS = Op.getOperand(1);
3398 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
3399
3400 // Collect demanded elements from shuffle operands..
3401 APInt DemandedLHS(NumElts, 0);
3402 APInt DemandedRHS(NumElts, 0);
3403 for (unsigned i = 0; i != NumElts; ++i) {
3404 int M = ShuffleMask[i];
3405 if (M < 0 || !DemandedElts[i])
3406 continue;
3407 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
3408 if (M < (int)NumElts)
3409 DemandedLHS.setBit(M);
3410 else
3411 DemandedRHS.setBit(M - NumElts);
3412 }
3413
3414 // See if we can simplify either shuffle operand.
3415 APInt UndefLHS, ZeroLHS;
3416 APInt UndefRHS, ZeroRHS;
3417 if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO,
3418 Depth + 1))
3419 return true;
3420 if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO,
3421 Depth + 1))
3422 return true;
3423
3424 // Simplify mask using undef elements from LHS/RHS.
3425 bool Updated = false;
3426 bool IdentityLHS = true, IdentityRHS = true;
3427 SmallVector<int, 32> NewMask(ShuffleMask);
3428 for (unsigned i = 0; i != NumElts; ++i) {
3429 int &M = NewMask[i];
3430 if (M < 0)
3431 continue;
3432 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
3433 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
3434 Updated = true;
3435 M = -1;
3436 }
3437 IdentityLHS &= (M < 0) || (M == (int)i);
3438 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
3439 }
3440
3441 // Update legal shuffle masks based on demanded elements if it won't reduce
3442 // to Identity which can cause premature removal of the shuffle mask.
3443 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
3444 SDValue LegalShuffle =
3445 buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG);
3446 if (LegalShuffle)
3447 return TLO.CombineTo(Op, LegalShuffle);
3448 }
3449
3450 // Propagate undef/zero elements from LHS/RHS.
3451 for (unsigned i = 0; i != NumElts; ++i) {
3452 int M = ShuffleMask[i];
3453 if (M < 0) {
3454 KnownUndef.setBit(i);
3455 } else if (M < (int)NumElts) {
3456 if (UndefLHS[M])
3457 KnownUndef.setBit(i);
3458 if (ZeroLHS[M])
3459 KnownZero.setBit(i);
3460 } else {
3461 if (UndefRHS[M - NumElts])
3462 KnownUndef.setBit(i);
3463 if (ZeroRHS[M - NumElts])
3464 KnownZero.setBit(i);
3465 }
3466 }
3467 break;
3468 }
3469 case ISD::ANY_EXTEND_VECTOR_INREG:
3470 case ISD::SIGN_EXTEND_VECTOR_INREG:
3471 case ISD::ZERO_EXTEND_VECTOR_INREG: {
3472 APInt SrcUndef, SrcZero;
3473 SDValue Src = Op.getOperand(0);
3474 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
3475 APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts);
3476 if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
3477 Depth + 1))
3478 return true;
3479 KnownZero = SrcZero.zextOrTrunc(NumElts);
3480 KnownUndef = SrcUndef.zextOrTrunc(NumElts);
3481
3482 if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
3483 Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
3484 DemandedSrcElts == 1) {
3485 // aext - if we just need the bottom element then we can bitcast.
3486 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
3487 }
3488
3489 if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
3490 // zext(undef) upper bits are guaranteed to be zero.
3491 if (DemandedElts.isSubsetOf(KnownUndef))
3492 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3493 KnownUndef.clearAllBits();
3494
3495 // zext - if we just need the bottom element then we can mask:
3496 // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
3497 if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND &&
3498 Op->isOnlyUserOf(Src.getNode()) &&
3499 Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
3500 SDLoc DL(Op);
3501 EVT SrcVT = Src.getValueType();
3502 EVT SrcSVT = SrcVT.getScalarType();
3503 SmallVector<SDValue> MaskElts;
3504 MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
3505 MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
3506 SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
3507 if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
3508 ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
3509 Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
3510 return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
3511 }
3512 }
3513 }
3514 break;
3515 }
3516
3517 // TODO: There are more binop opcodes that could be handled here - MIN,
3518 // MAX, saturated math, etc.
3519 case ISD::ADD: {
3520 SDValue Op0 = Op.getOperand(0);
3521 SDValue Op1 = Op.getOperand(1);
3522 if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) {
3523 APInt UndefLHS, ZeroLHS;
3524 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3525 Depth + 1, /*AssumeSingleUse*/ true))
3526 return true;
3527 }
3528 [[fallthrough]];
3529 }
3530 case ISD::OR:
3531 case ISD::XOR:
3532 case ISD::SUB:
3533 case ISD::FADD:
3534 case ISD::FSUB:
3535 case ISD::FMUL:
3536 case ISD::FDIV:
3537 case ISD::FREM: {
3538 SDValue Op0 = Op.getOperand(0);
3539 SDValue Op1 = Op.getOperand(1);
3540
3541 APInt UndefRHS, ZeroRHS;
3542 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3543 Depth + 1))
3544 return true;
3545 APInt UndefLHS, ZeroLHS;
3546 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3547 Depth + 1))
3548 return true;
3549
3550 KnownZero = ZeroLHS & ZeroRHS;
3551 KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
3552
3553 // Attempt to avoid multi-use ops if we don't need anything from them.
3554 // TODO - use KnownUndef to relax the demandedelts?
3555 if (!DemandedElts.isAllOnes())
3556 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3557 return true;
3558 break;
3559 }
3560 case ISD::SHL:
3561 case ISD::SRL:
3562 case ISD::SRA:
3563 case ISD::ROTL:
3564 case ISD::ROTR: {
3565 SDValue Op0 = Op.getOperand(0);
3566 SDValue Op1 = Op.getOperand(1);
3567
3568 APInt UndefRHS, ZeroRHS;
3569 if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO,
3570 Depth + 1))
3571 return true;
3572 APInt UndefLHS, ZeroLHS;
3573 if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO,
3574 Depth + 1))
3575 return true;
3576
3577 KnownZero = ZeroLHS;
3578 KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
3579
3580 // Attempt to avoid multi-use ops if we don't need anything from them.
3581 // TODO - use KnownUndef to relax the demandedelts?
3582 if (!DemandedElts.isAllOnes())
3583 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3584 return true;
3585 break;
3586 }
3587 case ISD::MUL:
3588 case ISD::MULHU:
3589 case ISD::MULHS:
3590 case ISD::AND: {
3591 SDValue Op0 = Op.getOperand(0);
3592 SDValue Op1 = Op.getOperand(1);
3593
3594 APInt SrcUndef, SrcZero;
3595 if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO,
3596 Depth + 1))
3597 return true;
3598 // If we know that a demanded element was zero in Op1 we don't need to
3599 // demand it in Op0 - its guaranteed to be zero.
3600 APInt DemandedElts0 = DemandedElts & ~SrcZero;
3601 if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero,
3602 TLO, Depth + 1))
3603 return true;
3604
3605 KnownUndef &= DemandedElts0;
3606 KnownZero &= DemandedElts0;
3607
3608 // If every element pair has a zero/undef then just fold to zero.
3609 // fold (and x, undef) -> 0 / (and x, 0) -> 0
3610 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0
3611 if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef))
3612 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3613
3614 // If either side has a zero element, then the result element is zero, even
3615 // if the other is an UNDEF.
3616 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
3617 // and then handle 'and' nodes with the rest of the binop opcodes.
3618 KnownZero |= SrcZero;
3619 KnownUndef &= SrcUndef;
3620 KnownUndef &= ~KnownZero;
3621
3622 // Attempt to avoid multi-use ops if we don't need anything from them.
3623 if (!DemandedElts.isAllOnes())
3624 if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
3625 return true;
3626 break;
3627 }
3628 case ISD::TRUNCATE:
3629 case ISD::SIGN_EXTEND:
3630 case ISD::ZERO_EXTEND:
3631 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
3632 KnownZero, TLO, Depth + 1))
3633 return true;
3634
3635 if (Op.getOpcode() == ISD::ZERO_EXTEND) {
3636 // zext(undef) upper bits are guaranteed to be zero.
3637 if (DemandedElts.isSubsetOf(KnownUndef))
3638 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
3639 KnownUndef.clearAllBits();
3640 }
3641 break;
3642 default: {
3643 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
3644 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
3645 KnownZero, TLO, Depth))
3646 return true;
3647 } else {
3648 KnownBits Known;
3649 APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
3650 if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
3651 TLO, Depth, AssumeSingleUse))
3652 return true;
3653 }
3654 break;
3655 }
3656 }
3657 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
3658
3659 // Constant fold all undef cases.
3660 // TODO: Handle zero cases as well.
3661 if (DemandedElts.isSubsetOf(KnownUndef))
3662 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
3663
3664 return false;
3665 }
3666
3667 /// Determine which of the bits specified in Mask are known to be either zero or
3668 /// one and return them in the Known.
computeKnownBitsForTargetNode(const SDValue Op,KnownBits & Known,const APInt & DemandedElts,const SelectionDAG & DAG,unsigned Depth) const3669 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
3670 KnownBits &Known,
3671 const APInt &DemandedElts,
3672 const SelectionDAG &DAG,
3673 unsigned Depth) const {
3674 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3675 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3676 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3677 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3678 "Should use MaskedValueIsZero if you don't know whether Op"
3679 " is a target node!");
3680 Known.resetAll();
3681 }
3682
computeKnownBitsForTargetInstr(GISelKnownBits & Analysis,Register R,KnownBits & Known,const APInt & DemandedElts,const MachineRegisterInfo & MRI,unsigned Depth) const3683 void TargetLowering::computeKnownBitsForTargetInstr(
3684 GISelKnownBits &Analysis, Register R, KnownBits &Known,
3685 const APInt &DemandedElts, const MachineRegisterInfo &MRI,
3686 unsigned Depth) const {
3687 Known.resetAll();
3688 }
3689
computeKnownBitsForFrameIndex(const int FrameIdx,KnownBits & Known,const MachineFunction & MF) const3690 void TargetLowering::computeKnownBitsForFrameIndex(
3691 const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const {
3692 // The low bits are known zero if the pointer is aligned.
3693 Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx)));
3694 }
3695
computeKnownAlignForTargetInstr(GISelKnownBits & Analysis,Register R,const MachineRegisterInfo & MRI,unsigned Depth) const3696 Align TargetLowering::computeKnownAlignForTargetInstr(
3697 GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI,
3698 unsigned Depth) const {
3699 return Align(1);
3700 }
3701
3702 /// This method can be implemented by targets that want to expose additional
3703 /// information about sign bits to the DAG Combiner.
ComputeNumSignBitsForTargetNode(SDValue Op,const APInt &,const SelectionDAG &,unsigned Depth) const3704 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3705 const APInt &,
3706 const SelectionDAG &,
3707 unsigned Depth) const {
3708 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3709 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3710 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3711 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3712 "Should use ComputeNumSignBits if you don't know whether Op"
3713 " is a target node!");
3714 return 1;
3715 }
3716
computeNumSignBitsForTargetInstr(GISelKnownBits & Analysis,Register R,const APInt & DemandedElts,const MachineRegisterInfo & MRI,unsigned Depth) const3717 unsigned TargetLowering::computeNumSignBitsForTargetInstr(
3718 GISelKnownBits &Analysis, Register R, const APInt &DemandedElts,
3719 const MachineRegisterInfo &MRI, unsigned Depth) const {
3720 return 1;
3721 }
3722
SimplifyDemandedVectorEltsForTargetNode(SDValue Op,const APInt & DemandedElts,APInt & KnownUndef,APInt & KnownZero,TargetLoweringOpt & TLO,unsigned Depth) const3723 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3724 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
3725 TargetLoweringOpt &TLO, unsigned Depth) const {
3726 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3727 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3728 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3729 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3730 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3731 " is a target node!");
3732 return false;
3733 }
3734
SimplifyDemandedBitsForTargetNode(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,KnownBits & Known,TargetLoweringOpt & TLO,unsigned Depth) const3735 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3736 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3737 KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const {
3738 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3739 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3740 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3741 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3742 "Should use SimplifyDemandedBits if you don't know whether Op"
3743 " is a target node!");
3744 computeKnownBitsForTargetNode(Op, Known, DemandedElts, TLO.DAG, Depth);
3745 return false;
3746 }
3747
SimplifyMultipleUseDemandedBitsForTargetNode(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,SelectionDAG & DAG,unsigned Depth) const3748 SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3749 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3750 SelectionDAG &DAG, unsigned Depth) const {
3751 assert(
3752 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3753 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3754 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3755 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3756 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3757 " is a target node!");
3758 return SDValue();
3759 }
3760
3761 SDValue
buildLegalVectorShuffle(EVT VT,const SDLoc & DL,SDValue N0,SDValue N1,MutableArrayRef<int> Mask,SelectionDAG & DAG) const3762 TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3763 SDValue N1, MutableArrayRef<int> Mask,
3764 SelectionDAG &DAG) const {
3765 bool LegalMask = isShuffleMaskLegal(Mask, VT);
3766 if (!LegalMask) {
3767 std::swap(N0, N1);
3768 ShuffleVectorSDNode::commuteMask(Mask);
3769 LegalMask = isShuffleMaskLegal(Mask, VT);
3770 }
3771
3772 if (!LegalMask)
3773 return SDValue();
3774
3775 return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
3776 }
3777
getTargetConstantFromLoad(LoadSDNode *) const3778 const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
3779 return nullptr;
3780 }
3781
isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op,const APInt & DemandedElts,const SelectionDAG & DAG,bool PoisonOnly,unsigned Depth) const3782 bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3783 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3784 bool PoisonOnly, unsigned Depth) const {
3785 assert(
3786 (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3787 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3788 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3789 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3790 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3791 " is a target node!");
3792 return false;
3793 }
3794
canCreateUndefOrPoisonForTargetNode(SDValue Op,const APInt & DemandedElts,const SelectionDAG & DAG,bool PoisonOnly,bool ConsiderFlags,unsigned Depth) const3795 bool TargetLowering::canCreateUndefOrPoisonForTargetNode(
3796 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
3797 bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const {
3798 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3799 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3800 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3801 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3802 "Should use canCreateUndefOrPoison if you don't know whether Op"
3803 " is a target node!");
3804 // Be conservative and return true.
3805 return true;
3806 }
3807
isKnownNeverNaNForTargetNode(SDValue Op,const SelectionDAG & DAG,bool SNaN,unsigned Depth) const3808 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
3809 const SelectionDAG &DAG,
3810 bool SNaN,
3811 unsigned Depth) const {
3812 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3813 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3814 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3815 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3816 "Should use isKnownNeverNaN if you don't know whether Op"
3817 " is a target node!");
3818 return false;
3819 }
3820
isSplatValueForTargetNode(SDValue Op,const APInt & DemandedElts,APInt & UndefElts,const SelectionDAG & DAG,unsigned Depth) const3821 bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
3822 const APInt &DemandedElts,
3823 APInt &UndefElts,
3824 const SelectionDAG &DAG,
3825 unsigned Depth) const {
3826 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
3827 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
3828 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
3829 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
3830 "Should use isSplatValue if you don't know whether Op"
3831 " is a target node!");
3832 return false;
3833 }
3834
3835 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3836 // work with truncating build vectors and vectors with elements of less than
3837 // 8 bits.
isConstTrueVal(SDValue N) const3838 bool TargetLowering::isConstTrueVal(SDValue N) const {
3839 if (!N)
3840 return false;
3841
3842 unsigned EltWidth;
3843 APInt CVal;
3844 if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3845 /*AllowTruncation=*/true)) {
3846 CVal = CN->getAPIntValue();
3847 EltWidth = N.getValueType().getScalarSizeInBits();
3848 } else
3849 return false;
3850
3851 // If this is a truncating splat, truncate the splat value.
3852 // Otherwise, we may fail to match the expected values below.
3853 if (EltWidth < CVal.getBitWidth())
3854 CVal = CVal.trunc(EltWidth);
3855
3856 switch (getBooleanContents(N.getValueType())) {
3857 case UndefinedBooleanContent:
3858 return CVal[0];
3859 case ZeroOrOneBooleanContent:
3860 return CVal.isOne();
3861 case ZeroOrNegativeOneBooleanContent:
3862 return CVal.isAllOnes();
3863 }
3864
3865 llvm_unreachable("Invalid boolean contents");
3866 }
3867
isConstFalseVal(SDValue N) const3868 bool TargetLowering::isConstFalseVal(SDValue N) const {
3869 if (!N)
3870 return false;
3871
3872 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
3873 if (!CN) {
3874 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
3875 if (!BV)
3876 return false;
3877
3878 // Only interested in constant splats, we don't care about undef
3879 // elements in identifying boolean constants and getConstantSplatNode
3880 // returns NULL if all ops are undef;
3881 CN = BV->getConstantSplatNode();
3882 if (!CN)
3883 return false;
3884 }
3885
3886 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
3887 return !CN->getAPIntValue()[0];
3888
3889 return CN->isZero();
3890 }
3891
isExtendedTrueVal(const ConstantSDNode * N,EVT VT,bool SExt) const3892 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
3893 bool SExt) const {
3894 if (VT == MVT::i1)
3895 return N->isOne();
3896
3897 TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
3898 switch (Cnt) {
3899 case TargetLowering::ZeroOrOneBooleanContent:
3900 // An extended value of 1 is always true, unless its original type is i1,
3901 // in which case it will be sign extended to -1.
3902 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
3903 case TargetLowering::UndefinedBooleanContent:
3904 case TargetLowering::ZeroOrNegativeOneBooleanContent:
3905 return N->isAllOnes() && SExt;
3906 }
3907 llvm_unreachable("Unexpected enumeration.");
3908 }
3909
3910 /// This helper function of SimplifySetCC tries to optimize the comparison when
3911 /// either operand of the SetCC node is a bitwise-and instruction.
foldSetCCWithAnd(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,const SDLoc & DL,DAGCombinerInfo & DCI) const3912 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
3913 ISD::CondCode Cond, const SDLoc &DL,
3914 DAGCombinerInfo &DCI) const {
3915 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
3916 std::swap(N0, N1);
3917
3918 SelectionDAG &DAG = DCI.DAG;
3919 EVT OpVT = N0.getValueType();
3920 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
3921 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
3922 return SDValue();
3923
3924 // (X & Y) != 0 --> zextOrTrunc(X & Y)
3925 // iff everything but LSB is known zero:
3926 if (Cond == ISD::SETNE && isNullConstant(N1) &&
3927 (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
3928 getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
3929 unsigned NumEltBits = OpVT.getScalarSizeInBits();
3930 APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
3931 if (DAG.MaskedValueIsZero(N0, UpperBits))
3932 return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
3933 }
3934
3935 // Try to eliminate a power-of-2 mask constant by converting to a signbit
3936 // test in a narrow type that we can truncate to with no cost. Examples:
3937 // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0
3938 // (i32 X & 32768) != 0 --> (trunc X to i16) < 0
3939 // TODO: This conservatively checks for type legality on the source and
3940 // destination types. That may inhibit optimizations, but it also
3941 // allows setcc->shift transforms that may be more beneficial.
3942 auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
3943 if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() &&
3944 isTypeLegal(OpVT) && N0.hasOneUse()) {
3945 EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(),
3946 AndC->getAPIntValue().getActiveBits());
3947 if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) {
3948 SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT);
3949 SDValue Zero = DAG.getConstant(0, DL, NarrowVT);
3950 return DAG.getSetCC(DL, VT, Trunc, Zero,
3951 Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT);
3952 }
3953 }
3954
3955 // Match these patterns in any of their permutations:
3956 // (X & Y) == Y
3957 // (X & Y) != Y
3958 SDValue X, Y;
3959 if (N0.getOperand(0) == N1) {
3960 X = N0.getOperand(1);
3961 Y = N0.getOperand(0);
3962 } else if (N0.getOperand(1) == N1) {
3963 X = N0.getOperand(0);
3964 Y = N0.getOperand(1);
3965 } else {
3966 return SDValue();
3967 }
3968
3969 // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if
3970 // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as
3971 // its liable to create and infinite loop.
3972 SDValue Zero = DAG.getConstant(0, DL, OpVT);
3973 if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) &&
3974 DAG.isKnownToBeAPowerOfTwo(Y)) {
3975 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3976 // Note that where Y is variable and is known to have at most one bit set
3977 // (for example, if it is Z & 1) we cannot do this; the expressions are not
3978 // equivalent when Y == 0.
3979 assert(OpVT.isInteger());
3980 Cond = ISD::getSetCCInverse(Cond, OpVT);
3981 if (DCI.isBeforeLegalizeOps() ||
3982 isCondCodeLegal(Cond, N0.getSimpleValueType()))
3983 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
3984 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
3985 // If the target supports an 'and-not' or 'and-complement' logic operation,
3986 // try to use that to make a comparison operation more efficient.
3987 // But don't do this transform if the mask is a single bit because there are
3988 // more efficient ways to deal with that case (for example, 'bt' on x86 or
3989 // 'rlwinm' on PPC).
3990
3991 // Bail out if the compare operand that we want to turn into a zero is
3992 // already a zero (otherwise, infinite loop).
3993 if (isNullConstant(Y))
3994 return SDValue();
3995
3996 // Transform this into: ~X & Y == 0.
3997 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
3998 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
3999 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
4000 }
4001
4002 return SDValue();
4003 }
4004
4005 /// There are multiple IR patterns that could be checking whether certain
4006 /// truncation of a signed number would be lossy or not. The pattern which is
4007 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
4008 /// We are looking for the following pattern: (KeptBits is a constant)
4009 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
4010 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
4011 /// KeptBits also can't be 1, that would have been folded to %x dstcond 0
4012 /// We will unfold it into the natural trunc+sext pattern:
4013 /// ((%x << C) a>> C) dstcond %x
4014 /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
optimizeSetCCOfSignedTruncationCheck(EVT SCCVT,SDValue N0,SDValue N1,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const4015 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
4016 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
4017 const SDLoc &DL) const {
4018 // We must be comparing with a constant.
4019 ConstantSDNode *C1;
4020 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
4021 return SDValue();
4022
4023 // N0 should be: add %x, (1 << (KeptBits-1))
4024 if (N0->getOpcode() != ISD::ADD)
4025 return SDValue();
4026
4027 // And we must be 'add'ing a constant.
4028 ConstantSDNode *C01;
4029 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
4030 return SDValue();
4031
4032 SDValue X = N0->getOperand(0);
4033 EVT XVT = X.getValueType();
4034
4035 // Validate constants ...
4036
4037 APInt I1 = C1->getAPIntValue();
4038
4039 ISD::CondCode NewCond;
4040 if (Cond == ISD::CondCode::SETULT) {
4041 NewCond = ISD::CondCode::SETEQ;
4042 } else if (Cond == ISD::CondCode::SETULE) {
4043 NewCond = ISD::CondCode::SETEQ;
4044 // But need to 'canonicalize' the constant.
4045 I1 += 1;
4046 } else if (Cond == ISD::CondCode::SETUGT) {
4047 NewCond = ISD::CondCode::SETNE;
4048 // But need to 'canonicalize' the constant.
4049 I1 += 1;
4050 } else if (Cond == ISD::CondCode::SETUGE) {
4051 NewCond = ISD::CondCode::SETNE;
4052 } else
4053 return SDValue();
4054
4055 APInt I01 = C01->getAPIntValue();
4056
4057 auto checkConstants = [&I1, &I01]() -> bool {
4058 // Both of them must be power-of-two, and the constant from setcc is bigger.
4059 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
4060 };
4061
4062 if (checkConstants()) {
4063 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
4064 } else {
4065 // What if we invert constants? (and the target predicate)
4066 I1.negate();
4067 I01.negate();
4068 assert(XVT.isInteger());
4069 NewCond = getSetCCInverse(NewCond, XVT);
4070 if (!checkConstants())
4071 return SDValue();
4072 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
4073 }
4074
4075 // They are power-of-two, so which bit is set?
4076 const unsigned KeptBits = I1.logBase2();
4077 const unsigned KeptBitsMinusOne = I01.logBase2();
4078
4079 // Magic!
4080 if (KeptBits != (KeptBitsMinusOne + 1))
4081 return SDValue();
4082 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
4083
4084 // We don't want to do this in every single case.
4085 SelectionDAG &DAG = DCI.DAG;
4086 if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
4087 XVT, KeptBits))
4088 return SDValue();
4089
4090 const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
4091 assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
4092
4093 // Unfold into: ((%x << C) a>> C) cond %x
4094 // Where 'cond' will be either 'eq' or 'ne'.
4095 SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
4096 SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
4097 SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
4098 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
4099
4100 return T2;
4101 }
4102
4103 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
optimizeSetCCByHoistingAndByConstFromLogicalShift(EVT SCCVT,SDValue N0,SDValue N1C,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const4104 SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
4105 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4106 DAGCombinerInfo &DCI, const SDLoc &DL) const {
4107 assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
4108 "Should be a comparison with 0.");
4109 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4110 "Valid only for [in]equality comparisons.");
4111
4112 unsigned NewShiftOpcode;
4113 SDValue X, C, Y;
4114
4115 SelectionDAG &DAG = DCI.DAG;
4116 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4117
4118 // Look for '(C l>>/<< Y)'.
4119 auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
4120 // The shift should be one-use.
4121 if (!V.hasOneUse())
4122 return false;
4123 unsigned OldShiftOpcode = V.getOpcode();
4124 switch (OldShiftOpcode) {
4125 case ISD::SHL:
4126 NewShiftOpcode = ISD::SRL;
4127 break;
4128 case ISD::SRL:
4129 NewShiftOpcode = ISD::SHL;
4130 break;
4131 default:
4132 return false; // must be a logical shift.
4133 }
4134 // We should be shifting a constant.
4135 // FIXME: best to use isConstantOrConstantVector().
4136 C = V.getOperand(0);
4137 ConstantSDNode *CC =
4138 isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4139 if (!CC)
4140 return false;
4141 Y = V.getOperand(1);
4142
4143 ConstantSDNode *XC =
4144 isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
4145 return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
4146 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
4147 };
4148
4149 // LHS of comparison should be an one-use 'and'.
4150 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
4151 return SDValue();
4152
4153 X = N0.getOperand(0);
4154 SDValue Mask = N0.getOperand(1);
4155
4156 // 'and' is commutative!
4157 if (!Match(Mask)) {
4158 std::swap(X, Mask);
4159 if (!Match(Mask))
4160 return SDValue();
4161 }
4162
4163 EVT VT = X.getValueType();
4164
4165 // Produce:
4166 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
4167 SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
4168 SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
4169 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
4170 return T2;
4171 }
4172
4173 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
4174 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
4175 /// handle the commuted versions of these patterns.
foldSetCCWithBinOp(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,const SDLoc & DL,DAGCombinerInfo & DCI) const4176 SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
4177 ISD::CondCode Cond, const SDLoc &DL,
4178 DAGCombinerInfo &DCI) const {
4179 unsigned BOpcode = N0.getOpcode();
4180 assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
4181 "Unexpected binop");
4182 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
4183
4184 // (X + Y) == X --> Y == 0
4185 // (X - Y) == X --> Y == 0
4186 // (X ^ Y) == X --> Y == 0
4187 SelectionDAG &DAG = DCI.DAG;
4188 EVT OpVT = N0.getValueType();
4189 SDValue X = N0.getOperand(0);
4190 SDValue Y = N0.getOperand(1);
4191 if (X == N1)
4192 return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
4193
4194 if (Y != N1)
4195 return SDValue();
4196
4197 // (X + Y) == Y --> X == 0
4198 // (X ^ Y) == Y --> X == 0
4199 if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
4200 return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
4201
4202 // The shift would not be valid if the operands are boolean (i1).
4203 if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
4204 return SDValue();
4205
4206 // (X - Y) == Y --> X == Y << 1
4207 EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
4208 !DCI.isBeforeLegalize());
4209 SDValue One = DAG.getConstant(1, DL, ShiftVT);
4210 SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
4211 if (!DCI.isCalledByLegalizer())
4212 DCI.AddToWorklist(YShl1.getNode());
4213 return DAG.getSetCC(DL, VT, X, YShl1, Cond);
4214 }
4215
simplifySetCCWithCTPOP(const TargetLowering & TLI,EVT VT,SDValue N0,const APInt & C1,ISD::CondCode Cond,const SDLoc & dl,SelectionDAG & DAG)4216 static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
4217 SDValue N0, const APInt &C1,
4218 ISD::CondCode Cond, const SDLoc &dl,
4219 SelectionDAG &DAG) {
4220 // Look through truncs that don't change the value of a ctpop.
4221 // FIXME: Add vector support? Need to be careful with setcc result type below.
4222 SDValue CTPOP = N0;
4223 if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() &&
4224 N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits()))
4225 CTPOP = N0.getOperand(0);
4226
4227 if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse())
4228 return SDValue();
4229
4230 EVT CTVT = CTPOP.getValueType();
4231 SDValue CTOp = CTPOP.getOperand(0);
4232
4233 // Expand a power-of-2-or-zero comparison based on ctpop:
4234 // (ctpop x) u< 2 -> (x & x-1) == 0
4235 // (ctpop x) u> 1 -> (x & x-1) != 0
4236 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) {
4237 // Keep the CTPOP if it is a cheap vector op.
4238 if (CTVT.isVector() && TLI.isCtpopFast(CTVT))
4239 return SDValue();
4240
4241 unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond);
4242 if (C1.ugt(CostLimit + (Cond == ISD::SETULT)))
4243 return SDValue();
4244 if (C1 == 0 && (Cond == ISD::SETULT))
4245 return SDValue(); // This is handled elsewhere.
4246
4247 unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT);
4248
4249 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4250 SDValue Result = CTOp;
4251 for (unsigned i = 0; i < Passes; i++) {
4252 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne);
4253 Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add);
4254 }
4255 ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
4256 return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
4257 }
4258
4259 // Expand a power-of-2 comparison based on ctpop
4260 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
4261 // Keep the CTPOP if it is cheap.
4262 if (TLI.isCtpopFast(CTVT))
4263 return SDValue();
4264
4265 SDValue Zero = DAG.getConstant(0, dl, CTVT);
4266 SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
4267 assert(CTVT.isInteger());
4268 SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4269
4270 // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4271 // check before emitting a potentially unnecessary op.
4272 if (DAG.isKnownNeverZero(CTOp)) {
4273 // (ctpop x) == 1 --> (x & x-1) == 0
4274 // (ctpop x) != 1 --> (x & x-1) != 0
4275 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4276 SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4277 return RHS;
4278 }
4279
4280 // (ctpop x) == 1 --> (x ^ x-1) > x-1
4281 // (ctpop x) != 1 --> (x ^ x-1) <= x-1
4282 SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4283 ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4284 return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
4285 }
4286
4287 return SDValue();
4288 }
4289
foldSetCCWithRotate(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,const SDLoc & dl,SelectionDAG & DAG)4290 static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1,
4291 ISD::CondCode Cond, const SDLoc &dl,
4292 SelectionDAG &DAG) {
4293 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4294 return SDValue();
4295
4296 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4297 if (!C1 || !(C1->isZero() || C1->isAllOnes()))
4298 return SDValue();
4299
4300 auto getRotateSource = [](SDValue X) {
4301 if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR)
4302 return X.getOperand(0);
4303 return SDValue();
4304 };
4305
4306 // Peek through a rotated value compared against 0 or -1:
4307 // (rot X, Y) == 0/-1 --> X == 0/-1
4308 // (rot X, Y) != 0/-1 --> X != 0/-1
4309 if (SDValue R = getRotateSource(N0))
4310 return DAG.getSetCC(dl, VT, R, N1, Cond);
4311
4312 // Peek through an 'or' of a rotated value compared against 0:
4313 // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0
4314 // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0
4315 //
4316 // TODO: Add the 'and' with -1 sibling.
4317 // TODO: Recurse through a series of 'or' ops to find the rotate.
4318 EVT OpVT = N0.getValueType();
4319 if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) {
4320 if (SDValue R = getRotateSource(N0.getOperand(0))) {
4321 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1));
4322 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4323 }
4324 if (SDValue R = getRotateSource(N0.getOperand(1))) {
4325 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0));
4326 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4327 }
4328 }
4329
4330 return SDValue();
4331 }
4332
foldSetCCWithFunnelShift(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,const SDLoc & dl,SelectionDAG & DAG)4333 static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1,
4334 ISD::CondCode Cond, const SDLoc &dl,
4335 SelectionDAG &DAG) {
4336 // If we are testing for all-bits-clear, we might be able to do that with
4337 // less shifting since bit-order does not matter.
4338 if (Cond != ISD::SETEQ && Cond != ISD::SETNE)
4339 return SDValue();
4340
4341 auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true);
4342 if (!C1 || !C1->isZero())
4343 return SDValue();
4344
4345 if (!N0.hasOneUse() ||
4346 (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR))
4347 return SDValue();
4348
4349 unsigned BitWidth = N0.getScalarValueSizeInBits();
4350 auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2));
4351 if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth))
4352 return SDValue();
4353
4354 // Canonicalize fshr as fshl to reduce pattern-matching.
4355 unsigned ShAmt = ShAmtC->getZExtValue();
4356 if (N0.getOpcode() == ISD::FSHR)
4357 ShAmt = BitWidth - ShAmt;
4358
4359 // Match an 'or' with a specific operand 'Other' in either commuted variant.
4360 SDValue X, Y;
4361 auto matchOr = [&X, &Y](SDValue Or, SDValue Other) {
4362 if (Or.getOpcode() != ISD::OR || !Or.hasOneUse())
4363 return false;
4364 if (Or.getOperand(0) == Other) {
4365 X = Or.getOperand(0);
4366 Y = Or.getOperand(1);
4367 return true;
4368 }
4369 if (Or.getOperand(1) == Other) {
4370 X = Or.getOperand(1);
4371 Y = Or.getOperand(0);
4372 return true;
4373 }
4374 return false;
4375 };
4376
4377 EVT OpVT = N0.getValueType();
4378 EVT ShAmtVT = N0.getOperand(2).getValueType();
4379 SDValue F0 = N0.getOperand(0);
4380 SDValue F1 = N0.getOperand(1);
4381 if (matchOr(F0, F1)) {
4382 // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0
4383 SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT);
4384 SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt);
4385 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4386 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4387 }
4388 if (matchOr(F1, F0)) {
4389 // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0
4390 SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT);
4391 SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt);
4392 SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X);
4393 return DAG.getSetCC(dl, VT, NewOr, N1, Cond);
4394 }
4395
4396 return SDValue();
4397 }
4398
4399 /// Try to simplify a setcc built with the specified operands and cc. If it is
4400 /// unable to simplify it, return a null SDValue.
SimplifySetCC(EVT VT,SDValue N0,SDValue N1,ISD::CondCode Cond,bool foldBooleans,DAGCombinerInfo & DCI,const SDLoc & dl) const4401 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
4402 ISD::CondCode Cond, bool foldBooleans,
4403 DAGCombinerInfo &DCI,
4404 const SDLoc &dl) const {
4405 SelectionDAG &DAG = DCI.DAG;
4406 const DataLayout &Layout = DAG.getDataLayout();
4407 EVT OpVT = N0.getValueType();
4408 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4409
4410 // Constant fold or commute setcc.
4411 if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
4412 return Fold;
4413
4414 bool N0ConstOrSplat =
4415 isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4416 bool N1ConstOrSplat =
4417 isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
4418
4419 // Canonicalize toward having the constant on the RHS.
4420 // TODO: Handle non-splat vector constants. All undef causes trouble.
4421 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
4422 // infinite loop here when we encounter one.
4423 ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
4424 if (N0ConstOrSplat && !N1ConstOrSplat &&
4425 (DCI.isBeforeLegalizeOps() ||
4426 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
4427 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4428
4429 // If we have a subtract with the same 2 non-constant operands as this setcc
4430 // -- but in reverse order -- then try to commute the operands of this setcc
4431 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
4432 // instruction on some targets.
4433 if (!N0ConstOrSplat && !N1ConstOrSplat &&
4434 (DCI.isBeforeLegalizeOps() ||
4435 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
4436 DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&
4437 !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1}))
4438 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
4439
4440 if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG))
4441 return V;
4442
4443 if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG))
4444 return V;
4445
4446 if (auto *N1C = isConstOrConstSplat(N1)) {
4447 const APInt &C1 = N1C->getAPIntValue();
4448
4449 // Optimize some CTPOP cases.
4450 if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG))
4451 return V;
4452
4453 // For equality to 0 of a no-wrap multiply, decompose and test each op:
4454 // X * Y == 0 --> (X == 0) || (Y == 0)
4455 // X * Y != 0 --> (X != 0) && (Y != 0)
4456 // TODO: This bails out if minsize is set, but if the target doesn't have a
4457 // single instruction multiply for this type, it would likely be
4458 // smaller to decompose.
4459 if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4460 N0.getOpcode() == ISD::MUL && N0.hasOneUse() &&
4461 (N0->getFlags().hasNoUnsignedWrap() ||
4462 N0->getFlags().hasNoSignedWrap()) &&
4463 !Attr.hasFnAttr(Attribute::MinSize)) {
4464 SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4465 SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond);
4466 unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND;
4467 return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero);
4468 }
4469
4470 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
4471 // equality comparison, then we're just comparing whether X itself is
4472 // zero.
4473 if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
4474 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
4475 llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
4476 if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
4477 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4478 ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
4479 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
4480 // (srl (ctlz x), 5) == 0 -> X != 0
4481 // (srl (ctlz x), 5) != 1 -> X != 0
4482 Cond = ISD::SETNE;
4483 } else {
4484 // (srl (ctlz x), 5) != 0 -> X == 0
4485 // (srl (ctlz x), 5) == 1 -> X == 0
4486 Cond = ISD::SETEQ;
4487 }
4488 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
4489 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero,
4490 Cond);
4491 }
4492 }
4493 }
4494 }
4495
4496 // FIXME: Support vectors.
4497 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
4498 const APInt &C1 = N1C->getAPIntValue();
4499
4500 // (zext x) == C --> x == (trunc C)
4501 // (sext x) == C --> x == (trunc C)
4502 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4503 DCI.isBeforeLegalize() && N0->hasOneUse()) {
4504 unsigned MinBits = N0.getValueSizeInBits();
4505 SDValue PreExt;
4506 bool Signed = false;
4507 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
4508 // ZExt
4509 MinBits = N0->getOperand(0).getValueSizeInBits();
4510 PreExt = N0->getOperand(0);
4511 } else if (N0->getOpcode() == ISD::AND) {
4512 // DAGCombine turns costly ZExts into ANDs
4513 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
4514 if ((C->getAPIntValue()+1).isPowerOf2()) {
4515 MinBits = C->getAPIntValue().countr_one();
4516 PreExt = N0->getOperand(0);
4517 }
4518 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
4519 // SExt
4520 MinBits = N0->getOperand(0).getValueSizeInBits();
4521 PreExt = N0->getOperand(0);
4522 Signed = true;
4523 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
4524 // ZEXTLOAD / SEXTLOAD
4525 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
4526 MinBits = LN0->getMemoryVT().getSizeInBits();
4527 PreExt = N0;
4528 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
4529 Signed = true;
4530 MinBits = LN0->getMemoryVT().getSizeInBits();
4531 PreExt = N0;
4532 }
4533 }
4534
4535 // Figure out how many bits we need to preserve this constant.
4536 unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
4537
4538 // Make sure we're not losing bits from the constant.
4539 if (MinBits > 0 &&
4540 MinBits < C1.getBitWidth() &&
4541 MinBits >= ReqdBits) {
4542 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
4543 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
4544 // Will get folded away.
4545 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
4546 if (MinBits == 1 && C1 == 1)
4547 // Invert the condition.
4548 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
4549 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4550 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
4551 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
4552 }
4553
4554 // If truncating the setcc operands is not desirable, we can still
4555 // simplify the expression in some cases:
4556 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
4557 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
4558 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
4559 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
4560 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
4561 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
4562 SDValue TopSetCC = N0->getOperand(0);
4563 unsigned N0Opc = N0->getOpcode();
4564 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
4565 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
4566 TopSetCC.getOpcode() == ISD::SETCC &&
4567 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
4568 (isConstFalseVal(N1) ||
4569 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
4570
4571 bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
4572 (!N1C->isZero() && Cond == ISD::SETNE);
4573
4574 if (!Inverse)
4575 return TopSetCC;
4576
4577 ISD::CondCode InvCond = ISD::getSetCCInverse(
4578 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
4579 TopSetCC.getOperand(0).getValueType());
4580 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
4581 TopSetCC.getOperand(1),
4582 InvCond);
4583 }
4584 }
4585 }
4586
4587 // If the LHS is '(and load, const)', the RHS is 0, the test is for
4588 // equality or unsigned, and all 1 bits of the const are in the same
4589 // partial word, see if we can shorten the load.
4590 if (DCI.isBeforeLegalize() &&
4591 !ISD::isSignedIntSetCC(Cond) &&
4592 N0.getOpcode() == ISD::AND && C1 == 0 &&
4593 N0.getNode()->hasOneUse() &&
4594 isa<LoadSDNode>(N0.getOperand(0)) &&
4595 N0.getOperand(0).getNode()->hasOneUse() &&
4596 isa<ConstantSDNode>(N0.getOperand(1))) {
4597 LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
4598 APInt bestMask;
4599 unsigned bestWidth = 0, bestOffset = 0;
4600 if (Lod->isSimple() && Lod->isUnindexed()) {
4601 unsigned origWidth = N0.getValueSizeInBits();
4602 unsigned maskWidth = origWidth;
4603 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
4604 // 8 bits, but have to be careful...
4605 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
4606 origWidth = Lod->getMemoryVT().getSizeInBits();
4607 const APInt &Mask = N0.getConstantOperandAPInt(1);
4608 for (unsigned width = origWidth / 2; width>=8; width /= 2) {
4609 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
4610 for (unsigned offset=0; offset<origWidth/width; offset++) {
4611 if (Mask.isSubsetOf(newMask)) {
4612 if (Layout.isLittleEndian())
4613 bestOffset = (uint64_t)offset * (width/8);
4614 else
4615 bestOffset = (origWidth/width - offset - 1) * (width/8);
4616 bestMask = Mask.lshr(offset * (width/8) * 8);
4617 bestWidth = width;
4618 break;
4619 }
4620 newMask <<= width;
4621 }
4622 }
4623 }
4624 if (bestWidth) {
4625 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
4626 if (newVT.isRound() &&
4627 shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) {
4628 SDValue Ptr = Lod->getBasePtr();
4629 if (bestOffset != 0)
4630 Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(bestOffset),
4631 dl);
4632 SDValue NewLoad =
4633 DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
4634 Lod->getPointerInfo().getWithOffset(bestOffset),
4635 Lod->getOriginalAlign());
4636 return DAG.getSetCC(dl, VT,
4637 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
4638 DAG.getConstant(bestMask.trunc(bestWidth),
4639 dl, newVT)),
4640 DAG.getConstant(0LL, dl, newVT), Cond);
4641 }
4642 }
4643 }
4644
4645 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
4646 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
4647 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
4648
4649 // If the comparison constant has bits in the upper part, the
4650 // zero-extended value could never match.
4651 if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
4652 C1.getBitWidth() - InSize))) {
4653 switch (Cond) {
4654 case ISD::SETUGT:
4655 case ISD::SETUGE:
4656 case ISD::SETEQ:
4657 return DAG.getConstant(0, dl, VT);
4658 case ISD::SETULT:
4659 case ISD::SETULE:
4660 case ISD::SETNE:
4661 return DAG.getConstant(1, dl, VT);
4662 case ISD::SETGT:
4663 case ISD::SETGE:
4664 // True if the sign bit of C1 is set.
4665 return DAG.getConstant(C1.isNegative(), dl, VT);
4666 case ISD::SETLT:
4667 case ISD::SETLE:
4668 // True if the sign bit of C1 isn't set.
4669 return DAG.getConstant(C1.isNonNegative(), dl, VT);
4670 default:
4671 break;
4672 }
4673 }
4674
4675 // Otherwise, we can perform the comparison with the low bits.
4676 switch (Cond) {
4677 case ISD::SETEQ:
4678 case ISD::SETNE:
4679 case ISD::SETUGT:
4680 case ISD::SETUGE:
4681 case ISD::SETULT:
4682 case ISD::SETULE: {
4683 EVT newVT = N0.getOperand(0).getValueType();
4684 if (DCI.isBeforeLegalizeOps() ||
4685 (isOperationLegal(ISD::SETCC, newVT) &&
4686 isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
4687 EVT NewSetCCVT = getSetCCResultType(Layout, *DAG.getContext(), newVT);
4688 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
4689
4690 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
4691 NewConst, Cond);
4692 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
4693 }
4694 break;
4695 }
4696 default:
4697 break; // todo, be more careful with signed comparisons
4698 }
4699 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
4700 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4701 !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(),
4702 OpVT)) {
4703 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
4704 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
4705 EVT ExtDstTy = N0.getValueType();
4706 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
4707
4708 // If the constant doesn't fit into the number of bits for the source of
4709 // the sign extension, it is impossible for both sides to be equal.
4710 if (C1.getSignificantBits() > ExtSrcTyBits)
4711 return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
4712
4713 assert(ExtDstTy == N0.getOperand(0).getValueType() &&
4714 ExtDstTy != ExtSrcTy && "Unexpected types!");
4715 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
4716 SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0),
4717 DAG.getConstant(Imm, dl, ExtDstTy));
4718 if (!DCI.isCalledByLegalizer())
4719 DCI.AddToWorklist(ZextOp.getNode());
4720 // Otherwise, make this a use of a zext.
4721 return DAG.getSetCC(dl, VT, ZextOp,
4722 DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
4723 } else if ((N1C->isZero() || N1C->isOne()) &&
4724 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4725 // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
4726 if (N0.getOpcode() == ISD::SETCC &&
4727 isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
4728 (N0.getValueType() == MVT::i1 ||
4729 getBooleanContents(N0.getOperand(0).getValueType()) ==
4730 ZeroOrOneBooleanContent)) {
4731 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
4732 if (TrueWhenTrue)
4733 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
4734 // Invert the condition.
4735 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
4736 CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType());
4737 if (DCI.isBeforeLegalizeOps() ||
4738 isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
4739 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
4740 }
4741
4742 if ((N0.getOpcode() == ISD::XOR ||
4743 (N0.getOpcode() == ISD::AND &&
4744 N0.getOperand(0).getOpcode() == ISD::XOR &&
4745 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
4746 isOneConstant(N0.getOperand(1))) {
4747 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
4748 // can only do this if the top bits are known zero.
4749 unsigned BitWidth = N0.getValueSizeInBits();
4750 if (DAG.MaskedValueIsZero(N0,
4751 APInt::getHighBitsSet(BitWidth,
4752 BitWidth-1))) {
4753 // Okay, get the un-inverted input value.
4754 SDValue Val;
4755 if (N0.getOpcode() == ISD::XOR) {
4756 Val = N0.getOperand(0);
4757 } else {
4758 assert(N0.getOpcode() == ISD::AND &&
4759 N0.getOperand(0).getOpcode() == ISD::XOR);
4760 // ((X^1)&1)^1 -> X & 1
4761 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
4762 N0.getOperand(0).getOperand(0),
4763 N0.getOperand(1));
4764 }
4765
4766 return DAG.getSetCC(dl, VT, Val, N1,
4767 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4768 }
4769 } else if (N1C->isOne()) {
4770 SDValue Op0 = N0;
4771 if (Op0.getOpcode() == ISD::TRUNCATE)
4772 Op0 = Op0.getOperand(0);
4773
4774 if ((Op0.getOpcode() == ISD::XOR) &&
4775 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
4776 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
4777 SDValue XorLHS = Op0.getOperand(0);
4778 SDValue XorRHS = Op0.getOperand(1);
4779 // Ensure that the input setccs return an i1 type or 0/1 value.
4780 if (Op0.getValueType() == MVT::i1 ||
4781 (getBooleanContents(XorLHS.getOperand(0).getValueType()) ==
4782 ZeroOrOneBooleanContent &&
4783 getBooleanContents(XorRHS.getOperand(0).getValueType()) ==
4784 ZeroOrOneBooleanContent)) {
4785 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
4786 Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
4787 return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond);
4788 }
4789 }
4790 if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) {
4791 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
4792 if (Op0.getValueType().bitsGT(VT))
4793 Op0 = DAG.getNode(ISD::AND, dl, VT,
4794 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
4795 DAG.getConstant(1, dl, VT));
4796 else if (Op0.getValueType().bitsLT(VT))
4797 Op0 = DAG.getNode(ISD::AND, dl, VT,
4798 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
4799 DAG.getConstant(1, dl, VT));
4800
4801 return DAG.getSetCC(dl, VT, Op0,
4802 DAG.getConstant(0, dl, Op0.getValueType()),
4803 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4804 }
4805 if (Op0.getOpcode() == ISD::AssertZext &&
4806 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
4807 return DAG.getSetCC(dl, VT, Op0,
4808 DAG.getConstant(0, dl, Op0.getValueType()),
4809 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
4810 }
4811 }
4812
4813 // Given:
4814 // icmp eq/ne (urem %x, %y), 0
4815 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
4816 // icmp eq/ne %x, 0
4817 if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
4818 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
4819 KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
4820 KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
4821 if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
4822 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
4823 }
4824
4825 // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
4826 // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
4827 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
4828 N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
4829 N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
4830 N1C && N1C->isAllOnes()) {
4831 return DAG.getSetCC(dl, VT, N0.getOperand(0),
4832 DAG.getConstant(0, dl, OpVT),
4833 Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
4834 }
4835
4836 if (SDValue V =
4837 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
4838 return V;
4839 }
4840
4841 // These simplifications apply to splat vectors as well.
4842 // TODO: Handle more splat vector cases.
4843 if (auto *N1C = isConstOrConstSplat(N1)) {
4844 const APInt &C1 = N1C->getAPIntValue();
4845
4846 APInt MinVal, MaxVal;
4847 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
4848 if (ISD::isSignedIntSetCC(Cond)) {
4849 MinVal = APInt::getSignedMinValue(OperandBitSize);
4850 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
4851 } else {
4852 MinVal = APInt::getMinValue(OperandBitSize);
4853 MaxVal = APInt::getMaxValue(OperandBitSize);
4854 }
4855
4856 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
4857 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
4858 // X >= MIN --> true
4859 if (C1 == MinVal)
4860 return DAG.getBoolConstant(true, dl, VT, OpVT);
4861
4862 if (!VT.isVector()) { // TODO: Support this for vectors.
4863 // X >= C0 --> X > (C0 - 1)
4864 APInt C = C1 - 1;
4865 ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
4866 if ((DCI.isBeforeLegalizeOps() ||
4867 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4868 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4869 isLegalICmpImmediate(C.getSExtValue())))) {
4870 return DAG.getSetCC(dl, VT, N0,
4871 DAG.getConstant(C, dl, N1.getValueType()),
4872 NewCC);
4873 }
4874 }
4875 }
4876
4877 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
4878 // X <= MAX --> true
4879 if (C1 == MaxVal)
4880 return DAG.getBoolConstant(true, dl, VT, OpVT);
4881
4882 // X <= C0 --> X < (C0 + 1)
4883 if (!VT.isVector()) { // TODO: Support this for vectors.
4884 APInt C = C1 + 1;
4885 ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
4886 if ((DCI.isBeforeLegalizeOps() ||
4887 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
4888 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
4889 isLegalICmpImmediate(C.getSExtValue())))) {
4890 return DAG.getSetCC(dl, VT, N0,
4891 DAG.getConstant(C, dl, N1.getValueType()),
4892 NewCC);
4893 }
4894 }
4895 }
4896
4897 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
4898 if (C1 == MinVal)
4899 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
4900
4901 // TODO: Support this for vectors after legalize ops.
4902 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4903 // Canonicalize setlt X, Max --> setne X, Max
4904 if (C1 == MaxVal)
4905 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4906
4907 // If we have setult X, 1, turn it into seteq X, 0
4908 if (C1 == MinVal+1)
4909 return DAG.getSetCC(dl, VT, N0,
4910 DAG.getConstant(MinVal, dl, N0.getValueType()),
4911 ISD::SETEQ);
4912 }
4913 }
4914
4915 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
4916 if (C1 == MaxVal)
4917 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
4918
4919 // TODO: Support this for vectors after legalize ops.
4920 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
4921 // Canonicalize setgt X, Min --> setne X, Min
4922 if (C1 == MinVal)
4923 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
4924
4925 // If we have setugt X, Max-1, turn it into seteq X, Max
4926 if (C1 == MaxVal-1)
4927 return DAG.getSetCC(dl, VT, N0,
4928 DAG.getConstant(MaxVal, dl, N0.getValueType()),
4929 ISD::SETEQ);
4930 }
4931 }
4932
4933 if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
4934 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4935 if (C1.isZero())
4936 if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
4937 VT, N0, N1, Cond, DCI, dl))
4938 return CC;
4939
4940 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
4941 // For example, when high 32-bits of i64 X are known clear:
4942 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
4943 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
4944 bool CmpZero = N1C->isZero();
4945 bool CmpNegOne = N1C->isAllOnes();
4946 if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
4947 // Match or(lo,shl(hi,bw/2)) pattern.
4948 auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
4949 unsigned EltBits = V.getScalarValueSizeInBits();
4950 if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0)
4951 return false;
4952 SDValue LHS = V.getOperand(0);
4953 SDValue RHS = V.getOperand(1);
4954 APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2);
4955 // Unshifted element must have zero upperbits.
4956 if (RHS.getOpcode() == ISD::SHL &&
4957 isa<ConstantSDNode>(RHS.getOperand(1)) &&
4958 RHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
4959 DAG.MaskedValueIsZero(LHS, HiBits)) {
4960 Lo = LHS;
4961 Hi = RHS.getOperand(0);
4962 return true;
4963 }
4964 if (LHS.getOpcode() == ISD::SHL &&
4965 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4966 LHS.getConstantOperandAPInt(1) == (EltBits / 2) &&
4967 DAG.MaskedValueIsZero(RHS, HiBits)) {
4968 Lo = RHS;
4969 Hi = LHS.getOperand(0);
4970 return true;
4971 }
4972 return false;
4973 };
4974
4975 auto MergeConcat = [&](SDValue Lo, SDValue Hi) {
4976 unsigned EltBits = N0.getScalarValueSizeInBits();
4977 unsigned HalfBits = EltBits / 2;
4978 APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits);
4979 SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT);
4980 SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits);
4981 SDValue NewN0 =
4982 DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask);
4983 SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits;
4984 return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond);
4985 };
4986
4987 SDValue Lo, Hi;
4988 if (IsConcat(N0, Lo, Hi))
4989 return MergeConcat(Lo, Hi);
4990
4991 if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) {
4992 SDValue Lo0, Lo1, Hi0, Hi1;
4993 if (IsConcat(N0.getOperand(0), Lo0, Hi0) &&
4994 IsConcat(N0.getOperand(1), Lo1, Hi1)) {
4995 return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1),
4996 DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1));
4997 }
4998 }
4999 }
5000 }
5001
5002 // If we have "setcc X, C0", check to see if we can shrink the immediate
5003 // by changing cc.
5004 // TODO: Support this for vectors after legalize ops.
5005 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
5006 // SETUGT X, SINTMAX -> SETLT X, 0
5007 // SETUGE X, SINTMIN -> SETLT X, 0
5008 if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) ||
5009 (Cond == ISD::SETUGE && C1.isMinSignedValue()))
5010 return DAG.getSetCC(dl, VT, N0,
5011 DAG.getConstant(0, dl, N1.getValueType()),
5012 ISD::SETLT);
5013
5014 // SETULT X, SINTMIN -> SETGT X, -1
5015 // SETULE X, SINTMAX -> SETGT X, -1
5016 if ((Cond == ISD::SETULT && C1.isMinSignedValue()) ||
5017 (Cond == ISD::SETULE && C1.isMaxSignedValue()))
5018 return DAG.getSetCC(dl, VT, N0,
5019 DAG.getAllOnesConstant(dl, N1.getValueType()),
5020 ISD::SETGT);
5021 }
5022 }
5023
5024 // Back to non-vector simplifications.
5025 // TODO: Can we do these for vector splats?
5026 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
5027 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5028 const APInt &C1 = N1C->getAPIntValue();
5029 EVT ShValTy = N0.getValueType();
5030
5031 // Fold bit comparisons when we can. This will result in an
5032 // incorrect value when boolean false is negative one, unless
5033 // the bitsize is 1 in which case the false value is the same
5034 // in practice regardless of the representation.
5035 if ((VT.getSizeInBits() == 1 ||
5036 getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) &&
5037 (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5038 (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
5039 N0.getOpcode() == ISD::AND) {
5040 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5041 EVT ShiftTy =
5042 getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
5043 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
5044 // Perform the xform if the AND RHS is a single bit.
5045 unsigned ShCt = AndRHS->getAPIntValue().logBase2();
5046 if (AndRHS->getAPIntValue().isPowerOf2() &&
5047 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5048 return DAG.getNode(ISD::TRUNCATE, dl, VT,
5049 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5050 DAG.getConstant(ShCt, dl, ShiftTy)));
5051 }
5052 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
5053 // (X & 8) == 8 --> (X & 8) >> 3
5054 // Perform the xform if C1 is a single bit.
5055 unsigned ShCt = C1.logBase2();
5056 if (C1.isPowerOf2() &&
5057 !TLI.shouldAvoidTransformToShift(ShValTy, ShCt)) {
5058 return DAG.getNode(ISD::TRUNCATE, dl, VT,
5059 DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5060 DAG.getConstant(ShCt, dl, ShiftTy)));
5061 }
5062 }
5063 }
5064 }
5065
5066 if (C1.getSignificantBits() <= 64 &&
5067 !isLegalICmpImmediate(C1.getSExtValue())) {
5068 EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
5069 // (X & -256) == 256 -> (X >> 8) == 1
5070 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5071 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
5072 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5073 const APInt &AndRHSC = AndRHS->getAPIntValue();
5074 if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
5075 unsigned ShiftBits = AndRHSC.countr_zero();
5076 if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5077 SDValue Shift =
5078 DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
5079 DAG.getConstant(ShiftBits, dl, ShiftTy));
5080 SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, ShValTy);
5081 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
5082 }
5083 }
5084 }
5085 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
5086 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
5087 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
5088 // X < 0x100000000 -> (X >> 32) < 1
5089 // X >= 0x100000000 -> (X >> 32) >= 1
5090 // X <= 0x0ffffffff -> (X >> 32) < 1
5091 // X > 0x0ffffffff -> (X >> 32) >= 1
5092 unsigned ShiftBits;
5093 APInt NewC = C1;
5094 ISD::CondCode NewCond = Cond;
5095 if (AdjOne) {
5096 ShiftBits = C1.countr_one();
5097 NewC = NewC + 1;
5098 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
5099 } else {
5100 ShiftBits = C1.countr_zero();
5101 }
5102 NewC.lshrInPlace(ShiftBits);
5103 if (ShiftBits && NewC.getSignificantBits() <= 64 &&
5104 isLegalICmpImmediate(NewC.getSExtValue()) &&
5105 !TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
5106 SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
5107 DAG.getConstant(ShiftBits, dl, ShiftTy));
5108 SDValue CmpRHS = DAG.getConstant(NewC, dl, ShValTy);
5109 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
5110 }
5111 }
5112 }
5113 }
5114
5115 if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
5116 auto *CFP = cast<ConstantFPSDNode>(N1);
5117 assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
5118
5119 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
5120 // constant if knowing that the operand is non-nan is enough. We prefer to
5121 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
5122 // materialize 0.0.
5123 if (Cond == ISD::SETO || Cond == ISD::SETUO)
5124 return DAG.getSetCC(dl, VT, N0, N0, Cond);
5125
5126 // setcc (fneg x), C -> setcc swap(pred) x, -C
5127 if (N0.getOpcode() == ISD::FNEG) {
5128 ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
5129 if (DCI.isBeforeLegalizeOps() ||
5130 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
5131 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
5132 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
5133 }
5134 }
5135
5136 // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf
5137 if (isOperationLegalOrCustom(ISD::IS_FPCLASS, N0.getValueType()) &&
5138 !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) {
5139 bool IsFabs = N0.getOpcode() == ISD::FABS;
5140 SDValue Op = IsFabs ? N0.getOperand(0) : N0;
5141 if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) {
5142 FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf)
5143 : (IsFabs ? fcInf : fcPosInf);
5144 if (Cond == ISD::SETUEQ)
5145 Flag |= fcNan;
5146 return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op,
5147 DAG.getTargetConstant(Flag, dl, MVT::i32));
5148 }
5149 }
5150
5151 // If the condition is not legal, see if we can find an equivalent one
5152 // which is legal.
5153 if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
5154 // If the comparison was an awkward floating-point == or != and one of
5155 // the comparison operands is infinity or negative infinity, convert the
5156 // condition to a less-awkward <= or >=.
5157 if (CFP->getValueAPF().isInfinity()) {
5158 bool IsNegInf = CFP->getValueAPF().isNegative();
5159 ISD::CondCode NewCond = ISD::SETCC_INVALID;
5160 switch (Cond) {
5161 case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break;
5162 case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break;
5163 case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break;
5164 case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break;
5165 default: break;
5166 }
5167 if (NewCond != ISD::SETCC_INVALID &&
5168 isCondCodeLegal(NewCond, N0.getSimpleValueType()))
5169 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5170 }
5171 }
5172 }
5173
5174 if (N0 == N1) {
5175 // The sext(setcc()) => setcc() optimization relies on the appropriate
5176 // constant being emitted.
5177 assert(!N0.getValueType().isInteger() &&
5178 "Integer types should be handled by FoldSetCC");
5179
5180 bool EqTrue = ISD::isTrueWhenEqual(Cond);
5181 unsigned UOF = ISD::getUnorderedFlavor(Cond);
5182 if (UOF == 2) // FP operators that are undefined on NaNs.
5183 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5184 if (UOF == unsigned(EqTrue))
5185 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
5186 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
5187 // if it is not already.
5188 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
5189 if (NewCond != Cond &&
5190 (DCI.isBeforeLegalizeOps() ||
5191 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
5192 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
5193 }
5194
5195 // ~X > ~Y --> Y > X
5196 // ~X < ~Y --> Y < X
5197 // ~X < C --> X > ~C
5198 // ~X > C --> X < ~C
5199 if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
5200 N0.getValueType().isInteger()) {
5201 if (isBitwiseNot(N0)) {
5202 if (isBitwiseNot(N1))
5203 return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
5204
5205 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
5206 !DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(0))) {
5207 SDValue Not = DAG.getNOT(dl, N1, OpVT);
5208 return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
5209 }
5210 }
5211 }
5212
5213 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
5214 N0.getValueType().isInteger()) {
5215 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
5216 N0.getOpcode() == ISD::XOR) {
5217 // Simplify (X+Y) == (X+Z) --> Y == Z
5218 if (N0.getOpcode() == N1.getOpcode()) {
5219 if (N0.getOperand(0) == N1.getOperand(0))
5220 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
5221 if (N0.getOperand(1) == N1.getOperand(1))
5222 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
5223 if (isCommutativeBinOp(N0.getOpcode())) {
5224 // If X op Y == Y op X, try other combinations.
5225 if (N0.getOperand(0) == N1.getOperand(1))
5226 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
5227 Cond);
5228 if (N0.getOperand(1) == N1.getOperand(0))
5229 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
5230 Cond);
5231 }
5232 }
5233
5234 // If RHS is a legal immediate value for a compare instruction, we need
5235 // to be careful about increasing register pressure needlessly.
5236 bool LegalRHSImm = false;
5237
5238 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
5239 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
5240 // Turn (X+C1) == C2 --> X == C2-C1
5241 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse())
5242 return DAG.getSetCC(
5243 dl, VT, N0.getOperand(0),
5244 DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(),
5245 dl, N0.getValueType()),
5246 Cond);
5247
5248 // Turn (X^C1) == C2 --> X == C1^C2
5249 if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse())
5250 return DAG.getSetCC(
5251 dl, VT, N0.getOperand(0),
5252 DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(),
5253 dl, N0.getValueType()),
5254 Cond);
5255 }
5256
5257 // Turn (C1-X) == C2 --> X == C1-C2
5258 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
5259 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse())
5260 return DAG.getSetCC(
5261 dl, VT, N0.getOperand(1),
5262 DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(),
5263 dl, N0.getValueType()),
5264 Cond);
5265
5266 // Could RHSC fold directly into a compare?
5267 if (RHSC->getValueType(0).getSizeInBits() <= 64)
5268 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
5269 }
5270
5271 // (X+Y) == X --> Y == 0 and similar folds.
5272 // Don't do this if X is an immediate that can fold into a cmp
5273 // instruction and X+Y has other uses. It could be an induction variable
5274 // chain, and the transform would increase register pressure.
5275 if (!LegalRHSImm || N0.hasOneUse())
5276 if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
5277 return V;
5278 }
5279
5280 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
5281 N1.getOpcode() == ISD::XOR)
5282 if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
5283 return V;
5284
5285 if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
5286 return V;
5287 }
5288
5289 // Fold remainder of division by a constant.
5290 if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
5291 N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
5292 // When division is cheap or optimizing for minimum size,
5293 // fall through to DIVREM creation by skipping this fold.
5294 if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
5295 if (N0.getOpcode() == ISD::UREM) {
5296 if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
5297 return Folded;
5298 } else if (N0.getOpcode() == ISD::SREM) {
5299 if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
5300 return Folded;
5301 }
5302 }
5303 }
5304
5305 // Fold away ALL boolean setcc's.
5306 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
5307 SDValue Temp;
5308 switch (Cond) {
5309 default: llvm_unreachable("Unknown integer setcc!");
5310 case ISD::SETEQ: // X == Y -> ~(X^Y)
5311 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5312 N0 = DAG.getNOT(dl, Temp, OpVT);
5313 if (!DCI.isCalledByLegalizer())
5314 DCI.AddToWorklist(Temp.getNode());
5315 break;
5316 case ISD::SETNE: // X != Y --> (X^Y)
5317 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
5318 break;
5319 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
5320 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
5321 Temp = DAG.getNOT(dl, N0, OpVT);
5322 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
5323 if (!DCI.isCalledByLegalizer())
5324 DCI.AddToWorklist(Temp.getNode());
5325 break;
5326 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
5327 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
5328 Temp = DAG.getNOT(dl, N1, OpVT);
5329 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
5330 if (!DCI.isCalledByLegalizer())
5331 DCI.AddToWorklist(Temp.getNode());
5332 break;
5333 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
5334 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
5335 Temp = DAG.getNOT(dl, N0, OpVT);
5336 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
5337 if (!DCI.isCalledByLegalizer())
5338 DCI.AddToWorklist(Temp.getNode());
5339 break;
5340 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
5341 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
5342 Temp = DAG.getNOT(dl, N1, OpVT);
5343 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
5344 break;
5345 }
5346 if (VT.getScalarType() != MVT::i1) {
5347 if (!DCI.isCalledByLegalizer())
5348 DCI.AddToWorklist(N0.getNode());
5349 // FIXME: If running after legalize, we probably can't do this.
5350 ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
5351 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
5352 }
5353 return N0;
5354 }
5355
5356 // Could not fold it.
5357 return SDValue();
5358 }
5359
5360 /// Returns true (and the GlobalValue and the offset) if the node is a
5361 /// GlobalAddress + offset.
isGAPlusOffset(SDNode * WN,const GlobalValue * & GA,int64_t & Offset) const5362 bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA,
5363 int64_t &Offset) const {
5364
5365 SDNode *N = unwrapAddress(SDValue(WN, 0)).getNode();
5366
5367 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
5368 GA = GASD->getGlobal();
5369 Offset += GASD->getOffset();
5370 return true;
5371 }
5372
5373 if (N->getOpcode() == ISD::ADD) {
5374 SDValue N1 = N->getOperand(0);
5375 SDValue N2 = N->getOperand(1);
5376 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
5377 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
5378 Offset += V->getSExtValue();
5379 return true;
5380 }
5381 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
5382 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
5383 Offset += V->getSExtValue();
5384 return true;
5385 }
5386 }
5387 }
5388
5389 return false;
5390 }
5391
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const5392 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
5393 DAGCombinerInfo &DCI) const {
5394 // Default implementation: no optimization.
5395 return SDValue();
5396 }
5397
5398 //===----------------------------------------------------------------------===//
5399 // Inline Assembler Implementation Methods
5400 //===----------------------------------------------------------------------===//
5401
5402 TargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const5403 TargetLowering::getConstraintType(StringRef Constraint) const {
5404 unsigned S = Constraint.size();
5405
5406 if (S == 1) {
5407 switch (Constraint[0]) {
5408 default: break;
5409 case 'r':
5410 return C_RegisterClass;
5411 case 'm': // memory
5412 case 'o': // offsetable
5413 case 'V': // not offsetable
5414 return C_Memory;
5415 case 'p': // Address.
5416 return C_Address;
5417 case 'n': // Simple Integer
5418 case 'E': // Floating Point Constant
5419 case 'F': // Floating Point Constant
5420 return C_Immediate;
5421 case 'i': // Simple Integer or Relocatable Constant
5422 case 's': // Relocatable Constant
5423 case 'X': // Allow ANY value.
5424 case 'I': // Target registers.
5425 case 'J':
5426 case 'K':
5427 case 'L':
5428 case 'M':
5429 case 'N':
5430 case 'O':
5431 case 'P':
5432 case '<':
5433 case '>':
5434 return C_Other;
5435 }
5436 }
5437
5438 if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
5439 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
5440 return C_Memory;
5441 return C_Register;
5442 }
5443 return C_Unknown;
5444 }
5445
5446 /// Try to replace an X constraint, which matches anything, with another that
5447 /// has more specific requirements based on the type of the corresponding
5448 /// operand.
LowerXConstraint(EVT ConstraintVT) const5449 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5450 if (ConstraintVT.isInteger())
5451 return "r";
5452 if (ConstraintVT.isFloatingPoint())
5453 return "f"; // works for many targets
5454 return nullptr;
5455 }
5456
LowerAsmOutputForConstraint(SDValue & Chain,SDValue & Glue,const SDLoc & DL,const AsmOperandInfo & OpInfo,SelectionDAG & DAG) const5457 SDValue TargetLowering::LowerAsmOutputForConstraint(
5458 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
5459 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
5460 return SDValue();
5461 }
5462
5463 /// Lower the specified operand into the Ops vector.
5464 /// If it is invalid, don't add anything to Ops.
LowerAsmOperandForConstraint(SDValue Op,StringRef Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const5465 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
5466 StringRef Constraint,
5467 std::vector<SDValue> &Ops,
5468 SelectionDAG &DAG) const {
5469
5470 if (Constraint.size() > 1)
5471 return;
5472
5473 char ConstraintLetter = Constraint[0];
5474 switch (ConstraintLetter) {
5475 default: break;
5476 case 'X': // Allows any operand
5477 case 'i': // Simple Integer or Relocatable Constant
5478 case 'n': // Simple Integer
5479 case 's': { // Relocatable Constant
5480
5481 ConstantSDNode *C;
5482 uint64_t Offset = 0;
5483
5484 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
5485 // etc., since getelementpointer is variadic. We can't use
5486 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
5487 // while in this case the GA may be furthest from the root node which is
5488 // likely an ISD::ADD.
5489 while (true) {
5490 if ((C = dyn_cast<ConstantSDNode>(Op)) && ConstraintLetter != 's') {
5491 // gcc prints these as sign extended. Sign extend value to 64 bits
5492 // now; without this it would get ZExt'd later in
5493 // ScheduleDAGSDNodes::EmitNode, which is very generic.
5494 bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
5495 BooleanContent BCont = getBooleanContents(MVT::i64);
5496 ISD::NodeType ExtOpc =
5497 IsBool ? getExtendForContent(BCont) : ISD::SIGN_EXTEND;
5498 int64_t ExtVal =
5499 ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue() : C->getSExtValue();
5500 Ops.push_back(
5501 DAG.getTargetConstant(Offset + ExtVal, SDLoc(C), MVT::i64));
5502 return;
5503 }
5504 if (ConstraintLetter != 'n') {
5505 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5506 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5507 GA->getValueType(0),
5508 Offset + GA->getOffset()));
5509 return;
5510 }
5511 if (const auto *BA = dyn_cast<BlockAddressSDNode>(Op)) {
5512 Ops.push_back(DAG.getTargetBlockAddress(
5513 BA->getBlockAddress(), BA->getValueType(0),
5514 Offset + BA->getOffset(), BA->getTargetFlags()));
5515 return;
5516 }
5517 if (isa<BasicBlockSDNode>(Op)) {
5518 Ops.push_back(Op);
5519 return;
5520 }
5521 }
5522 const unsigned OpCode = Op.getOpcode();
5523 if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
5524 if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
5525 Op = Op.getOperand(1);
5526 // Subtraction is not commutative.
5527 else if (OpCode == ISD::ADD &&
5528 (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
5529 Op = Op.getOperand(0);
5530 else
5531 return;
5532 Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
5533 continue;
5534 }
5535 return;
5536 }
5537 break;
5538 }
5539 }
5540 }
5541
CollectTargetIntrinsicOperands(const CallInst & I,SmallVectorImpl<SDValue> & Ops,SelectionDAG & DAG) const5542 void TargetLowering::CollectTargetIntrinsicOperands(
5543 const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
5544 }
5545
5546 std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * RI,StringRef Constraint,MVT VT) const5547 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
5548 StringRef Constraint,
5549 MVT VT) const {
5550 if (Constraint.empty() || Constraint[0] != '{')
5551 return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
5552 assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
5553
5554 // Remove the braces from around the name.
5555 StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
5556
5557 std::pair<unsigned, const TargetRegisterClass *> R =
5558 std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
5559
5560 // Figure out which register class contains this reg.
5561 for (const TargetRegisterClass *RC : RI->regclasses()) {
5562 // If none of the value types for this register class are valid, we
5563 // can't use it. For example, 64-bit reg classes on 32-bit targets.
5564 if (!isLegalRC(*RI, *RC))
5565 continue;
5566
5567 for (const MCPhysReg &PR : *RC) {
5568 if (RegName.equals_insensitive(RI->getRegAsmName(PR))) {
5569 std::pair<unsigned, const TargetRegisterClass *> S =
5570 std::make_pair(PR, RC);
5571
5572 // If this register class has the requested value type, return it,
5573 // otherwise keep searching and return the first class found
5574 // if no other is found which explicitly has the requested type.
5575 if (RI->isTypeLegalForClass(*RC, VT))
5576 return S;
5577 if (!R.second)
5578 R = S;
5579 }
5580 }
5581 }
5582
5583 return R;
5584 }
5585
5586 //===----------------------------------------------------------------------===//
5587 // Constraint Selection.
5588
5589 /// Return true of this is an input operand that is a matching constraint like
5590 /// "4".
isMatchingInputConstraint() const5591 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
5592 assert(!ConstraintCode.empty() && "No known constraint!");
5593 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
5594 }
5595
5596 /// If this is an input matching constraint, this method returns the output
5597 /// operand it matches.
getMatchedOperand() const5598 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
5599 assert(!ConstraintCode.empty() && "No known constraint!");
5600 return atoi(ConstraintCode.c_str());
5601 }
5602
5603 /// Split up the constraint string from the inline assembly value into the
5604 /// specific constraints and their prefixes, and also tie in the associated
5605 /// operand values.
5606 /// If this returns an empty vector, and if the constraint string itself
5607 /// isn't empty, there was an error parsing.
5608 TargetLowering::AsmOperandInfoVector
ParseConstraints(const DataLayout & DL,const TargetRegisterInfo * TRI,const CallBase & Call) const5609 TargetLowering::ParseConstraints(const DataLayout &DL,
5610 const TargetRegisterInfo *TRI,
5611 const CallBase &Call) const {
5612 /// Information about all of the constraints.
5613 AsmOperandInfoVector ConstraintOperands;
5614 const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand());
5615 unsigned maCount = 0; // Largest number of multiple alternative constraints.
5616
5617 // Do a prepass over the constraints, canonicalizing them, and building up the
5618 // ConstraintOperands list.
5619 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
5620 unsigned ResNo = 0; // ResNo - The result number of the next output.
5621 unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number.
5622
5623 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
5624 ConstraintOperands.emplace_back(std::move(CI));
5625 AsmOperandInfo &OpInfo = ConstraintOperands.back();
5626
5627 // Update multiple alternative constraint count.
5628 if (OpInfo.multipleAlternatives.size() > maCount)
5629 maCount = OpInfo.multipleAlternatives.size();
5630
5631 OpInfo.ConstraintVT = MVT::Other;
5632
5633 // Compute the value type for each operand.
5634 switch (OpInfo.Type) {
5635 case InlineAsm::isOutput:
5636 // Indirect outputs just consume an argument.
5637 if (OpInfo.isIndirect) {
5638 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5639 break;
5640 }
5641
5642 // The return value of the call is this value. As such, there is no
5643 // corresponding argument.
5644 assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
5645 if (StructType *STy = dyn_cast<StructType>(Call.getType())) {
5646 OpInfo.ConstraintVT =
5647 getSimpleValueType(DL, STy->getElementType(ResNo));
5648 } else {
5649 assert(ResNo == 0 && "Asm only has one result!");
5650 OpInfo.ConstraintVT =
5651 getAsmOperandValueType(DL, Call.getType()).getSimpleVT();
5652 }
5653 ++ResNo;
5654 break;
5655 case InlineAsm::isInput:
5656 OpInfo.CallOperandVal = Call.getArgOperand(ArgNo);
5657 break;
5658 case InlineAsm::isLabel:
5659 OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo);
5660 ++LabelNo;
5661 continue;
5662 case InlineAsm::isClobber:
5663 // Nothing to do.
5664 break;
5665 }
5666
5667 if (OpInfo.CallOperandVal) {
5668 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
5669 if (OpInfo.isIndirect) {
5670 OpTy = Call.getParamElementType(ArgNo);
5671 assert(OpTy && "Indirect operand must have elementtype attribute");
5672 }
5673
5674 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5675 if (StructType *STy = dyn_cast<StructType>(OpTy))
5676 if (STy->getNumElements() == 1)
5677 OpTy = STy->getElementType(0);
5678
5679 // If OpTy is not a single value, it may be a struct/union that we
5680 // can tile with integers.
5681 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5682 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
5683 switch (BitSize) {
5684 default: break;
5685 case 1:
5686 case 8:
5687 case 16:
5688 case 32:
5689 case 64:
5690 case 128:
5691 OpTy = IntegerType::get(OpTy->getContext(), BitSize);
5692 break;
5693 }
5694 }
5695
5696 EVT VT = getAsmOperandValueType(DL, OpTy, true);
5697 OpInfo.ConstraintVT = VT.isSimple() ? VT.getSimpleVT() : MVT::Other;
5698 ArgNo++;
5699 }
5700 }
5701
5702 // If we have multiple alternative constraints, select the best alternative.
5703 if (!ConstraintOperands.empty()) {
5704 if (maCount) {
5705 unsigned bestMAIndex = 0;
5706 int bestWeight = -1;
5707 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
5708 int weight = -1;
5709 unsigned maIndex;
5710 // Compute the sums of the weights for each alternative, keeping track
5711 // of the best (highest weight) one so far.
5712 for (maIndex = 0; maIndex < maCount; ++maIndex) {
5713 int weightSum = 0;
5714 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5715 cIndex != eIndex; ++cIndex) {
5716 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5717 if (OpInfo.Type == InlineAsm::isClobber)
5718 continue;
5719
5720 // If this is an output operand with a matching input operand,
5721 // look up the matching input. If their types mismatch, e.g. one
5722 // is an integer, the other is floating point, or their sizes are
5723 // different, flag it as an maCantMatch.
5724 if (OpInfo.hasMatchingInput()) {
5725 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5726 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5727 if ((OpInfo.ConstraintVT.isInteger() !=
5728 Input.ConstraintVT.isInteger()) ||
5729 (OpInfo.ConstraintVT.getSizeInBits() !=
5730 Input.ConstraintVT.getSizeInBits())) {
5731 weightSum = -1; // Can't match.
5732 break;
5733 }
5734 }
5735 }
5736 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
5737 if (weight == -1) {
5738 weightSum = -1;
5739 break;
5740 }
5741 weightSum += weight;
5742 }
5743 // Update best.
5744 if (weightSum > bestWeight) {
5745 bestWeight = weightSum;
5746 bestMAIndex = maIndex;
5747 }
5748 }
5749
5750 // Now select chosen alternative in each constraint.
5751 for (AsmOperandInfo &cInfo : ConstraintOperands)
5752 if (cInfo.Type != InlineAsm::isClobber)
5753 cInfo.selectAlternative(bestMAIndex);
5754 }
5755 }
5756
5757 // Check and hook up tied operands, choose constraint code to use.
5758 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
5759 cIndex != eIndex; ++cIndex) {
5760 AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
5761
5762 // If this is an output operand with a matching input operand, look up the
5763 // matching input. If their types mismatch, e.g. one is an integer, the
5764 // other is floating point, or their sizes are different, flag it as an
5765 // error.
5766 if (OpInfo.hasMatchingInput()) {
5767 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5768
5769 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5770 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
5771 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
5772 OpInfo.ConstraintVT);
5773 std::pair<unsigned, const TargetRegisterClass *> InputRC =
5774 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
5775 Input.ConstraintVT);
5776 if ((OpInfo.ConstraintVT.isInteger() !=
5777 Input.ConstraintVT.isInteger()) ||
5778 (MatchRC.second != InputRC.second)) {
5779 report_fatal_error("Unsupported asm: input constraint"
5780 " with a matching output constraint of"
5781 " incompatible type!");
5782 }
5783 }
5784 }
5785 }
5786
5787 return ConstraintOperands;
5788 }
5789
5790 /// Return a number indicating our preference for chosing a type of constraint
5791 /// over another, for the purpose of sorting them. Immediates are almost always
5792 /// preferrable (when they can be emitted). A higher return value means a
5793 /// stronger preference for one constraint type relative to another.
5794 /// FIXME: We should prefer registers over memory but doing so may lead to
5795 /// unrecoverable register exhaustion later.
5796 /// https://github.com/llvm/llvm-project/issues/20571
getConstraintPiority(TargetLowering::ConstraintType CT)5797 static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) {
5798 switch (CT) {
5799 case TargetLowering::C_Immediate:
5800 case TargetLowering::C_Other:
5801 return 4;
5802 case TargetLowering::C_Memory:
5803 case TargetLowering::C_Address:
5804 return 3;
5805 case TargetLowering::C_RegisterClass:
5806 return 2;
5807 case TargetLowering::C_Register:
5808 return 1;
5809 case TargetLowering::C_Unknown:
5810 return 0;
5811 }
5812 llvm_unreachable("Invalid constraint type");
5813 }
5814
5815 /// Examine constraint type and operand type and determine a weight value.
5816 /// This object must already have been set up with the operand type
5817 /// and the current alternative constraint selected.
5818 TargetLowering::ConstraintWeight
getMultipleConstraintMatchWeight(AsmOperandInfo & info,int maIndex) const5819 TargetLowering::getMultipleConstraintMatchWeight(
5820 AsmOperandInfo &info, int maIndex) const {
5821 InlineAsm::ConstraintCodeVector *rCodes;
5822 if (maIndex >= (int)info.multipleAlternatives.size())
5823 rCodes = &info.Codes;
5824 else
5825 rCodes = &info.multipleAlternatives[maIndex].Codes;
5826 ConstraintWeight BestWeight = CW_Invalid;
5827
5828 // Loop over the options, keeping track of the most general one.
5829 for (const std::string &rCode : *rCodes) {
5830 ConstraintWeight weight =
5831 getSingleConstraintMatchWeight(info, rCode.c_str());
5832 if (weight > BestWeight)
5833 BestWeight = weight;
5834 }
5835
5836 return BestWeight;
5837 }
5838
5839 /// Examine constraint type and operand type and determine a weight value.
5840 /// This object must already have been set up with the operand type
5841 /// and the current alternative constraint selected.
5842 TargetLowering::ConstraintWeight
getSingleConstraintMatchWeight(AsmOperandInfo & info,const char * constraint) const5843 TargetLowering::getSingleConstraintMatchWeight(
5844 AsmOperandInfo &info, const char *constraint) const {
5845 ConstraintWeight weight = CW_Invalid;
5846 Value *CallOperandVal = info.CallOperandVal;
5847 // If we don't have a value, we can't do a match,
5848 // but allow it at the lowest weight.
5849 if (!CallOperandVal)
5850 return CW_Default;
5851 // Look at the constraint type.
5852 switch (*constraint) {
5853 case 'i': // immediate integer.
5854 case 'n': // immediate integer with a known value.
5855 if (isa<ConstantInt>(CallOperandVal))
5856 weight = CW_Constant;
5857 break;
5858 case 's': // non-explicit intregal immediate.
5859 if (isa<GlobalValue>(CallOperandVal))
5860 weight = CW_Constant;
5861 break;
5862 case 'E': // immediate float if host format.
5863 case 'F': // immediate float.
5864 if (isa<ConstantFP>(CallOperandVal))
5865 weight = CW_Constant;
5866 break;
5867 case '<': // memory operand with autodecrement.
5868 case '>': // memory operand with autoincrement.
5869 case 'm': // memory operand.
5870 case 'o': // offsettable memory operand
5871 case 'V': // non-offsettable memory operand
5872 weight = CW_Memory;
5873 break;
5874 case 'r': // general register.
5875 case 'g': // general register, memory operand or immediate integer.
5876 // note: Clang converts "g" to "imr".
5877 if (CallOperandVal->getType()->isIntegerTy())
5878 weight = CW_Register;
5879 break;
5880 case 'X': // any operand.
5881 default:
5882 weight = CW_Default;
5883 break;
5884 }
5885 return weight;
5886 }
5887
5888 /// If there are multiple different constraints that we could pick for this
5889 /// operand (e.g. "imr") try to pick the 'best' one.
5890 /// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall
5891 /// into seven classes:
5892 /// Register -> one specific register
5893 /// RegisterClass -> a group of regs
5894 /// Memory -> memory
5895 /// Address -> a symbolic memory reference
5896 /// Immediate -> immediate values
5897 /// Other -> magic values (such as "Flag Output Operands")
5898 /// Unknown -> something we don't recognize yet and can't handle
5899 /// Ideally, we would pick the most specific constraint possible: if we have
5900 /// something that fits into a register, we would pick it. The problem here
5901 /// is that if we have something that could either be in a register or in
5902 /// memory that use of the register could cause selection of *other*
5903 /// operands to fail: they might only succeed if we pick memory. Because of
5904 /// this the heuristic we use is:
5905 ///
5906 /// 1) If there is an 'other' constraint, and if the operand is valid for
5907 /// that constraint, use it. This makes us take advantage of 'i'
5908 /// constraints when available.
5909 /// 2) Otherwise, pick the most general constraint present. This prefers
5910 /// 'm' over 'r', for example.
5911 ///
getConstraintPreferences(TargetLowering::AsmOperandInfo & OpInfo) const5912 TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences(
5913 TargetLowering::AsmOperandInfo &OpInfo) const {
5914 ConstraintGroup Ret;
5915
5916 Ret.reserve(OpInfo.Codes.size());
5917 for (StringRef Code : OpInfo.Codes) {
5918 TargetLowering::ConstraintType CType = getConstraintType(Code);
5919
5920 // Indirect 'other' or 'immediate' constraints are not allowed.
5921 if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory ||
5922 CType == TargetLowering::C_Register ||
5923 CType == TargetLowering::C_RegisterClass))
5924 continue;
5925
5926 // Things with matching constraints can only be registers, per gcc
5927 // documentation. This mainly affects "g" constraints.
5928 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
5929 continue;
5930
5931 Ret.emplace_back(Code, CType);
5932 }
5933
5934 std::stable_sort(
5935 Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) {
5936 return getConstraintPiority(a.second) > getConstraintPiority(b.second);
5937 });
5938
5939 return Ret;
5940 }
5941
5942 /// If we have an immediate, see if we can lower it. Return true if we can,
5943 /// false otherwise.
lowerImmediateIfPossible(TargetLowering::ConstraintPair & P,SDValue Op,SelectionDAG * DAG,const TargetLowering & TLI)5944 static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P,
5945 SDValue Op, SelectionDAG *DAG,
5946 const TargetLowering &TLI) {
5947
5948 assert((P.second == TargetLowering::C_Other ||
5949 P.second == TargetLowering::C_Immediate) &&
5950 "need immediate or other");
5951
5952 if (!Op.getNode())
5953 return false;
5954
5955 std::vector<SDValue> ResultOps;
5956 TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG);
5957 return !ResultOps.empty();
5958 }
5959
5960 /// Determines the constraint code and constraint type to use for the specific
5961 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
ComputeConstraintToUse(AsmOperandInfo & OpInfo,SDValue Op,SelectionDAG * DAG) const5962 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
5963 SDValue Op,
5964 SelectionDAG *DAG) const {
5965 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
5966
5967 // Single-letter constraints ('r') are very common.
5968 if (OpInfo.Codes.size() == 1) {
5969 OpInfo.ConstraintCode = OpInfo.Codes[0];
5970 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
5971 } else {
5972 ConstraintGroup G = getConstraintPreferences(OpInfo);
5973 if (G.empty())
5974 return;
5975
5976 unsigned BestIdx = 0;
5977 for (const unsigned E = G.size();
5978 BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other ||
5979 G[BestIdx].second == TargetLowering::C_Immediate);
5980 ++BestIdx) {
5981 if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this))
5982 break;
5983 // If we're out of constraints, just pick the first one.
5984 if (BestIdx + 1 == E) {
5985 BestIdx = 0;
5986 break;
5987 }
5988 }
5989
5990 OpInfo.ConstraintCode = G[BestIdx].first;
5991 OpInfo.ConstraintType = G[BestIdx].second;
5992 }
5993
5994 // 'X' matches anything.
5995 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
5996 // Constants are handled elsewhere. For Functions, the type here is the
5997 // type of the result, which is not what we want to look at; leave them
5998 // alone.
5999 Value *v = OpInfo.CallOperandVal;
6000 if (isa<ConstantInt>(v) || isa<Function>(v)) {
6001 return;
6002 }
6003
6004 if (isa<BasicBlock>(v) || isa<BlockAddress>(v)) {
6005 OpInfo.ConstraintCode = "i";
6006 return;
6007 }
6008
6009 // Otherwise, try to resolve it to something we know about by looking at
6010 // the actual operand type.
6011 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
6012 OpInfo.ConstraintCode = Repl;
6013 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
6014 }
6015 }
6016 }
6017
6018 /// Given an exact SDIV by a constant, create a multiplication
6019 /// with the multiplicative inverse of the constant.
BuildExactSDIV(const TargetLowering & TLI,SDNode * N,const SDLoc & dl,SelectionDAG & DAG,SmallVectorImpl<SDNode * > & Created)6020 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
6021 const SDLoc &dl, SelectionDAG &DAG,
6022 SmallVectorImpl<SDNode *> &Created) {
6023 SDValue Op0 = N->getOperand(0);
6024 SDValue Op1 = N->getOperand(1);
6025 EVT VT = N->getValueType(0);
6026 EVT SVT = VT.getScalarType();
6027 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
6028 EVT ShSVT = ShVT.getScalarType();
6029
6030 bool UseSRA = false;
6031 SmallVector<SDValue, 16> Shifts, Factors;
6032
6033 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6034 if (C->isZero())
6035 return false;
6036 APInt Divisor = C->getAPIntValue();
6037 unsigned Shift = Divisor.countr_zero();
6038 if (Shift) {
6039 Divisor.ashrInPlace(Shift);
6040 UseSRA = true;
6041 }
6042 // Calculate the multiplicative inverse, using Newton's method.
6043 APInt t;
6044 APInt Factor = Divisor;
6045 while ((t = Divisor * Factor) != 1)
6046 Factor *= APInt(Divisor.getBitWidth(), 2) - t;
6047 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
6048 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
6049 return true;
6050 };
6051
6052 // Collect all magic values from the build vector.
6053 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
6054 return SDValue();
6055
6056 SDValue Shift, Factor;
6057 if (Op1.getOpcode() == ISD::BUILD_VECTOR) {
6058 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6059 Factor = DAG.getBuildVector(VT, dl, Factors);
6060 } else if (Op1.getOpcode() == ISD::SPLAT_VECTOR) {
6061 assert(Shifts.size() == 1 && Factors.size() == 1 &&
6062 "Expected matchUnaryPredicate to return one element for scalable "
6063 "vectors");
6064 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6065 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6066 } else {
6067 assert(isa<ConstantSDNode>(Op1) && "Expected a constant");
6068 Shift = Shifts[0];
6069 Factor = Factors[0];
6070 }
6071
6072 SDValue Res = Op0;
6073
6074 // Shift the value upfront if it is even, so the LSB is one.
6075 if (UseSRA) {
6076 // TODO: For UDIV use SRL instead of SRA.
6077 SDNodeFlags Flags;
6078 Flags.setExact(true);
6079 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
6080 Created.push_back(Res.getNode());
6081 }
6082
6083 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
6084 }
6085
BuildSDIVPow2(SDNode * N,const APInt & Divisor,SelectionDAG & DAG,SmallVectorImpl<SDNode * > & Created) const6086 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
6087 SelectionDAG &DAG,
6088 SmallVectorImpl<SDNode *> &Created) const {
6089 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6090 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6091 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6092 return SDValue(N, 0); // Lower SDIV as SDIV
6093 return SDValue();
6094 }
6095
6096 SDValue
BuildSREMPow2(SDNode * N,const APInt & Divisor,SelectionDAG & DAG,SmallVectorImpl<SDNode * > & Created) const6097 TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
6098 SelectionDAG &DAG,
6099 SmallVectorImpl<SDNode *> &Created) const {
6100 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
6101 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6102 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
6103 return SDValue(N, 0); // Lower SREM as SREM
6104 return SDValue();
6105 }
6106
6107 /// Build sdiv by power-of-2 with conditional move instructions
6108 /// Ref: "Hacker's Delight" by Henry Warren 10-1
6109 /// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into:
6110 /// bgez x, label
6111 /// add x, x, 2**k-1
6112 /// label:
6113 /// sra res, x, k
6114 /// neg res, res (when the divisor is negative)
buildSDIVPow2WithCMov(SDNode * N,const APInt & Divisor,SelectionDAG & DAG,SmallVectorImpl<SDNode * > & Created) const6115 SDValue TargetLowering::buildSDIVPow2WithCMov(
6116 SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
6117 SmallVectorImpl<SDNode *> &Created) const {
6118 unsigned Lg2 = Divisor.countr_zero();
6119 EVT VT = N->getValueType(0);
6120
6121 SDLoc DL(N);
6122 SDValue N0 = N->getOperand(0);
6123 SDValue Zero = DAG.getConstant(0, DL, VT);
6124 APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2);
6125 SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT);
6126
6127 // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right.
6128 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6129 SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT);
6130 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
6131 SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0);
6132
6133 Created.push_back(Cmp.getNode());
6134 Created.push_back(Add.getNode());
6135 Created.push_back(CMov.getNode());
6136
6137 // Divide by pow2.
6138 SDValue SRA =
6139 DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT));
6140
6141 // If we're dividing by a positive value, we're done. Otherwise, we must
6142 // negate the result.
6143 if (Divisor.isNonNegative())
6144 return SRA;
6145
6146 Created.push_back(SRA.getNode());
6147 return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA);
6148 }
6149
6150 /// Given an ISD::SDIV node expressing a divide by constant,
6151 /// return a DAG expression to select that will generate the same value by
6152 /// multiplying by a magic number.
6153 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
BuildSDIV(SDNode * N,SelectionDAG & DAG,bool IsAfterLegalization,SmallVectorImpl<SDNode * > & Created) const6154 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
6155 bool IsAfterLegalization,
6156 SmallVectorImpl<SDNode *> &Created) const {
6157 SDLoc dl(N);
6158 EVT VT = N->getValueType(0);
6159 EVT SVT = VT.getScalarType();
6160 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6161 EVT ShSVT = ShVT.getScalarType();
6162 unsigned EltBits = VT.getScalarSizeInBits();
6163 EVT MulVT;
6164
6165 // Check to see if we can do this.
6166 // FIXME: We should be more aggressive here.
6167 if (!isTypeLegal(VT)) {
6168 // Limit this to simple scalars for now.
6169 if (VT.isVector() || !VT.isSimple())
6170 return SDValue();
6171
6172 // If this type will be promoted to a large enough type with a legal
6173 // multiply operation, we can go ahead and do this transform.
6174 if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6175 return SDValue();
6176
6177 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6178 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6179 !isOperationLegal(ISD::MUL, MulVT))
6180 return SDValue();
6181 }
6182
6183 // If the sdiv has an 'exact' bit we can use a simpler lowering.
6184 if (N->getFlags().hasExact())
6185 return BuildExactSDIV(*this, N, dl, DAG, Created);
6186
6187 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
6188
6189 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
6190 if (C->isZero())
6191 return false;
6192
6193 const APInt &Divisor = C->getAPIntValue();
6194 SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
6195 int NumeratorFactor = 0;
6196 int ShiftMask = -1;
6197
6198 if (Divisor.isOne() || Divisor.isAllOnes()) {
6199 // If d is +1/-1, we just multiply the numerator by +1/-1.
6200 NumeratorFactor = Divisor.getSExtValue();
6201 magics.Magic = 0;
6202 magics.ShiftAmount = 0;
6203 ShiftMask = 0;
6204 } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
6205 // If d > 0 and m < 0, add the numerator.
6206 NumeratorFactor = 1;
6207 } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
6208 // If d < 0 and m > 0, subtract the numerator.
6209 NumeratorFactor = -1;
6210 }
6211
6212 MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
6213 Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
6214 Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
6215 ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
6216 return true;
6217 };
6218
6219 SDValue N0 = N->getOperand(0);
6220 SDValue N1 = N->getOperand(1);
6221
6222 // Collect the shifts / magic values from each element.
6223 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
6224 return SDValue();
6225
6226 SDValue MagicFactor, Factor, Shift, ShiftMask;
6227 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6228 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6229 Factor = DAG.getBuildVector(VT, dl, Factors);
6230 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
6231 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
6232 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6233 assert(MagicFactors.size() == 1 && Factors.size() == 1 &&
6234 Shifts.size() == 1 && ShiftMasks.size() == 1 &&
6235 "Expected matchUnaryPredicate to return one element for scalable "
6236 "vectors");
6237 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6238 Factor = DAG.getSplatVector(VT, dl, Factors[0]);
6239 Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]);
6240 ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]);
6241 } else {
6242 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6243 MagicFactor = MagicFactors[0];
6244 Factor = Factors[0];
6245 Shift = Shifts[0];
6246 ShiftMask = ShiftMasks[0];
6247 }
6248
6249 // Multiply the numerator (operand 0) by the magic value.
6250 // FIXME: We should support doing a MUL in a wider type.
6251 auto GetMULHS = [&](SDValue X, SDValue Y) {
6252 // If the type isn't legal, use a wider mul of the type calculated
6253 // earlier.
6254 if (!isTypeLegal(VT)) {
6255 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X);
6256 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, Y);
6257 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6258 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6259 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6260 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6261 }
6262
6263 if (isOperationLegalOrCustom(ISD::MULHS, VT, IsAfterLegalization))
6264 return DAG.getNode(ISD::MULHS, dl, VT, X, Y);
6265 if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT, IsAfterLegalization)) {
6266 SDValue LoHi =
6267 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6268 return SDValue(LoHi.getNode(), 1);
6269 }
6270 // If type twice as wide legal, widen and use a mul plus a shift.
6271 unsigned Size = VT.getScalarSizeInBits();
6272 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6273 if (VT.isVector())
6274 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6275 VT.getVectorElementCount());
6276 if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6277 X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
6278 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
6279 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6280 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6281 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6282 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6283 }
6284 return SDValue();
6285 };
6286
6287 SDValue Q = GetMULHS(N0, MagicFactor);
6288 if (!Q)
6289 return SDValue();
6290
6291 Created.push_back(Q.getNode());
6292
6293 // (Optionally) Add/subtract the numerator using Factor.
6294 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
6295 Created.push_back(Factor.getNode());
6296 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
6297 Created.push_back(Q.getNode());
6298
6299 // Shift right algebraic by shift value.
6300 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
6301 Created.push_back(Q.getNode());
6302
6303 // Extract the sign bit, mask it and add it to the quotient.
6304 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
6305 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
6306 Created.push_back(T.getNode());
6307 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
6308 Created.push_back(T.getNode());
6309 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
6310 }
6311
6312 /// Given an ISD::UDIV node expressing a divide by constant,
6313 /// return a DAG expression to select that will generate the same value by
6314 /// multiplying by a magic number.
6315 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
BuildUDIV(SDNode * N,SelectionDAG & DAG,bool IsAfterLegalization,SmallVectorImpl<SDNode * > & Created) const6316 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
6317 bool IsAfterLegalization,
6318 SmallVectorImpl<SDNode *> &Created) const {
6319 SDLoc dl(N);
6320 EVT VT = N->getValueType(0);
6321 EVT SVT = VT.getScalarType();
6322 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
6323 EVT ShSVT = ShVT.getScalarType();
6324 unsigned EltBits = VT.getScalarSizeInBits();
6325 EVT MulVT;
6326
6327 // Check to see if we can do this.
6328 // FIXME: We should be more aggressive here.
6329 if (!isTypeLegal(VT)) {
6330 // Limit this to simple scalars for now.
6331 if (VT.isVector() || !VT.isSimple())
6332 return SDValue();
6333
6334 // If this type will be promoted to a large enough type with a legal
6335 // multiply operation, we can go ahead and do this transform.
6336 if (getTypeAction(VT.getSimpleVT()) != TypePromoteInteger)
6337 return SDValue();
6338
6339 MulVT = getTypeToTransformTo(*DAG.getContext(), VT);
6340 if (MulVT.getSizeInBits() < (2 * EltBits) ||
6341 !isOperationLegal(ISD::MUL, MulVT))
6342 return SDValue();
6343 }
6344
6345 SDValue N0 = N->getOperand(0);
6346 SDValue N1 = N->getOperand(1);
6347
6348 // Try to use leading zeros of the dividend to reduce the multiplier and
6349 // avoid expensive fixups.
6350 // TODO: Support vectors.
6351 unsigned LeadingZeros = 0;
6352 if (!VT.isVector() && isa<ConstantSDNode>(N1)) {
6353 assert(!isOneConstant(N1) && "Unexpected divisor");
6354 LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
6355 // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
6356 // the dividend exceeds the leading zeros for the divisor.
6357 LeadingZeros = std::min(LeadingZeros, N1->getAsAPIntVal().countl_zero());
6358 }
6359
6360 bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
6361 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
6362
6363 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
6364 if (C->isZero())
6365 return false;
6366 const APInt& Divisor = C->getAPIntValue();
6367
6368 SDValue PreShift, MagicFactor, NPQFactor, PostShift;
6369
6370 // Magic algorithm doesn't work for division by 1. We need to emit a select
6371 // at the end.
6372 if (Divisor.isOne()) {
6373 PreShift = PostShift = DAG.getUNDEF(ShSVT);
6374 MagicFactor = NPQFactor = DAG.getUNDEF(SVT);
6375 } else {
6376 UnsignedDivisionByConstantInfo magics =
6377 UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros);
6378
6379 MagicFactor = DAG.getConstant(magics.Magic, dl, SVT);
6380
6381 assert(magics.PreShift < Divisor.getBitWidth() &&
6382 "We shouldn't generate an undefined shift!");
6383 assert(magics.PostShift < Divisor.getBitWidth() &&
6384 "We shouldn't generate an undefined shift!");
6385 assert((!magics.IsAdd || magics.PreShift == 0) &&
6386 "Unexpected pre-shift");
6387 PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT);
6388 PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT);
6389 NPQFactor = DAG.getConstant(
6390 magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1)
6391 : APInt::getZero(EltBits),
6392 dl, SVT);
6393 UseNPQ |= magics.IsAdd;
6394 UsePreShift |= magics.PreShift != 0;
6395 UsePostShift |= magics.PostShift != 0;
6396 }
6397
6398 PreShifts.push_back(PreShift);
6399 MagicFactors.push_back(MagicFactor);
6400 NPQFactors.push_back(NPQFactor);
6401 PostShifts.push_back(PostShift);
6402 return true;
6403 };
6404
6405 // Collect the shifts/magic values from each element.
6406 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
6407 return SDValue();
6408
6409 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
6410 if (N1.getOpcode() == ISD::BUILD_VECTOR) {
6411 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
6412 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
6413 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
6414 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
6415 } else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
6416 assert(PreShifts.size() == 1 && MagicFactors.size() == 1 &&
6417 NPQFactors.size() == 1 && PostShifts.size() == 1 &&
6418 "Expected matchUnaryPredicate to return one for scalable vectors");
6419 PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]);
6420 MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]);
6421 NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]);
6422 PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]);
6423 } else {
6424 assert(isa<ConstantSDNode>(N1) && "Expected a constant");
6425 PreShift = PreShifts[0];
6426 MagicFactor = MagicFactors[0];
6427 PostShift = PostShifts[0];
6428 }
6429
6430 SDValue Q = N0;
6431 if (UsePreShift) {
6432 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
6433 Created.push_back(Q.getNode());
6434 }
6435
6436 // FIXME: We should support doing a MUL in a wider type.
6437 auto GetMULHU = [&](SDValue X, SDValue Y) {
6438 // If the type isn't legal, use a wider mul of the type calculated
6439 // earlier.
6440 if (!isTypeLegal(VT)) {
6441 X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X);
6442 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, Y);
6443 Y = DAG.getNode(ISD::MUL, dl, MulVT, X, Y);
6444 Y = DAG.getNode(ISD::SRL, dl, MulVT, Y,
6445 DAG.getShiftAmountConstant(EltBits, MulVT, dl));
6446 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6447 }
6448
6449 if (isOperationLegalOrCustom(ISD::MULHU, VT, IsAfterLegalization))
6450 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
6451 if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT, IsAfterLegalization)) {
6452 SDValue LoHi =
6453 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
6454 return SDValue(LoHi.getNode(), 1);
6455 }
6456 // If type twice as wide legal, widen and use a mul plus a shift.
6457 unsigned Size = VT.getScalarSizeInBits();
6458 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
6459 if (VT.isVector())
6460 WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
6461 VT.getVectorElementCount());
6462 if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
6463 X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
6464 Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
6465 Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
6466 Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
6467 DAG.getShiftAmountConstant(EltBits, WideVT, dl));
6468 return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
6469 }
6470 return SDValue(); // No mulhu or equivalent
6471 };
6472
6473 // Multiply the numerator (operand 0) by the magic value.
6474 Q = GetMULHU(Q, MagicFactor);
6475 if (!Q)
6476 return SDValue();
6477
6478 Created.push_back(Q.getNode());
6479
6480 if (UseNPQ) {
6481 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
6482 Created.push_back(NPQ.getNode());
6483
6484 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
6485 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
6486 if (VT.isVector())
6487 NPQ = GetMULHU(NPQ, NPQFactor);
6488 else
6489 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
6490
6491 Created.push_back(NPQ.getNode());
6492
6493 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
6494 Created.push_back(Q.getNode());
6495 }
6496
6497 if (UsePostShift) {
6498 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
6499 Created.push_back(Q.getNode());
6500 }
6501
6502 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
6503
6504 SDValue One = DAG.getConstant(1, dl, VT);
6505 SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ);
6506 return DAG.getSelect(dl, VT, IsOne, N0, Q);
6507 }
6508
6509 /// If all values in Values that *don't* match the predicate are same 'splat'
6510 /// value, then replace all values with that splat value.
6511 /// Else, if AlternativeReplacement was provided, then replace all values that
6512 /// do match predicate with AlternativeReplacement value.
6513 static void
turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,std::function<bool (SDValue)> Predicate,SDValue AlternativeReplacement=SDValue ())6514 turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
6515 std::function<bool(SDValue)> Predicate,
6516 SDValue AlternativeReplacement = SDValue()) {
6517 SDValue Replacement;
6518 // Is there a value for which the Predicate does *NOT* match? What is it?
6519 auto SplatValue = llvm::find_if_not(Values, Predicate);
6520 if (SplatValue != Values.end()) {
6521 // Does Values consist only of SplatValue's and values matching Predicate?
6522 if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
6523 return Value == *SplatValue || Predicate(Value);
6524 })) // Then we shall replace values matching predicate with SplatValue.
6525 Replacement = *SplatValue;
6526 }
6527 if (!Replacement) {
6528 // Oops, we did not find the "baseline" splat value.
6529 if (!AlternativeReplacement)
6530 return; // Nothing to do.
6531 // Let's replace with provided value then.
6532 Replacement = AlternativeReplacement;
6533 }
6534 std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
6535 }
6536
6537 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
6538 /// where the divisor is constant and the comparison target is zero,
6539 /// return a DAG expression that will generate the same comparison result
6540 /// using only multiplications, additions and shifts/rotations.
6541 /// Ref: "Hacker's Delight" 10-17.
buildUREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const6542 SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
6543 SDValue CompTargetNode,
6544 ISD::CondCode Cond,
6545 DAGCombinerInfo &DCI,
6546 const SDLoc &DL) const {
6547 SmallVector<SDNode *, 5> Built;
6548 if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6549 DCI, DL, Built)) {
6550 for (SDNode *N : Built)
6551 DCI.AddToWorklist(N);
6552 return Folded;
6553 }
6554
6555 return SDValue();
6556 }
6557
6558 SDValue
prepareUREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL,SmallVectorImpl<SDNode * > & Created) const6559 TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
6560 SDValue CompTargetNode, ISD::CondCode Cond,
6561 DAGCombinerInfo &DCI, const SDLoc &DL,
6562 SmallVectorImpl<SDNode *> &Created) const {
6563 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
6564 // - D must be constant, with D = D0 * 2^K where D0 is odd
6565 // - P is the multiplicative inverse of D0 modulo 2^W
6566 // - Q = floor(((2^W) - 1) / D)
6567 // where W is the width of the common type of N and D.
6568 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6569 "Only applicable for (in)equality comparisons.");
6570
6571 SelectionDAG &DAG = DCI.DAG;
6572
6573 EVT VT = REMNode.getValueType();
6574 EVT SVT = VT.getScalarType();
6575 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6576 EVT ShSVT = ShVT.getScalarType();
6577
6578 // If MUL is unavailable, we cannot proceed in any case.
6579 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6580 return SDValue();
6581
6582 bool ComparingWithAllZeros = true;
6583 bool AllComparisonsWithNonZerosAreTautological = true;
6584 bool HadTautologicalLanes = false;
6585 bool AllLanesAreTautological = true;
6586 bool HadEvenDivisor = false;
6587 bool AllDivisorsArePowerOfTwo = true;
6588 bool HadTautologicalInvertedLanes = false;
6589 SmallVector<SDValue, 16> PAmts, KAmts, QAmts, IAmts;
6590
6591 auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
6592 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6593 if (CDiv->isZero())
6594 return false;
6595
6596 const APInt &D = CDiv->getAPIntValue();
6597 const APInt &Cmp = CCmp->getAPIntValue();
6598
6599 ComparingWithAllZeros &= Cmp.isZero();
6600
6601 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6602 // if C2 is not less than C1, the comparison is always false.
6603 // But we will only be able to produce the comparison that will give the
6604 // opposive tautological answer. So this lane would need to be fixed up.
6605 bool TautologicalInvertedLane = D.ule(Cmp);
6606 HadTautologicalInvertedLanes |= TautologicalInvertedLane;
6607
6608 // If all lanes are tautological (either all divisors are ones, or divisor
6609 // is not greater than the constant we are comparing with),
6610 // we will prefer to avoid the fold.
6611 bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
6612 HadTautologicalLanes |= TautologicalLane;
6613 AllLanesAreTautological &= TautologicalLane;
6614
6615 // If we are comparing with non-zero, we need'll need to subtract said
6616 // comparison value from the LHS. But there is no point in doing that if
6617 // every lane where we are comparing with non-zero is tautological..
6618 if (!Cmp.isZero())
6619 AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
6620
6621 // Decompose D into D0 * 2^K
6622 unsigned K = D.countr_zero();
6623 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6624 APInt D0 = D.lshr(K);
6625
6626 // D is even if it has trailing zeros.
6627 HadEvenDivisor |= (K != 0);
6628 // D is a power-of-two if D0 is one.
6629 // If all divisors are power-of-two, we will prefer to avoid the fold.
6630 AllDivisorsArePowerOfTwo &= D0.isOne();
6631
6632 // P = inv(D0, 2^W)
6633 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6634 unsigned W = D.getBitWidth();
6635 APInt P = D0.zext(W + 1)
6636 .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
6637 .trunc(W);
6638 assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6639 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6640
6641 // Q = floor((2^W - 1) u/ D)
6642 // R = ((2^W - 1) u% D)
6643 APInt Q, R;
6644 APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
6645
6646 // If we are comparing with zero, then that comparison constant is okay,
6647 // else it may need to be one less than that.
6648 if (Cmp.ugt(R))
6649 Q -= 1;
6650
6651 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6652 "We are expecting that K is always less than all-ones for ShSVT");
6653
6654 // If the lane is tautological the result can be constant-folded.
6655 if (TautologicalLane) {
6656 // Set P and K amount to a bogus values so we can try to splat them.
6657 P = 0;
6658 K = -1;
6659 // And ensure that comparison constant is tautological,
6660 // it will always compare true/false.
6661 Q = -1;
6662 }
6663
6664 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6665 KAmts.push_back(
6666 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6667 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6668 return true;
6669 };
6670
6671 SDValue N = REMNode.getOperand(0);
6672 SDValue D = REMNode.getOperand(1);
6673
6674 // Collect the values from each element.
6675 if (!ISD::matchBinaryPredicate(D, CompTargetNode, BuildUREMPattern))
6676 return SDValue();
6677
6678 // If all lanes are tautological, the result can be constant-folded.
6679 if (AllLanesAreTautological)
6680 return SDValue();
6681
6682 // If this is a urem by a powers-of-two, avoid the fold since it can be
6683 // best implemented as a bit test.
6684 if (AllDivisorsArePowerOfTwo)
6685 return SDValue();
6686
6687 SDValue PVal, KVal, QVal;
6688 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6689 if (HadTautologicalLanes) {
6690 // Try to turn PAmts into a splat, since we don't care about the values
6691 // that are currently '0'. If we can't, just keep '0'`s.
6692 turnVectorIntoSplatVector(PAmts, isNullConstant);
6693 // Try to turn KAmts into a splat, since we don't care about the values
6694 // that are currently '-1'. If we can't, change them to '0'`s.
6695 turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6696 DAG.getConstant(0, DL, ShSVT));
6697 }
6698
6699 PVal = DAG.getBuildVector(VT, DL, PAmts);
6700 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6701 QVal = DAG.getBuildVector(VT, DL, QAmts);
6702 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6703 assert(PAmts.size() == 1 && KAmts.size() == 1 && QAmts.size() == 1 &&
6704 "Expected matchBinaryPredicate to return one element for "
6705 "SPLAT_VECTORs");
6706 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6707 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6708 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6709 } else {
6710 PVal = PAmts[0];
6711 KVal = KAmts[0];
6712 QVal = QAmts[0];
6713 }
6714
6715 if (!ComparingWithAllZeros && !AllComparisonsWithNonZerosAreTautological) {
6716 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB, VT))
6717 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
6718 assert(CompTargetNode.getValueType() == N.getValueType() &&
6719 "Expecting that the types on LHS and RHS of comparisons match.");
6720 N = DAG.getNode(ISD::SUB, DL, VT, N, CompTargetNode);
6721 }
6722
6723 // (mul N, P)
6724 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6725 Created.push_back(Op0.getNode());
6726
6727 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6728 // divisors as a performance improvement, since rotating by 0 is a no-op.
6729 if (HadEvenDivisor) {
6730 // We need ROTR to do this.
6731 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6732 return SDValue();
6733 // UREM: (rotr (mul N, P), K)
6734 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
6735 Created.push_back(Op0.getNode());
6736 }
6737
6738 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
6739 SDValue NewCC =
6740 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
6741 ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
6742 if (!HadTautologicalInvertedLanes)
6743 return NewCC;
6744
6745 // If any lanes previously compared always-false, the NewCC will give
6746 // always-true result for them, so we need to fixup those lanes.
6747 // Or the other way around for inequality predicate.
6748 assert(VT.isVector() && "Can/should only get here for vectors.");
6749 Created.push_back(NewCC.getNode());
6750
6751 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
6752 // if C2 is not less than C1, the comparison is always false.
6753 // But we have produced the comparison that will give the
6754 // opposive tautological answer. So these lanes would need to be fixed up.
6755 SDValue TautologicalInvertedChannels =
6756 DAG.getSetCC(DL, SETCCVT, D, CompTargetNode, ISD::SETULE);
6757 Created.push_back(TautologicalInvertedChannels.getNode());
6758
6759 // NOTE: we avoid letting illegal types through even if we're before legalize
6760 // ops – legalization has a hard time producing good code for this.
6761 if (isOperationLegalOrCustom(ISD::VSELECT, SETCCVT)) {
6762 // If we have a vector select, let's replace the comparison results in the
6763 // affected lanes with the correct tautological result.
6764 SDValue Replacement = DAG.getBoolConstant(Cond == ISD::SETEQ ? false : true,
6765 DL, SETCCVT, SETCCVT);
6766 return DAG.getNode(ISD::VSELECT, DL, SETCCVT, TautologicalInvertedChannels,
6767 Replacement, NewCC);
6768 }
6769
6770 // Else, we can just invert the comparison result in the appropriate lanes.
6771 //
6772 // NOTE: see the note above VSELECT above.
6773 if (isOperationLegalOrCustom(ISD::XOR, SETCCVT))
6774 return DAG.getNode(ISD::XOR, DL, SETCCVT, NewCC,
6775 TautologicalInvertedChannels);
6776
6777 return SDValue(); // Don't know how to lower.
6778 }
6779
6780 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
6781 /// where the divisor is constant and the comparison target is zero,
6782 /// return a DAG expression that will generate the same comparison result
6783 /// using only multiplications, additions and shifts/rotations.
6784 /// Ref: "Hacker's Delight" 10-17.
buildSREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL) const6785 SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
6786 SDValue CompTargetNode,
6787 ISD::CondCode Cond,
6788 DAGCombinerInfo &DCI,
6789 const SDLoc &DL) const {
6790 SmallVector<SDNode *, 7> Built;
6791 if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
6792 DCI, DL, Built)) {
6793 assert(Built.size() <= 7 && "Max size prediction failed.");
6794 for (SDNode *N : Built)
6795 DCI.AddToWorklist(N);
6796 return Folded;
6797 }
6798
6799 return SDValue();
6800 }
6801
6802 SDValue
prepareSREMEqFold(EVT SETCCVT,SDValue REMNode,SDValue CompTargetNode,ISD::CondCode Cond,DAGCombinerInfo & DCI,const SDLoc & DL,SmallVectorImpl<SDNode * > & Created) const6803 TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
6804 SDValue CompTargetNode, ISD::CondCode Cond,
6805 DAGCombinerInfo &DCI, const SDLoc &DL,
6806 SmallVectorImpl<SDNode *> &Created) const {
6807 // Fold:
6808 // (seteq/ne (srem N, D), 0)
6809 // To:
6810 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
6811 //
6812 // - D must be constant, with D = D0 * 2^K where D0 is odd
6813 // - P is the multiplicative inverse of D0 modulo 2^W
6814 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
6815 // - Q = floor((2 * A) / (2^K))
6816 // where W is the width of the common type of N and D.
6817 assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
6818 "Only applicable for (in)equality comparisons.");
6819
6820 SelectionDAG &DAG = DCI.DAG;
6821
6822 EVT VT = REMNode.getValueType();
6823 EVT SVT = VT.getScalarType();
6824 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout(), !DCI.isBeforeLegalize());
6825 EVT ShSVT = ShVT.getScalarType();
6826
6827 // If we are after ops legalization, and MUL is unavailable, we can not
6828 // proceed.
6829 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL, VT))
6830 return SDValue();
6831
6832 // TODO: Could support comparing with non-zero too.
6833 ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
6834 if (!CompTarget || !CompTarget->isZero())
6835 return SDValue();
6836
6837 bool HadIntMinDivisor = false;
6838 bool HadOneDivisor = false;
6839 bool AllDivisorsAreOnes = true;
6840 bool HadEvenDivisor = false;
6841 bool NeedToApplyOffset = false;
6842 bool AllDivisorsArePowerOfTwo = true;
6843 SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
6844
6845 auto BuildSREMPattern = [&](ConstantSDNode *C) {
6846 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
6847 if (C->isZero())
6848 return false;
6849
6850 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
6851
6852 // WARNING: this fold is only valid for positive divisors!
6853 APInt D = C->getAPIntValue();
6854 if (D.isNegative())
6855 D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
6856
6857 HadIntMinDivisor |= D.isMinSignedValue();
6858
6859 // If all divisors are ones, we will prefer to avoid the fold.
6860 HadOneDivisor |= D.isOne();
6861 AllDivisorsAreOnes &= D.isOne();
6862
6863 // Decompose D into D0 * 2^K
6864 unsigned K = D.countr_zero();
6865 assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
6866 APInt D0 = D.lshr(K);
6867
6868 if (!D.isMinSignedValue()) {
6869 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
6870 // we don't care about this lane in this fold, we'll special-handle it.
6871 HadEvenDivisor |= (K != 0);
6872 }
6873
6874 // D is a power-of-two if D0 is one. This includes INT_MIN.
6875 // If all divisors are power-of-two, we will prefer to avoid the fold.
6876 AllDivisorsArePowerOfTwo &= D0.isOne();
6877
6878 // P = inv(D0, 2^W)
6879 // 2^W requires W + 1 bits, so we have to extend and then truncate.
6880 unsigned W = D.getBitWidth();
6881 APInt P = D0.zext(W + 1)
6882 .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
6883 .trunc(W);
6884 assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
6885 assert((D0 * P).isOne() && "Multiplicative inverse basic check failed.");
6886
6887 // A = floor((2^(W - 1) - 1) / D0) & -2^K
6888 APInt A = APInt::getSignedMaxValue(W).udiv(D0);
6889 A.clearLowBits(K);
6890
6891 if (!D.isMinSignedValue()) {
6892 // If divisor INT_MIN, then we don't care about this lane in this fold,
6893 // we'll special-handle it.
6894 NeedToApplyOffset |= A != 0;
6895 }
6896
6897 // Q = floor((2 * A) / (2^K))
6898 APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
6899
6900 assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
6901 "We are expecting that A is always less than all-ones for SVT");
6902 assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
6903 "We are expecting that K is always less than all-ones for ShSVT");
6904
6905 // If the divisor is 1 the result can be constant-folded. Likewise, we
6906 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
6907 if (D.isOne()) {
6908 // Set P, A and K to a bogus values so we can try to splat them.
6909 P = 0;
6910 A = -1;
6911 K = -1;
6912
6913 // x ?% 1 == 0 <--> true <--> x u<= -1
6914 Q = -1;
6915 }
6916
6917 PAmts.push_back(DAG.getConstant(P, DL, SVT));
6918 AAmts.push_back(DAG.getConstant(A, DL, SVT));
6919 KAmts.push_back(
6920 DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
6921 QAmts.push_back(DAG.getConstant(Q, DL, SVT));
6922 return true;
6923 };
6924
6925 SDValue N = REMNode.getOperand(0);
6926 SDValue D = REMNode.getOperand(1);
6927
6928 // Collect the values from each element.
6929 if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
6930 return SDValue();
6931
6932 // If this is a srem by a one, avoid the fold since it can be constant-folded.
6933 if (AllDivisorsAreOnes)
6934 return SDValue();
6935
6936 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
6937 // since it can be best implemented as a bit test.
6938 if (AllDivisorsArePowerOfTwo)
6939 return SDValue();
6940
6941 SDValue PVal, AVal, KVal, QVal;
6942 if (D.getOpcode() == ISD::BUILD_VECTOR) {
6943 if (HadOneDivisor) {
6944 // Try to turn PAmts into a splat, since we don't care about the values
6945 // that are currently '0'. If we can't, just keep '0'`s.
6946 turnVectorIntoSplatVector(PAmts, isNullConstant);
6947 // Try to turn AAmts into a splat, since we don't care about the
6948 // values that are currently '-1'. If we can't, change them to '0'`s.
6949 turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
6950 DAG.getConstant(0, DL, SVT));
6951 // Try to turn KAmts into a splat, since we don't care about the values
6952 // that are currently '-1'. If we can't, change them to '0'`s.
6953 turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
6954 DAG.getConstant(0, DL, ShSVT));
6955 }
6956
6957 PVal = DAG.getBuildVector(VT, DL, PAmts);
6958 AVal = DAG.getBuildVector(VT, DL, AAmts);
6959 KVal = DAG.getBuildVector(ShVT, DL, KAmts);
6960 QVal = DAG.getBuildVector(VT, DL, QAmts);
6961 } else if (D.getOpcode() == ISD::SPLAT_VECTOR) {
6962 assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 &&
6963 QAmts.size() == 1 &&
6964 "Expected matchUnaryPredicate to return one element for scalable "
6965 "vectors");
6966 PVal = DAG.getSplatVector(VT, DL, PAmts[0]);
6967 AVal = DAG.getSplatVector(VT, DL, AAmts[0]);
6968 KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]);
6969 QVal = DAG.getSplatVector(VT, DL, QAmts[0]);
6970 } else {
6971 assert(isa<ConstantSDNode>(D) && "Expected a constant");
6972 PVal = PAmts[0];
6973 AVal = AAmts[0];
6974 KVal = KAmts[0];
6975 QVal = QAmts[0];
6976 }
6977
6978 // (mul N, P)
6979 SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
6980 Created.push_back(Op0.getNode());
6981
6982 if (NeedToApplyOffset) {
6983 // We need ADD to do this.
6984 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD, VT))
6985 return SDValue();
6986
6987 // (add (mul N, P), A)
6988 Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
6989 Created.push_back(Op0.getNode());
6990 }
6991
6992 // Rotate right only if any divisor was even. We avoid rotates for all-odd
6993 // divisors as a performance improvement, since rotating by 0 is a no-op.
6994 if (HadEvenDivisor) {
6995 // We need ROTR to do this.
6996 if (!DCI.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR, VT))
6997 return SDValue();
6998 // SREM: (rotr (add (mul N, P), A), K)
6999 Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal);
7000 Created.push_back(Op0.getNode());
7001 }
7002
7003 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
7004 SDValue Fold =
7005 DAG.getSetCC(DL, SETCCVT, Op0, QVal,
7006 ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
7007
7008 // If we didn't have lanes with INT_MIN divisor, then we're done.
7009 if (!HadIntMinDivisor)
7010 return Fold;
7011
7012 // That fold is only valid for positive divisors. Which effectively means,
7013 // it is invalid for INT_MIN divisors. So if we have such a lane,
7014 // we must fix-up results for said lanes.
7015 assert(VT.isVector() && "Can/should only get here for vectors.");
7016
7017 // NOTE: we avoid letting illegal types through even if we're before legalize
7018 // ops – legalization has a hard time producing good code for the code that
7019 // follows.
7020 if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
7021 !isOperationLegalOrCustom(ISD::AND, VT) ||
7022 !isCondCodeLegalOrCustom(Cond, VT.getSimpleVT()) ||
7023 !isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
7024 return SDValue();
7025
7026 Created.push_back(Fold.getNode());
7027
7028 SDValue IntMin = DAG.getConstant(
7029 APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
7030 SDValue IntMax = DAG.getConstant(
7031 APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
7032 SDValue Zero =
7033 DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
7034
7035 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
7036 SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
7037 Created.push_back(DivisorIsIntMin.getNode());
7038
7039 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
7040 SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
7041 Created.push_back(Masked.getNode());
7042 SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
7043 Created.push_back(MaskedIsZero.getNode());
7044
7045 // To produce final result we need to blend 2 vectors: 'SetCC' and
7046 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
7047 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
7048 // constant-folded, select can get lowered to a shuffle with constant mask.
7049 SDValue Blended = DAG.getNode(ISD::VSELECT, DL, SETCCVT, DivisorIsIntMin,
7050 MaskedIsZero, Fold);
7051
7052 return Blended;
7053 }
7054
7055 bool TargetLowering::
verifyReturnAddressArgumentIsConstant(SDValue Op,SelectionDAG & DAG) const7056 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
7057 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
7058 DAG.getContext()->emitError("argument to '__builtin_return_address' must "
7059 "be a constant integer");
7060 return true;
7061 }
7062
7063 return false;
7064 }
7065
getSqrtInputTest(SDValue Op,SelectionDAG & DAG,const DenormalMode & Mode) const7066 SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
7067 const DenormalMode &Mode) const {
7068 SDLoc DL(Op);
7069 EVT VT = Op.getValueType();
7070 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7071 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
7072
7073 // This is specifically a check for the handling of denormal inputs, not the
7074 // result.
7075 if (Mode.Input == DenormalMode::PreserveSign ||
7076 Mode.Input == DenormalMode::PositiveZero) {
7077 // Test = X == 0.0
7078 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
7079 }
7080
7081 // Testing it with denormal inputs to avoid wrong estimate.
7082 //
7083 // Test = fabs(X) < SmallestNormal
7084 const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
7085 APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
7086 SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
7087 SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
7088 return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
7089 }
7090
getNegatedExpression(SDValue Op,SelectionDAG & DAG,bool LegalOps,bool OptForSize,NegatibleCost & Cost,unsigned Depth) const7091 SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
7092 bool LegalOps, bool OptForSize,
7093 NegatibleCost &Cost,
7094 unsigned Depth) const {
7095 // fneg is removable even if it has multiple uses.
7096 if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
7097 Cost = NegatibleCost::Cheaper;
7098 return Op.getOperand(0);
7099 }
7100
7101 // Don't recurse exponentially.
7102 if (Depth > SelectionDAG::MaxRecursionDepth)
7103 return SDValue();
7104
7105 // Pre-increment recursion depth for use in recursive calls.
7106 ++Depth;
7107 const SDNodeFlags Flags = Op->getFlags();
7108 const TargetOptions &Options = DAG.getTarget().Options;
7109 EVT VT = Op.getValueType();
7110 unsigned Opcode = Op.getOpcode();
7111
7112 // Don't allow anything with multiple uses unless we know it is free.
7113 if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) {
7114 bool IsFreeExtend = Opcode == ISD::FP_EXTEND &&
7115 isFPExtFree(VT, Op.getOperand(0).getValueType());
7116 if (!IsFreeExtend)
7117 return SDValue();
7118 }
7119
7120 auto RemoveDeadNode = [&](SDValue N) {
7121 if (N && N.getNode()->use_empty())
7122 DAG.RemoveDeadNode(N.getNode());
7123 };
7124
7125 SDLoc DL(Op);
7126
7127 // Because getNegatedExpression can delete nodes we need a handle to keep
7128 // temporary nodes alive in case the recursion manages to create an identical
7129 // node.
7130 std::list<HandleSDNode> Handles;
7131
7132 switch (Opcode) {
7133 case ISD::ConstantFP: {
7134 // Don't invert constant FP values after legalization unless the target says
7135 // the negated constant is legal.
7136 bool IsOpLegal =
7137 isOperationLegal(ISD::ConstantFP, VT) ||
7138 isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
7139 OptForSize);
7140
7141 if (LegalOps && !IsOpLegal)
7142 break;
7143
7144 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
7145 V.changeSign();
7146 SDValue CFP = DAG.getConstantFP(V, DL, VT);
7147
7148 // If we already have the use of the negated floating constant, it is free
7149 // to negate it even it has multiple uses.
7150 if (!Op.hasOneUse() && CFP.use_empty())
7151 break;
7152 Cost = NegatibleCost::Neutral;
7153 return CFP;
7154 }
7155 case ISD::BUILD_VECTOR: {
7156 // Only permit BUILD_VECTOR of constants.
7157 if (llvm::any_of(Op->op_values(), [&](SDValue N) {
7158 return !N.isUndef() && !isa<ConstantFPSDNode>(N);
7159 }))
7160 break;
7161
7162 bool IsOpLegal =
7163 (isOperationLegal(ISD::ConstantFP, VT) &&
7164 isOperationLegal(ISD::BUILD_VECTOR, VT)) ||
7165 llvm::all_of(Op->op_values(), [&](SDValue N) {
7166 return N.isUndef() ||
7167 isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
7168 OptForSize);
7169 });
7170
7171 if (LegalOps && !IsOpLegal)
7172 break;
7173
7174 SmallVector<SDValue, 4> Ops;
7175 for (SDValue C : Op->op_values()) {
7176 if (C.isUndef()) {
7177 Ops.push_back(C);
7178 continue;
7179 }
7180 APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
7181 V.changeSign();
7182 Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType()));
7183 }
7184 Cost = NegatibleCost::Neutral;
7185 return DAG.getBuildVector(VT, DL, Ops);
7186 }
7187 case ISD::FADD: {
7188 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7189 break;
7190
7191 // After operation legalization, it might not be legal to create new FSUBs.
7192 if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT))
7193 break;
7194 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7195
7196 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
7197 NegatibleCost CostX = NegatibleCost::Expensive;
7198 SDValue NegX =
7199 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7200 // Prevent this node from being deleted by the next call.
7201 if (NegX)
7202 Handles.emplace_back(NegX);
7203
7204 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
7205 NegatibleCost CostY = NegatibleCost::Expensive;
7206 SDValue NegY =
7207 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7208
7209 // We're done with the handles.
7210 Handles.clear();
7211
7212 // Negate the X if its cost is less or equal than Y.
7213 if (NegX && (CostX <= CostY)) {
7214 Cost = CostX;
7215 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags);
7216 if (NegY != N)
7217 RemoveDeadNode(NegY);
7218 return N;
7219 }
7220
7221 // Negate the Y if it is not expensive.
7222 if (NegY) {
7223 Cost = CostY;
7224 SDValue N = DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags);
7225 if (NegX != N)
7226 RemoveDeadNode(NegX);
7227 return N;
7228 }
7229 break;
7230 }
7231 case ISD::FSUB: {
7232 // We can't turn -(A-B) into B-A when we honor signed zeros.
7233 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7234 break;
7235
7236 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7237 // fold (fneg (fsub 0, Y)) -> Y
7238 if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true))
7239 if (C->isZero()) {
7240 Cost = NegatibleCost::Cheaper;
7241 return Y;
7242 }
7243
7244 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
7245 Cost = NegatibleCost::Neutral;
7246 return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags);
7247 }
7248 case ISD::FMUL:
7249 case ISD::FDIV: {
7250 SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
7251
7252 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
7253 NegatibleCost CostX = NegatibleCost::Expensive;
7254 SDValue NegX =
7255 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7256 // Prevent this node from being deleted by the next call.
7257 if (NegX)
7258 Handles.emplace_back(NegX);
7259
7260 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
7261 NegatibleCost CostY = NegatibleCost::Expensive;
7262 SDValue NegY =
7263 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7264
7265 // We're done with the handles.
7266 Handles.clear();
7267
7268 // Negate the X if its cost is less or equal than Y.
7269 if (NegX && (CostX <= CostY)) {
7270 Cost = CostX;
7271 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, Flags);
7272 if (NegY != N)
7273 RemoveDeadNode(NegY);
7274 return N;
7275 }
7276
7277 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
7278 if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
7279 if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
7280 break;
7281
7282 // Negate the Y if it is not expensive.
7283 if (NegY) {
7284 Cost = CostY;
7285 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, Flags);
7286 if (NegX != N)
7287 RemoveDeadNode(NegX);
7288 return N;
7289 }
7290 break;
7291 }
7292 case ISD::FMA:
7293 case ISD::FMAD: {
7294 if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
7295 break;
7296
7297 SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2);
7298 NegatibleCost CostZ = NegatibleCost::Expensive;
7299 SDValue NegZ =
7300 getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth);
7301 // Give up if fail to negate the Z.
7302 if (!NegZ)
7303 break;
7304
7305 // Prevent this node from being deleted by the next two calls.
7306 Handles.emplace_back(NegZ);
7307
7308 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
7309 NegatibleCost CostX = NegatibleCost::Expensive;
7310 SDValue NegX =
7311 getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth);
7312 // Prevent this node from being deleted by the next call.
7313 if (NegX)
7314 Handles.emplace_back(NegX);
7315
7316 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
7317 NegatibleCost CostY = NegatibleCost::Expensive;
7318 SDValue NegY =
7319 getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth);
7320
7321 // We're done with the handles.
7322 Handles.clear();
7323
7324 // Negate the X if its cost is less or equal than Y.
7325 if (NegX && (CostX <= CostY)) {
7326 Cost = std::min(CostX, CostZ);
7327 SDValue N = DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags);
7328 if (NegY != N)
7329 RemoveDeadNode(NegY);
7330 return N;
7331 }
7332
7333 // Negate the Y if it is not expensive.
7334 if (NegY) {
7335 Cost = std::min(CostY, CostZ);
7336 SDValue N = DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags);
7337 if (NegX != N)
7338 RemoveDeadNode(NegX);
7339 return N;
7340 }
7341 break;
7342 }
7343
7344 case ISD::FP_EXTEND:
7345 case ISD::FSIN:
7346 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7347 OptForSize, Cost, Depth))
7348 return DAG.getNode(Opcode, DL, VT, NegV);
7349 break;
7350 case ISD::FP_ROUND:
7351 if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps,
7352 OptForSize, Cost, Depth))
7353 return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1));
7354 break;
7355 case ISD::SELECT:
7356 case ISD::VSELECT: {
7357 // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS))
7358 // iff at least one cost is cheaper and the other is neutral/cheaper
7359 SDValue LHS = Op.getOperand(1);
7360 NegatibleCost CostLHS = NegatibleCost::Expensive;
7361 SDValue NegLHS =
7362 getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth);
7363 if (!NegLHS || CostLHS > NegatibleCost::Neutral) {
7364 RemoveDeadNode(NegLHS);
7365 break;
7366 }
7367
7368 // Prevent this node from being deleted by the next call.
7369 Handles.emplace_back(NegLHS);
7370
7371 SDValue RHS = Op.getOperand(2);
7372 NegatibleCost CostRHS = NegatibleCost::Expensive;
7373 SDValue NegRHS =
7374 getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth);
7375
7376 // We're done with the handles.
7377 Handles.clear();
7378
7379 if (!NegRHS || CostRHS > NegatibleCost::Neutral ||
7380 (CostLHS != NegatibleCost::Cheaper &&
7381 CostRHS != NegatibleCost::Cheaper)) {
7382 RemoveDeadNode(NegLHS);
7383 RemoveDeadNode(NegRHS);
7384 break;
7385 }
7386
7387 Cost = std::min(CostLHS, CostRHS);
7388 return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS);
7389 }
7390 }
7391
7392 return SDValue();
7393 }
7394
7395 //===----------------------------------------------------------------------===//
7396 // Legalization Utilities
7397 //===----------------------------------------------------------------------===//
7398
expandMUL_LOHI(unsigned Opcode,EVT VT,const SDLoc & dl,SDValue LHS,SDValue RHS,SmallVectorImpl<SDValue> & Result,EVT HiLoVT,SelectionDAG & DAG,MulExpansionKind Kind,SDValue LL,SDValue LH,SDValue RL,SDValue RH) const7399 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
7400 SDValue LHS, SDValue RHS,
7401 SmallVectorImpl<SDValue> &Result,
7402 EVT HiLoVT, SelectionDAG &DAG,
7403 MulExpansionKind Kind, SDValue LL,
7404 SDValue LH, SDValue RL, SDValue RH) const {
7405 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
7406 Opcode == ISD::SMUL_LOHI);
7407
7408 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
7409 isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
7410 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
7411 isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
7412 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7413 isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
7414 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
7415 isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
7416
7417 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
7418 return false;
7419
7420 unsigned OuterBitSize = VT.getScalarSizeInBits();
7421 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
7422
7423 // LL, LH, RL, and RH must be either all NULL or all set to a value.
7424 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
7425 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
7426
7427 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
7428 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
7429 bool Signed) -> bool {
7430 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
7431 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
7432 Hi = SDValue(Lo.getNode(), 1);
7433 return true;
7434 }
7435 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
7436 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
7437 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
7438 return true;
7439 }
7440 return false;
7441 };
7442
7443 SDValue Lo, Hi;
7444
7445 if (!LL.getNode() && !RL.getNode() &&
7446 isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7447 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
7448 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
7449 }
7450
7451 if (!LL.getNode())
7452 return false;
7453
7454 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
7455 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
7456 DAG.MaskedValueIsZero(RHS, HighMask)) {
7457 // The inputs are both zero-extended.
7458 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
7459 Result.push_back(Lo);
7460 Result.push_back(Hi);
7461 if (Opcode != ISD::MUL) {
7462 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7463 Result.push_back(Zero);
7464 Result.push_back(Zero);
7465 }
7466 return true;
7467 }
7468 }
7469
7470 if (!VT.isVector() && Opcode == ISD::MUL &&
7471 DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize &&
7472 DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) {
7473 // The input values are both sign-extended.
7474 // TODO non-MUL case?
7475 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
7476 Result.push_back(Lo);
7477 Result.push_back(Hi);
7478 return true;
7479 }
7480 }
7481
7482 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
7483 SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl);
7484
7485 if (!LH.getNode() && !RH.getNode() &&
7486 isOperationLegalOrCustom(ISD::SRL, VT) &&
7487 isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
7488 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
7489 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
7490 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
7491 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
7492 }
7493
7494 if (!LH.getNode())
7495 return false;
7496
7497 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
7498 return false;
7499
7500 Result.push_back(Lo);
7501
7502 if (Opcode == ISD::MUL) {
7503 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
7504 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
7505 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
7506 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
7507 Result.push_back(Hi);
7508 return true;
7509 }
7510
7511 // Compute the full width result.
7512 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
7513 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
7514 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7515 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
7516 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
7517 };
7518
7519 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
7520 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
7521 return false;
7522
7523 // This is effectively the add part of a multiply-add of half-sized operands,
7524 // so it cannot overflow.
7525 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7526
7527 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
7528 return false;
7529
7530 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
7531 EVT BoolType = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
7532
7533 bool UseGlue = (isOperationLegalOrCustom(ISD::ADDC, VT) &&
7534 isOperationLegalOrCustom(ISD::ADDE, VT));
7535 if (UseGlue)
7536 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
7537 Merge(Lo, Hi));
7538 else
7539 Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
7540 Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
7541
7542 SDValue Carry = Next.getValue(1);
7543 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7544 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7545
7546 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
7547 return false;
7548
7549 if (UseGlue)
7550 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
7551 Carry);
7552 else
7553 Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
7554 Zero, Carry);
7555
7556 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
7557
7558 if (Opcode == ISD::SMUL_LOHI) {
7559 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7560 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
7561 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
7562
7563 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
7564 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
7565 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
7566 }
7567
7568 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7569 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
7570 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
7571 return true;
7572 }
7573
expandMUL(SDNode * N,SDValue & Lo,SDValue & Hi,EVT HiLoVT,SelectionDAG & DAG,MulExpansionKind Kind,SDValue LL,SDValue LH,SDValue RL,SDValue RH) const7574 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
7575 SelectionDAG &DAG, MulExpansionKind Kind,
7576 SDValue LL, SDValue LH, SDValue RL,
7577 SDValue RH) const {
7578 SmallVector<SDValue, 2> Result;
7579 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N),
7580 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
7581 DAG, Kind, LL, LH, RL, RH);
7582 if (Ok) {
7583 assert(Result.size() == 2);
7584 Lo = Result[0];
7585 Hi = Result[1];
7586 }
7587 return Ok;
7588 }
7589
7590 // Optimize unsigned division or remainder by constants for types twice as large
7591 // as a legal VT.
7592 //
7593 // If (1 << (BitWidth / 2)) % Constant == 1, then the remainder
7594 // can be computed
7595 // as:
7596 // Sum += __builtin_uadd_overflow(Lo, High, &Sum);
7597 // Remainder = Sum % Constant
7598 // This is based on "Remainder by Summing Digits" from Hacker's Delight.
7599 //
7600 // For division, we can compute the remainder using the algorithm described
7601 // above, subtract it from the dividend to get an exact multiple of Constant.
7602 // Then multiply that extact multiply by the multiplicative inverse modulo
7603 // (1 << (BitWidth / 2)) to get the quotient.
7604
7605 // If Constant is even, we can shift right the dividend and the divisor by the
7606 // number of trailing zeros in Constant before applying the remainder algorithm.
7607 // If we're after the quotient, we can subtract this value from the shifted
7608 // dividend and multiply by the multiplicative inverse of the shifted divisor.
7609 // If we want the remainder, we shift the value left by the number of trailing
7610 // zeros and add the bits that were shifted out of the dividend.
expandDIVREMByConstant(SDNode * N,SmallVectorImpl<SDValue> & Result,EVT HiLoVT,SelectionDAG & DAG,SDValue LL,SDValue LH) const7611 bool TargetLowering::expandDIVREMByConstant(SDNode *N,
7612 SmallVectorImpl<SDValue> &Result,
7613 EVT HiLoVT, SelectionDAG &DAG,
7614 SDValue LL, SDValue LH) const {
7615 unsigned Opcode = N->getOpcode();
7616 EVT VT = N->getValueType(0);
7617
7618 // TODO: Support signed division/remainder.
7619 if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM)
7620 return false;
7621 assert(
7622 (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) &&
7623 "Unexpected opcode");
7624
7625 auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
7626 if (!CN)
7627 return false;
7628
7629 APInt Divisor = CN->getAPIntValue();
7630 unsigned BitWidth = Divisor.getBitWidth();
7631 unsigned HBitWidth = BitWidth / 2;
7632 assert(VT.getScalarSizeInBits() == BitWidth &&
7633 HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs");
7634
7635 // Divisor needs to less than (1 << HBitWidth).
7636 APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth);
7637 if (Divisor.uge(HalfMaxPlus1))
7638 return false;
7639
7640 // We depend on the UREM by constant optimization in DAGCombiner that requires
7641 // high multiply.
7642 if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) &&
7643 !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT))
7644 return false;
7645
7646 // Don't expand if optimizing for size.
7647 if (DAG.shouldOptForSize())
7648 return false;
7649
7650 // Early out for 0 or 1 divisors.
7651 if (Divisor.ule(1))
7652 return false;
7653
7654 // If the divisor is even, shift it until it becomes odd.
7655 unsigned TrailingZeros = 0;
7656 if (!Divisor[0]) {
7657 TrailingZeros = Divisor.countr_zero();
7658 Divisor.lshrInPlace(TrailingZeros);
7659 }
7660
7661 SDLoc dl(N);
7662 SDValue Sum;
7663 SDValue PartialRem;
7664
7665 // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and
7666 // then add in the carry.
7667 // TODO: If we can't split it in half, we might be able to split into 3 or
7668 // more pieces using a smaller bit width.
7669 if (HalfMaxPlus1.urem(Divisor).isOne()) {
7670 assert(!LL == !LH && "Expected both input halves or no input halves!");
7671 if (!LL)
7672 std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
7673
7674 // Shift the input by the number of TrailingZeros in the divisor. The
7675 // shifted out bits will be added to the remainder later.
7676 if (TrailingZeros) {
7677 // Save the shifted off bits if we need the remainder.
7678 if (Opcode != ISD::UDIV) {
7679 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7680 PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL,
7681 DAG.getConstant(Mask, dl, HiLoVT));
7682 }
7683
7684 LL = DAG.getNode(
7685 ISD::OR, dl, HiLoVT,
7686 DAG.getNode(ISD::SRL, dl, HiLoVT, LL,
7687 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)),
7688 DAG.getNode(ISD::SHL, dl, HiLoVT, LH,
7689 DAG.getShiftAmountConstant(HBitWidth - TrailingZeros,
7690 HiLoVT, dl)));
7691 LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH,
7692 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7693 }
7694
7695 // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
7696 EVT SetCCType =
7697 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
7698 if (isOperationLegalOrCustom(ISD::UADDO_CARRY, HiLoVT)) {
7699 SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
7700 Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
7701 Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
7702 DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
7703 } else {
7704 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
7705 SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT);
7706 // If the boolean for the target is 0 or 1, we can add the setcc result
7707 // directly.
7708 if (getBooleanContents(HiLoVT) ==
7709 TargetLoweringBase::ZeroOrOneBooleanContent)
7710 Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT);
7711 else
7712 Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT),
7713 DAG.getConstant(0, dl, HiLoVT));
7714 Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry);
7715 }
7716 }
7717
7718 // If we didn't find a sum, we can't do the expansion.
7719 if (!Sum)
7720 return false;
7721
7722 // Perform a HiLoVT urem on the Sum using truncated divisor.
7723 SDValue RemL =
7724 DAG.getNode(ISD::UREM, dl, HiLoVT, Sum,
7725 DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT));
7726 SDValue RemH = DAG.getConstant(0, dl, HiLoVT);
7727
7728 if (Opcode != ISD::UREM) {
7729 // Subtract the remainder from the shifted dividend.
7730 SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH);
7731 SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH);
7732
7733 Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem);
7734
7735 // Multiply by the multiplicative inverse of the divisor modulo
7736 // (1 << BitWidth).
7737 APInt Mod = APInt::getSignedMinValue(BitWidth + 1);
7738 APInt MulFactor = Divisor.zext(BitWidth + 1);
7739 MulFactor = MulFactor.multiplicativeInverse(Mod);
7740 MulFactor = MulFactor.trunc(BitWidth);
7741
7742 SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend,
7743 DAG.getConstant(MulFactor, dl, VT));
7744
7745 // Split the quotient into low and high parts.
7746 SDValue QuotL, QuotH;
7747 std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
7748 Result.push_back(QuotL);
7749 Result.push_back(QuotH);
7750 }
7751
7752 if (Opcode != ISD::UDIV) {
7753 // If we shifted the input, shift the remainder left and add the bits we
7754 // shifted off the input.
7755 if (TrailingZeros) {
7756 APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros);
7757 RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL,
7758 DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
7759 RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem);
7760 }
7761 Result.push_back(RemL);
7762 Result.push_back(DAG.getConstant(0, dl, HiLoVT));
7763 }
7764
7765 return true;
7766 }
7767
7768 // Check that (every element of) Z is undef or not an exact multiple of BW.
isNonZeroModBitWidthOrUndef(SDValue Z,unsigned BW)7769 static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) {
7770 return ISD::matchUnaryPredicate(
7771 Z,
7772 [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; },
7773 true);
7774 }
7775
expandVPFunnelShift(SDNode * Node,SelectionDAG & DAG)7776 static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) {
7777 EVT VT = Node->getValueType(0);
7778 SDValue ShX, ShY;
7779 SDValue ShAmt, InvShAmt;
7780 SDValue X = Node->getOperand(0);
7781 SDValue Y = Node->getOperand(1);
7782 SDValue Z = Node->getOperand(2);
7783 SDValue Mask = Node->getOperand(3);
7784 SDValue VL = Node->getOperand(4);
7785
7786 unsigned BW = VT.getScalarSizeInBits();
7787 bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL;
7788 SDLoc DL(SDValue(Node, 0));
7789
7790 EVT ShVT = Z.getValueType();
7791 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7792 // fshl: X << C | Y >> (BW - C)
7793 // fshr: X << (BW - C) | Y >> C
7794 // where C = Z % BW is not zero
7795 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7796 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7797 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL);
7798 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask,
7799 VL);
7800 ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask,
7801 VL);
7802 } else {
7803 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7804 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7805 SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT);
7806 if (isPowerOf2_32(BW)) {
7807 // Z % BW -> Z & (BW - 1)
7808 ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL);
7809 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7810 SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z,
7811 DAG.getAllOnesConstant(DL, ShVT), Mask, VL);
7812 InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL);
7813 } else {
7814 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7815 ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL);
7816 InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL);
7817 }
7818
7819 SDValue One = DAG.getConstant(1, DL, ShVT);
7820 if (IsFSHL) {
7821 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL);
7822 SDValue ShY1 = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, One, Mask, VL);
7823 ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, ShY1, InvShAmt, Mask, VL);
7824 } else {
7825 SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL);
7826 ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL);
7827 ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, ShAmt, Mask, VL);
7828 }
7829 }
7830 return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL);
7831 }
7832
expandFunnelShift(SDNode * Node,SelectionDAG & DAG) const7833 SDValue TargetLowering::expandFunnelShift(SDNode *Node,
7834 SelectionDAG &DAG) const {
7835 if (Node->isVPOpcode())
7836 return expandVPFunnelShift(Node, DAG);
7837
7838 EVT VT = Node->getValueType(0);
7839
7840 if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7841 !isOperationLegalOrCustom(ISD::SRL, VT) ||
7842 !isOperationLegalOrCustom(ISD::SUB, VT) ||
7843 !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
7844 return SDValue();
7845
7846 SDValue X = Node->getOperand(0);
7847 SDValue Y = Node->getOperand(1);
7848 SDValue Z = Node->getOperand(2);
7849
7850 unsigned BW = VT.getScalarSizeInBits();
7851 bool IsFSHL = Node->getOpcode() == ISD::FSHL;
7852 SDLoc DL(SDValue(Node, 0));
7853
7854 EVT ShVT = Z.getValueType();
7855
7856 // If a funnel shift in the other direction is more supported, use it.
7857 unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL;
7858 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7859 isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) {
7860 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7861 // fshl X, Y, Z -> fshr X, Y, -Z
7862 // fshr X, Y, Z -> fshl X, Y, -Z
7863 SDValue Zero = DAG.getConstant(0, DL, ShVT);
7864 Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z);
7865 } else {
7866 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7867 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7868 SDValue One = DAG.getConstant(1, DL, ShVT);
7869 if (IsFSHL) {
7870 Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
7871 X = DAG.getNode(ISD::SRL, DL, VT, X, One);
7872 } else {
7873 X = DAG.getNode(RevOpcode, DL, VT, X, Y, One);
7874 Y = DAG.getNode(ISD::SHL, DL, VT, Y, One);
7875 }
7876 Z = DAG.getNOT(DL, Z, ShVT);
7877 }
7878 return DAG.getNode(RevOpcode, DL, VT, X, Y, Z);
7879 }
7880
7881 SDValue ShX, ShY;
7882 SDValue ShAmt, InvShAmt;
7883 if (isNonZeroModBitWidthOrUndef(Z, BW)) {
7884 // fshl: X << C | Y >> (BW - C)
7885 // fshr: X << (BW - C) | Y >> C
7886 // where C = Z % BW is not zero
7887 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7888 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7889 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt);
7890 ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt);
7891 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt);
7892 } else {
7893 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7894 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7895 SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT);
7896 if (isPowerOf2_32(BW)) {
7897 // Z % BW -> Z & (BW - 1)
7898 ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask);
7899 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7900 InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask);
7901 } else {
7902 SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT);
7903 ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC);
7904 InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt);
7905 }
7906
7907 SDValue One = DAG.getConstant(1, DL, ShVT);
7908 if (IsFSHL) {
7909 ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt);
7910 SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One);
7911 ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt);
7912 } else {
7913 SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One);
7914 ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt);
7915 ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt);
7916 }
7917 }
7918 return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
7919 }
7920
7921 // TODO: Merge with expandFunnelShift.
expandROT(SDNode * Node,bool AllowVectorOps,SelectionDAG & DAG) const7922 SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
7923 SelectionDAG &DAG) const {
7924 EVT VT = Node->getValueType(0);
7925 unsigned EltSizeInBits = VT.getScalarSizeInBits();
7926 bool IsLeft = Node->getOpcode() == ISD::ROTL;
7927 SDValue Op0 = Node->getOperand(0);
7928 SDValue Op1 = Node->getOperand(1);
7929 SDLoc DL(SDValue(Node, 0));
7930
7931 EVT ShVT = Op1.getValueType();
7932 SDValue Zero = DAG.getConstant(0, DL, ShVT);
7933
7934 // If a rotate in the other direction is more supported, use it.
7935 unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL;
7936 if (!isOperationLegalOrCustom(Node->getOpcode(), VT) &&
7937 isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) {
7938 SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
7939 return DAG.getNode(RevRot, DL, VT, Op0, Sub);
7940 }
7941
7942 if (!AllowVectorOps && VT.isVector() &&
7943 (!isOperationLegalOrCustom(ISD::SHL, VT) ||
7944 !isOperationLegalOrCustom(ISD::SRL, VT) ||
7945 !isOperationLegalOrCustom(ISD::SUB, VT) ||
7946 !isOperationLegalOrCustomOrPromote(ISD::OR, VT) ||
7947 !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
7948 return SDValue();
7949
7950 unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL;
7951 unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL;
7952 SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT);
7953 SDValue ShVal;
7954 SDValue HsVal;
7955 if (isPowerOf2_32(EltSizeInBits)) {
7956 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
7957 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
7958 SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1);
7959 SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC);
7960 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
7961 SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC);
7962 HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt);
7963 } else {
7964 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
7965 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
7966 SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT);
7967 SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC);
7968 ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt);
7969 SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt);
7970 SDValue One = DAG.getConstant(1, DL, ShVT);
7971 HsVal =
7972 DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt);
7973 }
7974 return DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal);
7975 }
7976
expandShiftParts(SDNode * Node,SDValue & Lo,SDValue & Hi,SelectionDAG & DAG) const7977 void TargetLowering::expandShiftParts(SDNode *Node, SDValue &Lo, SDValue &Hi,
7978 SelectionDAG &DAG) const {
7979 assert(Node->getNumOperands() == 3 && "Not a double-shift!");
7980 EVT VT = Node->getValueType(0);
7981 unsigned VTBits = VT.getScalarSizeInBits();
7982 assert(isPowerOf2_32(VTBits) && "Power-of-two integer type expected");
7983
7984 bool IsSHL = Node->getOpcode() == ISD::SHL_PARTS;
7985 bool IsSRA = Node->getOpcode() == ISD::SRA_PARTS;
7986 SDValue ShOpLo = Node->getOperand(0);
7987 SDValue ShOpHi = Node->getOperand(1);
7988 SDValue ShAmt = Node->getOperand(2);
7989 EVT ShAmtVT = ShAmt.getValueType();
7990 EVT ShAmtCCVT =
7991 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShAmtVT);
7992 SDLoc dl(Node);
7993
7994 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
7995 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
7996 // away during isel.
7997 SDValue SafeShAmt = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
7998 DAG.getConstant(VTBits - 1, dl, ShAmtVT));
7999 SDValue Tmp1 = IsSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi,
8000 DAG.getConstant(VTBits - 1, dl, ShAmtVT))
8001 : DAG.getConstant(0, dl, VT);
8002
8003 SDValue Tmp2, Tmp3;
8004 if (IsSHL) {
8005 Tmp2 = DAG.getNode(ISD::FSHL, dl, VT, ShOpHi, ShOpLo, ShAmt);
8006 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, SafeShAmt);
8007 } else {
8008 Tmp2 = DAG.getNode(ISD::FSHR, dl, VT, ShOpHi, ShOpLo, ShAmt);
8009 Tmp3 = DAG.getNode(IsSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, SafeShAmt);
8010 }
8011
8012 // If the shift amount is larger or equal than the width of a part we don't
8013 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
8014 // values for large shift amounts.
8015 SDValue AndNode = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt,
8016 DAG.getConstant(VTBits, dl, ShAmtVT));
8017 SDValue Cond = DAG.getSetCC(dl, ShAmtCCVT, AndNode,
8018 DAG.getConstant(0, dl, ShAmtVT), ISD::SETNE);
8019
8020 if (IsSHL) {
8021 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8022 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8023 } else {
8024 Lo = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp3, Tmp2);
8025 Hi = DAG.getNode(ISD::SELECT, dl, VT, Cond, Tmp1, Tmp3);
8026 }
8027 }
8028
expandFP_TO_SINT(SDNode * Node,SDValue & Result,SelectionDAG & DAG) const8029 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
8030 SelectionDAG &DAG) const {
8031 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8032 SDValue Src = Node->getOperand(OpNo);
8033 EVT SrcVT = Src.getValueType();
8034 EVT DstVT = Node->getValueType(0);
8035 SDLoc dl(SDValue(Node, 0));
8036
8037 // FIXME: Only f32 to i64 conversions are supported.
8038 if (SrcVT != MVT::f32 || DstVT != MVT::i64)
8039 return false;
8040
8041 if (Node->isStrictFPOpcode())
8042 // When a NaN is converted to an integer a trap is allowed. We can't
8043 // use this expansion here because it would eliminate that trap. Other
8044 // traps are also allowed and cannot be eliminated. See
8045 // IEEE 754-2008 sec 5.8.
8046 return false;
8047
8048 // Expand f32 -> i64 conversion
8049 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8050 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8051 unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
8052 EVT IntVT = SrcVT.changeTypeToInteger();
8053 EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
8054
8055 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
8056 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
8057 SDValue Bias = DAG.getConstant(127, dl, IntVT);
8058 SDValue SignMask = DAG.getConstant(APInt::getSignMask(SrcEltBits), dl, IntVT);
8059 SDValue SignLowBit = DAG.getConstant(SrcEltBits - 1, dl, IntVT);
8060 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
8061
8062 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Src);
8063
8064 SDValue ExponentBits = DAG.getNode(
8065 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
8066 DAG.getZExtOrTrunc(ExponentLoBit, dl, IntShVT));
8067 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
8068
8069 SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT,
8070 DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
8071 DAG.getZExtOrTrunc(SignLowBit, dl, IntShVT));
8072 Sign = DAG.getSExtOrTrunc(Sign, dl, DstVT);
8073
8074 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
8075 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
8076 DAG.getConstant(0x00800000, dl, IntVT));
8077
8078 R = DAG.getZExtOrTrunc(R, dl, DstVT);
8079
8080 R = DAG.getSelectCC(
8081 dl, Exponent, ExponentLoBit,
8082 DAG.getNode(ISD::SHL, dl, DstVT, R,
8083 DAG.getZExtOrTrunc(
8084 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
8085 dl, IntShVT)),
8086 DAG.getNode(ISD::SRL, dl, DstVT, R,
8087 DAG.getZExtOrTrunc(
8088 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
8089 dl, IntShVT)),
8090 ISD::SETGT);
8091
8092 SDValue Ret = DAG.getNode(ISD::SUB, dl, DstVT,
8093 DAG.getNode(ISD::XOR, dl, DstVT, R, Sign), Sign);
8094
8095 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
8096 DAG.getConstant(0, dl, DstVT), Ret, ISD::SETLT);
8097 return true;
8098 }
8099
expandFP_TO_UINT(SDNode * Node,SDValue & Result,SDValue & Chain,SelectionDAG & DAG) const8100 bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
8101 SDValue &Chain,
8102 SelectionDAG &DAG) const {
8103 SDLoc dl(SDValue(Node, 0));
8104 unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
8105 SDValue Src = Node->getOperand(OpNo);
8106
8107 EVT SrcVT = Src.getValueType();
8108 EVT DstVT = Node->getValueType(0);
8109 EVT SetCCVT =
8110 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8111 EVT DstSetCCVT =
8112 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8113
8114 // Only expand vector types if we have the appropriate vector bit operations.
8115 unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
8116 ISD::FP_TO_SINT;
8117 if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
8118 !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
8119 return false;
8120
8121 // If the maximum float value is smaller then the signed integer range,
8122 // the destination signmask can't be represented by the float, so we can
8123 // just use FP_TO_SINT directly.
8124 const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
8125 APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
8126 APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
8127 if (APFloat::opOverflow &
8128 APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
8129 if (Node->isStrictFPOpcode()) {
8130 Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8131 { Node->getOperand(0), Src });
8132 Chain = Result.getValue(1);
8133 } else
8134 Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8135 return true;
8136 }
8137
8138 // Don't expand it if there isn't cheap fsub instruction.
8139 if (!isOperationLegalOrCustom(
8140 Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT))
8141 return false;
8142
8143 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8144 SDValue Sel;
8145
8146 if (Node->isStrictFPOpcode()) {
8147 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8148 Node->getOperand(0), /*IsSignaling*/ true);
8149 Chain = Sel.getValue(1);
8150 } else {
8151 Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
8152 }
8153
8154 bool Strict = Node->isStrictFPOpcode() ||
8155 shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
8156
8157 if (Strict) {
8158 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
8159 // signmask then offset (the result of which should be fully representable).
8160 // Sel = Src < 0x8000000000000000
8161 // FltOfs = select Sel, 0, 0x8000000000000000
8162 // IntOfs = select Sel, 0, 0x8000000000000000
8163 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8164
8165 // TODO: Should any fast-math-flags be set for the FSUB?
8166 SDValue FltOfs = DAG.getSelect(dl, SrcVT, Sel,
8167 DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8168 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8169 SDValue IntOfs = DAG.getSelect(dl, DstVT, Sel,
8170 DAG.getConstant(0, dl, DstVT),
8171 DAG.getConstant(SignMask, dl, DstVT));
8172 SDValue SInt;
8173 if (Node->isStrictFPOpcode()) {
8174 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
8175 { Chain, Src, FltOfs });
8176 SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
8177 { Val.getValue(1), Val });
8178 Chain = SInt.getValue(1);
8179 } else {
8180 SDValue Val = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FltOfs);
8181 SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
8182 }
8183 Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8184 } else {
8185 // Expand based on maximum range of FP_TO_SINT:
8186 // True = fp_to_sint(Src)
8187 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
8188 // Result = select (Src < 0x8000000000000000), True, False
8189
8190 SDValue True = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
8191 // TODO: Should any fast-math-flags be set for the FSUB?
8192 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT,
8193 DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
8194 False = DAG.getNode(ISD::XOR, dl, DstVT, False,
8195 DAG.getConstant(SignMask, dl, DstVT));
8196 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8197 Result = DAG.getSelect(dl, DstVT, Sel, True, False);
8198 }
8199 return true;
8200 }
8201
expandUINT_TO_FP(SDNode * Node,SDValue & Result,SDValue & Chain,SelectionDAG & DAG) const8202 bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
8203 SDValue &Chain,
8204 SelectionDAG &DAG) const {
8205 // This transform is not correct for converting 0 when rounding mode is set
8206 // to round toward negative infinity which will produce -0.0. So disable under
8207 // strictfp.
8208 if (Node->isStrictFPOpcode())
8209 return false;
8210
8211 SDValue Src = Node->getOperand(0);
8212 EVT SrcVT = Src.getValueType();
8213 EVT DstVT = Node->getValueType(0);
8214
8215 if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
8216 return false;
8217
8218 // Only expand vector types if we have the appropriate vector bit operations.
8219 if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
8220 !isOperationLegalOrCustom(ISD::FADD, DstVT) ||
8221 !isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
8222 !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) ||
8223 !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT)))
8224 return false;
8225
8226 SDLoc dl(SDValue(Node, 0));
8227 EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout());
8228
8229 // Implementation of unsigned i64 to f64 following the algorithm in
8230 // __floatundidf in compiler_rt. This implementation performs rounding
8231 // correctly in all rounding modes with the exception of converting 0
8232 // when rounding toward negative infinity. In that case the fsub will produce
8233 // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
8234 SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
8235 SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
8236 llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
8237 SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
8238 SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
8239 SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
8240
8241 SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask);
8242 SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift);
8243 SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52);
8244 SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
8245 SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
8246 SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
8247 SDValue HiSub =
8248 DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
8249 Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
8250 return true;
8251 }
8252
8253 SDValue
createSelectForFMINNUM_FMAXNUM(SDNode * Node,SelectionDAG & DAG) const8254 TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
8255 SelectionDAG &DAG) const {
8256 unsigned Opcode = Node->getOpcode();
8257 assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM ||
8258 Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
8259 "Wrong opcode");
8260
8261 if (Node->getFlags().hasNoNaNs()) {
8262 ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
8263 SDValue Op1 = Node->getOperand(0);
8264 SDValue Op2 = Node->getOperand(1);
8265 SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred);
8266 // Copy FMF flags, but always set the no-signed-zeros flag
8267 // as this is implied by the FMINNUM/FMAXNUM semantics.
8268 SDNodeFlags Flags = Node->getFlags();
8269 Flags.setNoSignedZeros(true);
8270 SelCC->setFlags(Flags);
8271 return SelCC;
8272 }
8273
8274 return SDValue();
8275 }
8276
expandFMINNUM_FMAXNUM(SDNode * Node,SelectionDAG & DAG) const8277 SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
8278 SelectionDAG &DAG) const {
8279 SDLoc dl(Node);
8280 unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ?
8281 ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8282 EVT VT = Node->getValueType(0);
8283
8284 if (VT.isScalableVector())
8285 report_fatal_error(
8286 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
8287
8288 if (isOperationLegalOrCustom(NewOp, VT)) {
8289 SDValue Quiet0 = Node->getOperand(0);
8290 SDValue Quiet1 = Node->getOperand(1);
8291
8292 if (!Node->getFlags().hasNoNaNs()) {
8293 // Insert canonicalizes if it's possible we need to quiet to get correct
8294 // sNaN behavior.
8295 if (!DAG.isKnownNeverSNaN(Quiet0)) {
8296 Quiet0 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet0,
8297 Node->getFlags());
8298 }
8299 if (!DAG.isKnownNeverSNaN(Quiet1)) {
8300 Quiet1 = DAG.getNode(ISD::FCANONICALIZE, dl, VT, Quiet1,
8301 Node->getFlags());
8302 }
8303 }
8304
8305 return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
8306 }
8307
8308 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
8309 // instead if there are no NaNs and there can't be an incompatible zero
8310 // compare: at least one operand isn't +/-0, or there are no signed-zeros.
8311 if ((Node->getFlags().hasNoNaNs() ||
8312 (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
8313 DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
8314 (Node->getFlags().hasNoSignedZeros() ||
8315 DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
8316 DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
8317 unsigned IEEE2018Op =
8318 Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
8319 if (isOperationLegalOrCustom(IEEE2018Op, VT))
8320 return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
8321 Node->getOperand(1), Node->getFlags());
8322 }
8323
8324 if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
8325 return SelCC;
8326
8327 return SDValue();
8328 }
8329
8330 /// Returns a true value if if this FPClassTest can be performed with an ordered
8331 /// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
8332 /// std::nullopt if it cannot be performed as a compare with 0.
isFCmpEqualZero(FPClassTest Test,const fltSemantics & Semantics,const MachineFunction & MF)8333 static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
8334 const fltSemantics &Semantics,
8335 const MachineFunction &MF) {
8336 FPClassTest OrderedMask = Test & ~fcNan;
8337 FPClassTest NanTest = Test & fcNan;
8338 bool IsOrdered = NanTest == fcNone;
8339 bool IsUnordered = NanTest == fcNan;
8340
8341 // Skip cases that are testing for only a qnan or snan.
8342 if (!IsOrdered && !IsUnordered)
8343 return std::nullopt;
8344
8345 if (OrderedMask == fcZero &&
8346 MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
8347 return IsOrdered;
8348 if (OrderedMask == (fcZero | fcSubnormal) &&
8349 MF.getDenormalMode(Semantics).inputsAreZero())
8350 return IsOrdered;
8351 return std::nullopt;
8352 }
8353
expandIS_FPCLASS(EVT ResultVT,SDValue Op,FPClassTest Test,SDNodeFlags Flags,const SDLoc & DL,SelectionDAG & DAG) const8354 SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
8355 FPClassTest Test, SDNodeFlags Flags,
8356 const SDLoc &DL,
8357 SelectionDAG &DAG) const {
8358 EVT OperandVT = Op.getValueType();
8359 assert(OperandVT.isFloatingPoint());
8360
8361 // Degenerated cases.
8362 if (Test == fcNone)
8363 return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
8364 if ((Test & fcAllFlags) == fcAllFlags)
8365 return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
8366
8367 // PPC double double is a pair of doubles, of which the higher part determines
8368 // the value class.
8369 if (OperandVT == MVT::ppcf128) {
8370 Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op,
8371 DAG.getConstant(1, DL, MVT::i32));
8372 OperandVT = MVT::f64;
8373 }
8374
8375 // Some checks may be represented as inversion of simpler check, for example
8376 // "inf|normal|subnormal|zero" => !"nan".
8377 bool IsInverted = false;
8378 if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
8379 IsInverted = true;
8380 Test = InvertedCheck;
8381 }
8382
8383 // Floating-point type properties.
8384 EVT ScalarFloatVT = OperandVT.getScalarType();
8385 const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext());
8386 const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics();
8387 bool IsF80 = (ScalarFloatVT == MVT::f80);
8388
8389 // Some checks can be implemented using float comparisons, if floating point
8390 // exceptions are ignored.
8391 if (Flags.hasNoFPExcept() &&
8392 isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
8393 ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
8394 ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
8395
8396 if (std::optional<bool> IsCmp0 =
8397 isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
8398 IsCmp0 && (isCondCodeLegalOrCustom(
8399 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
8400 OperandVT.getScalarType().getSimpleVT()))) {
8401
8402 // If denormals could be implicitly treated as 0, this is not equivalent
8403 // to a compare with 0 since it will also be true for denormals.
8404 return DAG.getSetCC(DL, ResultVT, Op,
8405 DAG.getConstantFP(0.0, DL, OperandVT),
8406 *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
8407 }
8408
8409 if (Test == fcNan &&
8410 isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO,
8411 OperandVT.getScalarType().getSimpleVT())) {
8412 return DAG.getSetCC(DL, ResultVT, Op, Op,
8413 IsInverted ? ISD::SETO : ISD::SETUO);
8414 }
8415
8416 if (Test == fcInf &&
8417 isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
8418 OperandVT.getScalarType().getSimpleVT()) &&
8419 isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
8420 // isinf(x) --> fabs(x) == inf
8421 SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
8422 SDValue Inf =
8423 DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
8424 return DAG.getSetCC(DL, ResultVT, Abs, Inf,
8425 IsInverted ? ISD::SETUNE : ISD::SETOEQ);
8426 }
8427 }
8428
8429 // In the general case use integer operations.
8430 unsigned BitSize = OperandVT.getScalarSizeInBits();
8431 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize);
8432 if (OperandVT.isVector())
8433 IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT,
8434 OperandVT.getVectorElementCount());
8435 SDValue OpAsInt = DAG.getBitcast(IntVT, Op);
8436
8437 // Various masks.
8438 APInt SignBit = APInt::getSignMask(BitSize);
8439 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8440 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8441 const unsigned ExplicitIntBitInF80 = 63;
8442 APInt ExpMask = Inf;
8443 if (IsF80)
8444 ExpMask.clearBit(ExplicitIntBitInF80);
8445 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8446 APInt QNaNBitMask =
8447 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8448 APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
8449
8450 SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
8451 SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
8452 SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT);
8453 SDValue ZeroV = DAG.getConstant(0, DL, IntVT);
8454 SDValue InfV = DAG.getConstant(Inf, DL, IntVT);
8455 SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT);
8456
8457 SDValue Res;
8458 const auto appendResult = [&](SDValue PartialRes) {
8459 if (PartialRes) {
8460 if (Res)
8461 Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes);
8462 else
8463 Res = PartialRes;
8464 }
8465 };
8466
8467 SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set.
8468 const auto getIntBitIsSet = [&]() -> SDValue {
8469 if (!IntBitIsSetV) {
8470 APInt IntBitMask(BitSize, 0);
8471 IntBitMask.setBit(ExplicitIntBitInF80);
8472 SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT);
8473 SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV);
8474 IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE);
8475 }
8476 return IntBitIsSetV;
8477 };
8478
8479 // Split the value into sign bit and absolute value.
8480 SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV);
8481 SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt,
8482 DAG.getConstant(0.0, DL, IntVT), ISD::SETLT);
8483
8484 // Tests that involve more than one class should be processed first.
8485 SDValue PartialRes;
8486
8487 if (IsF80)
8488 ; // Detect finite numbers of f80 by checking individual classes because
8489 // they have different settings of the explicit integer bit.
8490 else if ((Test & fcFinite) == fcFinite) {
8491 // finite(V) ==> abs(V) < exp_mask
8492 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8493 Test &= ~fcFinite;
8494 } else if ((Test & fcFinite) == fcPosFinite) {
8495 // finite(V) && V > 0 ==> V < exp_mask
8496 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT);
8497 Test &= ~fcPosFinite;
8498 } else if ((Test & fcFinite) == fcNegFinite) {
8499 // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1
8500 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT);
8501 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8502 Test &= ~fcNegFinite;
8503 }
8504 appendResult(PartialRes);
8505
8506 if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
8507 // fcZero | fcSubnormal => test all exponent bits are 0
8508 // TODO: Handle sign bit specific cases
8509 if (PartialCheck == (fcZero | fcSubnormal)) {
8510 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
8511 SDValue ExpIsZero =
8512 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8513 appendResult(ExpIsZero);
8514 Test &= ~PartialCheck & fcAllFlags;
8515 }
8516 }
8517
8518 // Check for individual classes.
8519
8520 if (unsigned PartialCheck = Test & fcZero) {
8521 if (PartialCheck == fcPosZero)
8522 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ);
8523 else if (PartialCheck == fcZero)
8524 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ);
8525 else // ISD::fcNegZero
8526 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ);
8527 appendResult(PartialRes);
8528 }
8529
8530 if (unsigned PartialCheck = Test & fcSubnormal) {
8531 // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
8532 // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
8533 SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
8534 SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
8535 SDValue VMinusOneV =
8536 DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
8537 PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
8538 if (PartialCheck == fcNegSubnormal)
8539 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8540 appendResult(PartialRes);
8541 }
8542
8543 if (unsigned PartialCheck = Test & fcInf) {
8544 if (PartialCheck == fcPosInf)
8545 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
8546 else if (PartialCheck == fcInf)
8547 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ);
8548 else { // ISD::fcNegInf
8549 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8550 SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT);
8551 PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ);
8552 }
8553 appendResult(PartialRes);
8554 }
8555
8556 if (unsigned PartialCheck = Test & fcNan) {
8557 APInt InfWithQnanBit = Inf | QNaNBitMask;
8558 SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT);
8559 if (PartialCheck == fcNan) {
8560 // isnan(V) ==> abs(V) > int(inf)
8561 PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8562 if (IsF80) {
8563 // Recognize unsupported values as NaNs for compatibility with glibc.
8564 // In them (exp(V)==0) == int_bit.
8565 SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV);
8566 SDValue ExpIsZero =
8567 DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
8568 SDValue IsPseudo =
8569 DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ);
8570 PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo);
8571 }
8572 } else if (PartialCheck == fcQNan) {
8573 // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit)
8574 PartialRes =
8575 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE);
8576 } else { // ISD::fcSNan
8577 // issignaling(V) ==> abs(V) > unsigned(Inf) &&
8578 // abs(V) < (unsigned(Inf) | quiet_bit)
8579 SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT);
8580 SDValue IsNotQnan =
8581 DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT);
8582 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan);
8583 }
8584 appendResult(PartialRes);
8585 }
8586
8587 if (unsigned PartialCheck = Test & fcNormal) {
8588 // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
8589 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8590 SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT);
8591 SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV);
8592 APInt ExpLimit = ExpMask - ExpLSB;
8593 SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT);
8594 PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT);
8595 if (PartialCheck == fcNegNormal)
8596 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
8597 else if (PartialCheck == fcPosNormal) {
8598 SDValue PosSignV =
8599 DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask);
8600 PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV);
8601 }
8602 if (IsF80)
8603 PartialRes =
8604 DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet());
8605 appendResult(PartialRes);
8606 }
8607
8608 if (!Res)
8609 return DAG.getConstant(IsInverted, DL, ResultVT);
8610 if (IsInverted)
8611 Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask);
8612 return Res;
8613 }
8614
8615 // Only expand vector types if we have the appropriate vector bit operations.
canExpandVectorCTPOP(const TargetLowering & TLI,EVT VT)8616 static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
8617 assert(VT.isVector() && "Expected vector type");
8618 unsigned Len = VT.getScalarSizeInBits();
8619 return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
8620 TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
8621 TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
8622 (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
8623 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
8624 }
8625
expandCTPOP(SDNode * Node,SelectionDAG & DAG) const8626 SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
8627 SDLoc dl(Node);
8628 EVT VT = Node->getValueType(0);
8629 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8630 SDValue Op = Node->getOperand(0);
8631 unsigned Len = VT.getScalarSizeInBits();
8632 assert(VT.isInteger() && "CTPOP not implemented for this type.");
8633
8634 // TODO: Add support for irregular type lengths.
8635 if (!(Len <= 128 && Len % 8 == 0))
8636 return SDValue();
8637
8638 // Only expand vector types if we have the appropriate vector bit operations.
8639 if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
8640 return SDValue();
8641
8642 // This is the "best" algorithm from
8643 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8644 SDValue Mask55 =
8645 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8646 SDValue Mask33 =
8647 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8648 SDValue Mask0F =
8649 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8650
8651 // v = v - ((v >> 1) & 0x55555555...)
8652 Op = DAG.getNode(ISD::SUB, dl, VT, Op,
8653 DAG.getNode(ISD::AND, dl, VT,
8654 DAG.getNode(ISD::SRL, dl, VT, Op,
8655 DAG.getConstant(1, dl, ShVT)),
8656 Mask55));
8657 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8658 Op = DAG.getNode(ISD::ADD, dl, VT, DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
8659 DAG.getNode(ISD::AND, dl, VT,
8660 DAG.getNode(ISD::SRL, dl, VT, Op,
8661 DAG.getConstant(2, dl, ShVT)),
8662 Mask33));
8663 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8664 Op = DAG.getNode(ISD::AND, dl, VT,
8665 DAG.getNode(ISD::ADD, dl, VT, Op,
8666 DAG.getNode(ISD::SRL, dl, VT, Op,
8667 DAG.getConstant(4, dl, ShVT))),
8668 Mask0F);
8669
8670 if (Len <= 8)
8671 return Op;
8672
8673 // Avoid the multiply if we only have 2 bytes to add.
8674 // TODO: Only doing this for scalars because vectors weren't as obviously
8675 // improved.
8676 if (Len == 16 && !VT.isVector()) {
8677 // v = (v + (v >> 8)) & 0x00FF;
8678 return DAG.getNode(ISD::AND, dl, VT,
8679 DAG.getNode(ISD::ADD, dl, VT, Op,
8680 DAG.getNode(ISD::SRL, dl, VT, Op,
8681 DAG.getConstant(8, dl, ShVT))),
8682 DAG.getConstant(0xFF, dl, VT));
8683 }
8684
8685 // v = (v * 0x01010101...) >> (Len - 8)
8686 SDValue Mask01 =
8687 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8688 return DAG.getNode(ISD::SRL, dl, VT,
8689 DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
8690 DAG.getConstant(Len - 8, dl, ShVT));
8691 }
8692
expandVPCTPOP(SDNode * Node,SelectionDAG & DAG) const8693 SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
8694 SDLoc dl(Node);
8695 EVT VT = Node->getValueType(0);
8696 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8697 SDValue Op = Node->getOperand(0);
8698 SDValue Mask = Node->getOperand(1);
8699 SDValue VL = Node->getOperand(2);
8700 unsigned Len = VT.getScalarSizeInBits();
8701 assert(VT.isInteger() && "VP_CTPOP not implemented for this type.");
8702
8703 // TODO: Add support for irregular type lengths.
8704 if (!(Len <= 128 && Len % 8 == 0))
8705 return SDValue();
8706
8707 // This is same algorithm of expandCTPOP from
8708 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
8709 SDValue Mask55 =
8710 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
8711 SDValue Mask33 =
8712 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
8713 SDValue Mask0F =
8714 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
8715
8716 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
8717
8718 // v = v - ((v >> 1) & 0x55555555...)
8719 Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT,
8720 DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
8721 DAG.getConstant(1, dl, ShVT), Mask, VL),
8722 Mask55, Mask, VL);
8723 Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL);
8724
8725 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
8726 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL);
8727 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT,
8728 DAG.getNode(ISD::VP_LSHR, dl, VT, Op,
8729 DAG.getConstant(2, dl, ShVT), Mask, VL),
8730 Mask33, Mask, VL);
8731 Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL);
8732
8733 // v = (v + (v >> 4)) & 0x0F0F0F0F...
8734 Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(4, dl, ShVT),
8735 Mask, VL),
8736 Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL);
8737 Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL);
8738
8739 if (Len <= 8)
8740 return Op;
8741
8742 // v = (v * 0x01010101...) >> (Len - 8)
8743 SDValue Mask01 =
8744 DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
8745 return DAG.getNode(ISD::VP_LSHR, dl, VT,
8746 DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL),
8747 DAG.getConstant(Len - 8, dl, ShVT), Mask, VL);
8748 }
8749
expandCTLZ(SDNode * Node,SelectionDAG & DAG) const8750 SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
8751 SDLoc dl(Node);
8752 EVT VT = Node->getValueType(0);
8753 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8754 SDValue Op = Node->getOperand(0);
8755 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8756
8757 // If the non-ZERO_UNDEF version is supported we can use that instead.
8758 if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
8759 isOperationLegalOrCustom(ISD::CTLZ, VT))
8760 return DAG.getNode(ISD::CTLZ, dl, VT, Op);
8761
8762 // If the ZERO_UNDEF version is supported use that and handle the zero case.
8763 if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
8764 EVT SetCCVT =
8765 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8766 SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
8767 SDValue Zero = DAG.getConstant(0, dl, VT);
8768 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
8769 return DAG.getSelect(dl, VT, SrcIsZero,
8770 DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
8771 }
8772
8773 // Only expand vector types if we have the appropriate vector bit operations.
8774 // This includes the operations needed to expand CTPOP if it isn't supported.
8775 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
8776 (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
8777 !canExpandVectorCTPOP(*this, VT)) ||
8778 !isOperationLegalOrCustom(ISD::SRL, VT) ||
8779 !isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
8780 return SDValue();
8781
8782 // for now, we do this:
8783 // x = x | (x >> 1);
8784 // x = x | (x >> 2);
8785 // ...
8786 // x = x | (x >>16);
8787 // x = x | (x >>32); // for 64-bit input
8788 // return popcount(~x);
8789 //
8790 // Ref: "Hacker's Delight" by Henry Warren
8791 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8792 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
8793 Op = DAG.getNode(ISD::OR, dl, VT, Op,
8794 DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
8795 }
8796 Op = DAG.getNOT(dl, Op, VT);
8797 return DAG.getNode(ISD::CTPOP, dl, VT, Op);
8798 }
8799
expandVPCTLZ(SDNode * Node,SelectionDAG & DAG) const8800 SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const {
8801 SDLoc dl(Node);
8802 EVT VT = Node->getValueType(0);
8803 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8804 SDValue Op = Node->getOperand(0);
8805 SDValue Mask = Node->getOperand(1);
8806 SDValue VL = Node->getOperand(2);
8807 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8808
8809 // do this:
8810 // x = x | (x >> 1);
8811 // x = x | (x >> 2);
8812 // ...
8813 // x = x | (x >>16);
8814 // x = x | (x >>32); // for 64-bit input
8815 // return popcount(~x);
8816 for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) {
8817 SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT);
8818 Op = DAG.getNode(ISD::VP_OR, dl, VT, Op,
8819 DAG.getNode(ISD::VP_LSHR, dl, VT, Op, Tmp, Mask, VL), Mask,
8820 VL);
8821 }
8822 Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask,
8823 VL);
8824 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL);
8825 }
8826
CTTZTableLookup(SDNode * Node,SelectionDAG & DAG,const SDLoc & DL,EVT VT,SDValue Op,unsigned BitWidth) const8827 SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG,
8828 const SDLoc &DL, EVT VT, SDValue Op,
8829 unsigned BitWidth) const {
8830 if (BitWidth != 32 && BitWidth != 64)
8831 return SDValue();
8832 APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U)
8833 : APInt(64, 0x0218A392CD3D5DBFULL);
8834 const DataLayout &TD = DAG.getDataLayout();
8835 MachinePointerInfo PtrInfo =
8836 MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
8837 unsigned ShiftAmt = BitWidth - Log2_32(BitWidth);
8838 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
8839 SDValue Lookup = DAG.getNode(
8840 ISD::SRL, DL, VT,
8841 DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg),
8842 DAG.getConstant(DeBruijn, DL, VT)),
8843 DAG.getConstant(ShiftAmt, DL, VT));
8844 Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD));
8845
8846 SmallVector<uint8_t> Table(BitWidth, 0);
8847 for (unsigned i = 0; i < BitWidth; i++) {
8848 APInt Shl = DeBruijn.shl(i);
8849 APInt Lshr = Shl.lshr(ShiftAmt);
8850 Table[Lshr.getZExtValue()] = i;
8851 }
8852
8853 // Create a ConstantArray in Constant Pool
8854 auto *CA = ConstantDataArray::get(*DAG.getContext(), Table);
8855 SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD),
8856 TD.getPrefTypeAlign(CA->getType()));
8857 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(),
8858 DAG.getMemBasePlusOffset(CPIdx, Lookup, DL),
8859 PtrInfo, MVT::i8);
8860 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF)
8861 return ExtLoad;
8862
8863 EVT SetCCVT =
8864 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8865 SDValue Zero = DAG.getConstant(0, DL, VT);
8866 SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ);
8867 return DAG.getSelect(DL, VT, SrcIsZero,
8868 DAG.getConstant(BitWidth, DL, VT), ExtLoad);
8869 }
8870
expandCTTZ(SDNode * Node,SelectionDAG & DAG) const8871 SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
8872 SDLoc dl(Node);
8873 EVT VT = Node->getValueType(0);
8874 SDValue Op = Node->getOperand(0);
8875 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
8876
8877 // If the non-ZERO_UNDEF version is supported we can use that instead.
8878 if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
8879 isOperationLegalOrCustom(ISD::CTTZ, VT))
8880 return DAG.getNode(ISD::CTTZ, dl, VT, Op);
8881
8882 // If the ZERO_UNDEF version is supported use that and handle the zero case.
8883 if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
8884 EVT SetCCVT =
8885 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8886 SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
8887 SDValue Zero = DAG.getConstant(0, dl, VT);
8888 SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
8889 return DAG.getSelect(dl, VT, SrcIsZero,
8890 DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
8891 }
8892
8893 // Only expand vector types if we have the appropriate vector bit operations.
8894 // This includes the operations needed to expand CTPOP if it isn't supported.
8895 if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
8896 (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
8897 !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
8898 !canExpandVectorCTPOP(*this, VT)) ||
8899 !isOperationLegalOrCustom(ISD::SUB, VT) ||
8900 !isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
8901 !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
8902 return SDValue();
8903
8904 // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
8905 if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
8906 !isOperationLegal(ISD::CTLZ, VT))
8907 if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
8908 return V;
8909
8910 // for now, we use: { return popcount(~x & (x - 1)); }
8911 // unless the target has ctlz but not ctpop, in which case we use:
8912 // { return 32 - nlz(~x & (x-1)); }
8913 // Ref: "Hacker's Delight" by Henry Warren
8914 SDValue Tmp = DAG.getNode(
8915 ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT),
8916 DAG.getNode(ISD::SUB, dl, VT, Op, DAG.getConstant(1, dl, VT)));
8917
8918 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
8919 if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
8920 return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
8921 DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
8922 }
8923
8924 return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
8925 }
8926
expandVPCTTZ(SDNode * Node,SelectionDAG & DAG) const8927 SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const {
8928 SDValue Op = Node->getOperand(0);
8929 SDValue Mask = Node->getOperand(1);
8930 SDValue VL = Node->getOperand(2);
8931 SDLoc dl(Node);
8932 EVT VT = Node->getValueType(0);
8933
8934 // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1))
8935 SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op,
8936 DAG.getConstant(-1, dl, VT), Mask, VL);
8937 SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op,
8938 DAG.getConstant(1, dl, VT), Mask, VL);
8939 SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL);
8940 return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL);
8941 }
8942
expandABS(SDNode * N,SelectionDAG & DAG,bool IsNegative) const8943 SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
8944 bool IsNegative) const {
8945 SDLoc dl(N);
8946 EVT VT = N->getValueType(0);
8947 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
8948 SDValue Op = N->getOperand(0);
8949
8950 // abs(x) -> smax(x,sub(0,x))
8951 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
8952 isOperationLegal(ISD::SMAX, VT)) {
8953 SDValue Zero = DAG.getConstant(0, dl, VT);
8954 return DAG.getNode(ISD::SMAX, dl, VT, Op,
8955 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
8956 }
8957
8958 // abs(x) -> umin(x,sub(0,x))
8959 if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
8960 isOperationLegal(ISD::UMIN, VT)) {
8961 SDValue Zero = DAG.getConstant(0, dl, VT);
8962 Op = DAG.getFreeze(Op);
8963 return DAG.getNode(ISD::UMIN, dl, VT, Op,
8964 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
8965 }
8966
8967 // 0 - abs(x) -> smin(x, sub(0,x))
8968 if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
8969 isOperationLegal(ISD::SMIN, VT)) {
8970 Op = DAG.getFreeze(Op);
8971 SDValue Zero = DAG.getConstant(0, dl, VT);
8972 return DAG.getNode(ISD::SMIN, dl, VT, Op,
8973 DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
8974 }
8975
8976 // Only expand vector types if we have the appropriate vector operations.
8977 if (VT.isVector() &&
8978 (!isOperationLegalOrCustom(ISD::SRA, VT) ||
8979 (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
8980 (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
8981 !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
8982 return SDValue();
8983
8984 Op = DAG.getFreeze(Op);
8985 SDValue Shift =
8986 DAG.getNode(ISD::SRA, dl, VT, Op,
8987 DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
8988 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
8989
8990 // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
8991 if (!IsNegative)
8992 return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
8993
8994 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
8995 return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
8996 }
8997
expandABD(SDNode * N,SelectionDAG & DAG) const8998 SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
8999 SDLoc dl(N);
9000 EVT VT = N->getValueType(0);
9001 SDValue LHS = DAG.getFreeze(N->getOperand(0));
9002 SDValue RHS = DAG.getFreeze(N->getOperand(1));
9003 bool IsSigned = N->getOpcode() == ISD::ABDS;
9004
9005 // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
9006 // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
9007 unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
9008 unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
9009 if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
9010 SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
9011 SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
9012 return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
9013 }
9014
9015 // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
9016 if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
9017 return DAG.getNode(ISD::OR, dl, VT,
9018 DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
9019 DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
9020
9021 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9022 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9023 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9024 ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
9025 SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
9026 return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9027 DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9028 }
9029
expandBSWAP(SDNode * N,SelectionDAG & DAG) const9030 SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
9031 SDLoc dl(N);
9032 EVT VT = N->getValueType(0);
9033 SDValue Op = N->getOperand(0);
9034
9035 if (!VT.isSimple())
9036 return SDValue();
9037
9038 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9039 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9040 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9041 default:
9042 return SDValue();
9043 case MVT::i16:
9044 // Use a rotate by 8. This can be further expanded if necessary.
9045 return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9046 case MVT::i32:
9047 Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9048 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op,
9049 DAG.getConstant(0xFF00, dl, VT));
9050 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT));
9051 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9052 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT));
9053 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9054 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9055 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9056 return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9057 case MVT::i64:
9058 Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9059 Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op,
9060 DAG.getConstant(255ULL<<8, dl, VT));
9061 Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT));
9062 Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op,
9063 DAG.getConstant(255ULL<<16, dl, VT));
9064 Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT));
9065 Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op,
9066 DAG.getConstant(255ULL<<24, dl, VT));
9067 Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT));
9068 Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
9069 Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4,
9070 DAG.getConstant(255ULL<<24, dl, VT));
9071 Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
9072 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3,
9073 DAG.getConstant(255ULL<<16, dl, VT));
9074 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT));
9075 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2,
9076 DAG.getConstant(255ULL<<8, dl, VT));
9077 Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT));
9078 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7);
9079 Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5);
9080 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3);
9081 Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1);
9082 Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6);
9083 Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2);
9084 return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4);
9085 }
9086 }
9087
expandVPBSWAP(SDNode * N,SelectionDAG & DAG) const9088 SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const {
9089 SDLoc dl(N);
9090 EVT VT = N->getValueType(0);
9091 SDValue Op = N->getOperand(0);
9092 SDValue Mask = N->getOperand(1);
9093 SDValue EVL = N->getOperand(2);
9094
9095 if (!VT.isSimple())
9096 return SDValue();
9097
9098 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9099 SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
9100 switch (VT.getSimpleVT().getScalarType().SimpleTy) {
9101 default:
9102 return SDValue();
9103 case MVT::i16:
9104 Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9105 Mask, EVL);
9106 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9107 Mask, EVL);
9108 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL);
9109 case MVT::i32:
9110 Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9111 Mask, EVL);
9112 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT),
9113 Mask, EVL);
9114 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT),
9115 Mask, EVL);
9116 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9117 Mask, EVL);
9118 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9119 DAG.getConstant(0xFF00, dl, VT), Mask, EVL);
9120 Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9121 Mask, EVL);
9122 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9123 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9124 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9125 case MVT::i64:
9126 Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9127 Mask, EVL);
9128 Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9129 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9130 Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT),
9131 Mask, EVL);
9132 Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9133 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9134 Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT),
9135 Mask, EVL);
9136 Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op,
9137 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9138 Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT),
9139 Mask, EVL);
9140 Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT),
9141 Mask, EVL);
9142 Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4,
9143 DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL);
9144 Tmp3 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT),
9145 Mask, EVL);
9146 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3,
9147 DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL);
9148 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(40, dl, SHVT),
9149 Mask, EVL);
9150 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9151 DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL);
9152 Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(56, dl, SHVT),
9153 Mask, EVL);
9154 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL);
9155 Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL);
9156 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL);
9157 Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL);
9158 Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL);
9159 Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL);
9160 return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL);
9161 }
9162 }
9163
expandBITREVERSE(SDNode * N,SelectionDAG & DAG) const9164 SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9165 SDLoc dl(N);
9166 EVT VT = N->getValueType(0);
9167 SDValue Op = N->getOperand(0);
9168 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9169 unsigned Sz = VT.getScalarSizeInBits();
9170
9171 SDValue Tmp, Tmp2, Tmp3;
9172
9173 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9174 // and finally the i1 pairs.
9175 // TODO: We can easily support i4/i2 legal types if any target ever does.
9176 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9177 // Create the masks - repeating the pattern every byte.
9178 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9179 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9180 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9181
9182 // BSWAP if the type is wider than a single byte.
9183 Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
9184
9185 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9186 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
9187 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
9188 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
9189 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
9190 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9191
9192 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9193 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
9194 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
9195 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
9196 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
9197 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9198
9199 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9200 Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
9201 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
9202 Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
9203 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
9204 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9205 return Tmp;
9206 }
9207
9208 Tmp = DAG.getConstant(0, dl, VT);
9209 for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
9210 if (I < J)
9211 Tmp2 =
9212 DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT));
9213 else
9214 Tmp2 =
9215 DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
9216
9217 APInt Shift = APInt::getOneBitSet(Sz, J);
9218 Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
9219 Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
9220 }
9221
9222 return Tmp;
9223 }
9224
expandVPBITREVERSE(SDNode * N,SelectionDAG & DAG) const9225 SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
9226 assert(N->getOpcode() == ISD::VP_BITREVERSE);
9227
9228 SDLoc dl(N);
9229 EVT VT = N->getValueType(0);
9230 SDValue Op = N->getOperand(0);
9231 SDValue Mask = N->getOperand(1);
9232 SDValue EVL = N->getOperand(2);
9233 EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout());
9234 unsigned Sz = VT.getScalarSizeInBits();
9235
9236 SDValue Tmp, Tmp2, Tmp3;
9237
9238 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
9239 // and finally the i1 pairs.
9240 // TODO: We can easily support i4/i2 legal types if any target ever does.
9241 if (Sz >= 8 && isPowerOf2_32(Sz)) {
9242 // Create the masks - repeating the pattern every byte.
9243 APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
9244 APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
9245 APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
9246
9247 // BSWAP if the type is wider than a single byte.
9248 Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op);
9249
9250 // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
9251 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT),
9252 Mask, EVL);
9253 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9254 DAG.getConstant(Mask4, dl, VT), Mask, EVL);
9255 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT),
9256 Mask, EVL);
9257 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT),
9258 Mask, EVL);
9259 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9260
9261 // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
9262 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT),
9263 Mask, EVL);
9264 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9265 DAG.getConstant(Mask2, dl, VT), Mask, EVL);
9266 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT),
9267 Mask, EVL);
9268 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT),
9269 Mask, EVL);
9270 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9271
9272 // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
9273 Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT),
9274 Mask, EVL);
9275 Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2,
9276 DAG.getConstant(Mask1, dl, VT), Mask, EVL);
9277 Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT),
9278 Mask, EVL);
9279 Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT),
9280 Mask, EVL);
9281 Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL);
9282 return Tmp;
9283 }
9284 return SDValue();
9285 }
9286
9287 std::pair<SDValue, SDValue>
scalarizeVectorLoad(LoadSDNode * LD,SelectionDAG & DAG) const9288 TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
9289 SelectionDAG &DAG) const {
9290 SDLoc SL(LD);
9291 SDValue Chain = LD->getChain();
9292 SDValue BasePTR = LD->getBasePtr();
9293 EVT SrcVT = LD->getMemoryVT();
9294 EVT DstVT = LD->getValueType(0);
9295 ISD::LoadExtType ExtType = LD->getExtensionType();
9296
9297 if (SrcVT.isScalableVector())
9298 report_fatal_error("Cannot scalarize scalable vector loads");
9299
9300 unsigned NumElem = SrcVT.getVectorNumElements();
9301
9302 EVT SrcEltVT = SrcVT.getScalarType();
9303 EVT DstEltVT = DstVT.getScalarType();
9304
9305 // A vector must always be stored in memory as-is, i.e. without any padding
9306 // between the elements, since various code depend on it, e.g. in the
9307 // handling of a bitcast of a vector type to int, which may be done with a
9308 // vector store followed by an integer load. A vector that does not have
9309 // elements that are byte-sized must therefore be stored as an integer
9310 // built out of the extracted vector elements.
9311 if (!SrcEltVT.isByteSized()) {
9312 unsigned NumLoadBits = SrcVT.getStoreSizeInBits();
9313 EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits);
9314
9315 unsigned NumSrcBits = SrcVT.getSizeInBits();
9316 EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits);
9317
9318 unsigned SrcEltBits = SrcEltVT.getSizeInBits();
9319 SDValue SrcEltBitMask = DAG.getConstant(
9320 APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT);
9321
9322 // Load the whole vector and avoid masking off the top bits as it makes
9323 // the codegen worse.
9324 SDValue Load =
9325 DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR,
9326 LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(),
9327 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9328
9329 SmallVector<SDValue, 8> Vals;
9330 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9331 unsigned ShiftIntoIdx =
9332 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9333 SDValue ShiftAmount =
9334 DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(),
9335 LoadVT, SL, /*LegalTypes=*/false);
9336 SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount);
9337 SDValue Elt =
9338 DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask);
9339 SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt);
9340
9341 if (ExtType != ISD::NON_EXTLOAD) {
9342 unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
9343 Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
9344 }
9345
9346 Vals.push_back(Scalar);
9347 }
9348
9349 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9350 return std::make_pair(Value, Load.getValue(1));
9351 }
9352
9353 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
9354 assert(SrcEltVT.isByteSized());
9355
9356 SmallVector<SDValue, 8> Vals;
9357 SmallVector<SDValue, 8> LoadChains;
9358
9359 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9360 SDValue ScalarLoad =
9361 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
9362 LD->getPointerInfo().getWithOffset(Idx * Stride),
9363 SrcEltVT, LD->getOriginalAlign(),
9364 LD->getMemOperand()->getFlags(), LD->getAAInfo());
9365
9366 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride));
9367
9368 Vals.push_back(ScalarLoad.getValue(0));
9369 LoadChains.push_back(ScalarLoad.getValue(1));
9370 }
9371
9372 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
9373 SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
9374
9375 return std::make_pair(Value, NewChain);
9376 }
9377
scalarizeVectorStore(StoreSDNode * ST,SelectionDAG & DAG) const9378 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
9379 SelectionDAG &DAG) const {
9380 SDLoc SL(ST);
9381
9382 SDValue Chain = ST->getChain();
9383 SDValue BasePtr = ST->getBasePtr();
9384 SDValue Value = ST->getValue();
9385 EVT StVT = ST->getMemoryVT();
9386
9387 if (StVT.isScalableVector())
9388 report_fatal_error("Cannot scalarize scalable vector stores");
9389
9390 // The type of the data we want to save
9391 EVT RegVT = Value.getValueType();
9392 EVT RegSclVT = RegVT.getScalarType();
9393
9394 // The type of data as saved in memory.
9395 EVT MemSclVT = StVT.getScalarType();
9396
9397 unsigned NumElem = StVT.getVectorNumElements();
9398
9399 // A vector must always be stored in memory as-is, i.e. without any padding
9400 // between the elements, since various code depend on it, e.g. in the
9401 // handling of a bitcast of a vector type to int, which may be done with a
9402 // vector store followed by an integer load. A vector that does not have
9403 // elements that are byte-sized must therefore be stored as an integer
9404 // built out of the extracted vector elements.
9405 if (!MemSclVT.isByteSized()) {
9406 unsigned NumBits = StVT.getSizeInBits();
9407 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
9408
9409 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
9410
9411 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9412 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9413 DAG.getVectorIdxConstant(Idx, SL));
9414 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
9415 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
9416 unsigned ShiftIntoIdx =
9417 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
9418 SDValue ShiftAmount =
9419 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
9420 SDValue ShiftedElt =
9421 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
9422 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
9423 }
9424
9425 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
9426 ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9427 ST->getAAInfo());
9428 }
9429
9430 // Store Stride in bytes
9431 unsigned Stride = MemSclVT.getSizeInBits() / 8;
9432 assert(Stride && "Zero stride!");
9433 // Extract each of the elements from the original vector and save them into
9434 // memory individually.
9435 SmallVector<SDValue, 8> Stores;
9436 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
9437 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
9438 DAG.getVectorIdxConstant(Idx, SL));
9439
9440 SDValue Ptr =
9441 DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride));
9442
9443 // This scalar TruncStore may be illegal, but we legalize it later.
9444 SDValue Store = DAG.getTruncStore(
9445 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
9446 MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(),
9447 ST->getAAInfo());
9448
9449 Stores.push_back(Store);
9450 }
9451
9452 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
9453 }
9454
9455 std::pair<SDValue, SDValue>
expandUnalignedLoad(LoadSDNode * LD,SelectionDAG & DAG) const9456 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
9457 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
9458 "unaligned indexed loads not implemented!");
9459 SDValue Chain = LD->getChain();
9460 SDValue Ptr = LD->getBasePtr();
9461 EVT VT = LD->getValueType(0);
9462 EVT LoadedVT = LD->getMemoryVT();
9463 SDLoc dl(LD);
9464 auto &MF = DAG.getMachineFunction();
9465
9466 if (VT.isFloatingPoint() || VT.isVector()) {
9467 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
9468 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
9469 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
9470 LoadedVT.isVector()) {
9471 // Scalarize the load and let the individual components be handled.
9472 return scalarizeVectorLoad(LD, DAG);
9473 }
9474
9475 // Expand to a (misaligned) integer load of the same size,
9476 // then bitconvert to floating point or vector.
9477 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
9478 LD->getMemOperand());
9479 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
9480 if (LoadedVT != VT)
9481 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
9482 ISD::ANY_EXTEND, dl, VT, Result);
9483
9484 return std::make_pair(Result, newLoad.getValue(1));
9485 }
9486
9487 // Copy the value to a (aligned) stack slot using (unaligned) integer
9488 // loads and stores, then do a (aligned) load from the stack slot.
9489 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
9490 unsigned LoadedBytes = LoadedVT.getStoreSize();
9491 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9492 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
9493
9494 // Make sure the stack slot is also aligned for the register type.
9495 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
9496 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
9497 SmallVector<SDValue, 8> Stores;
9498 SDValue StackPtr = StackBase;
9499 unsigned Offset = 0;
9500
9501 EVT PtrVT = Ptr.getValueType();
9502 EVT StackPtrVT = StackPtr.getValueType();
9503
9504 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9505 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9506
9507 // Do all but one copies using the full register width.
9508 for (unsigned i = 1; i < NumRegs; i++) {
9509 // Load one integer register's worth from the original location.
9510 SDValue Load = DAG.getLoad(
9511 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
9512 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9513 LD->getAAInfo());
9514 // Follow the load with a store to the stack slot. Remember the store.
9515 Stores.push_back(DAG.getStore(
9516 Load.getValue(1), dl, Load, StackPtr,
9517 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
9518 // Increment the pointers.
9519 Offset += RegBytes;
9520
9521 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9522 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9523 }
9524
9525 // The last copy may be partial. Do an extending load.
9526 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
9527 8 * (LoadedBytes - Offset));
9528 SDValue Load =
9529 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
9530 LD->getPointerInfo().getWithOffset(Offset), MemVT,
9531 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
9532 LD->getAAInfo());
9533 // Follow the load with a store to the stack slot. Remember the store.
9534 // On big-endian machines this requires a truncating store to ensure
9535 // that the bits end up in the right place.
9536 Stores.push_back(DAG.getTruncStore(
9537 Load.getValue(1), dl, Load, StackPtr,
9538 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
9539
9540 // The order of the stores doesn't matter - say it with a TokenFactor.
9541 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9542
9543 // Finally, perform the original load only redirected to the stack slot.
9544 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
9545 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
9546 LoadedVT);
9547
9548 // Callers expect a MERGE_VALUES node.
9549 return std::make_pair(Load, TF);
9550 }
9551
9552 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
9553 "Unaligned load of unsupported type.");
9554
9555 // Compute the new VT that is half the size of the old one. This is an
9556 // integer MVT.
9557 unsigned NumBits = LoadedVT.getSizeInBits();
9558 EVT NewLoadedVT;
9559 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
9560 NumBits >>= 1;
9561
9562 Align Alignment = LD->getOriginalAlign();
9563 unsigned IncrementSize = NumBits / 8;
9564 ISD::LoadExtType HiExtType = LD->getExtensionType();
9565
9566 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
9567 if (HiExtType == ISD::NON_EXTLOAD)
9568 HiExtType = ISD::ZEXTLOAD;
9569
9570 // Load the value in two parts
9571 SDValue Lo, Hi;
9572 if (DAG.getDataLayout().isLittleEndian()) {
9573 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9574 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9575 LD->getAAInfo());
9576
9577 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9578 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
9579 LD->getPointerInfo().getWithOffset(IncrementSize),
9580 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9581 LD->getAAInfo());
9582 } else {
9583 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
9584 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9585 LD->getAAInfo());
9586
9587 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9588 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
9589 LD->getPointerInfo().getWithOffset(IncrementSize),
9590 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
9591 LD->getAAInfo());
9592 }
9593
9594 // aggregate the two parts
9595 SDValue ShiftAmount =
9596 DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
9597 DAG.getDataLayout()));
9598 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
9599 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
9600
9601 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
9602 Hi.getValue(1));
9603
9604 return std::make_pair(Result, TF);
9605 }
9606
expandUnalignedStore(StoreSDNode * ST,SelectionDAG & DAG) const9607 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
9608 SelectionDAG &DAG) const {
9609 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
9610 "unaligned indexed stores not implemented!");
9611 SDValue Chain = ST->getChain();
9612 SDValue Ptr = ST->getBasePtr();
9613 SDValue Val = ST->getValue();
9614 EVT VT = Val.getValueType();
9615 Align Alignment = ST->getOriginalAlign();
9616 auto &MF = DAG.getMachineFunction();
9617 EVT StoreMemVT = ST->getMemoryVT();
9618
9619 SDLoc dl(ST);
9620 if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
9621 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
9622 if (isTypeLegal(intVT)) {
9623 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
9624 StoreMemVT.isVector()) {
9625 // Scalarize the store and let the individual components be handled.
9626 SDValue Result = scalarizeVectorStore(ST, DAG);
9627 return Result;
9628 }
9629 // Expand to a bitconvert of the value to the integer type of the
9630 // same size, then a (misaligned) int store.
9631 // FIXME: Does not handle truncating floating point stores!
9632 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
9633 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
9634 Alignment, ST->getMemOperand()->getFlags());
9635 return Result;
9636 }
9637 // Do a (aligned) store to a stack slot, then copy from the stack slot
9638 // to the final destination using (unaligned) integer loads and stores.
9639 MVT RegVT = getRegisterType(
9640 *DAG.getContext(),
9641 EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
9642 EVT PtrVT = Ptr.getValueType();
9643 unsigned StoredBytes = StoreMemVT.getStoreSize();
9644 unsigned RegBytes = RegVT.getSizeInBits() / 8;
9645 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
9646
9647 // Make sure the stack slot is also aligned for the register type.
9648 SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
9649 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
9650
9651 // Perform the original store, only redirected to the stack slot.
9652 SDValue Store = DAG.getTruncStore(
9653 Chain, dl, Val, StackPtr,
9654 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
9655
9656 EVT StackPtrVT = StackPtr.getValueType();
9657
9658 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
9659 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
9660 SmallVector<SDValue, 8> Stores;
9661 unsigned Offset = 0;
9662
9663 // Do all but one copies using the full register width.
9664 for (unsigned i = 1; i < NumRegs; i++) {
9665 // Load one integer register's worth from the stack slot.
9666 SDValue Load = DAG.getLoad(
9667 RegVT, dl, Store, StackPtr,
9668 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
9669 // Store it to the final location. Remember the store.
9670 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
9671 ST->getPointerInfo().getWithOffset(Offset),
9672 ST->getOriginalAlign(),
9673 ST->getMemOperand()->getFlags()));
9674 // Increment the pointers.
9675 Offset += RegBytes;
9676 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
9677 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
9678 }
9679
9680 // The last store may be partial. Do a truncating store. On big-endian
9681 // machines this requires an extending load from the stack slot to ensure
9682 // that the bits are in the right place.
9683 EVT LoadMemVT =
9684 EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
9685
9686 // Load from the stack slot.
9687 SDValue Load = DAG.getExtLoad(
9688 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
9689 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
9690
9691 Stores.push_back(
9692 DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
9693 ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
9694 ST->getOriginalAlign(),
9695 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
9696 // The order of the stores doesn't matter - say it with a TokenFactor.
9697 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
9698 return Result;
9699 }
9700
9701 assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
9702 "Unaligned store of unknown type.");
9703 // Get the half-size VT
9704 EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
9705 unsigned NumBits = NewStoredVT.getFixedSizeInBits();
9706 unsigned IncrementSize = NumBits / 8;
9707
9708 // Divide the stored value in two parts.
9709 SDValue ShiftAmount = DAG.getConstant(
9710 NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
9711 SDValue Lo = Val;
9712 // If Val is a constant, replace the upper bits with 0. The SRL will constant
9713 // fold and not use the upper bits. A smaller constant may be easier to
9714 // materialize.
9715 if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque())
9716 Lo = DAG.getNode(
9717 ISD::AND, dl, VT, Lo,
9718 DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl,
9719 VT));
9720 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
9721
9722 // Store the two parts
9723 SDValue Store1, Store2;
9724 Store1 = DAG.getTruncStore(Chain, dl,
9725 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
9726 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
9727 ST->getMemOperand()->getFlags());
9728
9729 Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize));
9730 Store2 = DAG.getTruncStore(
9731 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
9732 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
9733 ST->getMemOperand()->getFlags(), ST->getAAInfo());
9734
9735 SDValue Result =
9736 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
9737 return Result;
9738 }
9739
9740 SDValue
IncrementMemoryAddress(SDValue Addr,SDValue Mask,const SDLoc & DL,EVT DataVT,SelectionDAG & DAG,bool IsCompressedMemory) const9741 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
9742 const SDLoc &DL, EVT DataVT,
9743 SelectionDAG &DAG,
9744 bool IsCompressedMemory) const {
9745 SDValue Increment;
9746 EVT AddrVT = Addr.getValueType();
9747 EVT MaskVT = Mask.getValueType();
9748 assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() &&
9749 "Incompatible types of Data and Mask");
9750 if (IsCompressedMemory) {
9751 if (DataVT.isScalableVector())
9752 report_fatal_error(
9753 "Cannot currently handle compressed memory with scalable vectors");
9754 // Incrementing the pointer according to number of '1's in the mask.
9755 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
9756 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
9757 if (MaskIntVT.getSizeInBits() < 32) {
9758 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
9759 MaskIntVT = MVT::i32;
9760 }
9761
9762 // Count '1's with POPCNT.
9763 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
9764 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
9765 // Scale is an element size in bytes.
9766 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
9767 AddrVT);
9768 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
9769 } else if (DataVT.isScalableVector()) {
9770 Increment = DAG.getVScale(DL, AddrVT,
9771 APInt(AddrVT.getFixedSizeInBits(),
9772 DataVT.getStoreSize().getKnownMinValue()));
9773 } else
9774 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
9775
9776 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
9777 }
9778
clampDynamicVectorIndex(SelectionDAG & DAG,SDValue Idx,EVT VecVT,const SDLoc & dl,ElementCount SubEC)9779 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
9780 EVT VecVT, const SDLoc &dl,
9781 ElementCount SubEC) {
9782 assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
9783 "Cannot index a scalable vector within a fixed-width vector");
9784
9785 unsigned NElts = VecVT.getVectorMinNumElements();
9786 unsigned NumSubElts = SubEC.getKnownMinValue();
9787 EVT IdxVT = Idx.getValueType();
9788
9789 if (VecVT.isScalableVector() && !SubEC.isScalable()) {
9790 // If this is a constant index and we know the value plus the number of the
9791 // elements in the subvector minus one is less than the minimum number of
9792 // elements then it's safe to return Idx.
9793 if (auto *IdxCst = dyn_cast<ConstantSDNode>(Idx))
9794 if (IdxCst->getZExtValue() + (NumSubElts - 1) < NElts)
9795 return Idx;
9796 SDValue VS =
9797 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getFixedSizeInBits(), NElts));
9798 unsigned SubOpcode = NumSubElts <= NElts ? ISD::SUB : ISD::USUBSAT;
9799 SDValue Sub = DAG.getNode(SubOpcode, dl, IdxVT, VS,
9800 DAG.getConstant(NumSubElts, dl, IdxVT));
9801 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub);
9802 }
9803 if (isPowerOf2_32(NElts) && NumSubElts == 1) {
9804 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), Log2_32(NElts));
9805 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
9806 DAG.getConstant(Imm, dl, IdxVT));
9807 }
9808 unsigned MaxIndex = NumSubElts < NElts ? NElts - NumSubElts : 0;
9809 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
9810 DAG.getConstant(MaxIndex, dl, IdxVT));
9811 }
9812
getVectorElementPointer(SelectionDAG & DAG,SDValue VecPtr,EVT VecVT,SDValue Index) const9813 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
9814 SDValue VecPtr, EVT VecVT,
9815 SDValue Index) const {
9816 return getVectorSubVecPointer(
9817 DAG, VecPtr, VecVT,
9818 EVT::getVectorVT(*DAG.getContext(), VecVT.getVectorElementType(), 1),
9819 Index);
9820 }
9821
getVectorSubVecPointer(SelectionDAG & DAG,SDValue VecPtr,EVT VecVT,EVT SubVecVT,SDValue Index) const9822 SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
9823 SDValue VecPtr, EVT VecVT,
9824 EVT SubVecVT,
9825 SDValue Index) const {
9826 SDLoc dl(Index);
9827 // Make sure the index type is big enough to compute in.
9828 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
9829
9830 EVT EltVT = VecVT.getVectorElementType();
9831
9832 // Calculate the element offset and add it to the pointer.
9833 unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
9834 assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
9835 "Converting bits to bytes lost precision");
9836 assert(SubVecVT.getVectorElementType() == EltVT &&
9837 "Sub-vector must be a vector with matching element type");
9838 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
9839 SubVecVT.getVectorElementCount());
9840
9841 EVT IdxVT = Index.getValueType();
9842 if (SubVecVT.isScalableVector())
9843 Index =
9844 DAG.getNode(ISD::MUL, dl, IdxVT, Index,
9845 DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
9846
9847 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
9848 DAG.getConstant(EltSize, dl, IdxVT));
9849 return DAG.getMemBasePlusOffset(VecPtr, Index, dl);
9850 }
9851
9852 //===----------------------------------------------------------------------===//
9853 // Implementation of Emulated TLS Model
9854 //===----------------------------------------------------------------------===//
9855
LowerToTLSEmulatedModel(const GlobalAddressSDNode * GA,SelectionDAG & DAG) const9856 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
9857 SelectionDAG &DAG) const {
9858 // Access to address of TLS varialbe xyz is lowered to a function call:
9859 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
9860 EVT PtrVT = getPointerTy(DAG.getDataLayout());
9861 PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0);
9862 SDLoc dl(GA);
9863
9864 ArgListTy Args;
9865 ArgListEntry Entry;
9866 std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
9867 Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
9868 StringRef EmuTlsVarName(NameString);
9869 GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
9870 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
9871 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
9872 Entry.Ty = VoidPtrType;
9873 Args.push_back(Entry);
9874
9875 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
9876
9877 TargetLowering::CallLoweringInfo CLI(DAG);
9878 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
9879 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
9880 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9881
9882 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
9883 // At last for X86 targets, maybe good for other targets too?
9884 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
9885 MFI.setAdjustsStack(true); // Is this only for X86 target?
9886 MFI.setHasCalls(true);
9887
9888 assert((GA->getOffset() == 0) &&
9889 "Emulated TLS must have zero offset in GlobalAddressSDNode");
9890 return CallResult.first;
9891 }
9892
lowerCmpEqZeroToCtlzSrl(SDValue Op,SelectionDAG & DAG) const9893 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
9894 SelectionDAG &DAG) const {
9895 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
9896 if (!isCtlzFast())
9897 return SDValue();
9898 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
9899 SDLoc dl(Op);
9900 if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) {
9901 EVT VT = Op.getOperand(0).getValueType();
9902 SDValue Zext = Op.getOperand(0);
9903 if (VT.bitsLT(MVT::i32)) {
9904 VT = MVT::i32;
9905 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
9906 }
9907 unsigned Log2b = Log2_32(VT.getSizeInBits());
9908 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
9909 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
9910 DAG.getConstant(Log2b, dl, MVT::i32));
9911 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
9912 }
9913 return SDValue();
9914 }
9915
expandIntMINMAX(SDNode * Node,SelectionDAG & DAG) const9916 SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
9917 SDValue Op0 = Node->getOperand(0);
9918 SDValue Op1 = Node->getOperand(1);
9919 EVT VT = Op0.getValueType();
9920 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
9921 unsigned Opcode = Node->getOpcode();
9922 SDLoc DL(Node);
9923
9924 // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
9925 if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
9926 getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
9927 Op0 = DAG.getFreeze(Op0);
9928 SDValue Zero = DAG.getConstant(0, DL, VT);
9929 return DAG.getNode(ISD::SUB, DL, VT, Op0,
9930 DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
9931 }
9932
9933 // umin(x,y) -> sub(x,usubsat(x,y))
9934 // TODO: Missing freeze(Op0)?
9935 if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
9936 isOperationLegal(ISD::USUBSAT, VT)) {
9937 return DAG.getNode(ISD::SUB, DL, VT, Op0,
9938 DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1));
9939 }
9940
9941 // umax(x,y) -> add(x,usubsat(y,x))
9942 // TODO: Missing freeze(Op0)?
9943 if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
9944 isOperationLegal(ISD::USUBSAT, VT)) {
9945 return DAG.getNode(ISD::ADD, DL, VT, Op0,
9946 DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
9947 }
9948
9949 // FIXME: Should really try to split the vector in case it's legal on a
9950 // subvector.
9951 if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
9952 return DAG.UnrollVectorOp(Node);
9953
9954 // Attempt to find an existing SETCC node that we can reuse.
9955 // TODO: Do we need a generic doesSETCCNodeExist?
9956 // TODO: Missing freeze(Op0)/freeze(Op1)?
9957 auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
9958 ISD::CondCode PrefCommuteCC,
9959 ISD::CondCode AltCommuteCC) {
9960 SDVTList BoolVTList = DAG.getVTList(BoolVT);
9961 for (ISD::CondCode CC : {PrefCC, AltCC}) {
9962 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
9963 {Op0, Op1, DAG.getCondCode(CC)})) {
9964 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
9965 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
9966 }
9967 }
9968 for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
9969 if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
9970 {Op0, Op1, DAG.getCondCode(CC)})) {
9971 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
9972 return DAG.getSelect(DL, VT, Cond, Op1, Op0);
9973 }
9974 }
9975 SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
9976 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
9977 };
9978
9979 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
9980 // -> Y = (A < B) ? B : A
9981 // -> Y = (A >= B) ? A : B
9982 // -> Y = (A <= B) ? B : A
9983 switch (Opcode) {
9984 case ISD::SMAX:
9985 return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
9986 case ISD::SMIN:
9987 return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
9988 case ISD::UMAX:
9989 return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
9990 case ISD::UMIN:
9991 return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
9992 }
9993
9994 llvm_unreachable("How did we get here?");
9995 }
9996
expandAddSubSat(SDNode * Node,SelectionDAG & DAG) const9997 SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
9998 unsigned Opcode = Node->getOpcode();
9999 SDValue LHS = Node->getOperand(0);
10000 SDValue RHS = Node->getOperand(1);
10001 EVT VT = LHS.getValueType();
10002 SDLoc dl(Node);
10003
10004 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10005 assert(VT.isInteger() && "Expected operands to be integers");
10006
10007 // usub.sat(a, b) -> umax(a, b) - b
10008 if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) {
10009 SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
10010 return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
10011 }
10012
10013 // uadd.sat(a, b) -> umin(a, ~b) + b
10014 if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) {
10015 SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
10016 SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
10017 return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
10018 }
10019
10020 unsigned OverflowOp;
10021 switch (Opcode) {
10022 case ISD::SADDSAT:
10023 OverflowOp = ISD::SADDO;
10024 break;
10025 case ISD::UADDSAT:
10026 OverflowOp = ISD::UADDO;
10027 break;
10028 case ISD::SSUBSAT:
10029 OverflowOp = ISD::SSUBO;
10030 break;
10031 case ISD::USUBSAT:
10032 OverflowOp = ISD::USUBO;
10033 break;
10034 default:
10035 llvm_unreachable("Expected method to receive signed or unsigned saturation "
10036 "addition or subtraction node.");
10037 }
10038
10039 // FIXME: Should really try to split the vector in case it's legal on a
10040 // subvector.
10041 if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10042 return DAG.UnrollVectorOp(Node);
10043
10044 unsigned BitWidth = LHS.getScalarValueSizeInBits();
10045 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10046 SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10047 SDValue SumDiff = Result.getValue(0);
10048 SDValue Overflow = Result.getValue(1);
10049 SDValue Zero = DAG.getConstant(0, dl, VT);
10050 SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
10051
10052 if (Opcode == ISD::UADDSAT) {
10053 if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10054 // (LHS + RHS) | OverflowMask
10055 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10056 return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
10057 }
10058 // Overflow ? 0xffff.... : (LHS + RHS)
10059 return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
10060 }
10061
10062 if (Opcode == ISD::USUBSAT) {
10063 if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
10064 // (LHS - RHS) & ~OverflowMask
10065 SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
10066 SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
10067 return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
10068 }
10069 // Overflow ? 0 : (LHS - RHS)
10070 return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
10071 }
10072
10073 if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
10074 APInt MinVal = APInt::getSignedMinValue(BitWidth);
10075 APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
10076
10077 KnownBits KnownLHS = DAG.computeKnownBits(LHS);
10078 KnownBits KnownRHS = DAG.computeKnownBits(RHS);
10079
10080 // If either of the operand signs are known, then they are guaranteed to
10081 // only saturate in one direction. If non-negative they will saturate
10082 // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
10083 //
10084 // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
10085 // sign of 'y' has to be flipped.
10086
10087 bool LHSIsNonNegative = KnownLHS.isNonNegative();
10088 bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
10089 : KnownRHS.isNegative();
10090 if (LHSIsNonNegative || RHSIsNonNegative) {
10091 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10092 return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
10093 }
10094
10095 bool LHSIsNegative = KnownLHS.isNegative();
10096 bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
10097 : KnownRHS.isNonNegative();
10098 if (LHSIsNegative || RHSIsNegative) {
10099 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10100 return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
10101 }
10102 }
10103
10104 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
10105 APInt MinVal = APInt::getSignedMinValue(BitWidth);
10106 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10107 SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
10108 DAG.getConstant(BitWidth - 1, dl, VT));
10109 Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
10110 return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
10111 }
10112
expandShlSat(SDNode * Node,SelectionDAG & DAG) const10113 SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const {
10114 unsigned Opcode = Node->getOpcode();
10115 bool IsSigned = Opcode == ISD::SSHLSAT;
10116 SDValue LHS = Node->getOperand(0);
10117 SDValue RHS = Node->getOperand(1);
10118 EVT VT = LHS.getValueType();
10119 SDLoc dl(Node);
10120
10121 assert((Node->getOpcode() == ISD::SSHLSAT ||
10122 Node->getOpcode() == ISD::USHLSAT) &&
10123 "Expected a SHLSAT opcode");
10124 assert(VT == RHS.getValueType() && "Expected operands to be the same type");
10125 assert(VT.isInteger() && "Expected operands to be integers");
10126
10127 if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
10128 return DAG.UnrollVectorOp(Node);
10129
10130 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
10131
10132 unsigned BW = VT.getScalarSizeInBits();
10133 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10134 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS);
10135 SDValue Orig =
10136 DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS);
10137
10138 SDValue SatVal;
10139 if (IsSigned) {
10140 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT);
10141 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT);
10142 SDValue Cond =
10143 DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT);
10144 SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax);
10145 } else {
10146 SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT);
10147 }
10148 SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE);
10149 return DAG.getSelect(dl, VT, Cond, SatVal, Result);
10150 }
10151
10152 SDValue
expandFixedPointMul(SDNode * Node,SelectionDAG & DAG) const10153 TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
10154 assert((Node->getOpcode() == ISD::SMULFIX ||
10155 Node->getOpcode() == ISD::UMULFIX ||
10156 Node->getOpcode() == ISD::SMULFIXSAT ||
10157 Node->getOpcode() == ISD::UMULFIXSAT) &&
10158 "Expected a fixed point multiplication opcode");
10159
10160 SDLoc dl(Node);
10161 SDValue LHS = Node->getOperand(0);
10162 SDValue RHS = Node->getOperand(1);
10163 EVT VT = LHS.getValueType();
10164 unsigned Scale = Node->getConstantOperandVal(2);
10165 bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
10166 Node->getOpcode() == ISD::UMULFIXSAT);
10167 bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
10168 Node->getOpcode() == ISD::SMULFIXSAT);
10169 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10170 unsigned VTSize = VT.getScalarSizeInBits();
10171
10172 if (!Scale) {
10173 // [us]mul.fix(a, b, 0) -> mul(a, b)
10174 if (!Saturating) {
10175 if (isOperationLegalOrCustom(ISD::MUL, VT))
10176 return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10177 } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
10178 SDValue Result =
10179 DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10180 SDValue Product = Result.getValue(0);
10181 SDValue Overflow = Result.getValue(1);
10182 SDValue Zero = DAG.getConstant(0, dl, VT);
10183
10184 APInt MinVal = APInt::getSignedMinValue(VTSize);
10185 APInt MaxVal = APInt::getSignedMaxValue(VTSize);
10186 SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
10187 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10188 // Xor the inputs, if resulting sign bit is 0 the product will be
10189 // positive, else negative.
10190 SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
10191 SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT);
10192 Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax);
10193 return DAG.getSelect(dl, VT, Overflow, Result, Product);
10194 } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
10195 SDValue Result =
10196 DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
10197 SDValue Product = Result.getValue(0);
10198 SDValue Overflow = Result.getValue(1);
10199
10200 APInt MaxVal = APInt::getMaxValue(VTSize);
10201 SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
10202 return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
10203 }
10204 }
10205
10206 assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
10207 "Expected scale to be less than the number of bits if signed or at "
10208 "most the number of bits if unsigned.");
10209 assert(LHS.getValueType() == RHS.getValueType() &&
10210 "Expected both operands to be the same type");
10211
10212 // Get the upper and lower bits of the result.
10213 SDValue Lo, Hi;
10214 unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10215 unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
10216 if (isOperationLegalOrCustom(LoHiOp, VT)) {
10217 SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
10218 Lo = Result.getValue(0);
10219 Hi = Result.getValue(1);
10220 } else if (isOperationLegalOrCustom(HiOp, VT)) {
10221 Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10222 Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
10223 } else if (VT.isVector()) {
10224 return SDValue();
10225 } else {
10226 report_fatal_error("Unable to expand fixed point multiplication.");
10227 }
10228
10229 if (Scale == VTSize)
10230 // Result is just the top half since we'd be shifting by the width of the
10231 // operand. Overflow impossible so this works for both UMULFIX and
10232 // UMULFIXSAT.
10233 return Hi;
10234
10235 // The result will need to be shifted right by the scale since both operands
10236 // are scaled. The result is given to us in 2 halves, so we only want part of
10237 // both in the result.
10238 EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
10239 SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
10240 DAG.getConstant(Scale, dl, ShiftTy));
10241 if (!Saturating)
10242 return Result;
10243
10244 if (!Signed) {
10245 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
10246 // widened multiplication) aren't all zeroes.
10247
10248 // Saturate to max if ((Hi >> Scale) != 0),
10249 // which is the same as if (Hi > ((1 << Scale) - 1))
10250 APInt MaxVal = APInt::getMaxValue(VTSize);
10251 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
10252 dl, VT);
10253 Result = DAG.getSelectCC(dl, Hi, LowMask,
10254 DAG.getConstant(MaxVal, dl, VT), Result,
10255 ISD::SETUGT);
10256
10257 return Result;
10258 }
10259
10260 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
10261 // widened multiplication) aren't all ones or all zeroes.
10262
10263 SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
10264 SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
10265
10266 if (Scale == 0) {
10267 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
10268 DAG.getConstant(VTSize - 1, dl, ShiftTy));
10269 SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
10270 // Saturated to SatMin if wide product is negative, and SatMax if wide
10271 // product is positive ...
10272 SDValue Zero = DAG.getConstant(0, dl, VT);
10273 SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
10274 ISD::SETLT);
10275 // ... but only if we overflowed.
10276 return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
10277 }
10278
10279 // We handled Scale==0 above so all the bits to examine is in Hi.
10280
10281 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
10282 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
10283 SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
10284 dl, VT);
10285 Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
10286 // Saturate to min if (Hi >> (Scale - 1)) < -1),
10287 // which is the same as if (HI < (-1 << (Scale - 1))
10288 SDValue HighMask =
10289 DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
10290 dl, VT);
10291 Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
10292 return Result;
10293 }
10294
10295 SDValue
expandFixedPointDiv(unsigned Opcode,const SDLoc & dl,SDValue LHS,SDValue RHS,unsigned Scale,SelectionDAG & DAG) const10296 TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
10297 SDValue LHS, SDValue RHS,
10298 unsigned Scale, SelectionDAG &DAG) const {
10299 assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
10300 Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
10301 "Expected a fixed point division opcode");
10302
10303 EVT VT = LHS.getValueType();
10304 bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
10305 bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
10306 EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10307
10308 // If there is enough room in the type to upscale the LHS or downscale the
10309 // RHS before the division, we can perform it in this type without having to
10310 // resize. For signed operations, the LHS headroom is the number of
10311 // redundant sign bits, and for unsigned ones it is the number of zeroes.
10312 // The headroom for the RHS is the number of trailing zeroes.
10313 unsigned LHSLead = Signed ? DAG.ComputeNumSignBits(LHS) - 1
10314 : DAG.computeKnownBits(LHS).countMinLeadingZeros();
10315 unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
10316
10317 // For signed saturating operations, we need to be able to detect true integer
10318 // division overflow; that is, when you have MIN / -EPS. However, this
10319 // is undefined behavior and if we emit divisions that could take such
10320 // values it may cause undesired behavior (arithmetic exceptions on x86, for
10321 // example).
10322 // Avoid this by requiring an extra bit so that we never get this case.
10323 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
10324 // signed saturating division, we need to emit a whopping 32-bit division.
10325 if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
10326 return SDValue();
10327
10328 unsigned LHSShift = std::min(LHSLead, Scale);
10329 unsigned RHSShift = Scale - LHSShift;
10330
10331 // At this point, we know that if we shift the LHS up by LHSShift and the
10332 // RHS down by RHSShift, we can emit a regular division with a final scaling
10333 // factor of Scale.
10334
10335 EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
10336 if (LHSShift)
10337 LHS = DAG.getNode(ISD::SHL, dl, VT, LHS,
10338 DAG.getConstant(LHSShift, dl, ShiftTy));
10339 if (RHSShift)
10340 RHS = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, VT, RHS,
10341 DAG.getConstant(RHSShift, dl, ShiftTy));
10342
10343 SDValue Quot;
10344 if (Signed) {
10345 // For signed operations, if the resulting quotient is negative and the
10346 // remainder is nonzero, subtract 1 from the quotient to round towards
10347 // negative infinity.
10348 SDValue Rem;
10349 // FIXME: Ideally we would always produce an SDIVREM here, but if the
10350 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
10351 // we couldn't just form a libcall, but the type legalizer doesn't do it.
10352 if (isTypeLegal(VT) &&
10353 isOperationLegalOrCustom(ISD::SDIVREM, VT)) {
10354 Quot = DAG.getNode(ISD::SDIVREM, dl,
10355 DAG.getVTList(VT, VT),
10356 LHS, RHS);
10357 Rem = Quot.getValue(1);
10358 Quot = Quot.getValue(0);
10359 } else {
10360 Quot = DAG.getNode(ISD::SDIV, dl, VT,
10361 LHS, RHS);
10362 Rem = DAG.getNode(ISD::SREM, dl, VT,
10363 LHS, RHS);
10364 }
10365 SDValue Zero = DAG.getConstant(0, dl, VT);
10366 SDValue RemNonZero = DAG.getSetCC(dl, BoolVT, Rem, Zero, ISD::SETNE);
10367 SDValue LHSNeg = DAG.getSetCC(dl, BoolVT, LHS, Zero, ISD::SETLT);
10368 SDValue RHSNeg = DAG.getSetCC(dl, BoolVT, RHS, Zero, ISD::SETLT);
10369 SDValue QuotNeg = DAG.getNode(ISD::XOR, dl, BoolVT, LHSNeg, RHSNeg);
10370 SDValue Sub1 = DAG.getNode(ISD::SUB, dl, VT, Quot,
10371 DAG.getConstant(1, dl, VT));
10372 Quot = DAG.getSelect(dl, VT,
10373 DAG.getNode(ISD::AND, dl, BoolVT, RemNonZero, QuotNeg),
10374 Sub1, Quot);
10375 } else
10376 Quot = DAG.getNode(ISD::UDIV, dl, VT,
10377 LHS, RHS);
10378
10379 return Quot;
10380 }
10381
expandUADDSUBO(SDNode * Node,SDValue & Result,SDValue & Overflow,SelectionDAG & DAG) const10382 void TargetLowering::expandUADDSUBO(
10383 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10384 SDLoc dl(Node);
10385 SDValue LHS = Node->getOperand(0);
10386 SDValue RHS = Node->getOperand(1);
10387 bool IsAdd = Node->getOpcode() == ISD::UADDO;
10388
10389 // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
10390 unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
10391 if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
10392 SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
10393 SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
10394 { LHS, RHS, CarryIn });
10395 Result = SDValue(NodeCarry.getNode(), 0);
10396 Overflow = SDValue(NodeCarry.getNode(), 1);
10397 return;
10398 }
10399
10400 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10401 LHS.getValueType(), LHS, RHS);
10402
10403 EVT ResultType = Node->getValueType(1);
10404 EVT SetCCType = getSetCCResultType(
10405 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10406 SDValue SetCC;
10407 if (IsAdd && isOneConstant(RHS)) {
10408 // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces
10409 // the live range of X. We assume comparing with 0 is cheap.
10410 // The general case (X + C) < C is not necessarily beneficial. Although we
10411 // reduce the live range of X, we may introduce the materialization of
10412 // constant C.
10413 SetCC =
10414 DAG.getSetCC(dl, SetCCType, Result,
10415 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
10416 } else if (IsAdd && isAllOnesConstant(RHS)) {
10417 // Special case: uaddo X, -1 overflows if X != 0.
10418 SetCC =
10419 DAG.getSetCC(dl, SetCCType, LHS,
10420 DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
10421 } else {
10422 ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
10423 SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
10424 }
10425 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10426 }
10427
expandSADDSUBO(SDNode * Node,SDValue & Result,SDValue & Overflow,SelectionDAG & DAG) const10428 void TargetLowering::expandSADDSUBO(
10429 SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
10430 SDLoc dl(Node);
10431 SDValue LHS = Node->getOperand(0);
10432 SDValue RHS = Node->getOperand(1);
10433 bool IsAdd = Node->getOpcode() == ISD::SADDO;
10434
10435 Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
10436 LHS.getValueType(), LHS, RHS);
10437
10438 EVT ResultType = Node->getValueType(1);
10439 EVT OType = getSetCCResultType(
10440 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
10441
10442 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
10443 unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
10444 if (isOperationLegal(OpcSat, LHS.getValueType())) {
10445 SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
10446 SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
10447 Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
10448 return;
10449 }
10450
10451 SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
10452
10453 // For an addition, the result should be less than one of the operands (LHS)
10454 // if and only if the other operand (RHS) is negative, otherwise there will
10455 // be overflow.
10456 // For a subtraction, the result should be less than one of the operands
10457 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
10458 // otherwise there will be overflow.
10459 SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
10460 SDValue ConditionRHS =
10461 DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
10462
10463 Overflow = DAG.getBoolExtOrTrunc(
10464 DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
10465 ResultType, ResultType);
10466 }
10467
expandMULO(SDNode * Node,SDValue & Result,SDValue & Overflow,SelectionDAG & DAG) const10468 bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
10469 SDValue &Overflow, SelectionDAG &DAG) const {
10470 SDLoc dl(Node);
10471 EVT VT = Node->getValueType(0);
10472 EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
10473 SDValue LHS = Node->getOperand(0);
10474 SDValue RHS = Node->getOperand(1);
10475 bool isSigned = Node->getOpcode() == ISD::SMULO;
10476
10477 // For power-of-two multiplications we can use a simpler shift expansion.
10478 if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
10479 const APInt &C = RHSC->getAPIntValue();
10480 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
10481 if (C.isPowerOf2()) {
10482 // smulo(x, signed_min) is same as umulo(x, signed_min).
10483 bool UseArithShift = isSigned && !C.isMinSignedValue();
10484 EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
10485 SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
10486 Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
10487 Overflow = DAG.getSetCC(dl, SetCCVT,
10488 DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
10489 dl, VT, Result, ShiftAmt),
10490 LHS, ISD::SETNE);
10491 return true;
10492 }
10493 }
10494
10495 EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
10496 if (VT.isVector())
10497 WideVT =
10498 EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
10499
10500 SDValue BottomHalf;
10501 SDValue TopHalf;
10502 static const unsigned Ops[2][3] =
10503 { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
10504 { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
10505 if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
10506 BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
10507 TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
10508 } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
10509 BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
10510 RHS);
10511 TopHalf = BottomHalf.getValue(1);
10512 } else if (isTypeLegal(WideVT)) {
10513 LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
10514 RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
10515 SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
10516 BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
10517 SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
10518 getShiftAmountTy(WideVT, DAG.getDataLayout()));
10519 TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
10520 DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
10521 } else {
10522 if (VT.isVector())
10523 return false;
10524
10525 // We can fall back to a libcall with an illegal type for the MUL if we
10526 // have a libcall big enough.
10527 // Also, we can fall back to a division in some cases, but that's a big
10528 // performance hit in the general case.
10529 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
10530 if (WideVT == MVT::i16)
10531 LC = RTLIB::MUL_I16;
10532 else if (WideVT == MVT::i32)
10533 LC = RTLIB::MUL_I32;
10534 else if (WideVT == MVT::i64)
10535 LC = RTLIB::MUL_I64;
10536 else if (WideVT == MVT::i128)
10537 LC = RTLIB::MUL_I128;
10538 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
10539
10540 SDValue HiLHS;
10541 SDValue HiRHS;
10542 if (isSigned) {
10543 // The high part is obtained by SRA'ing all but one of the bits of low
10544 // part.
10545 unsigned LoSize = VT.getFixedSizeInBits();
10546 HiLHS =
10547 DAG.getNode(ISD::SRA, dl, VT, LHS,
10548 DAG.getConstant(LoSize - 1, dl,
10549 getPointerTy(DAG.getDataLayout())));
10550 HiRHS =
10551 DAG.getNode(ISD::SRA, dl, VT, RHS,
10552 DAG.getConstant(LoSize - 1, dl,
10553 getPointerTy(DAG.getDataLayout())));
10554 } else {
10555 HiLHS = DAG.getConstant(0, dl, VT);
10556 HiRHS = DAG.getConstant(0, dl, VT);
10557 }
10558
10559 // Here we're passing the 2 arguments explicitly as 4 arguments that are
10560 // pre-lowered to the correct types. This all depends upon WideVT not
10561 // being a legal type for the architecture and thus has to be split to
10562 // two arguments.
10563 SDValue Ret;
10564 TargetLowering::MakeLibCallOptions CallOptions;
10565 CallOptions.setSExt(isSigned);
10566 CallOptions.setIsPostTypeLegalization(true);
10567 if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
10568 // Halves of WideVT are packed into registers in different order
10569 // depending on platform endianness. This is usually handled by
10570 // the C calling convention, but we can't defer to it in
10571 // the legalizer.
10572 SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
10573 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10574 } else {
10575 SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
10576 Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
10577 }
10578 assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
10579 "Ret value is a collection of constituent nodes holding result.");
10580 if (DAG.getDataLayout().isLittleEndian()) {
10581 // Same as above.
10582 BottomHalf = Ret.getOperand(0);
10583 TopHalf = Ret.getOperand(1);
10584 } else {
10585 BottomHalf = Ret.getOperand(1);
10586 TopHalf = Ret.getOperand(0);
10587 }
10588 }
10589
10590 Result = BottomHalf;
10591 if (isSigned) {
10592 SDValue ShiftAmt = DAG.getConstant(
10593 VT.getScalarSizeInBits() - 1, dl,
10594 getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
10595 SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
10596 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
10597 } else {
10598 Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
10599 DAG.getConstant(0, dl, VT), ISD::SETNE);
10600 }
10601
10602 // Truncate the result if SetCC returns a larger type than needed.
10603 EVT RType = Node->getValueType(1);
10604 if (RType.bitsLT(Overflow.getValueType()))
10605 Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
10606
10607 assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
10608 "Unexpected result type for S/UMULO legalization");
10609 return true;
10610 }
10611
expandVecReduce(SDNode * Node,SelectionDAG & DAG) const10612 SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
10613 SDLoc dl(Node);
10614 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
10615 SDValue Op = Node->getOperand(0);
10616 EVT VT = Op.getValueType();
10617
10618 if (VT.isScalableVector())
10619 report_fatal_error(
10620 "Expanding reductions for scalable vectors is undefined.");
10621
10622 // Try to use a shuffle reduction for power of two vectors.
10623 if (VT.isPow2VectorType()) {
10624 while (VT.getVectorNumElements() > 1) {
10625 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
10626 if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
10627 break;
10628
10629 SDValue Lo, Hi;
10630 std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
10631 Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
10632 VT = HalfVT;
10633 }
10634 }
10635
10636 EVT EltVT = VT.getVectorElementType();
10637 unsigned NumElts = VT.getVectorNumElements();
10638
10639 SmallVector<SDValue, 8> Ops;
10640 DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
10641
10642 SDValue Res = Ops[0];
10643 for (unsigned i = 1; i < NumElts; i++)
10644 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
10645
10646 // Result type may be wider than element type.
10647 if (EltVT != Node->getValueType(0))
10648 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
10649 return Res;
10650 }
10651
expandVecReduceSeq(SDNode * Node,SelectionDAG & DAG) const10652 SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const {
10653 SDLoc dl(Node);
10654 SDValue AccOp = Node->getOperand(0);
10655 SDValue VecOp = Node->getOperand(1);
10656 SDNodeFlags Flags = Node->getFlags();
10657
10658 EVT VT = VecOp.getValueType();
10659 EVT EltVT = VT.getVectorElementType();
10660
10661 if (VT.isScalableVector())
10662 report_fatal_error(
10663 "Expanding reductions for scalable vectors is undefined.");
10664
10665 unsigned NumElts = VT.getVectorNumElements();
10666
10667 SmallVector<SDValue, 8> Ops;
10668 DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts);
10669
10670 unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode());
10671
10672 SDValue Res = AccOp;
10673 for (unsigned i = 0; i < NumElts; i++)
10674 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags);
10675
10676 return Res;
10677 }
10678
expandREM(SDNode * Node,SDValue & Result,SelectionDAG & DAG) const10679 bool TargetLowering::expandREM(SDNode *Node, SDValue &Result,
10680 SelectionDAG &DAG) const {
10681 EVT VT = Node->getValueType(0);
10682 SDLoc dl(Node);
10683 bool isSigned = Node->getOpcode() == ISD::SREM;
10684 unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV;
10685 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
10686 SDValue Dividend = Node->getOperand(0);
10687 SDValue Divisor = Node->getOperand(1);
10688 if (isOperationLegalOrCustom(DivRemOpc, VT)) {
10689 SDVTList VTs = DAG.getVTList(VT, VT);
10690 Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1);
10691 return true;
10692 }
10693 if (isOperationLegalOrCustom(DivOpc, VT)) {
10694 // X % Y -> X-X/Y*Y
10695 SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor);
10696 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor);
10697 Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
10698 return true;
10699 }
10700 return false;
10701 }
10702
expandFP_TO_INT_SAT(SDNode * Node,SelectionDAG & DAG) const10703 SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
10704 SelectionDAG &DAG) const {
10705 bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT;
10706 SDLoc dl(SDValue(Node, 0));
10707 SDValue Src = Node->getOperand(0);
10708
10709 // DstVT is the result type, while SatVT is the size to which we saturate
10710 EVT SrcVT = Src.getValueType();
10711 EVT DstVT = Node->getValueType(0);
10712
10713 EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
10714 unsigned SatWidth = SatVT.getScalarSizeInBits();
10715 unsigned DstWidth = DstVT.getScalarSizeInBits();
10716 assert(SatWidth <= DstWidth &&
10717 "Expected saturation width smaller than result width");
10718
10719 // Determine minimum and maximum integer values and their corresponding
10720 // floating-point values.
10721 APInt MinInt, MaxInt;
10722 if (IsSigned) {
10723 MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth);
10724 MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth);
10725 } else {
10726 MinInt = APInt::getMinValue(SatWidth).zext(DstWidth);
10727 MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth);
10728 }
10729
10730 // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as
10731 // libcall emission cannot handle this. Large result types will fail.
10732 if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) {
10733 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src);
10734 SrcVT = Src.getValueType();
10735 }
10736
10737 APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT));
10738 APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT));
10739
10740 APFloat::opStatus MinStatus =
10741 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
10742 APFloat::opStatus MaxStatus =
10743 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
10744 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
10745 !(MaxStatus & APFloat::opStatus::opInexact);
10746
10747 SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT);
10748 SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT);
10749
10750 // If the integer bounds are exactly representable as floats and min/max are
10751 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
10752 // of comparisons and selects.
10753 bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) &&
10754 isOperationLegal(ISD::FMAXNUM, SrcVT);
10755 if (AreExactFloatBounds && MinMaxLegal) {
10756 SDValue Clamped = Src;
10757
10758 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
10759 Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode);
10760 // Clamp by MaxFloat from above. NaN cannot occur.
10761 Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode);
10762 // Convert clamped value to integer.
10763 SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT,
10764 dl, DstVT, Clamped);
10765
10766 // In the unsigned case we're done, because we mapped NaN to MinFloat,
10767 // which will cast to zero.
10768 if (!IsSigned)
10769 return FpToInt;
10770
10771 // Otherwise, select 0 if Src is NaN.
10772 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
10773 EVT SetCCVT =
10774 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
10775 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
10776 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
10777 }
10778
10779 SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
10780 SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT);
10781
10782 // Result of direct conversion. The assumption here is that the operation is
10783 // non-trapping and it's fine to apply it to an out-of-range value if we
10784 // select it away later.
10785 SDValue FpToInt =
10786 DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src);
10787
10788 SDValue Select = FpToInt;
10789
10790 EVT SetCCVT =
10791 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
10792
10793 // If Src ULT MinFloat, select MinInt. In particular, this also selects
10794 // MinInt if Src is NaN.
10795 SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
10796 Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
10797 // If Src OGT MaxFloat, select MaxInt.
10798 SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
10799 Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
10800
10801 // In the unsigned case we are done, because we mapped NaN to MinInt, which
10802 // is already zero.
10803 if (!IsSigned)
10804 return Select;
10805
10806 // Otherwise, select 0 if Src is NaN.
10807 SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
10808 SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
10809 return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
10810 }
10811
expandVectorSplice(SDNode * Node,SelectionDAG & DAG) const10812 SDValue TargetLowering::expandVectorSplice(SDNode *Node,
10813 SelectionDAG &DAG) const {
10814 assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
10815 assert(Node->getValueType(0).isScalableVector() &&
10816 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
10817
10818 EVT VT = Node->getValueType(0);
10819 SDValue V1 = Node->getOperand(0);
10820 SDValue V2 = Node->getOperand(1);
10821 int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
10822 SDLoc DL(Node);
10823
10824 // Expand through memory thusly:
10825 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
10826 // Store V1, Ptr
10827 // Store V2, Ptr + sizeof(V1)
10828 // If (Imm < 0)
10829 // TrailingElts = -Imm
10830 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
10831 // else
10832 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
10833 // Res = Load Ptr
10834
10835 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
10836
10837 EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
10838 VT.getVectorElementCount() * 2);
10839 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
10840 EVT PtrVT = StackPtr.getValueType();
10841 auto &MF = DAG.getMachineFunction();
10842 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
10843 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
10844
10845 // Store the lo part of CONCAT_VECTORS(V1, V2)
10846 SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
10847 // Store the hi part of CONCAT_VECTORS(V1, V2)
10848 SDValue OffsetToV2 = DAG.getVScale(
10849 DL, PtrVT,
10850 APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue()));
10851 SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
10852 SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
10853
10854 if (Imm >= 0) {
10855 // Load back the required element. getVectorElementPointer takes care of
10856 // clamping the index if it's out-of-bounds.
10857 StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
10858 // Load the spliced result
10859 return DAG.getLoad(VT, DL, StoreV2, StackPtr,
10860 MachinePointerInfo::getUnknownStack(MF));
10861 }
10862
10863 uint64_t TrailingElts = -Imm;
10864
10865 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
10866 TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
10867 SDValue TrailingBytes =
10868 DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
10869
10870 if (TrailingElts > VT.getVectorMinNumElements()) {
10871 SDValue VLBytes =
10872 DAG.getVScale(DL, PtrVT,
10873 APInt(PtrVT.getFixedSizeInBits(),
10874 VT.getStoreSize().getKnownMinValue()));
10875 TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
10876 }
10877
10878 // Calculate the start address of the spliced result.
10879 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
10880
10881 // Load the spliced result
10882 return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
10883 MachinePointerInfo::getUnknownStack(MF));
10884 }
10885
LegalizeSetCCCondCode(SelectionDAG & DAG,EVT VT,SDValue & LHS,SDValue & RHS,SDValue & CC,SDValue Mask,SDValue EVL,bool & NeedInvert,const SDLoc & dl,SDValue & Chain,bool IsSignaling) const10886 bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT,
10887 SDValue &LHS, SDValue &RHS,
10888 SDValue &CC, SDValue Mask,
10889 SDValue EVL, bool &NeedInvert,
10890 const SDLoc &dl, SDValue &Chain,
10891 bool IsSignaling) const {
10892 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
10893 MVT OpVT = LHS.getSimpleValueType();
10894 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
10895 NeedInvert = false;
10896 assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset");
10897 bool IsNonVP = !EVL;
10898 switch (TLI.getCondCodeAction(CCCode, OpVT)) {
10899 default:
10900 llvm_unreachable("Unknown condition code action!");
10901 case TargetLowering::Legal:
10902 // Nothing to do.
10903 break;
10904 case TargetLowering::Expand: {
10905 ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
10906 if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
10907 std::swap(LHS, RHS);
10908 CC = DAG.getCondCode(InvCC);
10909 return true;
10910 }
10911 // Swapping operands didn't work. Try inverting the condition.
10912 bool NeedSwap = false;
10913 InvCC = getSetCCInverse(CCCode, OpVT);
10914 if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
10915 // If inverting the condition is not enough, try swapping operands
10916 // on top of it.
10917 InvCC = ISD::getSetCCSwappedOperands(InvCC);
10918 NeedSwap = true;
10919 }
10920 if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
10921 CC = DAG.getCondCode(InvCC);
10922 NeedInvert = true;
10923 if (NeedSwap)
10924 std::swap(LHS, RHS);
10925 return true;
10926 }
10927
10928 ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
10929 unsigned Opc = 0;
10930 switch (CCCode) {
10931 default:
10932 llvm_unreachable("Don't know how to expand this condition!");
10933 case ISD::SETUO:
10934 if (TLI.isCondCodeLegal(ISD::SETUNE, OpVT)) {
10935 CC1 = ISD::SETUNE;
10936 CC2 = ISD::SETUNE;
10937 Opc = ISD::OR;
10938 break;
10939 }
10940 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
10941 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
10942 NeedInvert = true;
10943 [[fallthrough]];
10944 case ISD::SETO:
10945 assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) &&
10946 "If SETO is expanded, SETOEQ must be legal!");
10947 CC1 = ISD::SETOEQ;
10948 CC2 = ISD::SETOEQ;
10949 Opc = ISD::AND;
10950 break;
10951 case ISD::SETONE:
10952 case ISD::SETUEQ:
10953 // If the SETUO or SETO CC isn't legal, we might be able to use
10954 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
10955 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
10956 // the operands.
10957 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
10958 if (!TLI.isCondCodeLegal(CC2, OpVT) &&
10959 (TLI.isCondCodeLegal(ISD::SETOGT, OpVT) ||
10960 TLI.isCondCodeLegal(ISD::SETOLT, OpVT))) {
10961 CC1 = ISD::SETOGT;
10962 CC2 = ISD::SETOLT;
10963 Opc = ISD::OR;
10964 NeedInvert = ((unsigned)CCCode & 0x8U);
10965 break;
10966 }
10967 [[fallthrough]];
10968 case ISD::SETOEQ:
10969 case ISD::SETOGT:
10970 case ISD::SETOGE:
10971 case ISD::SETOLT:
10972 case ISD::SETOLE:
10973 case ISD::SETUNE:
10974 case ISD::SETUGT:
10975 case ISD::SETUGE:
10976 case ISD::SETULT:
10977 case ISD::SETULE:
10978 // If we are floating point, assign and break, otherwise fall through.
10979 if (!OpVT.isInteger()) {
10980 // We can use the 4th bit to tell if we are the unordered
10981 // or ordered version of the opcode.
10982 CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO;
10983 Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND;
10984 CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10);
10985 break;
10986 }
10987 // Fallthrough if we are unsigned integer.
10988 [[fallthrough]];
10989 case ISD::SETLE:
10990 case ISD::SETGT:
10991 case ISD::SETGE:
10992 case ISD::SETLT:
10993 case ISD::SETNE:
10994 case ISD::SETEQ:
10995 // If all combinations of inverting the condition and swapping operands
10996 // didn't work then we have no means to expand the condition.
10997 llvm_unreachable("Don't know how to expand this condition!");
10998 }
10999
11000 SDValue SetCC1, SetCC2;
11001 if (CCCode != ISD::SETO && CCCode != ISD::SETUO) {
11002 // If we aren't the ordered or unorder operation,
11003 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
11004 if (IsNonVP) {
11005 SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling);
11006 SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling);
11007 } else {
11008 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL);
11009 SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL);
11010 }
11011 } else {
11012 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
11013 if (IsNonVP) {
11014 SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling);
11015 SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling);
11016 } else {
11017 SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL);
11018 SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL);
11019 }
11020 }
11021 if (Chain)
11022 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1),
11023 SetCC2.getValue(1));
11024 if (IsNonVP)
11025 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
11026 else {
11027 // Transform the binary opcode to the VP equivalent.
11028 assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode");
11029 Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND;
11030 LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL);
11031 }
11032 RHS = SDValue();
11033 CC = SDValue();
11034 return true;
11035 }
11036 }
11037 return false;
11038 }
11039