1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallBitVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/Analysis/AliasAnalysis.h"
31 #include "llvm/Analysis/MemoryLocation.h"
32 #include "llvm/Analysis/TargetLibraryInfo.h"
33 #include "llvm/Analysis/ValueTracking.h"
34 #include "llvm/Analysis/VectorUtils.h"
35 #include "llvm/CodeGen/ByteProvider.h"
36 #include "llvm/CodeGen/DAGCombine.h"
37 #include "llvm/CodeGen/ISDOpcodes.h"
38 #include "llvm/CodeGen/MachineFunction.h"
39 #include "llvm/CodeGen/MachineMemOperand.h"
40 #include "llvm/CodeGen/MachineValueType.h"
41 #include "llvm/CodeGen/RuntimeLibcalls.h"
42 #include "llvm/CodeGen/SelectionDAG.h"
43 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
44 #include "llvm/CodeGen/SelectionDAGNodes.h"
45 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
46 #include "llvm/CodeGen/TargetLowering.h"
47 #include "llvm/CodeGen/TargetRegisterInfo.h"
48 #include "llvm/CodeGen/TargetSubtargetInfo.h"
49 #include "llvm/CodeGen/ValueTypes.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/Constant.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/Metadata.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/CommandLine.h"
59 #include "llvm/Support/Compiler.h"
60 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/DebugCounter.h"
62 #include "llvm/Support/ErrorHandling.h"
63 #include "llvm/Support/KnownBits.h"
64 #include "llvm/Support/MathExtras.h"
65 #include "llvm/Support/raw_ostream.h"
66 #include "llvm/Target/TargetMachine.h"
67 #include "llvm/Target/TargetOptions.h"
68 #include <algorithm>
69 #include <cassert>
70 #include <cstdint>
71 #include <functional>
72 #include <iterator>
73 #include <optional>
74 #include <string>
75 #include <tuple>
76 #include <utility>
77 #include <variant>
78 
79 using namespace llvm;
80 
81 #define DEBUG_TYPE "dagcombine"
82 
83 STATISTIC(NodesCombined   , "Number of dag nodes combined");
84 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
85 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
86 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
87 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
88 STATISTIC(SlicedLoads, "Number of load sliced");
89 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
90 
91 DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
92               "Controls whether a DAG combine is performed for a node");
93 
94 static cl::opt<bool>
95 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
96                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
97 
98 static cl::opt<bool>
99 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
100         cl::desc("Enable DAG combiner's use of TBAA"));
101 
102 #ifndef NDEBUG
103 static cl::opt<std::string>
104 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
105                    cl::desc("Only use DAG-combiner alias analysis in this"
106                             " function"));
107 #endif
108 
109 /// Hidden option to stress test load slicing, i.e., when this option
110 /// is enabled, load slicing bypasses most of its profitability guards.
111 static cl::opt<bool>
112 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
113                   cl::desc("Bypass the profitability model of load slicing"),
114                   cl::init(false));
115 
116 static cl::opt<bool>
117   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
118                     cl::desc("DAG combiner may split indexing from loads"));
119 
120 static cl::opt<bool>
121     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
122                        cl::desc("DAG combiner enable merging multiple stores "
123                                 "into a wider store"));
124 
125 static cl::opt<unsigned> TokenFactorInlineLimit(
126     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
127     cl::desc("Limit the number of operands to inline for Token Factors"));
128 
129 static cl::opt<unsigned> StoreMergeDependenceLimit(
130     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
131     cl::desc("Limit the number of times for the same StoreNode and RootNode "
132              "to bail out in store merging dependence check"));
133 
134 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
135     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
136     cl::desc("DAG combiner enable reducing the width of load/op/store "
137              "sequence"));
138 
139 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
140     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
141     cl::desc("DAG combiner enable load/<replace bytes>/store with "
142              "a narrower store"));
143 
144 static cl::opt<bool> EnableVectorFCopySignExtendRound(
145     "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
146     cl::desc(
147         "Enable merging extends and rounds into FCOPYSIGN on vector types"));
148 
149 namespace {
150 
151   class DAGCombiner {
152     SelectionDAG &DAG;
153     const TargetLowering &TLI;
154     const SelectionDAGTargetInfo *STI;
155     CombineLevel Level = BeforeLegalizeTypes;
156     CodeGenOptLevel OptLevel;
157     bool LegalDAG = false;
158     bool LegalOperations = false;
159     bool LegalTypes = false;
160     bool ForCodeSize;
161     bool DisableGenericCombines;
162 
163     /// Worklist of all of the nodes that need to be simplified.
164     ///
165     /// This must behave as a stack -- new nodes to process are pushed onto the
166     /// back and when processing we pop off of the back.
167     ///
168     /// The worklist will not contain duplicates but may contain null entries
169     /// due to nodes being deleted from the underlying DAG.
170     SmallVector<SDNode *, 64> Worklist;
171 
172     /// Mapping from an SDNode to its position on the worklist.
173     ///
174     /// This is used to find and remove nodes from the worklist (by nulling
175     /// them) when they are deleted from the underlying DAG. It relies on
176     /// stable indices of nodes within the worklist.
177     DenseMap<SDNode *, unsigned> WorklistMap;
178 
179     /// This records all nodes attempted to be added to the worklist since we
180     /// considered a new worklist entry. As we keep do not add duplicate nodes
181     /// in the worklist, this is different from the tail of the worklist.
182     SmallSetVector<SDNode *, 32> PruningList;
183 
184     /// Set of nodes which have been combined (at least once).
185     ///
186     /// This is used to allow us to reliably add any operands of a DAG node
187     /// which have not yet been combined to the worklist.
188     SmallPtrSet<SDNode *, 32> CombinedNodes;
189 
190     /// Map from candidate StoreNode to the pair of RootNode and count.
191     /// The count is used to track how many times we have seen the StoreNode
192     /// with the same RootNode bail out in dependence check. If we have seen
193     /// the bail out for the same pair many times over a limit, we won't
194     /// consider the StoreNode with the same RootNode as store merging
195     /// candidate again.
196     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
197 
198     // AA - Used for DAG load/store alias analysis.
199     AliasAnalysis *AA;
200 
201     /// When an instruction is simplified, add all users of the instruction to
202     /// the work lists because they might get more simplified now.
203     void AddUsersToWorklist(SDNode *N) {
204       for (SDNode *Node : N->uses())
205         AddToWorklist(Node);
206     }
207 
208     /// Convenient shorthand to add a node and all of its user to the worklist.
209     void AddToWorklistWithUsers(SDNode *N) {
210       AddUsersToWorklist(N);
211       AddToWorklist(N);
212     }
213 
214     // Prune potentially dangling nodes. This is called after
215     // any visit to a node, but should also be called during a visit after any
216     // failed combine which may have created a DAG node.
217     void clearAddedDanglingWorklistEntries() {
218       // Check any nodes added to the worklist to see if they are prunable.
219       while (!PruningList.empty()) {
220         auto *N = PruningList.pop_back_val();
221         if (N->use_empty())
222           recursivelyDeleteUnusedNodes(N);
223       }
224     }
225 
226     SDNode *getNextWorklistEntry() {
227       // Before we do any work, remove nodes that are not in use.
228       clearAddedDanglingWorklistEntries();
229       SDNode *N = nullptr;
230       // The Worklist holds the SDNodes in order, but it may contain null
231       // entries.
232       while (!N && !Worklist.empty()) {
233         N = Worklist.pop_back_val();
234       }
235 
236       if (N) {
237         bool GoodWorklistEntry = WorklistMap.erase(N);
238         (void)GoodWorklistEntry;
239         assert(GoodWorklistEntry &&
240                "Found a worklist entry without a corresponding map entry!");
241       }
242       return N;
243     }
244 
245     /// Call the node-specific routine that folds each particular type of node.
246     SDValue visit(SDNode *N);
247 
248   public:
249     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
250         : DAG(D), TLI(D.getTargetLoweringInfo()),
251           STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
252       ForCodeSize = DAG.shouldOptForSize();
253       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
254 
255       MaximumLegalStoreInBits = 0;
256       // We use the minimum store size here, since that's all we can guarantee
257       // for the scalable vector types.
258       for (MVT VT : MVT::all_valuetypes())
259         if (EVT(VT).isSimple() && VT != MVT::Other &&
260             TLI.isTypeLegal(EVT(VT)) &&
261             VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
262           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
263     }
264 
265     void ConsiderForPruning(SDNode *N) {
266       // Mark this for potential pruning.
267       PruningList.insert(N);
268     }
269 
270     /// Add to the worklist making sure its instance is at the back (next to be
271     /// processed.)
272     void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true) {
273       assert(N->getOpcode() != ISD::DELETED_NODE &&
274              "Deleted Node added to Worklist");
275 
276       // Skip handle nodes as they can't usefully be combined and confuse the
277       // zero-use deletion strategy.
278       if (N->getOpcode() == ISD::HANDLENODE)
279         return;
280 
281       if (IsCandidateForPruning)
282         ConsiderForPruning(N);
283 
284       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
285         Worklist.push_back(N);
286     }
287 
288     /// Remove all instances of N from the worklist.
289     void removeFromWorklist(SDNode *N) {
290       CombinedNodes.erase(N);
291       PruningList.remove(N);
292       StoreRootCountMap.erase(N);
293 
294       auto It = WorklistMap.find(N);
295       if (It == WorklistMap.end())
296         return; // Not in the worklist.
297 
298       // Null out the entry rather than erasing it to avoid a linear operation.
299       Worklist[It->second] = nullptr;
300       WorklistMap.erase(It);
301     }
302 
303     void deleteAndRecombine(SDNode *N);
304     bool recursivelyDeleteUnusedNodes(SDNode *N);
305 
306     /// Replaces all uses of the results of one DAG node with new values.
307     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
308                       bool AddTo = true);
309 
310     /// Replaces all uses of the results of one DAG node with new values.
311     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
312       return CombineTo(N, &Res, 1, AddTo);
313     }
314 
315     /// Replaces all uses of the results of one DAG node with new values.
316     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
317                       bool AddTo = true) {
318       SDValue To[] = { Res0, Res1 };
319       return CombineTo(N, To, 2, AddTo);
320     }
321 
322     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
323 
324   private:
325     unsigned MaximumLegalStoreInBits;
326 
327     /// Check the specified integer node value to see if it can be simplified or
328     /// if things it uses can be simplified by bit propagation.
329     /// If so, return true.
330     bool SimplifyDemandedBits(SDValue Op) {
331       unsigned BitWidth = Op.getScalarValueSizeInBits();
332       APInt DemandedBits = APInt::getAllOnes(BitWidth);
333       return SimplifyDemandedBits(Op, DemandedBits);
334     }
335 
336     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
337       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
338       KnownBits Known;
339       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
340         return false;
341 
342       // Revisit the node.
343       AddToWorklist(Op.getNode());
344 
345       CommitTargetLoweringOpt(TLO);
346       return true;
347     }
348 
349     /// Check the specified vector node value to see if it can be simplified or
350     /// if things it uses can be simplified as it only uses some of the
351     /// elements. If so, return true.
352     bool SimplifyDemandedVectorElts(SDValue Op) {
353       // TODO: For now just pretend it cannot be simplified.
354       if (Op.getValueType().isScalableVector())
355         return false;
356 
357       unsigned NumElts = Op.getValueType().getVectorNumElements();
358       APInt DemandedElts = APInt::getAllOnes(NumElts);
359       return SimplifyDemandedVectorElts(Op, DemandedElts);
360     }
361 
362     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
363                               const APInt &DemandedElts,
364                               bool AssumeSingleUse = false);
365     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
366                                     bool AssumeSingleUse = false);
367 
368     bool CombineToPreIndexedLoadStore(SDNode *N);
369     bool CombineToPostIndexedLoadStore(SDNode *N);
370     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
371     bool SliceUpLoad(SDNode *N);
372 
373     // Looks up the chain to find a unique (unaliased) store feeding the passed
374     // load. If no such store is found, returns a nullptr.
375     // Note: This will look past a CALLSEQ_START if the load is chained to it so
376     //       so that it can find stack stores for byval params.
377     StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
378     // Scalars have size 0 to distinguish from singleton vectors.
379     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
380     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
381     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
382 
383     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
384     ///   load.
385     ///
386     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
387     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
388     /// \param EltNo index of the vector element to load.
389     /// \param OriginalLoad load that EVE came from to be replaced.
390     /// \returns EVE on success SDValue() on failure.
391     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
392                                          SDValue EltNo,
393                                          LoadSDNode *OriginalLoad);
394     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
395     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
396     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
397     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
398     SDValue PromoteIntBinOp(SDValue Op);
399     SDValue PromoteIntShiftOp(SDValue Op);
400     SDValue PromoteExtend(SDValue Op);
401     bool PromoteLoad(SDValue Op);
402 
403     SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
404                                 SDValue RHS, SDValue True, SDValue False,
405                                 ISD::CondCode CC);
406 
407     /// Call the node-specific routine that knows how to fold each
408     /// particular type of node. If that doesn't do anything, try the
409     /// target-specific DAG combines.
410     SDValue combine(SDNode *N);
411 
412     // Visitation implementation - Implement dag node combining for different
413     // node types.  The semantics are as follows:
414     // Return Value:
415     //   SDValue.getNode() == 0 - No change was made
416     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
417     //   otherwise              - N should be replaced by the returned Operand.
418     //
419     SDValue visitTokenFactor(SDNode *N);
420     SDValue visitMERGE_VALUES(SDNode *N);
421     SDValue visitADD(SDNode *N);
422     SDValue visitADDLike(SDNode *N);
423     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
424     SDValue visitSUB(SDNode *N);
425     SDValue visitADDSAT(SDNode *N);
426     SDValue visitSUBSAT(SDNode *N);
427     SDValue visitADDC(SDNode *N);
428     SDValue visitADDO(SDNode *N);
429     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
430     SDValue visitSUBC(SDNode *N);
431     SDValue visitSUBO(SDNode *N);
432     SDValue visitADDE(SDNode *N);
433     SDValue visitUADDO_CARRY(SDNode *N);
434     SDValue visitSADDO_CARRY(SDNode *N);
435     SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
436                                  SDNode *N);
437     SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
438                                  SDNode *N);
439     SDValue visitSUBE(SDNode *N);
440     SDValue visitUSUBO_CARRY(SDNode *N);
441     SDValue visitSSUBO_CARRY(SDNode *N);
442     SDValue visitMUL(SDNode *N);
443     SDValue visitMULFIX(SDNode *N);
444     SDValue useDivRem(SDNode *N);
445     SDValue visitSDIV(SDNode *N);
446     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
447     SDValue visitUDIV(SDNode *N);
448     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
449     SDValue visitREM(SDNode *N);
450     SDValue visitMULHU(SDNode *N);
451     SDValue visitMULHS(SDNode *N);
452     SDValue visitAVG(SDNode *N);
453     SDValue visitABD(SDNode *N);
454     SDValue visitSMUL_LOHI(SDNode *N);
455     SDValue visitUMUL_LOHI(SDNode *N);
456     SDValue visitMULO(SDNode *N);
457     SDValue visitIMINMAX(SDNode *N);
458     SDValue visitAND(SDNode *N);
459     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
460     SDValue visitOR(SDNode *N);
461     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
462     SDValue visitXOR(SDNode *N);
463     SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
464     SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
465     SDValue visitSHL(SDNode *N);
466     SDValue visitSRA(SDNode *N);
467     SDValue visitSRL(SDNode *N);
468     SDValue visitFunnelShift(SDNode *N);
469     SDValue visitSHLSAT(SDNode *N);
470     SDValue visitRotate(SDNode *N);
471     SDValue visitABS(SDNode *N);
472     SDValue visitBSWAP(SDNode *N);
473     SDValue visitBITREVERSE(SDNode *N);
474     SDValue visitCTLZ(SDNode *N);
475     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
476     SDValue visitCTTZ(SDNode *N);
477     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
478     SDValue visitCTPOP(SDNode *N);
479     SDValue visitSELECT(SDNode *N);
480     SDValue visitVSELECT(SDNode *N);
481     SDValue visitSELECT_CC(SDNode *N);
482     SDValue visitSETCC(SDNode *N);
483     SDValue visitSETCCCARRY(SDNode *N);
484     SDValue visitSIGN_EXTEND(SDNode *N);
485     SDValue visitZERO_EXTEND(SDNode *N);
486     SDValue visitANY_EXTEND(SDNode *N);
487     SDValue visitAssertExt(SDNode *N);
488     SDValue visitAssertAlign(SDNode *N);
489     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
490     SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
491     SDValue visitTRUNCATE(SDNode *N);
492     SDValue visitBITCAST(SDNode *N);
493     SDValue visitFREEZE(SDNode *N);
494     SDValue visitBUILD_PAIR(SDNode *N);
495     SDValue visitFADD(SDNode *N);
496     SDValue visitVP_FADD(SDNode *N);
497     SDValue visitVP_FSUB(SDNode *N);
498     SDValue visitSTRICT_FADD(SDNode *N);
499     SDValue visitFSUB(SDNode *N);
500     SDValue visitFMUL(SDNode *N);
501     template <class MatchContextClass> SDValue visitFMA(SDNode *N);
502     SDValue visitFMAD(SDNode *N);
503     SDValue visitFDIV(SDNode *N);
504     SDValue visitFREM(SDNode *N);
505     SDValue visitFSQRT(SDNode *N);
506     SDValue visitFCOPYSIGN(SDNode *N);
507     SDValue visitFPOW(SDNode *N);
508     SDValue visitSINT_TO_FP(SDNode *N);
509     SDValue visitUINT_TO_FP(SDNode *N);
510     SDValue visitFP_TO_SINT(SDNode *N);
511     SDValue visitFP_TO_UINT(SDNode *N);
512     SDValue visitXRINT(SDNode *N);
513     SDValue visitFP_ROUND(SDNode *N);
514     SDValue visitFP_EXTEND(SDNode *N);
515     SDValue visitFNEG(SDNode *N);
516     SDValue visitFABS(SDNode *N);
517     SDValue visitFCEIL(SDNode *N);
518     SDValue visitFTRUNC(SDNode *N);
519     SDValue visitFFREXP(SDNode *N);
520     SDValue visitFFLOOR(SDNode *N);
521     SDValue visitFMinMax(SDNode *N);
522     SDValue visitBRCOND(SDNode *N);
523     SDValue visitBR_CC(SDNode *N);
524     SDValue visitLOAD(SDNode *N);
525 
526     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
527     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
528     SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
529 
530     bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
531 
532     SDValue visitSTORE(SDNode *N);
533     SDValue visitLIFETIME_END(SDNode *N);
534     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
535     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
536     SDValue visitBUILD_VECTOR(SDNode *N);
537     SDValue visitCONCAT_VECTORS(SDNode *N);
538     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
539     SDValue visitVECTOR_SHUFFLE(SDNode *N);
540     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
541     SDValue visitINSERT_SUBVECTOR(SDNode *N);
542     SDValue visitMLOAD(SDNode *N);
543     SDValue visitMSTORE(SDNode *N);
544     SDValue visitMGATHER(SDNode *N);
545     SDValue visitMSCATTER(SDNode *N);
546     SDValue visitVPGATHER(SDNode *N);
547     SDValue visitVPSCATTER(SDNode *N);
548     SDValue visitVP_STRIDED_LOAD(SDNode *N);
549     SDValue visitVP_STRIDED_STORE(SDNode *N);
550     SDValue visitFP_TO_FP16(SDNode *N);
551     SDValue visitFP16_TO_FP(SDNode *N);
552     SDValue visitFP_TO_BF16(SDNode *N);
553     SDValue visitBF16_TO_FP(SDNode *N);
554     SDValue visitVECREDUCE(SDNode *N);
555     SDValue visitVPOp(SDNode *N);
556     SDValue visitGET_FPENV_MEM(SDNode *N);
557     SDValue visitSET_FPENV_MEM(SDNode *N);
558 
559     template <class MatchContextClass>
560     SDValue visitFADDForFMACombine(SDNode *N);
561     template <class MatchContextClass>
562     SDValue visitFSUBForFMACombine(SDNode *N);
563     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
564 
565     SDValue XformToShuffleWithZero(SDNode *N);
566     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
567                                                     const SDLoc &DL,
568                                                     SDNode *N,
569                                                     SDValue N0,
570                                                     SDValue N1);
571     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
572                                       SDValue N1, SDNodeFlags Flags);
573     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
574                            SDValue N1, SDNodeFlags Flags);
575     SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
576                                  EVT VT, SDValue N0, SDValue N1,
577                                  SDNodeFlags Flags = SDNodeFlags());
578 
579     SDValue visitShiftByConstant(SDNode *N);
580 
581     SDValue foldSelectOfConstants(SDNode *N);
582     SDValue foldVSelectOfConstants(SDNode *N);
583     SDValue foldBinOpIntoSelect(SDNode *BO);
584     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
585     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
586     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
587     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
588                              SDValue N2, SDValue N3, ISD::CondCode CC,
589                              bool NotExtCompare = false);
590     SDValue convertSelectOfFPConstantsToLoadOffset(
591         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
592         ISD::CondCode CC);
593     SDValue foldSignChangeInBitcast(SDNode *N);
594     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
595                                    SDValue N2, SDValue N3, ISD::CondCode CC);
596     SDValue foldSelectOfBinops(SDNode *N);
597     SDValue foldSextSetcc(SDNode *N);
598     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
599                               const SDLoc &DL);
600     SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
601     SDValue foldABSToABD(SDNode *N);
602     SDValue unfoldMaskedMerge(SDNode *N);
603     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
604     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
605                           const SDLoc &DL, bool foldBooleans);
606     SDValue rebuildSetCC(SDValue N);
607 
608     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
609                            SDValue &CC, bool MatchStrict = false) const;
610     bool isOneUseSetCC(SDValue N) const;
611 
612     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
613                                          unsigned HiOp);
614     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
615     SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
616                                  const TargetLowering &TLI);
617 
618     SDValue CombineExtLoad(SDNode *N);
619     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
620     SDValue combineRepeatedFPDivisors(SDNode *N);
621     SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
622     SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
623     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
624     SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
625     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
626     SDValue BuildSDIV(SDNode *N);
627     SDValue BuildSDIVPow2(SDNode *N);
628     SDValue BuildUDIV(SDNode *N);
629     SDValue BuildSREMPow2(SDNode *N);
630     SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
631     SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
632                           bool KnownNeverZero = false,
633                           bool InexpensiveOnly = false,
634                           std::optional<EVT> OutVT = std::nullopt);
635     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
636     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
637     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
638     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
639     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
640                                 SDNodeFlags Flags, bool Reciprocal);
641     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
642                                 SDNodeFlags Flags, bool Reciprocal);
643     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
644                                bool DemandHighBits = true);
645     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
646     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
647                               SDValue InnerPos, SDValue InnerNeg, bool HasPos,
648                               unsigned PosOpcode, unsigned NegOpcode,
649                               const SDLoc &DL);
650     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
651                               SDValue InnerPos, SDValue InnerNeg, bool HasPos,
652                               unsigned PosOpcode, unsigned NegOpcode,
653                               const SDLoc &DL);
654     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
655     SDValue MatchLoadCombine(SDNode *N);
656     SDValue mergeTruncStores(StoreSDNode *N);
657     SDValue reduceLoadWidth(SDNode *N);
658     SDValue ReduceLoadOpStoreWidth(SDNode *N);
659     SDValue splitMergedValStore(StoreSDNode *ST);
660     SDValue TransformFPLoadStorePair(SDNode *N);
661     SDValue convertBuildVecZextToZext(SDNode *N);
662     SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
663     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
664     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
665     SDValue reduceBuildVecToShuffle(SDNode *N);
666     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
667                                   ArrayRef<int> VectorMask, SDValue VecIn1,
668                                   SDValue VecIn2, unsigned LeftIdx,
669                                   bool DidSplitVec);
670     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
671 
672     /// Walk up chain skipping non-aliasing memory nodes,
673     /// looking for aliasing nodes and adding them to the Aliases vector.
674     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
675                           SmallVectorImpl<SDValue> &Aliases);
676 
677     /// Return true if there is any possibility that the two addresses overlap.
678     bool mayAlias(SDNode *Op0, SDNode *Op1) const;
679 
680     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
681     /// chain (aliasing node.)
682     SDValue FindBetterChain(SDNode *N, SDValue Chain);
683 
684     /// Try to replace a store and any possibly adjacent stores on
685     /// consecutive chains with better chains. Return true only if St is
686     /// replaced.
687     ///
688     /// Notice that other chains may still be replaced even if the function
689     /// returns false.
690     bool findBetterNeighborChains(StoreSDNode *St);
691 
692     // Helper for findBetterNeighborChains. Walk up store chain add additional
693     // chained stores that do not overlap and can be parallelized.
694     bool parallelizeChainedStores(StoreSDNode *St);
695 
696     /// Holds a pointer to an LSBaseSDNode as well as information on where it
697     /// is located in a sequence of memory operations connected by a chain.
698     struct MemOpLink {
699       // Ptr to the mem node.
700       LSBaseSDNode *MemNode;
701 
702       // Offset from the base ptr.
703       int64_t OffsetFromBase;
704 
705       MemOpLink(LSBaseSDNode *N, int64_t Offset)
706           : MemNode(N), OffsetFromBase(Offset) {}
707     };
708 
709     // Classify the origin of a stored value.
710     enum class StoreSource { Unknown, Constant, Extract, Load };
711     StoreSource getStoreSource(SDValue StoreVal) {
712       switch (StoreVal.getOpcode()) {
713       case ISD::Constant:
714       case ISD::ConstantFP:
715         return StoreSource::Constant;
716       case ISD::BUILD_VECTOR:
717         if (ISD::isBuildVectorOfConstantSDNodes(StoreVal.getNode()) ||
718             ISD::isBuildVectorOfConstantFPSDNodes(StoreVal.getNode()))
719           return StoreSource::Constant;
720         return StoreSource::Unknown;
721       case ISD::EXTRACT_VECTOR_ELT:
722       case ISD::EXTRACT_SUBVECTOR:
723         return StoreSource::Extract;
724       case ISD::LOAD:
725         return StoreSource::Load;
726       default:
727         return StoreSource::Unknown;
728       }
729     }
730 
731     /// This is a helper function for visitMUL to check the profitability
732     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
733     /// MulNode is the original multiply, AddNode is (add x, c1),
734     /// and ConstNode is c2.
735     bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
736                                      SDValue ConstNode);
737 
738     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
739     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
740     /// the type of the loaded value to be extended.
741     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
742                           EVT LoadResultTy, EVT &ExtVT);
743 
744     /// Helper function to calculate whether the given Load/Store can have its
745     /// width reduced to ExtVT.
746     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
747                            EVT &MemVT, unsigned ShAmt = 0);
748 
749     /// Used by BackwardsPropagateMask to find suitable loads.
750     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
751                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
752                            ConstantSDNode *Mask, SDNode *&NodeToMask);
753     /// Attempt to propagate a given AND node back to load leaves so that they
754     /// can be combined into narrow loads.
755     bool BackwardsPropagateMask(SDNode *N);
756 
757     /// Helper function for mergeConsecutiveStores which merges the component
758     /// store chains.
759     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
760                                 unsigned NumStores);
761 
762     /// Helper function for mergeConsecutiveStores which checks if all the store
763     /// nodes have the same underlying object. We can still reuse the first
764     /// store's pointer info if all the stores are from the same object.
765     bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
766 
767     /// This is a helper function for mergeConsecutiveStores. When the source
768     /// elements of the consecutive stores are all constants or all extracted
769     /// vector elements, try to merge them into one larger store introducing
770     /// bitcasts if necessary.  \return True if a merged store was created.
771     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
772                                          EVT MemVT, unsigned NumStores,
773                                          bool IsConstantSrc, bool UseVector,
774                                          bool UseTrunc);
775 
776     /// This is a helper function for mergeConsecutiveStores. Stores that
777     /// potentially may be merged with St are placed in StoreNodes. RootNode is
778     /// a chain predecessor to all store candidates.
779     void getStoreMergeCandidates(StoreSDNode *St,
780                                  SmallVectorImpl<MemOpLink> &StoreNodes,
781                                  SDNode *&Root);
782 
783     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
784     /// have indirect dependency through their operands. RootNode is the
785     /// predecessor to all stores calculated by getStoreMergeCandidates and is
786     /// used to prune the dependency check. \return True if safe to merge.
787     bool checkMergeStoreCandidatesForDependencies(
788         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
789         SDNode *RootNode);
790 
791     /// This is a helper function for mergeConsecutiveStores. Given a list of
792     /// store candidates, find the first N that are consecutive in memory.
793     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
794     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
795                                   int64_t ElementSizeBytes) const;
796 
797     /// This is a helper function for mergeConsecutiveStores. It is used for
798     /// store chains that are composed entirely of constant values.
799     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
800                                   unsigned NumConsecutiveStores,
801                                   EVT MemVT, SDNode *Root, bool AllowVectors);
802 
803     /// This is a helper function for mergeConsecutiveStores. It is used for
804     /// store chains that are composed entirely of extracted vector elements.
805     /// When extracting multiple vector elements, try to store them in one
806     /// vector store rather than a sequence of scalar stores.
807     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
808                                  unsigned NumConsecutiveStores, EVT MemVT,
809                                  SDNode *Root);
810 
811     /// This is a helper function for mergeConsecutiveStores. It is used for
812     /// store chains that are composed entirely of loaded values.
813     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
814                               unsigned NumConsecutiveStores, EVT MemVT,
815                               SDNode *Root, bool AllowVectors,
816                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
817 
818     /// Merge consecutive store operations into a wide store.
819     /// This optimization uses wide integers or vectors when possible.
820     /// \return true if stores were merged.
821     bool mergeConsecutiveStores(StoreSDNode *St);
822 
823     /// Try to transform a truncation where C is a constant:
824     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
825     ///
826     /// \p N needs to be a truncation and its first operand an AND. Other
827     /// requirements are checked by the function (e.g. that trunc is
828     /// single-use) and if missed an empty SDValue is returned.
829     SDValue distributeTruncateThroughAnd(SDNode *N);
830 
831     /// Helper function to determine whether the target supports operation
832     /// given by \p Opcode for type \p VT, that is, whether the operation
833     /// is legal or custom before legalizing operations, and whether is
834     /// legal (but not custom) after legalization.
835     bool hasOperation(unsigned Opcode, EVT VT) {
836       return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
837     }
838 
839   public:
840     /// Runs the dag combiner on all nodes in the work list
841     void Run(CombineLevel AtLevel);
842 
843     SelectionDAG &getDAG() const { return DAG; }
844 
845     /// Returns a type large enough to hold any valid shift amount - before type
846     /// legalization these can be huge.
847     EVT getShiftAmountTy(EVT LHSTy) {
848       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
849       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
850     }
851 
852     /// This method returns true if we are running before type legalization or
853     /// if the specified VT is legal.
854     bool isTypeLegal(const EVT &VT) {
855       if (!LegalTypes) return true;
856       return TLI.isTypeLegal(VT);
857     }
858 
859     /// Convenience wrapper around TargetLowering::getSetCCResultType
860     EVT getSetCCResultType(EVT VT) const {
861       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
862     }
863 
864     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
865                          SDValue OrigLoad, SDValue ExtLoad,
866                          ISD::NodeType ExtType);
867   };
868 
869 /// This class is a DAGUpdateListener that removes any deleted
870 /// nodes from the worklist.
871 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
872   DAGCombiner &DC;
873 
874 public:
875   explicit WorklistRemover(DAGCombiner &dc)
876     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
877 
878   void NodeDeleted(SDNode *N, SDNode *E) override {
879     DC.removeFromWorklist(N);
880   }
881 };
882 
883 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
884   DAGCombiner &DC;
885 
886 public:
887   explicit WorklistInserter(DAGCombiner &dc)
888       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
889 
890   // FIXME: Ideally we could add N to the worklist, but this causes exponential
891   //        compile time costs in large DAGs, e.g. Halide.
892   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
893 };
894 
895 class EmptyMatchContext {
896   SelectionDAG &DAG;
897   const TargetLowering &TLI;
898 
899 public:
900   EmptyMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
901       : DAG(DAG), TLI(TLI) {}
902 
903   bool match(SDValue OpN, unsigned Opcode) const {
904     return Opcode == OpN->getOpcode();
905   }
906 
907   // Same as SelectionDAG::getNode().
908   template <typename... ArgT> SDValue getNode(ArgT &&...Args) {
909     return DAG.getNode(std::forward<ArgT>(Args)...);
910   }
911 
912   bool isOperationLegalOrCustom(unsigned Op, EVT VT,
913                                 bool LegalOnly = false) const {
914     return TLI.isOperationLegalOrCustom(Op, VT, LegalOnly);
915   }
916 };
917 
918 class VPMatchContext {
919   SelectionDAG &DAG;
920   const TargetLowering &TLI;
921   SDValue RootMaskOp;
922   SDValue RootVectorLenOp;
923 
924 public:
925   VPMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
926       : DAG(DAG), TLI(TLI), RootMaskOp(), RootVectorLenOp() {
927     assert(Root->isVPOpcode());
928     if (auto RootMaskPos = ISD::getVPMaskIdx(Root->getOpcode()))
929       RootMaskOp = Root->getOperand(*RootMaskPos);
930 
931     if (auto RootVLenPos =
932             ISD::getVPExplicitVectorLengthIdx(Root->getOpcode()))
933       RootVectorLenOp = Root->getOperand(*RootVLenPos);
934   }
935 
936   /// whether \p OpVal is a node that is functionally compatible with the
937   /// NodeType \p Opc
938   bool match(SDValue OpVal, unsigned Opc) const {
939     if (!OpVal->isVPOpcode())
940       return OpVal->getOpcode() == Opc;
941 
942     auto BaseOpc = ISD::getBaseOpcodeForVP(OpVal->getOpcode(),
943                                            !OpVal->getFlags().hasNoFPExcept());
944     if (BaseOpc != Opc)
945       return false;
946 
947     // Make sure the mask of OpVal is true mask or is same as Root's.
948     unsigned VPOpcode = OpVal->getOpcode();
949     if (auto MaskPos = ISD::getVPMaskIdx(VPOpcode)) {
950       SDValue MaskOp = OpVal.getOperand(*MaskPos);
951       if (RootMaskOp != MaskOp &&
952           !ISD::isConstantSplatVectorAllOnes(MaskOp.getNode()))
953         return false;
954     }
955 
956     // Make sure the EVL of OpVal is same as Root's.
957     if (auto VLenPos = ISD::getVPExplicitVectorLengthIdx(VPOpcode))
958       if (RootVectorLenOp != OpVal.getOperand(*VLenPos))
959         return false;
960     return true;
961   }
962 
963   // Specialize based on number of operands.
964   // TODO emit VP intrinsics where MaskOp/VectorLenOp != null
965   // SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return
966   // DAG.getNode(Opcode, DL, VT); }
967   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand) {
968     unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
969     assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
970            ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
971     return DAG.getNode(VPOpcode, DL, VT,
972                        {Operand, RootMaskOp, RootVectorLenOp});
973   }
974 
975   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
976                   SDValue N2) {
977     unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
978     assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
979            ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
980     return DAG.getNode(VPOpcode, DL, VT,
981                        {N1, N2, RootMaskOp, RootVectorLenOp});
982   }
983 
984   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
985                   SDValue N2, SDValue N3) {
986     unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
987     assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
988            ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
989     return DAG.getNode(VPOpcode, DL, VT,
990                        {N1, N2, N3, RootMaskOp, RootVectorLenOp});
991   }
992 
993   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
994                   SDNodeFlags Flags) {
995     unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
996     assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
997            ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
998     return DAG.getNode(VPOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp},
999                        Flags);
1000   }
1001 
1002   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
1003                   SDValue N2, SDNodeFlags Flags) {
1004     unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
1005     assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
1006            ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
1007     return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp},
1008                        Flags);
1009   }
1010 
1011   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
1012                   SDValue N2, SDValue N3, SDNodeFlags Flags) {
1013     unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
1014     assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
1015            ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
1016     return DAG.getNode(VPOpcode, DL, VT,
1017                        {N1, N2, N3, RootMaskOp, RootVectorLenOp}, Flags);
1018   }
1019 
1020   bool isOperationLegalOrCustom(unsigned Op, EVT VT,
1021                                 bool LegalOnly = false) const {
1022     unsigned VPOp = ISD::getVPForBaseOpcode(Op);
1023     return TLI.isOperationLegalOrCustom(VPOp, VT, LegalOnly);
1024   }
1025 };
1026 
1027 } // end anonymous namespace
1028 
1029 //===----------------------------------------------------------------------===//
1030 //  TargetLowering::DAGCombinerInfo implementation
1031 //===----------------------------------------------------------------------===//
1032 
1033 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
1034   ((DAGCombiner*)DC)->AddToWorklist(N);
1035 }
1036 
1037 SDValue TargetLowering::DAGCombinerInfo::
1038 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
1039   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
1040 }
1041 
1042 SDValue TargetLowering::DAGCombinerInfo::
1043 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
1044   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
1045 }
1046 
1047 SDValue TargetLowering::DAGCombinerInfo::
1048 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
1049   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
1050 }
1051 
1052 bool TargetLowering::DAGCombinerInfo::
1053 recursivelyDeleteUnusedNodes(SDNode *N) {
1054   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
1055 }
1056 
1057 void TargetLowering::DAGCombinerInfo::
1058 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1059   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
1060 }
1061 
1062 //===----------------------------------------------------------------------===//
1063 // Helper Functions
1064 //===----------------------------------------------------------------------===//
1065 
1066 void DAGCombiner::deleteAndRecombine(SDNode *N) {
1067   removeFromWorklist(N);
1068 
1069   // If the operands of this node are only used by the node, they will now be
1070   // dead. Make sure to re-visit them and recursively delete dead nodes.
1071   for (const SDValue &Op : N->ops())
1072     // For an operand generating multiple values, one of the values may
1073     // become dead allowing further simplification (e.g. split index
1074     // arithmetic from an indexed load).
1075     if (Op->hasOneUse() || Op->getNumValues() > 1)
1076       AddToWorklist(Op.getNode());
1077 
1078   DAG.DeleteNode(N);
1079 }
1080 
1081 // APInts must be the same size for most operations, this helper
1082 // function zero extends the shorter of the pair so that they match.
1083 // We provide an Offset so that we can create bitwidths that won't overflow.
1084 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
1085   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
1086   LHS = LHS.zext(Bits);
1087   RHS = RHS.zext(Bits);
1088 }
1089 
1090 // Return true if this node is a setcc, or is a select_cc
1091 // that selects between the target values used for true and false, making it
1092 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
1093 // the appropriate nodes based on the type of node we are checking. This
1094 // simplifies life a bit for the callers.
1095 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
1096                                     SDValue &CC, bool MatchStrict) const {
1097   if (N.getOpcode() == ISD::SETCC) {
1098     LHS = N.getOperand(0);
1099     RHS = N.getOperand(1);
1100     CC  = N.getOperand(2);
1101     return true;
1102   }
1103 
1104   if (MatchStrict &&
1105       (N.getOpcode() == ISD::STRICT_FSETCC ||
1106        N.getOpcode() == ISD::STRICT_FSETCCS)) {
1107     LHS = N.getOperand(1);
1108     RHS = N.getOperand(2);
1109     CC  = N.getOperand(3);
1110     return true;
1111   }
1112 
1113   if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
1114       !TLI.isConstFalseVal(N.getOperand(3)))
1115     return false;
1116 
1117   if (TLI.getBooleanContents(N.getValueType()) ==
1118       TargetLowering::UndefinedBooleanContent)
1119     return false;
1120 
1121   LHS = N.getOperand(0);
1122   RHS = N.getOperand(1);
1123   CC  = N.getOperand(4);
1124   return true;
1125 }
1126 
1127 /// Return true if this is a SetCC-equivalent operation with only one use.
1128 /// If this is true, it allows the users to invert the operation for free when
1129 /// it is profitable to do so.
1130 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1131   SDValue N0, N1, N2;
1132   if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1133     return true;
1134   return false;
1135 }
1136 
1137 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
1138   if (!ScalarTy.isSimple())
1139     return false;
1140 
1141   uint64_t MaskForTy = 0ULL;
1142   switch (ScalarTy.getSimpleVT().SimpleTy) {
1143   case MVT::i8:
1144     MaskForTy = 0xFFULL;
1145     break;
1146   case MVT::i16:
1147     MaskForTy = 0xFFFFULL;
1148     break;
1149   case MVT::i32:
1150     MaskForTy = 0xFFFFFFFFULL;
1151     break;
1152   default:
1153     return false;
1154     break;
1155   }
1156 
1157   APInt Val;
1158   if (ISD::isConstantSplatVector(N, Val))
1159     return Val.getLimitedValue() == MaskForTy;
1160 
1161   return false;
1162 }
1163 
1164 // Determines if it is a constant integer or a splat/build vector of constant
1165 // integers (and undefs).
1166 // Do not permit build vector implicit truncation.
1167 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1168   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1169     return !(Const->isOpaque() && NoOpaques);
1170   if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1171     return false;
1172   unsigned BitWidth = N.getScalarValueSizeInBits();
1173   for (const SDValue &Op : N->op_values()) {
1174     if (Op.isUndef())
1175       continue;
1176     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1177     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1178         (Const->isOpaque() && NoOpaques))
1179       return false;
1180   }
1181   return true;
1182 }
1183 
1184 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1185 // undef's.
1186 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1187   if (V.getOpcode() != ISD::BUILD_VECTOR)
1188     return false;
1189   return isConstantOrConstantVector(V, NoOpaques) ||
1190          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
1191 }
1192 
1193 // Determine if this an indexed load with an opaque target constant index.
1194 static bool canSplitIdx(LoadSDNode *LD) {
1195   return MaySplitLoadIndex &&
1196          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1197           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1198 }
1199 
1200 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1201                                                              const SDLoc &DL,
1202                                                              SDNode *N,
1203                                                              SDValue N0,
1204                                                              SDValue N1) {
1205   // Currently this only tries to ensure we don't undo the GEP splits done by
1206   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1207   // we check if the following transformation would be problematic:
1208   // (load/store (add, (add, x, offset1), offset2)) ->
1209   // (load/store (add, x, offset1+offset2)).
1210 
1211   // (load/store (add, (add, x, y), offset2)) ->
1212   // (load/store (add, (add, x, offset2), y)).
1213 
1214   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1215     return false;
1216 
1217   auto *C2 = dyn_cast<ConstantSDNode>(N1);
1218   if (!C2)
1219     return false;
1220 
1221   const APInt &C2APIntVal = C2->getAPIntValue();
1222   if (C2APIntVal.getSignificantBits() > 64)
1223     return false;
1224 
1225   if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1226     if (N0.hasOneUse())
1227       return false;
1228 
1229     const APInt &C1APIntVal = C1->getAPIntValue();
1230     const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1231     if (CombinedValueIntVal.getSignificantBits() > 64)
1232       return false;
1233     const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1234 
1235     for (SDNode *Node : N->uses()) {
1236       if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1237         // Is x[offset2] already not a legal addressing mode? If so then
1238         // reassociating the constants breaks nothing (we test offset2 because
1239         // that's the one we hope to fold into the load or store).
1240         TargetLoweringBase::AddrMode AM;
1241         AM.HasBaseReg = true;
1242         AM.BaseOffs = C2APIntVal.getSExtValue();
1243         EVT VT = LoadStore->getMemoryVT();
1244         unsigned AS = LoadStore->getAddressSpace();
1245         Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1246         if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1247           continue;
1248 
1249         // Would x[offset1+offset2] still be a legal addressing mode?
1250         AM.BaseOffs = CombinedValue;
1251         if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1252           return true;
1253       }
1254     }
1255   } else {
1256     if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1257       if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1258         return false;
1259 
1260     for (SDNode *Node : N->uses()) {
1261       auto *LoadStore = dyn_cast<MemSDNode>(Node);
1262       if (!LoadStore)
1263         return false;
1264 
1265       // Is x[offset2] a legal addressing mode? If so then
1266       // reassociating the constants breaks address pattern
1267       TargetLoweringBase::AddrMode AM;
1268       AM.HasBaseReg = true;
1269       AM.BaseOffs = C2APIntVal.getSExtValue();
1270       EVT VT = LoadStore->getMemoryVT();
1271       unsigned AS = LoadStore->getAddressSpace();
1272       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1273       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1274         return false;
1275     }
1276     return true;
1277   }
1278 
1279   return false;
1280 }
1281 
1282 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1283 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1284 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1285                                                SDValue N0, SDValue N1,
1286                                                SDNodeFlags Flags) {
1287   EVT VT = N0.getValueType();
1288 
1289   if (N0.getOpcode() != Opc)
1290     return SDValue();
1291 
1292   SDValue N00 = N0.getOperand(0);
1293   SDValue N01 = N0.getOperand(1);
1294 
1295   if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
1296     if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
1297       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1298       if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1299         return DAG.getNode(Opc, DL, VT, N00, OpNode);
1300       return SDValue();
1301     }
1302     if (TLI.isReassocProfitable(DAG, N0, N1)) {
1303       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1304       //              iff (op x, c1) has one use
1305       SDNodeFlags NewFlags;
1306       if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1307           Flags.hasNoUnsignedWrap())
1308         NewFlags.setNoUnsignedWrap(true);
1309       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1310       return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1311     }
1312   }
1313 
1314   // Check for repeated operand logic simplifications.
1315   if (Opc == ISD::AND || Opc == ISD::OR) {
1316     // (N00 & N01) & N00 --> N00 & N01
1317     // (N00 & N01) & N01 --> N00 & N01
1318     // (N00 | N01) | N00 --> N00 | N01
1319     // (N00 | N01) | N01 --> N00 | N01
1320     if (N1 == N00 || N1 == N01)
1321       return N0;
1322   }
1323   if (Opc == ISD::XOR) {
1324     // (N00 ^ N01) ^ N00 --> N01
1325     if (N1 == N00)
1326       return N01;
1327     // (N00 ^ N01) ^ N01 --> N00
1328     if (N1 == N01)
1329       return N00;
1330   }
1331 
1332   if (TLI.isReassocProfitable(DAG, N0, N1)) {
1333     if (N1 != N01) {
1334       // Reassociate if (op N00, N1) already exist
1335       if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1336         // if Op (Op N00, N1), N01 already exist
1337         // we need to stop reassciate to avoid dead loop
1338         if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1339           return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1340       }
1341     }
1342 
1343     if (N1 != N00) {
1344       // Reassociate if (op N01, N1) already exist
1345       if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1346         // if Op (Op N01, N1), N00 already exist
1347         // we need to stop reassciate to avoid dead loop
1348         if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1349           return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1350       }
1351     }
1352 
1353     // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1354     // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1355     // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1356     // comparisons with the same predicate. This enables optimizations as the
1357     // following one:
1358     // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1359     // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1360     if (Opc == ISD::AND || Opc == ISD::OR) {
1361       if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1362           N01->getOpcode() == ISD::SETCC) {
1363         ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1364         ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1365         ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1366         if (CC1 == CC00 && CC1 != CC01) {
1367           SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1368           return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1369         }
1370         if (CC1 == CC01 && CC1 != CC00) {
1371           SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1372           return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1373         }
1374       }
1375     }
1376   }
1377 
1378   return SDValue();
1379 }
1380 
1381 // Try to reassociate commutative binops.
1382 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1383                                     SDValue N1, SDNodeFlags Flags) {
1384   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1385 
1386   // Floating-point reassociation is not allowed without loose FP math.
1387   if (N0.getValueType().isFloatingPoint() ||
1388       N1.getValueType().isFloatingPoint())
1389     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1390       return SDValue();
1391 
1392   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1393     return Combined;
1394   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1395     return Combined;
1396   return SDValue();
1397 }
1398 
1399 // Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1400 // Note that we only expect Flags to be passed from FP operations. For integer
1401 // operations they need to be dropped.
1402 SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1403                                           const SDLoc &DL, EVT VT, SDValue N0,
1404                                           SDValue N1, SDNodeFlags Flags) {
1405   if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1406       N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1407       N0->hasOneUse() && N1->hasOneUse() &&
1408       TLI.isOperationLegalOrCustom(Opc, N0.getOperand(0).getValueType()) &&
1409       TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1410     SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1411     return DAG.getNode(RedOpc, DL, VT,
1412                        DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1413                                    N0.getOperand(0), N1.getOperand(0)));
1414   }
1415   return SDValue();
1416 }
1417 
1418 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1419                                bool AddTo) {
1420   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1421   ++NodesCombined;
1422   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1423              To[0].dump(&DAG);
1424              dbgs() << " and " << NumTo - 1 << " other values\n");
1425   for (unsigned i = 0, e = NumTo; i != e; ++i)
1426     assert((!To[i].getNode() ||
1427             N->getValueType(i) == To[i].getValueType()) &&
1428            "Cannot combine value to value of different type!");
1429 
1430   WorklistRemover DeadNodes(*this);
1431   DAG.ReplaceAllUsesWith(N, To);
1432   if (AddTo) {
1433     // Push the new nodes and any users onto the worklist
1434     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1435       if (To[i].getNode())
1436         AddToWorklistWithUsers(To[i].getNode());
1437     }
1438   }
1439 
1440   // Finally, if the node is now dead, remove it from the graph.  The node
1441   // may not be dead if the replacement process recursively simplified to
1442   // something else needing this node.
1443   if (N->use_empty())
1444     deleteAndRecombine(N);
1445   return SDValue(N, 0);
1446 }
1447 
1448 void DAGCombiner::
1449 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1450   // Replace the old value with the new one.
1451   ++NodesCombined;
1452   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1453              dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1454 
1455   // Replace all uses.
1456   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1457 
1458   // Push the new node and any (possibly new) users onto the worklist.
1459   AddToWorklistWithUsers(TLO.New.getNode());
1460 
1461   // Finally, if the node is now dead, remove it from the graph.
1462   recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1463 }
1464 
1465 /// Check the specified integer node value to see if it can be simplified or if
1466 /// things it uses can be simplified by bit propagation. If so, return true.
1467 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1468                                        const APInt &DemandedElts,
1469                                        bool AssumeSingleUse) {
1470   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1471   KnownBits Known;
1472   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1473                                 AssumeSingleUse))
1474     return false;
1475 
1476   // Revisit the node.
1477   AddToWorklist(Op.getNode());
1478 
1479   CommitTargetLoweringOpt(TLO);
1480   return true;
1481 }
1482 
1483 /// Check the specified vector node value to see if it can be simplified or
1484 /// if things it uses can be simplified as it only uses some of the elements.
1485 /// If so, return true.
1486 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1487                                              const APInt &DemandedElts,
1488                                              bool AssumeSingleUse) {
1489   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1490   APInt KnownUndef, KnownZero;
1491   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1492                                       TLO, 0, AssumeSingleUse))
1493     return false;
1494 
1495   // Revisit the node.
1496   AddToWorklist(Op.getNode());
1497 
1498   CommitTargetLoweringOpt(TLO);
1499   return true;
1500 }
1501 
1502 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1503   SDLoc DL(Load);
1504   EVT VT = Load->getValueType(0);
1505   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1506 
1507   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1508              Trunc.dump(&DAG); dbgs() << '\n');
1509 
1510   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1511   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1512 
1513   AddToWorklist(Trunc.getNode());
1514   recursivelyDeleteUnusedNodes(Load);
1515 }
1516 
1517 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1518   Replace = false;
1519   SDLoc DL(Op);
1520   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1521     LoadSDNode *LD = cast<LoadSDNode>(Op);
1522     EVT MemVT = LD->getMemoryVT();
1523     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1524                                                       : LD->getExtensionType();
1525     Replace = true;
1526     return DAG.getExtLoad(ExtType, DL, PVT,
1527                           LD->getChain(), LD->getBasePtr(),
1528                           MemVT, LD->getMemOperand());
1529   }
1530 
1531   unsigned Opc = Op.getOpcode();
1532   switch (Opc) {
1533   default: break;
1534   case ISD::AssertSext:
1535     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1536       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1537     break;
1538   case ISD::AssertZext:
1539     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1540       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1541     break;
1542   case ISD::Constant: {
1543     unsigned ExtOpc =
1544       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1545     return DAG.getNode(ExtOpc, DL, PVT, Op);
1546   }
1547   }
1548 
1549   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1550     return SDValue();
1551   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1552 }
1553 
1554 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1555   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1556     return SDValue();
1557   EVT OldVT = Op.getValueType();
1558   SDLoc DL(Op);
1559   bool Replace = false;
1560   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1561   if (!NewOp.getNode())
1562     return SDValue();
1563   AddToWorklist(NewOp.getNode());
1564 
1565   if (Replace)
1566     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1567   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1568                      DAG.getValueType(OldVT));
1569 }
1570 
1571 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1572   EVT OldVT = Op.getValueType();
1573   SDLoc DL(Op);
1574   bool Replace = false;
1575   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1576   if (!NewOp.getNode())
1577     return SDValue();
1578   AddToWorklist(NewOp.getNode());
1579 
1580   if (Replace)
1581     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1582   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1583 }
1584 
1585 /// Promote the specified integer binary operation if the target indicates it is
1586 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1587 /// i32 since i16 instructions are longer.
1588 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1589   if (!LegalOperations)
1590     return SDValue();
1591 
1592   EVT VT = Op.getValueType();
1593   if (VT.isVector() || !VT.isInteger())
1594     return SDValue();
1595 
1596   // If operation type is 'undesirable', e.g. i16 on x86, consider
1597   // promoting it.
1598   unsigned Opc = Op.getOpcode();
1599   if (TLI.isTypeDesirableForOp(Opc, VT))
1600     return SDValue();
1601 
1602   EVT PVT = VT;
1603   // Consult target whether it is a good idea to promote this operation and
1604   // what's the right type to promote it to.
1605   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1606     assert(PVT != VT && "Don't know what type to promote to!");
1607 
1608     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1609 
1610     bool Replace0 = false;
1611     SDValue N0 = Op.getOperand(0);
1612     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1613 
1614     bool Replace1 = false;
1615     SDValue N1 = Op.getOperand(1);
1616     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1617     SDLoc DL(Op);
1618 
1619     SDValue RV =
1620         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1621 
1622     // We are always replacing N0/N1's use in N and only need additional
1623     // replacements if there are additional uses.
1624     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1625     //       (SDValue) here because the node may reference multiple values
1626     //       (for example, the chain value of a load node).
1627     Replace0 &= !N0->hasOneUse();
1628     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1629 
1630     // Combine Op here so it is preserved past replacements.
1631     CombineTo(Op.getNode(), RV);
1632 
1633     // If operands have a use ordering, make sure we deal with
1634     // predecessor first.
1635     if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1636       std::swap(N0, N1);
1637       std::swap(NN0, NN1);
1638     }
1639 
1640     if (Replace0) {
1641       AddToWorklist(NN0.getNode());
1642       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1643     }
1644     if (Replace1) {
1645       AddToWorklist(NN1.getNode());
1646       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1647     }
1648     return Op;
1649   }
1650   return SDValue();
1651 }
1652 
1653 /// Promote the specified integer shift operation if the target indicates it is
1654 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1655 /// i32 since i16 instructions are longer.
1656 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1657   if (!LegalOperations)
1658     return SDValue();
1659 
1660   EVT VT = Op.getValueType();
1661   if (VT.isVector() || !VT.isInteger())
1662     return SDValue();
1663 
1664   // If operation type is 'undesirable', e.g. i16 on x86, consider
1665   // promoting it.
1666   unsigned Opc = Op.getOpcode();
1667   if (TLI.isTypeDesirableForOp(Opc, VT))
1668     return SDValue();
1669 
1670   EVT PVT = VT;
1671   // Consult target whether it is a good idea to promote this operation and
1672   // what's the right type to promote it to.
1673   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1674     assert(PVT != VT && "Don't know what type to promote to!");
1675 
1676     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1677 
1678     bool Replace = false;
1679     SDValue N0 = Op.getOperand(0);
1680     if (Opc == ISD::SRA)
1681       N0 = SExtPromoteOperand(N0, PVT);
1682     else if (Opc == ISD::SRL)
1683       N0 = ZExtPromoteOperand(N0, PVT);
1684     else
1685       N0 = PromoteOperand(N0, PVT, Replace);
1686 
1687     if (!N0.getNode())
1688       return SDValue();
1689 
1690     SDLoc DL(Op);
1691     SDValue N1 = Op.getOperand(1);
1692     SDValue RV =
1693         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1694 
1695     if (Replace)
1696       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1697 
1698     // Deal with Op being deleted.
1699     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1700       return RV;
1701   }
1702   return SDValue();
1703 }
1704 
1705 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1706   if (!LegalOperations)
1707     return SDValue();
1708 
1709   EVT VT = Op.getValueType();
1710   if (VT.isVector() || !VT.isInteger())
1711     return SDValue();
1712 
1713   // If operation type is 'undesirable', e.g. i16 on x86, consider
1714   // promoting it.
1715   unsigned Opc = Op.getOpcode();
1716   if (TLI.isTypeDesirableForOp(Opc, VT))
1717     return SDValue();
1718 
1719   EVT PVT = VT;
1720   // Consult target whether it is a good idea to promote this operation and
1721   // what's the right type to promote it to.
1722   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1723     assert(PVT != VT && "Don't know what type to promote to!");
1724     // fold (aext (aext x)) -> (aext x)
1725     // fold (aext (zext x)) -> (zext x)
1726     // fold (aext (sext x)) -> (sext x)
1727     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1728     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1729   }
1730   return SDValue();
1731 }
1732 
1733 bool DAGCombiner::PromoteLoad(SDValue Op) {
1734   if (!LegalOperations)
1735     return false;
1736 
1737   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1738     return false;
1739 
1740   EVT VT = Op.getValueType();
1741   if (VT.isVector() || !VT.isInteger())
1742     return false;
1743 
1744   // If operation type is 'undesirable', e.g. i16 on x86, consider
1745   // promoting it.
1746   unsigned Opc = Op.getOpcode();
1747   if (TLI.isTypeDesirableForOp(Opc, VT))
1748     return false;
1749 
1750   EVT PVT = VT;
1751   // Consult target whether it is a good idea to promote this operation and
1752   // what's the right type to promote it to.
1753   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1754     assert(PVT != VT && "Don't know what type to promote to!");
1755 
1756     SDLoc DL(Op);
1757     SDNode *N = Op.getNode();
1758     LoadSDNode *LD = cast<LoadSDNode>(N);
1759     EVT MemVT = LD->getMemoryVT();
1760     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1761                                                       : LD->getExtensionType();
1762     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1763                                    LD->getChain(), LD->getBasePtr(),
1764                                    MemVT, LD->getMemOperand());
1765     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1766 
1767     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1768                Result.dump(&DAG); dbgs() << '\n');
1769 
1770     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1771     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1772 
1773     AddToWorklist(Result.getNode());
1774     recursivelyDeleteUnusedNodes(N);
1775     return true;
1776   }
1777 
1778   return false;
1779 }
1780 
1781 /// Recursively delete a node which has no uses and any operands for
1782 /// which it is the only use.
1783 ///
1784 /// Note that this both deletes the nodes and removes them from the worklist.
1785 /// It also adds any nodes who have had a user deleted to the worklist as they
1786 /// may now have only one use and subject to other combines.
1787 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1788   if (!N->use_empty())
1789     return false;
1790 
1791   SmallSetVector<SDNode *, 16> Nodes;
1792   Nodes.insert(N);
1793   do {
1794     N = Nodes.pop_back_val();
1795     if (!N)
1796       continue;
1797 
1798     if (N->use_empty()) {
1799       for (const SDValue &ChildN : N->op_values())
1800         Nodes.insert(ChildN.getNode());
1801 
1802       removeFromWorklist(N);
1803       DAG.DeleteNode(N);
1804     } else {
1805       AddToWorklist(N);
1806     }
1807   } while (!Nodes.empty());
1808   return true;
1809 }
1810 
1811 //===----------------------------------------------------------------------===//
1812 //  Main DAG Combiner implementation
1813 //===----------------------------------------------------------------------===//
1814 
1815 void DAGCombiner::Run(CombineLevel AtLevel) {
1816   // set the instance variables, so that the various visit routines may use it.
1817   Level = AtLevel;
1818   LegalDAG = Level >= AfterLegalizeDAG;
1819   LegalOperations = Level >= AfterLegalizeVectorOps;
1820   LegalTypes = Level >= AfterLegalizeTypes;
1821 
1822   WorklistInserter AddNodes(*this);
1823 
1824   // Add all the dag nodes to the worklist.
1825   //
1826   // Note: All nodes are not added to PruningList here, this is because the only
1827   // nodes which can be deleted are those which have no uses and all other nodes
1828   // which would otherwise be added to the worklist by the first call to
1829   // getNextWorklistEntry are already present in it.
1830   for (SDNode &Node : DAG.allnodes())
1831     AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1832 
1833   // Create a dummy node (which is not added to allnodes), that adds a reference
1834   // to the root node, preventing it from being deleted, and tracking any
1835   // changes of the root.
1836   HandleSDNode Dummy(DAG.getRoot());
1837 
1838   // While we have a valid worklist entry node, try to combine it.
1839   while (SDNode *N = getNextWorklistEntry()) {
1840     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1841     // N is deleted from the DAG, since they too may now be dead or may have a
1842     // reduced number of uses, allowing other xforms.
1843     if (recursivelyDeleteUnusedNodes(N))
1844       continue;
1845 
1846     WorklistRemover DeadNodes(*this);
1847 
1848     // If this combine is running after legalizing the DAG, re-legalize any
1849     // nodes pulled off the worklist.
1850     if (LegalDAG) {
1851       SmallSetVector<SDNode *, 16> UpdatedNodes;
1852       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1853 
1854       for (SDNode *LN : UpdatedNodes)
1855         AddToWorklistWithUsers(LN);
1856 
1857       if (!NIsValid)
1858         continue;
1859     }
1860 
1861     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1862 
1863     // Add any operands of the new node which have not yet been combined to the
1864     // worklist as well. Because the worklist uniques things already, this
1865     // won't repeatedly process the same operand.
1866     for (const SDValue &ChildN : N->op_values())
1867       if (!CombinedNodes.count(ChildN.getNode()))
1868         AddToWorklist(ChildN.getNode());
1869 
1870     CombinedNodes.insert(N);
1871     SDValue RV = combine(N);
1872 
1873     if (!RV.getNode())
1874       continue;
1875 
1876     ++NodesCombined;
1877 
1878     // If we get back the same node we passed in, rather than a new node or
1879     // zero, we know that the node must have defined multiple values and
1880     // CombineTo was used.  Since CombineTo takes care of the worklist
1881     // mechanics for us, we have no work to do in this case.
1882     if (RV.getNode() == N)
1883       continue;
1884 
1885     assert(N->getOpcode() != ISD::DELETED_NODE &&
1886            RV.getOpcode() != ISD::DELETED_NODE &&
1887            "Node was deleted but visit returned new node!");
1888 
1889     LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1890 
1891     if (N->getNumValues() == RV->getNumValues())
1892       DAG.ReplaceAllUsesWith(N, RV.getNode());
1893     else {
1894       assert(N->getValueType(0) == RV.getValueType() &&
1895              N->getNumValues() == 1 && "Type mismatch");
1896       DAG.ReplaceAllUsesWith(N, &RV);
1897     }
1898 
1899     // Push the new node and any users onto the worklist.  Omit this if the
1900     // new node is the EntryToken (e.g. if a store managed to get optimized
1901     // out), because re-visiting the EntryToken and its users will not uncover
1902     // any additional opportunities, but there may be a large number of such
1903     // users, potentially causing compile time explosion.
1904     if (RV.getOpcode() != ISD::EntryToken)
1905       AddToWorklistWithUsers(RV.getNode());
1906 
1907     // Finally, if the node is now dead, remove it from the graph.  The node
1908     // may not be dead if the replacement process recursively simplified to
1909     // something else needing this node. This will also take care of adding any
1910     // operands which have lost a user to the worklist.
1911     recursivelyDeleteUnusedNodes(N);
1912   }
1913 
1914   // If the root changed (e.g. it was a dead load, update the root).
1915   DAG.setRoot(Dummy.getValue());
1916   DAG.RemoveDeadNodes();
1917 }
1918 
1919 SDValue DAGCombiner::visit(SDNode *N) {
1920   // clang-format off
1921   switch (N->getOpcode()) {
1922   default: break;
1923   case ISD::TokenFactor:        return visitTokenFactor(N);
1924   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1925   case ISD::ADD:                return visitADD(N);
1926   case ISD::SUB:                return visitSUB(N);
1927   case ISD::SADDSAT:
1928   case ISD::UADDSAT:            return visitADDSAT(N);
1929   case ISD::SSUBSAT:
1930   case ISD::USUBSAT:            return visitSUBSAT(N);
1931   case ISD::ADDC:               return visitADDC(N);
1932   case ISD::SADDO:
1933   case ISD::UADDO:              return visitADDO(N);
1934   case ISD::SUBC:               return visitSUBC(N);
1935   case ISD::SSUBO:
1936   case ISD::USUBO:              return visitSUBO(N);
1937   case ISD::ADDE:               return visitADDE(N);
1938   case ISD::UADDO_CARRY:        return visitUADDO_CARRY(N);
1939   case ISD::SADDO_CARRY:        return visitSADDO_CARRY(N);
1940   case ISD::SUBE:               return visitSUBE(N);
1941   case ISD::USUBO_CARRY:        return visitUSUBO_CARRY(N);
1942   case ISD::SSUBO_CARRY:        return visitSSUBO_CARRY(N);
1943   case ISD::SMULFIX:
1944   case ISD::SMULFIXSAT:
1945   case ISD::UMULFIX:
1946   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1947   case ISD::MUL:                return visitMUL(N);
1948   case ISD::SDIV:               return visitSDIV(N);
1949   case ISD::UDIV:               return visitUDIV(N);
1950   case ISD::SREM:
1951   case ISD::UREM:               return visitREM(N);
1952   case ISD::MULHU:              return visitMULHU(N);
1953   case ISD::MULHS:              return visitMULHS(N);
1954   case ISD::AVGFLOORS:
1955   case ISD::AVGFLOORU:
1956   case ISD::AVGCEILS:
1957   case ISD::AVGCEILU:           return visitAVG(N);
1958   case ISD::ABDS:
1959   case ISD::ABDU:               return visitABD(N);
1960   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1961   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1962   case ISD::SMULO:
1963   case ISD::UMULO:              return visitMULO(N);
1964   case ISD::SMIN:
1965   case ISD::SMAX:
1966   case ISD::UMIN:
1967   case ISD::UMAX:               return visitIMINMAX(N);
1968   case ISD::AND:                return visitAND(N);
1969   case ISD::OR:                 return visitOR(N);
1970   case ISD::XOR:                return visitXOR(N);
1971   case ISD::SHL:                return visitSHL(N);
1972   case ISD::SRA:                return visitSRA(N);
1973   case ISD::SRL:                return visitSRL(N);
1974   case ISD::ROTR:
1975   case ISD::ROTL:               return visitRotate(N);
1976   case ISD::FSHL:
1977   case ISD::FSHR:               return visitFunnelShift(N);
1978   case ISD::SSHLSAT:
1979   case ISD::USHLSAT:            return visitSHLSAT(N);
1980   case ISD::ABS:                return visitABS(N);
1981   case ISD::BSWAP:              return visitBSWAP(N);
1982   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1983   case ISD::CTLZ:               return visitCTLZ(N);
1984   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1985   case ISD::CTTZ:               return visitCTTZ(N);
1986   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1987   case ISD::CTPOP:              return visitCTPOP(N);
1988   case ISD::SELECT:             return visitSELECT(N);
1989   case ISD::VSELECT:            return visitVSELECT(N);
1990   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1991   case ISD::SETCC:              return visitSETCC(N);
1992   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1993   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1994   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1995   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1996   case ISD::AssertSext:
1997   case ISD::AssertZext:         return visitAssertExt(N);
1998   case ISD::AssertAlign:        return visitAssertAlign(N);
1999   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
2000   case ISD::SIGN_EXTEND_VECTOR_INREG:
2001   case ISD::ZERO_EXTEND_VECTOR_INREG:
2002   case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
2003   case ISD::TRUNCATE:           return visitTRUNCATE(N);
2004   case ISD::BITCAST:            return visitBITCAST(N);
2005   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
2006   case ISD::FADD:               return visitFADD(N);
2007   case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
2008   case ISD::FSUB:               return visitFSUB(N);
2009   case ISD::FMUL:               return visitFMUL(N);
2010   case ISD::FMA:                return visitFMA<EmptyMatchContext>(N);
2011   case ISD::FMAD:               return visitFMAD(N);
2012   case ISD::FDIV:               return visitFDIV(N);
2013   case ISD::FREM:               return visitFREM(N);
2014   case ISD::FSQRT:              return visitFSQRT(N);
2015   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
2016   case ISD::FPOW:               return visitFPOW(N);
2017   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
2018   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
2019   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
2020   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
2021   case ISD::LRINT:
2022   case ISD::LLRINT:             return visitXRINT(N);
2023   case ISD::FP_ROUND:           return visitFP_ROUND(N);
2024   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
2025   case ISD::FNEG:               return visitFNEG(N);
2026   case ISD::FABS:               return visitFABS(N);
2027   case ISD::FFLOOR:             return visitFFLOOR(N);
2028   case ISD::FMINNUM:
2029   case ISD::FMAXNUM:
2030   case ISD::FMINIMUM:
2031   case ISD::FMAXIMUM:           return visitFMinMax(N);
2032   case ISD::FCEIL:              return visitFCEIL(N);
2033   case ISD::FTRUNC:             return visitFTRUNC(N);
2034   case ISD::FFREXP:             return visitFFREXP(N);
2035   case ISD::BRCOND:             return visitBRCOND(N);
2036   case ISD::BR_CC:              return visitBR_CC(N);
2037   case ISD::LOAD:               return visitLOAD(N);
2038   case ISD::STORE:              return visitSTORE(N);
2039   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
2040   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
2041   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
2042   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
2043   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
2044   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
2045   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
2046   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
2047   case ISD::MGATHER:            return visitMGATHER(N);
2048   case ISD::MLOAD:              return visitMLOAD(N);
2049   case ISD::MSCATTER:           return visitMSCATTER(N);
2050   case ISD::MSTORE:             return visitMSTORE(N);
2051   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
2052   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
2053   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
2054   case ISD::FP_TO_BF16:         return visitFP_TO_BF16(N);
2055   case ISD::BF16_TO_FP:         return visitBF16_TO_FP(N);
2056   case ISD::FREEZE:             return visitFREEZE(N);
2057   case ISD::GET_FPENV_MEM:      return visitGET_FPENV_MEM(N);
2058   case ISD::SET_FPENV_MEM:      return visitSET_FPENV_MEM(N);
2059   case ISD::VECREDUCE_FADD:
2060   case ISD::VECREDUCE_FMUL:
2061   case ISD::VECREDUCE_ADD:
2062   case ISD::VECREDUCE_MUL:
2063   case ISD::VECREDUCE_AND:
2064   case ISD::VECREDUCE_OR:
2065   case ISD::VECREDUCE_XOR:
2066   case ISD::VECREDUCE_SMAX:
2067   case ISD::VECREDUCE_SMIN:
2068   case ISD::VECREDUCE_UMAX:
2069   case ISD::VECREDUCE_UMIN:
2070   case ISD::VECREDUCE_FMAX:
2071   case ISD::VECREDUCE_FMIN:
2072   case ISD::VECREDUCE_FMAXIMUM:
2073   case ISD::VECREDUCE_FMINIMUM:     return visitVECREDUCE(N);
2074 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2075 #include "llvm/IR/VPIntrinsics.def"
2076     return visitVPOp(N);
2077   }
2078   // clang-format on
2079   return SDValue();
2080 }
2081 
2082 SDValue DAGCombiner::combine(SDNode *N) {
2083   if (!DebugCounter::shouldExecute(DAGCombineCounter))
2084     return SDValue();
2085 
2086   SDValue RV;
2087   if (!DisableGenericCombines)
2088     RV = visit(N);
2089 
2090   // If nothing happened, try a target-specific DAG combine.
2091   if (!RV.getNode()) {
2092     assert(N->getOpcode() != ISD::DELETED_NODE &&
2093            "Node was deleted but visit returned NULL!");
2094 
2095     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2096         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2097 
2098       // Expose the DAG combiner to the target combiner impls.
2099       TargetLowering::DAGCombinerInfo
2100         DagCombineInfo(DAG, Level, false, this);
2101 
2102       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2103     }
2104   }
2105 
2106   // If nothing happened still, try promoting the operation.
2107   if (!RV.getNode()) {
2108     switch (N->getOpcode()) {
2109     default: break;
2110     case ISD::ADD:
2111     case ISD::SUB:
2112     case ISD::MUL:
2113     case ISD::AND:
2114     case ISD::OR:
2115     case ISD::XOR:
2116       RV = PromoteIntBinOp(SDValue(N, 0));
2117       break;
2118     case ISD::SHL:
2119     case ISD::SRA:
2120     case ISD::SRL:
2121       RV = PromoteIntShiftOp(SDValue(N, 0));
2122       break;
2123     case ISD::SIGN_EXTEND:
2124     case ISD::ZERO_EXTEND:
2125     case ISD::ANY_EXTEND:
2126       RV = PromoteExtend(SDValue(N, 0));
2127       break;
2128     case ISD::LOAD:
2129       if (PromoteLoad(SDValue(N, 0)))
2130         RV = SDValue(N, 0);
2131       break;
2132     }
2133   }
2134 
2135   // If N is a commutative binary node, try to eliminate it if the commuted
2136   // version is already present in the DAG.
2137   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2138     SDValue N0 = N->getOperand(0);
2139     SDValue N1 = N->getOperand(1);
2140 
2141     // Constant operands are canonicalized to RHS.
2142     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2143       SDValue Ops[] = {N1, N0};
2144       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2145                                             N->getFlags());
2146       if (CSENode)
2147         return SDValue(CSENode, 0);
2148     }
2149   }
2150 
2151   return RV;
2152 }
2153 
2154 /// Given a node, return its input chain if it has one, otherwise return a null
2155 /// sd operand.
2156 static SDValue getInputChainForNode(SDNode *N) {
2157   if (unsigned NumOps = N->getNumOperands()) {
2158     if (N->getOperand(0).getValueType() == MVT::Other)
2159       return N->getOperand(0);
2160     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2161       return N->getOperand(NumOps-1);
2162     for (unsigned i = 1; i < NumOps-1; ++i)
2163       if (N->getOperand(i).getValueType() == MVT::Other)
2164         return N->getOperand(i);
2165   }
2166   return SDValue();
2167 }
2168 
2169 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2170   // If N has two operands, where one has an input chain equal to the other,
2171   // the 'other' chain is redundant.
2172   if (N->getNumOperands() == 2) {
2173     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2174       return N->getOperand(0);
2175     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2176       return N->getOperand(1);
2177   }
2178 
2179   // Don't simplify token factors if optnone.
2180   if (OptLevel == CodeGenOptLevel::None)
2181     return SDValue();
2182 
2183   // Don't simplify the token factor if the node itself has too many operands.
2184   if (N->getNumOperands() > TokenFactorInlineLimit)
2185     return SDValue();
2186 
2187   // If the sole user is a token factor, we should make sure we have a
2188   // chance to merge them together. This prevents TF chains from inhibiting
2189   // optimizations.
2190   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
2191     AddToWorklist(*(N->use_begin()));
2192 
2193   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
2194   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
2195   SmallPtrSet<SDNode*, 16> SeenOps;
2196   bool Changed = false;             // If we should replace this token factor.
2197 
2198   // Start out with this token factor.
2199   TFs.push_back(N);
2200 
2201   // Iterate through token factors.  The TFs grows when new token factors are
2202   // encountered.
2203   for (unsigned i = 0; i < TFs.size(); ++i) {
2204     // Limit number of nodes to inline, to avoid quadratic compile times.
2205     // We have to add the outstanding Token Factors to Ops, otherwise we might
2206     // drop Ops from the resulting Token Factors.
2207     if (Ops.size() > TokenFactorInlineLimit) {
2208       for (unsigned j = i; j < TFs.size(); j++)
2209         Ops.emplace_back(TFs[j], 0);
2210       // Drop unprocessed Token Factors from TFs, so we do not add them to the
2211       // combiner worklist later.
2212       TFs.resize(i);
2213       break;
2214     }
2215 
2216     SDNode *TF = TFs[i];
2217     // Check each of the operands.
2218     for (const SDValue &Op : TF->op_values()) {
2219       switch (Op.getOpcode()) {
2220       case ISD::EntryToken:
2221         // Entry tokens don't need to be added to the list. They are
2222         // redundant.
2223         Changed = true;
2224         break;
2225 
2226       case ISD::TokenFactor:
2227         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2228           // Queue up for processing.
2229           TFs.push_back(Op.getNode());
2230           Changed = true;
2231           break;
2232         }
2233         [[fallthrough]];
2234 
2235       default:
2236         // Only add if it isn't already in the list.
2237         if (SeenOps.insert(Op.getNode()).second)
2238           Ops.push_back(Op);
2239         else
2240           Changed = true;
2241         break;
2242       }
2243     }
2244   }
2245 
2246   // Re-visit inlined Token Factors, to clean them up in case they have been
2247   // removed. Skip the first Token Factor, as this is the current node.
2248   for (unsigned i = 1, e = TFs.size(); i < e; i++)
2249     AddToWorklist(TFs[i]);
2250 
2251   // Remove Nodes that are chained to another node in the list. Do so
2252   // by walking up chains breath-first stopping when we've seen
2253   // another operand. In general we must climb to the EntryNode, but we can exit
2254   // early if we find all remaining work is associated with just one operand as
2255   // no further pruning is possible.
2256 
2257   // List of nodes to search through and original Ops from which they originate.
2258   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
2259   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2260   SmallPtrSet<SDNode *, 16> SeenChains;
2261   bool DidPruneOps = false;
2262 
2263   unsigned NumLeftToConsider = 0;
2264   for (const SDValue &Op : Ops) {
2265     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2266     OpWorkCount.push_back(1);
2267   }
2268 
2269   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2270     // If this is an Op, we can remove the op from the list. Remark any
2271     // search associated with it as from the current OpNumber.
2272     if (SeenOps.contains(Op)) {
2273       Changed = true;
2274       DidPruneOps = true;
2275       unsigned OrigOpNumber = 0;
2276       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2277         OrigOpNumber++;
2278       assert((OrigOpNumber != Ops.size()) &&
2279              "expected to find TokenFactor Operand");
2280       // Re-mark worklist from OrigOpNumber to OpNumber
2281       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2282         if (Worklist[i].second == OrigOpNumber) {
2283           Worklist[i].second = OpNumber;
2284         }
2285       }
2286       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2287       OpWorkCount[OrigOpNumber] = 0;
2288       NumLeftToConsider--;
2289     }
2290     // Add if it's a new chain
2291     if (SeenChains.insert(Op).second) {
2292       OpWorkCount[OpNumber]++;
2293       Worklist.push_back(std::make_pair(Op, OpNumber));
2294     }
2295   };
2296 
2297   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2298     // We need at least be consider at least 2 Ops to prune.
2299     if (NumLeftToConsider <= 1)
2300       break;
2301     auto CurNode = Worklist[i].first;
2302     auto CurOpNumber = Worklist[i].second;
2303     assert((OpWorkCount[CurOpNumber] > 0) &&
2304            "Node should not appear in worklist");
2305     switch (CurNode->getOpcode()) {
2306     case ISD::EntryToken:
2307       // Hitting EntryToken is the only way for the search to terminate without
2308       // hitting
2309       // another operand's search. Prevent us from marking this operand
2310       // considered.
2311       NumLeftToConsider++;
2312       break;
2313     case ISD::TokenFactor:
2314       for (const SDValue &Op : CurNode->op_values())
2315         AddToWorklist(i, Op.getNode(), CurOpNumber);
2316       break;
2317     case ISD::LIFETIME_START:
2318     case ISD::LIFETIME_END:
2319     case ISD::CopyFromReg:
2320     case ISD::CopyToReg:
2321       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2322       break;
2323     default:
2324       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2325         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2326       break;
2327     }
2328     OpWorkCount[CurOpNumber]--;
2329     if (OpWorkCount[CurOpNumber] == 0)
2330       NumLeftToConsider--;
2331   }
2332 
2333   // If we've changed things around then replace token factor.
2334   if (Changed) {
2335     SDValue Result;
2336     if (Ops.empty()) {
2337       // The entry token is the only possible outcome.
2338       Result = DAG.getEntryNode();
2339     } else {
2340       if (DidPruneOps) {
2341         SmallVector<SDValue, 8> PrunedOps;
2342         //
2343         for (const SDValue &Op : Ops) {
2344           if (SeenChains.count(Op.getNode()) == 0)
2345             PrunedOps.push_back(Op);
2346         }
2347         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2348       } else {
2349         Result = DAG.getTokenFactor(SDLoc(N), Ops);
2350       }
2351     }
2352     return Result;
2353   }
2354   return SDValue();
2355 }
2356 
2357 /// MERGE_VALUES can always be eliminated.
2358 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2359   WorklistRemover DeadNodes(*this);
2360   // Replacing results may cause a different MERGE_VALUES to suddenly
2361   // be CSE'd with N, and carry its uses with it. Iterate until no
2362   // uses remain, to ensure that the node can be safely deleted.
2363   // First add the users of this node to the work list so that they
2364   // can be tried again once they have new operands.
2365   AddUsersToWorklist(N);
2366   do {
2367     // Do as a single replacement to avoid rewalking use lists.
2368     SmallVector<SDValue, 8> Ops;
2369     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2370       Ops.push_back(N->getOperand(i));
2371     DAG.ReplaceAllUsesWith(N, Ops.data());
2372   } while (!N->use_empty());
2373   deleteAndRecombine(N);
2374   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2375 }
2376 
2377 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2378 /// ConstantSDNode pointer else nullptr.
2379 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2380   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2381   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2382 }
2383 
2384 // isTruncateOf - If N is a truncate of some other value, return true, record
2385 // the value being truncated in Op and which of Op's bits are zero/one in Known.
2386 // This function computes KnownBits to avoid a duplicated call to
2387 // computeKnownBits in the caller.
2388 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
2389                          KnownBits &Known) {
2390   if (N->getOpcode() == ISD::TRUNCATE) {
2391     Op = N->getOperand(0);
2392     Known = DAG.computeKnownBits(Op);
2393     return true;
2394   }
2395 
2396   if (N.getOpcode() != ISD::SETCC ||
2397       N.getValueType().getScalarType() != MVT::i1 ||
2398       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
2399     return false;
2400 
2401   SDValue Op0 = N->getOperand(0);
2402   SDValue Op1 = N->getOperand(1);
2403   assert(Op0.getValueType() == Op1.getValueType());
2404 
2405   if (isNullOrNullSplat(Op0))
2406     Op = Op1;
2407   else if (isNullOrNullSplat(Op1))
2408     Op = Op0;
2409   else
2410     return false;
2411 
2412   Known = DAG.computeKnownBits(Op);
2413 
2414   return (Known.Zero | 1).isAllOnes();
2415 }
2416 
2417 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2418 /// and that N may be folded in the load / store addressing mode.
2419 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2420                                     const TargetLowering &TLI) {
2421   EVT VT;
2422   unsigned AS;
2423 
2424   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2425     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2426       return false;
2427     VT = LD->getMemoryVT();
2428     AS = LD->getAddressSpace();
2429   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2430     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2431       return false;
2432     VT = ST->getMemoryVT();
2433     AS = ST->getAddressSpace();
2434   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2435     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2436       return false;
2437     VT = LD->getMemoryVT();
2438     AS = LD->getAddressSpace();
2439   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2440     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2441       return false;
2442     VT = ST->getMemoryVT();
2443     AS = ST->getAddressSpace();
2444   } else {
2445     return false;
2446   }
2447 
2448   TargetLowering::AddrMode AM;
2449   if (N->getOpcode() == ISD::ADD) {
2450     AM.HasBaseReg = true;
2451     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2452     if (Offset)
2453       // [reg +/- imm]
2454       AM.BaseOffs = Offset->getSExtValue();
2455     else
2456       // [reg +/- reg]
2457       AM.Scale = 1;
2458   } else if (N->getOpcode() == ISD::SUB) {
2459     AM.HasBaseReg = true;
2460     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2461     if (Offset)
2462       // [reg +/- imm]
2463       AM.BaseOffs = -Offset->getSExtValue();
2464     else
2465       // [reg +/- reg]
2466       AM.Scale = 1;
2467   } else {
2468     return false;
2469   }
2470 
2471   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2472                                    VT.getTypeForEVT(*DAG.getContext()), AS);
2473 }
2474 
2475 /// This inverts a canonicalization in IR that replaces a variable select arm
2476 /// with an identity constant. Codegen improves if we re-use the variable
2477 /// operand rather than load a constant. This can also be converted into a
2478 /// masked vector operation if the target supports it.
2479 static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
2480                                               bool ShouldCommuteOperands) {
2481   // Match a select as operand 1. The identity constant that we are looking for
2482   // is only valid as operand 1 of a non-commutative binop.
2483   SDValue N0 = N->getOperand(0);
2484   SDValue N1 = N->getOperand(1);
2485   if (ShouldCommuteOperands)
2486     std::swap(N0, N1);
2487 
2488   // TODO: Should this apply to scalar select too?
2489   if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2490     return SDValue();
2491 
2492   // We can't hoist all instructions because of immediate UB (not speculatable).
2493   // For example div/rem by zero.
2494   if (!DAG.isSafeToSpeculativelyExecuteNode(N))
2495     return SDValue();
2496 
2497   unsigned Opcode = N->getOpcode();
2498   EVT VT = N->getValueType(0);
2499   SDValue Cond = N1.getOperand(0);
2500   SDValue TVal = N1.getOperand(1);
2501   SDValue FVal = N1.getOperand(2);
2502 
2503   // This transform increases uses of N0, so freeze it to be safe.
2504   // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2505   unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2506   if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2507     SDValue F0 = DAG.getFreeze(N0);
2508     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2509     return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2510   }
2511   // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2512   if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2513     SDValue F0 = DAG.getFreeze(N0);
2514     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2515     return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2516   }
2517 
2518   return SDValue();
2519 }
2520 
2521 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2522   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2523          "Unexpected binary operator");
2524 
2525   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2526   auto BinOpcode = BO->getOpcode();
2527   EVT VT = BO->getValueType(0);
2528   if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2529     if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2530       return Sel;
2531 
2532     if (TLI.isCommutativeBinOp(BO->getOpcode()))
2533       if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2534         return Sel;
2535   }
2536 
2537   // Don't do this unless the old select is going away. We want to eliminate the
2538   // binary operator, not replace a binop with a select.
2539   // TODO: Handle ISD::SELECT_CC.
2540   unsigned SelOpNo = 0;
2541   SDValue Sel = BO->getOperand(0);
2542   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2543     SelOpNo = 1;
2544     Sel = BO->getOperand(1);
2545 
2546     // Peek through trunc to shift amount type.
2547     if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2548          BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2549       // This is valid when the truncated bits of x are already zero.
2550       SDValue Op;
2551       KnownBits Known;
2552       if (isTruncateOf(DAG, Sel, Op, Known) &&
2553           Known.countMaxActiveBits() < Sel.getScalarValueSizeInBits())
2554         Sel = Op;
2555     }
2556   }
2557 
2558   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2559     return SDValue();
2560 
2561   SDValue CT = Sel.getOperand(1);
2562   if (!isConstantOrConstantVector(CT, true) &&
2563       !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2564     return SDValue();
2565 
2566   SDValue CF = Sel.getOperand(2);
2567   if (!isConstantOrConstantVector(CF, true) &&
2568       !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2569     return SDValue();
2570 
2571   // Bail out if any constants are opaque because we can't constant fold those.
2572   // The exception is "and" and "or" with either 0 or -1 in which case we can
2573   // propagate non constant operands into select. I.e.:
2574   // and (select Cond, 0, -1), X --> select Cond, 0, X
2575   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2576   bool CanFoldNonConst =
2577       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2578       ((isNullOrNullSplat(CT) && isAllOnesOrAllOnesSplat(CF)) ||
2579        (isNullOrNullSplat(CF) && isAllOnesOrAllOnesSplat(CT)));
2580 
2581   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2582   if (!CanFoldNonConst &&
2583       !isConstantOrConstantVector(CBO, true) &&
2584       !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2585     return SDValue();
2586 
2587   SDLoc DL(Sel);
2588   SDValue NewCT, NewCF;
2589 
2590   if (CanFoldNonConst) {
2591     // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2592     if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2593         (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2594       NewCT = CT;
2595     else
2596       NewCT = CBO;
2597 
2598     if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2599         (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2600       NewCF = CF;
2601     else
2602       NewCF = CBO;
2603   } else {
2604     // We have a select-of-constants followed by a binary operator with a
2605     // constant. Eliminate the binop by pulling the constant math into the
2606     // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2607     // CBO, CF + CBO
2608     NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2609                     : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2610     if (!NewCT)
2611       return SDValue();
2612 
2613     NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2614                     : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2615     if (!NewCF)
2616       return SDValue();
2617   }
2618 
2619   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2620   SelectOp->setFlags(BO->getFlags());
2621   return SelectOp;
2622 }
2623 
2624 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2625   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2626          "Expecting add or sub");
2627 
2628   // Match a constant operand and a zext operand for the math instruction:
2629   // add Z, C
2630   // sub C, Z
2631   bool IsAdd = N->getOpcode() == ISD::ADD;
2632   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2633   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2634   auto *CN = dyn_cast<ConstantSDNode>(C);
2635   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2636     return SDValue();
2637 
2638   // Match the zext operand as a setcc of a boolean.
2639   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2640       Z.getOperand(0).getValueType() != MVT::i1)
2641     return SDValue();
2642 
2643   // Match the compare as: setcc (X & 1), 0, eq.
2644   SDValue SetCC = Z.getOperand(0);
2645   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2646   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2647       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2648       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2649     return SDValue();
2650 
2651   // We are adding/subtracting a constant and an inverted low bit. Turn that
2652   // into a subtract/add of the low bit with incremented/decremented constant:
2653   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2654   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2655   EVT VT = C.getValueType();
2656   SDLoc DL(N);
2657   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2658   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2659                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2660   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2661 }
2662 
2663 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2664 /// a shift and add with a different constant.
2665 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2666   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2667          "Expecting add or sub");
2668 
2669   // We need a constant operand for the add/sub, and the other operand is a
2670   // logical shift right: add (srl), C or sub C, (srl).
2671   bool IsAdd = N->getOpcode() == ISD::ADD;
2672   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2673   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2674   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2675       ShiftOp.getOpcode() != ISD::SRL)
2676     return SDValue();
2677 
2678   // The shift must be of a 'not' value.
2679   SDValue Not = ShiftOp.getOperand(0);
2680   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2681     return SDValue();
2682 
2683   // The shift must be moving the sign bit to the least-significant-bit.
2684   EVT VT = ShiftOp.getValueType();
2685   SDValue ShAmt = ShiftOp.getOperand(1);
2686   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2687   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2688     return SDValue();
2689 
2690   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2691   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2692   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2693   SDLoc DL(N);
2694   if (SDValue NewC = DAG.FoldConstantArithmetic(
2695           IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2696           {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2697     SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2698                                    Not.getOperand(0), ShAmt);
2699     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2700   }
2701 
2702   return SDValue();
2703 }
2704 
2705 static bool
2706 areBitwiseNotOfEachother(SDValue Op0, SDValue Op1) {
2707   return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2708          (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2709 }
2710 
2711 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2712 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2713 /// are no common bits set in the operands).
2714 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2715   SDValue N0 = N->getOperand(0);
2716   SDValue N1 = N->getOperand(1);
2717   EVT VT = N0.getValueType();
2718   SDLoc DL(N);
2719 
2720   // fold (add x, undef) -> undef
2721   if (N0.isUndef())
2722     return N0;
2723   if (N1.isUndef())
2724     return N1;
2725 
2726   // fold (add c1, c2) -> c1+c2
2727   if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2728     return C;
2729 
2730   // canonicalize constant to RHS
2731   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2732       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2733     return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2734 
2735   if (areBitwiseNotOfEachother(N0, N1))
2736     return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()),
2737                            SDLoc(N), VT);
2738 
2739   // fold vector ops
2740   if (VT.isVector()) {
2741     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2742       return FoldedVOp;
2743 
2744     // fold (add x, 0) -> x, vector edition
2745     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2746       return N0;
2747   }
2748 
2749   // fold (add x, 0) -> x
2750   if (isNullConstant(N1))
2751     return N0;
2752 
2753   if (N0.getOpcode() == ISD::SUB) {
2754     SDValue N00 = N0.getOperand(0);
2755     SDValue N01 = N0.getOperand(1);
2756 
2757     // fold ((A-c1)+c2) -> (A+(c2-c1))
2758     if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2759       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2760 
2761     // fold ((c1-A)+c2) -> (c1+c2)-A
2762     if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2763       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2764   }
2765 
2766   // add (sext i1 X), 1 -> zext (not i1 X)
2767   // We don't transform this pattern:
2768   //   add (zext i1 X), -1 -> sext (not i1 X)
2769   // because most (?) targets generate better code for the zext form.
2770   if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2771       isOneOrOneSplat(N1)) {
2772     SDValue X = N0.getOperand(0);
2773     if ((!LegalOperations ||
2774          (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2775           TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2776         X.getScalarValueSizeInBits() == 1) {
2777       SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2778       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2779     }
2780   }
2781 
2782   // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2783   // iff (or x, c0) is equivalent to (add x, c0).
2784   // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2785   // iff (xor x, c0) is equivalent to (add x, c0).
2786   if (DAG.isADDLike(N0)) {
2787     SDValue N01 = N0.getOperand(1);
2788     if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2789       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2790   }
2791 
2792   if (SDValue NewSel = foldBinOpIntoSelect(N))
2793     return NewSel;
2794 
2795   // reassociate add
2796   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2797     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2798       return RADD;
2799 
2800     // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2801     // equivalent to (add x, c).
2802     // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2803     // equivalent to (add x, c).
2804     // Do this optimization only when adding c does not introduce instructions
2805     // for adding carries.
2806     auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2807       if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2808           isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2809         // If N0's type does not split or is a sign mask, it does not introduce
2810         // add carry.
2811         auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2812         bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2813                           TyActn == TargetLoweringBase::TypePromoteInteger ||
2814                           isMinSignedConstant(N0.getOperand(1));
2815         if (NoAddCarry)
2816           return DAG.getNode(
2817               ISD::ADD, DL, VT,
2818               DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2819               N0.getOperand(1));
2820       }
2821       return SDValue();
2822     };
2823     if (SDValue Add = ReassociateAddOr(N0, N1))
2824       return Add;
2825     if (SDValue Add = ReassociateAddOr(N1, N0))
2826       return Add;
2827 
2828     // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2829     if (SDValue SD =
2830             reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2831       return SD;
2832   }
2833   // fold ((0-A) + B) -> B-A
2834   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2835     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2836 
2837   // fold (A + (0-B)) -> A-B
2838   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2839     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2840 
2841   // fold (A+(B-A)) -> B
2842   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2843     return N1.getOperand(0);
2844 
2845   // fold ((B-A)+A) -> B
2846   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2847     return N0.getOperand(0);
2848 
2849   // fold ((A-B)+(C-A)) -> (C-B)
2850   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2851       N0.getOperand(0) == N1.getOperand(1))
2852     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2853                        N0.getOperand(1));
2854 
2855   // fold ((A-B)+(B-C)) -> (A-C)
2856   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2857       N0.getOperand(1) == N1.getOperand(0))
2858     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2859                        N1.getOperand(1));
2860 
2861   // fold (A+(B-(A+C))) to (B-C)
2862   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2863       N0 == N1.getOperand(1).getOperand(0))
2864     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2865                        N1.getOperand(1).getOperand(1));
2866 
2867   // fold (A+(B-(C+A))) to (B-C)
2868   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2869       N0 == N1.getOperand(1).getOperand(1))
2870     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2871                        N1.getOperand(1).getOperand(0));
2872 
2873   // fold (A+((B-A)+or-C)) to (B+or-C)
2874   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2875       N1.getOperand(0).getOpcode() == ISD::SUB &&
2876       N0 == N1.getOperand(0).getOperand(1))
2877     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2878                        N1.getOperand(1));
2879 
2880   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2881   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2882       N0->hasOneUse() && N1->hasOneUse()) {
2883     SDValue N00 = N0.getOperand(0);
2884     SDValue N01 = N0.getOperand(1);
2885     SDValue N10 = N1.getOperand(0);
2886     SDValue N11 = N1.getOperand(1);
2887 
2888     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2889       return DAG.getNode(ISD::SUB, DL, VT,
2890                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2891                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2892   }
2893 
2894   // fold (add (umax X, C), -C) --> (usubsat X, C)
2895   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2896     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2897       return (!Max && !Op) ||
2898              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2899     };
2900     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2901                                   /*AllowUndefs*/ true))
2902       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2903                          N0.getOperand(1));
2904   }
2905 
2906   if (SimplifyDemandedBits(SDValue(N, 0)))
2907     return SDValue(N, 0);
2908 
2909   if (isOneOrOneSplat(N1)) {
2910     // fold (add (xor a, -1), 1) -> (sub 0, a)
2911     if (isBitwiseNot(N0))
2912       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2913                          N0.getOperand(0));
2914 
2915     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2916     if (N0.getOpcode() == ISD::ADD) {
2917       SDValue A, Xor;
2918 
2919       if (isBitwiseNot(N0.getOperand(0))) {
2920         A = N0.getOperand(1);
2921         Xor = N0.getOperand(0);
2922       } else if (isBitwiseNot(N0.getOperand(1))) {
2923         A = N0.getOperand(0);
2924         Xor = N0.getOperand(1);
2925       }
2926 
2927       if (Xor)
2928         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2929     }
2930 
2931     // Look for:
2932     //   add (add x, y), 1
2933     // And if the target does not like this form then turn into:
2934     //   sub y, (xor x, -1)
2935     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2936         N0.hasOneUse() &&
2937         // Limit this to after legalization if the add has wrap flags
2938         (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2939                                        !N->getFlags().hasNoSignedWrap()))) {
2940       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2941                                 DAG.getAllOnesConstant(DL, VT));
2942       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2943     }
2944   }
2945 
2946   // (x - y) + -1  ->  add (xor y, -1), x
2947   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2948       isAllOnesOrAllOnesSplat(N1)) {
2949     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2950     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2951   }
2952 
2953   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2954     return Combined;
2955 
2956   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2957     return Combined;
2958 
2959   return SDValue();
2960 }
2961 
2962 SDValue DAGCombiner::visitADD(SDNode *N) {
2963   SDValue N0 = N->getOperand(0);
2964   SDValue N1 = N->getOperand(1);
2965   EVT VT = N0.getValueType();
2966   SDLoc DL(N);
2967 
2968   if (SDValue Combined = visitADDLike(N))
2969     return Combined;
2970 
2971   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2972     return V;
2973 
2974   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2975     return V;
2976 
2977   // fold (a+b) -> (a|b) iff a and b share no bits.
2978   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2979       DAG.haveNoCommonBitsSet(N0, N1))
2980     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2981 
2982   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2983   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2984     const APInt &C0 = N0->getConstantOperandAPInt(0);
2985     const APInt &C1 = N1->getConstantOperandAPInt(0);
2986     return DAG.getVScale(DL, VT, C0 + C1);
2987   }
2988 
2989   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2990   if (N0.getOpcode() == ISD::ADD &&
2991       N0.getOperand(1).getOpcode() == ISD::VSCALE &&
2992       N1.getOpcode() == ISD::VSCALE) {
2993     const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2994     const APInt &VS1 = N1->getConstantOperandAPInt(0);
2995     SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2996     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2997   }
2998 
2999   // Fold (add step_vector(c1), step_vector(c2)  to step_vector(c1+c2))
3000   if (N0.getOpcode() == ISD::STEP_VECTOR &&
3001       N1.getOpcode() == ISD::STEP_VECTOR) {
3002     const APInt &C0 = N0->getConstantOperandAPInt(0);
3003     const APInt &C1 = N1->getConstantOperandAPInt(0);
3004     APInt NewStep = C0 + C1;
3005     return DAG.getStepVector(DL, VT, NewStep);
3006   }
3007 
3008   // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3009   if (N0.getOpcode() == ISD::ADD &&
3010       N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR &&
3011       N1.getOpcode() == ISD::STEP_VECTOR) {
3012     const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3013     const APInt &SV1 = N1->getConstantOperandAPInt(0);
3014     APInt NewStep = SV0 + SV1;
3015     SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3016     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3017   }
3018 
3019   return SDValue();
3020 }
3021 
3022 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3023   unsigned Opcode = N->getOpcode();
3024   SDValue N0 = N->getOperand(0);
3025   SDValue N1 = N->getOperand(1);
3026   EVT VT = N0.getValueType();
3027   bool IsSigned = Opcode == ISD::SADDSAT;
3028   SDLoc DL(N);
3029 
3030   // fold (add_sat x, undef) -> -1
3031   if (N0.isUndef() || N1.isUndef())
3032     return DAG.getAllOnesConstant(DL, VT);
3033 
3034   // fold (add_sat c1, c2) -> c3
3035   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3036     return C;
3037 
3038   // canonicalize constant to RHS
3039   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3040       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3041     return DAG.getNode(Opcode, DL, VT, N1, N0);
3042 
3043   // fold vector ops
3044   if (VT.isVector()) {
3045     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3046       return FoldedVOp;
3047 
3048     // fold (add_sat x, 0) -> x, vector edition
3049     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3050       return N0;
3051   }
3052 
3053   // fold (add_sat x, 0) -> x
3054   if (isNullConstant(N1))
3055     return N0;
3056 
3057   // If it cannot overflow, transform into an add.
3058   if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3059     return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3060 
3061   return SDValue();
3062 }
3063 
3064 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V,
3065                           bool ForceCarryReconstruction = false) {
3066   bool Masked = false;
3067 
3068   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3069   while (true) {
3070     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3071       V = V.getOperand(0);
3072       continue;
3073     }
3074 
3075     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3076       if (ForceCarryReconstruction)
3077         return V;
3078 
3079       Masked = true;
3080       V = V.getOperand(0);
3081       continue;
3082     }
3083 
3084     if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3085       return V;
3086 
3087     break;
3088   }
3089 
3090   // If this is not a carry, return.
3091   if (V.getResNo() != 1)
3092     return SDValue();
3093 
3094   if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3095       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3096     return SDValue();
3097 
3098   EVT VT = V->getValueType(0);
3099   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3100     return SDValue();
3101 
3102   // If the result is masked, then no matter what kind of bool it is we can
3103   // return. If it isn't, then we need to make sure the bool type is either 0 or
3104   // 1 and not other values.
3105   if (Masked ||
3106       TLI.getBooleanContents(V.getValueType()) ==
3107           TargetLoweringBase::ZeroOrOneBooleanContent)
3108     return V;
3109 
3110   return SDValue();
3111 }
3112 
3113 /// Given the operands of an add/sub operation, see if the 2nd operand is a
3114 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3115 /// the opcode and bypass the mask operation.
3116 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3117                                  SelectionDAG &DAG, const SDLoc &DL) {
3118   if (N1.getOpcode() == ISD::ZERO_EXTEND)
3119     N1 = N1.getOperand(0);
3120 
3121   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3122     return SDValue();
3123 
3124   EVT VT = N0.getValueType();
3125   SDValue N10 = N1.getOperand(0);
3126   if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3127     N10 = N10.getOperand(0);
3128 
3129   if (N10.getValueType() != VT)
3130     return SDValue();
3131 
3132   if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3133     return SDValue();
3134 
3135   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3136   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3137   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3138 }
3139 
3140 /// Helper for doing combines based on N0 and N1 being added to each other.
3141 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3142                                           SDNode *LocReference) {
3143   EVT VT = N0.getValueType();
3144   SDLoc DL(LocReference);
3145 
3146   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3147   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
3148       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
3149     return DAG.getNode(ISD::SUB, DL, VT, N0,
3150                        DAG.getNode(ISD::SHL, DL, VT,
3151                                    N1.getOperand(0).getOperand(1),
3152                                    N1.getOperand(1)));
3153 
3154   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3155     return V;
3156 
3157   // Look for:
3158   //   add (add x, 1), y
3159   // And if the target does not like this form then turn into:
3160   //   sub y, (xor x, -1)
3161   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3162       N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3163       // Limit this to after legalization if the add has wrap flags
3164       (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3165                                      !N0->getFlags().hasNoSignedWrap()))) {
3166     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
3167                               DAG.getAllOnesConstant(DL, VT));
3168     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3169   }
3170 
3171   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3172     // Hoist one-use subtraction by non-opaque constant:
3173     //   (x - C) + y  ->  (x + y) - C
3174     // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3175     if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3176       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3177       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3178     }
3179     // Hoist one-use subtraction from non-opaque constant:
3180     //   (C - x) + y  ->  (y - x) + C
3181     if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3182       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3183       return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3184     }
3185   }
3186 
3187   // add (mul x, C), x -> mul x, C+1
3188   if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3189       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3190       N0.hasOneUse()) {
3191     SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3192                                DAG.getConstant(1, DL, VT));
3193     return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3194   }
3195 
3196   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3197   // rather than 'add 0/-1' (the zext should get folded).
3198   // add (sext i1 Y), X --> sub X, (zext i1 Y)
3199   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3200       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3201       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
3202     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3203     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3204   }
3205 
3206   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3207   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3208     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3209     if (TN->getVT() == MVT::i1) {
3210       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3211                                  DAG.getConstant(1, DL, VT));
3212       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3213     }
3214   }
3215 
3216   // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3217   if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3218       N1.getResNo() == 0)
3219     return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3220                        N0, N1.getOperand(0), N1.getOperand(2));
3221 
3222   // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3223   if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT))
3224     if (SDValue Carry = getAsCarry(TLI, N1))
3225       return DAG.getNode(ISD::UADDO_CARRY, DL,
3226                          DAG.getVTList(VT, Carry.getValueType()), N0,
3227                          DAG.getConstant(0, DL, VT), Carry);
3228 
3229   return SDValue();
3230 }
3231 
3232 SDValue DAGCombiner::visitADDC(SDNode *N) {
3233   SDValue N0 = N->getOperand(0);
3234   SDValue N1 = N->getOperand(1);
3235   EVT VT = N0.getValueType();
3236   SDLoc DL(N);
3237 
3238   // If the flag result is dead, turn this into an ADD.
3239   if (!N->hasAnyUseOfValue(1))
3240     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3241                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3242 
3243   // canonicalize constant to RHS.
3244   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3245   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3246   if (N0C && !N1C)
3247     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3248 
3249   // fold (addc x, 0) -> x + no carry out
3250   if (isNullConstant(N1))
3251     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3252                                         DL, MVT::Glue));
3253 
3254   // If it cannot overflow, transform into an add.
3255   if (DAG.computeOverflowForUnsignedAdd(N0, N1) == SelectionDAG::OFK_Never)
3256     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3257                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3258 
3259   return SDValue();
3260 }
3261 
3262 /**
3263  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3264  * then the flip also occurs if computing the inverse is the same cost.
3265  * This function returns an empty SDValue in case it cannot flip the boolean
3266  * without increasing the cost of the computation. If you want to flip a boolean
3267  * no matter what, use DAG.getLogicalNOT.
3268  */
3269 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
3270                                   const TargetLowering &TLI,
3271                                   bool Force) {
3272   if (Force && isa<ConstantSDNode>(V))
3273     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3274 
3275   if (V.getOpcode() != ISD::XOR)
3276     return SDValue();
3277 
3278   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
3279   if (!Const)
3280     return SDValue();
3281 
3282   EVT VT = V.getValueType();
3283 
3284   bool IsFlip = false;
3285   switch(TLI.getBooleanContents(VT)) {
3286     case TargetLowering::ZeroOrOneBooleanContent:
3287       IsFlip = Const->isOne();
3288       break;
3289     case TargetLowering::ZeroOrNegativeOneBooleanContent:
3290       IsFlip = Const->isAllOnes();
3291       break;
3292     case TargetLowering::UndefinedBooleanContent:
3293       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
3294       break;
3295   }
3296 
3297   if (IsFlip)
3298     return V.getOperand(0);
3299   if (Force)
3300     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3301   return SDValue();
3302 }
3303 
3304 SDValue DAGCombiner::visitADDO(SDNode *N) {
3305   SDValue N0 = N->getOperand(0);
3306   SDValue N1 = N->getOperand(1);
3307   EVT VT = N0.getValueType();
3308   bool IsSigned = (ISD::SADDO == N->getOpcode());
3309 
3310   EVT CarryVT = N->getValueType(1);
3311   SDLoc DL(N);
3312 
3313   // If the flag result is dead, turn this into an ADD.
3314   if (!N->hasAnyUseOfValue(1))
3315     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3316                      DAG.getUNDEF(CarryVT));
3317 
3318   // canonicalize constant to RHS.
3319   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3320       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3321     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3322 
3323   // fold (addo x, 0) -> x + no carry out
3324   if (isNullOrNullSplat(N1))
3325     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3326 
3327   // If it cannot overflow, transform into an add.
3328   if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3329     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3330                      DAG.getConstant(0, DL, CarryVT));
3331 
3332   if (IsSigned) {
3333     // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3334     if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3335       return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3336                          DAG.getConstant(0, DL, VT), N0.getOperand(0));
3337   } else {
3338     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3339     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3340       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3341                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3342       return CombineTo(
3343           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3344     }
3345 
3346     if (SDValue Combined = visitUADDOLike(N0, N1, N))
3347       return Combined;
3348 
3349     if (SDValue Combined = visitUADDOLike(N1, N0, N))
3350       return Combined;
3351   }
3352 
3353   return SDValue();
3354 }
3355 
3356 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3357   EVT VT = N0.getValueType();
3358   if (VT.isVector())
3359     return SDValue();
3360 
3361   // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3362   // If Y + 1 cannot overflow.
3363   if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3364     SDValue Y = N1.getOperand(0);
3365     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3366     if (DAG.computeOverflowForUnsignedAdd(Y, One) == SelectionDAG::OFK_Never)
3367       return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3368                          N1.getOperand(2));
3369   }
3370 
3371   // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3372   if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT))
3373     if (SDValue Carry = getAsCarry(TLI, N1))
3374       return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3375                          DAG.getConstant(0, SDLoc(N), VT), Carry);
3376 
3377   return SDValue();
3378 }
3379 
3380 SDValue DAGCombiner::visitADDE(SDNode *N) {
3381   SDValue N0 = N->getOperand(0);
3382   SDValue N1 = N->getOperand(1);
3383   SDValue CarryIn = N->getOperand(2);
3384 
3385   // canonicalize constant to RHS
3386   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3387   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3388   if (N0C && !N1C)
3389     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3390                        N1, N0, CarryIn);
3391 
3392   // fold (adde x, y, false) -> (addc x, y)
3393   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3394     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3395 
3396   return SDValue();
3397 }
3398 
3399 SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3400   SDValue N0 = N->getOperand(0);
3401   SDValue N1 = N->getOperand(1);
3402   SDValue CarryIn = N->getOperand(2);
3403   SDLoc DL(N);
3404 
3405   // canonicalize constant to RHS
3406   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3407   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3408   if (N0C && !N1C)
3409     return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3410 
3411   // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3412   if (isNullConstant(CarryIn)) {
3413     if (!LegalOperations ||
3414         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3415       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3416   }
3417 
3418   // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3419   if (isNullConstant(N0) && isNullConstant(N1)) {
3420     EVT VT = N0.getValueType();
3421     EVT CarryVT = CarryIn.getValueType();
3422     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3423     AddToWorklist(CarryExt.getNode());
3424     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3425                                     DAG.getConstant(1, DL, VT)),
3426                      DAG.getConstant(0, DL, CarryVT));
3427   }
3428 
3429   if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3430     return Combined;
3431 
3432   if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3433     return Combined;
3434 
3435   // We want to avoid useless duplication.
3436   // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3437   // not a binary operation, this is not really possible to leverage this
3438   // existing mechanism for it. However, if more operations require the same
3439   // deduplication logic, then it may be worth generalize.
3440   SDValue Ops[] = {N1, N0, CarryIn};
3441   SDNode *CSENode =
3442       DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3443   if (CSENode)
3444     return SDValue(CSENode, 0);
3445 
3446   return SDValue();
3447 }
3448 
3449 /**
3450  * If we are facing some sort of diamond carry propapagtion pattern try to
3451  * break it up to generate something like:
3452  *   (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3453  *
3454  * The end result is usually an increase in operation required, but because the
3455  * carry is now linearized, other transforms can kick in and optimize the DAG.
3456  *
3457  * Patterns typically look something like
3458  *                (uaddo A, B)
3459  *                /          \
3460  *             Carry         Sum
3461  *               |             \
3462  *               | (uaddo_carry *, 0, Z)
3463  *               |       /
3464  *                \   Carry
3465  *                 |   /
3466  * (uaddo_carry X, *, *)
3467  *
3468  * But numerous variation exist. Our goal is to identify A, B, X and Z and
3469  * produce a combine with a single path for carry propagation.
3470  */
3471 static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner,
3472                                          SelectionDAG &DAG, SDValue X,
3473                                          SDValue Carry0, SDValue Carry1,
3474                                          SDNode *N) {
3475   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3476     return SDValue();
3477   if (Carry1.getOpcode() != ISD::UADDO)
3478     return SDValue();
3479 
3480   SDValue Z;
3481 
3482   /**
3483    * First look for a suitable Z. It will present itself in the form of
3484    * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3485    */
3486   if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3487       isNullConstant(Carry0.getOperand(1))) {
3488     Z = Carry0.getOperand(2);
3489   } else if (Carry0.getOpcode() == ISD::UADDO &&
3490              isOneConstant(Carry0.getOperand(1))) {
3491     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3492     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3493   } else {
3494     // We couldn't find a suitable Z.
3495     return SDValue();
3496   }
3497 
3498 
3499   auto cancelDiamond = [&](SDValue A,SDValue B) {
3500     SDLoc DL(N);
3501     SDValue NewY =
3502         DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3503     Combiner.AddToWorklist(NewY.getNode());
3504     return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3505                        DAG.getConstant(0, DL, X.getValueType()),
3506                        NewY.getValue(1));
3507   };
3508 
3509   /**
3510    *         (uaddo A, B)
3511    *              |
3512    *             Sum
3513    *              |
3514    * (uaddo_carry *, 0, Z)
3515    */
3516   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3517     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3518   }
3519 
3520   /**
3521    * (uaddo_carry A, 0, Z)
3522    *         |
3523    *        Sum
3524    *         |
3525    *  (uaddo *, B)
3526    */
3527   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3528     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3529   }
3530 
3531   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3532     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3533   }
3534 
3535   return SDValue();
3536 }
3537 
3538 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3539 // match patterns like:
3540 //
3541 //          (uaddo A, B)            CarryIn
3542 //            |  \                     |
3543 //            |   \                    |
3544 //    PartialSum   PartialCarryOutX   /
3545 //            |        |             /
3546 //            |    ____|____________/
3547 //            |   /    |
3548 //     (uaddo *, *)    \________
3549 //       |  \                   \
3550 //       |   \                   |
3551 //       |    PartialCarryOutY   |
3552 //       |        \              |
3553 //       |         \            /
3554 //   AddCarrySum    |    ______/
3555 //                  |   /
3556 //   CarryOut = (or *, *)
3557 //
3558 // And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3559 //
3560 //    {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3561 //
3562 // Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3563 // with a single path for carry/borrow out propagation.
3564 static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
3565                                    SDValue N0, SDValue N1, SDNode *N) {
3566   SDValue Carry0 = getAsCarry(TLI, N0);
3567   if (!Carry0)
3568     return SDValue();
3569   SDValue Carry1 = getAsCarry(TLI, N1);
3570   if (!Carry1)
3571     return SDValue();
3572 
3573   unsigned Opcode = Carry0.getOpcode();
3574   if (Opcode != Carry1.getOpcode())
3575     return SDValue();
3576   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3577     return SDValue();
3578 
3579   // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3580   // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3581   if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3582     std::swap(Carry0, Carry1);
3583 
3584   // Check if nodes are connected in expected way.
3585   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3586       Carry1.getOperand(1) != Carry0.getValue(0))
3587     return SDValue();
3588 
3589   // The carry in value must be on the righthand side for subtraction.
3590   unsigned CarryInOperandNum =
3591       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3592   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3593     return SDValue();
3594   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3595 
3596   unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3597   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3598     return SDValue();
3599 
3600   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3601   CarryIn = getAsCarry(TLI, CarryIn, true);
3602   if (!CarryIn)
3603     return SDValue();
3604 
3605   SDLoc DL(N);
3606   SDValue Merged =
3607       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3608                   Carry0.getOperand(1), CarryIn);
3609 
3610   // Please note that because we have proven that the result of the UADDO/USUBO
3611   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3612   // therefore prove that if the first UADDO/USUBO overflows, the second
3613   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3614   // maximum value.
3615   //
3616   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3617   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3618   //
3619   // This is important because it means that OR and XOR can be used to merge
3620   // carry flags; and that AND can return a constant zero.
3621   //
3622   // TODO: match other operations that can merge flags (ADD, etc)
3623   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3624   if (N->getOpcode() == ISD::AND)
3625     return DAG.getConstant(0, DL, MVT::i1);
3626   return Merged.getValue(1);
3627 }
3628 
3629 SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3630                                           SDValue CarryIn, SDNode *N) {
3631   // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3632   // carry.
3633   if (isBitwiseNot(N0))
3634     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3635       SDLoc DL(N);
3636       SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3637                                 N0.getOperand(0), NotC);
3638       return CombineTo(
3639           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3640     }
3641 
3642   // Iff the flag result is dead:
3643   // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3644   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3645   // or the dependency between the instructions.
3646   if ((N0.getOpcode() == ISD::ADD ||
3647        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3648         N0.getValue(1) != CarryIn)) &&
3649       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3650     return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3651                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3652 
3653   /**
3654    * When one of the uaddo_carry argument is itself a carry, we may be facing
3655    * a diamond carry propagation. In which case we try to transform the DAG
3656    * to ensure linear carry propagation if that is possible.
3657    */
3658   if (auto Y = getAsCarry(TLI, N1)) {
3659     // Because both are carries, Y and Z can be swapped.
3660     if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3661       return R;
3662     if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3663       return R;
3664   }
3665 
3666   return SDValue();
3667 }
3668 
3669 SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3670                                           SDValue CarryIn, SDNode *N) {
3671   // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3672   if (isBitwiseNot(N0)) {
3673     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3674       return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3675                          N0.getOperand(0), NotC);
3676   }
3677 
3678   return SDValue();
3679 }
3680 
3681 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3682   SDValue N0 = N->getOperand(0);
3683   SDValue N1 = N->getOperand(1);
3684   SDValue CarryIn = N->getOperand(2);
3685   SDLoc DL(N);
3686 
3687   // canonicalize constant to RHS
3688   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3689   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3690   if (N0C && !N1C)
3691     return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3692 
3693   // fold (saddo_carry x, y, false) -> (saddo x, y)
3694   if (isNullConstant(CarryIn)) {
3695     if (!LegalOperations ||
3696         TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3697       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3698   }
3699 
3700   if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3701     return Combined;
3702 
3703   if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3704     return Combined;
3705 
3706   return SDValue();
3707 }
3708 
3709 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3710 // clamp/truncation if necessary.
3711 static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3712                                    SDValue RHS, SelectionDAG &DAG,
3713                                    const SDLoc &DL) {
3714   assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3715          "Illegal truncation");
3716 
3717   if (DstVT == SrcVT)
3718     return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3719 
3720   // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3721   // clamping RHS.
3722   APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3723                                           DstVT.getScalarSizeInBits());
3724   if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3725     return SDValue();
3726 
3727   SDValue SatLimit =
3728       DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3729                                            DstVT.getScalarSizeInBits()),
3730                       DL, SrcVT);
3731   RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3732   RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3733   LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3734   return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3735 }
3736 
3737 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3738 // usubsat(a,b), optionally as a truncated type.
3739 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3740   if (N->getOpcode() != ISD::SUB ||
3741       !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3742     return SDValue();
3743 
3744   EVT SubVT = N->getValueType(0);
3745   SDValue Op0 = N->getOperand(0);
3746   SDValue Op1 = N->getOperand(1);
3747 
3748   // Try to find umax(a,b) - b or a - umin(a,b) patterns
3749   // they may be converted to usubsat(a,b).
3750   if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3751     SDValue MaxLHS = Op0.getOperand(0);
3752     SDValue MaxRHS = Op0.getOperand(1);
3753     if (MaxLHS == Op1)
3754       return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3755     if (MaxRHS == Op1)
3756       return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3757   }
3758 
3759   if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3760     SDValue MinLHS = Op1.getOperand(0);
3761     SDValue MinRHS = Op1.getOperand(1);
3762     if (MinLHS == Op0)
3763       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3764     if (MinRHS == Op0)
3765       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3766   }
3767 
3768   // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3769   if (Op1.getOpcode() == ISD::TRUNCATE &&
3770       Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3771       Op1.getOperand(0).hasOneUse()) {
3772     SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3773     SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3774     if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3775       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3776                                  DAG, SDLoc(N));
3777     if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3778       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3779                                  DAG, SDLoc(N));
3780   }
3781 
3782   return SDValue();
3783 }
3784 
3785 // Since it may not be valid to emit a fold to zero for vector initializers
3786 // check if we can before folding.
3787 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3788                              SelectionDAG &DAG, bool LegalOperations) {
3789   if (!VT.isVector())
3790     return DAG.getConstant(0, DL, VT);
3791   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3792     return DAG.getConstant(0, DL, VT);
3793   return SDValue();
3794 }
3795 
3796 SDValue DAGCombiner::visitSUB(SDNode *N) {
3797   SDValue N0 = N->getOperand(0);
3798   SDValue N1 = N->getOperand(1);
3799   EVT VT = N0.getValueType();
3800   SDLoc DL(N);
3801 
3802   auto PeekThroughFreeze = [](SDValue N) {
3803     if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3804       return N->getOperand(0);
3805     return N;
3806   };
3807 
3808   // fold (sub x, x) -> 0
3809   // FIXME: Refactor this and xor and other similar operations together.
3810   if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3811     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3812 
3813   // fold (sub c1, c2) -> c3
3814   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3815     return C;
3816 
3817   // fold vector ops
3818   if (VT.isVector()) {
3819     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3820       return FoldedVOp;
3821 
3822     // fold (sub x, 0) -> x, vector edition
3823     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3824       return N0;
3825   }
3826 
3827   if (SDValue NewSel = foldBinOpIntoSelect(N))
3828     return NewSel;
3829 
3830   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3831 
3832   // fold (sub x, c) -> (add x, -c)
3833   if (N1C) {
3834     return DAG.getNode(ISD::ADD, DL, VT, N0,
3835                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3836   }
3837 
3838   if (isNullOrNullSplat(N0)) {
3839     unsigned BitWidth = VT.getScalarSizeInBits();
3840     // Right-shifting everything out but the sign bit followed by negation is
3841     // the same as flipping arithmetic/logical shift type without the negation:
3842     // -(X >>u 31) -> (X >>s 31)
3843     // -(X >>s 31) -> (X >>u 31)
3844     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3845       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3846       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3847         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3848         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3849           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3850       }
3851     }
3852 
3853     // 0 - X --> 0 if the sub is NUW.
3854     if (N->getFlags().hasNoUnsignedWrap())
3855       return N0;
3856 
3857     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3858       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3859       // N1 must be 0 because negating the minimum signed value is undefined.
3860       if (N->getFlags().hasNoSignedWrap())
3861         return N0;
3862 
3863       // 0 - X --> X if X is 0 or the minimum signed value.
3864       return N1;
3865     }
3866 
3867     // Convert 0 - abs(x).
3868     if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3869         !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
3870       if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3871         return Result;
3872 
3873     // Fold neg(splat(neg(x)) -> splat(x)
3874     if (VT.isVector()) {
3875       SDValue N1S = DAG.getSplatValue(N1, true);
3876       if (N1S && N1S.getOpcode() == ISD::SUB &&
3877           isNullConstant(N1S.getOperand(0)))
3878         return DAG.getSplat(VT, DL, N1S.getOperand(1));
3879     }
3880   }
3881 
3882   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3883   if (isAllOnesOrAllOnesSplat(N0))
3884     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3885 
3886   // fold (A - (0-B)) -> A+B
3887   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3888     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3889 
3890   // fold A-(A-B) -> B
3891   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3892     return N1.getOperand(1);
3893 
3894   // fold (A+B)-A -> B
3895   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3896     return N0.getOperand(1);
3897 
3898   // fold (A+B)-B -> A
3899   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3900     return N0.getOperand(0);
3901 
3902   // fold (A+C1)-C2 -> A+(C1-C2)
3903   if (N0.getOpcode() == ISD::ADD) {
3904     SDValue N01 = N0.getOperand(1);
3905     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3906       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3907   }
3908 
3909   // fold C2-(A+C1) -> (C2-C1)-A
3910   if (N1.getOpcode() == ISD::ADD) {
3911     SDValue N11 = N1.getOperand(1);
3912     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3913       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3914   }
3915 
3916   // fold (A-C1)-C2 -> A-(C1+C2)
3917   if (N0.getOpcode() == ISD::SUB) {
3918     SDValue N01 = N0.getOperand(1);
3919     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3920       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3921   }
3922 
3923   // fold (c1-A)-c2 -> (c1-c2)-A
3924   if (N0.getOpcode() == ISD::SUB) {
3925     SDValue N00 = N0.getOperand(0);
3926     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3927       return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3928   }
3929 
3930   // fold ((A+(B+or-C))-B) -> A+or-C
3931   if (N0.getOpcode() == ISD::ADD &&
3932       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3933        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3934       N0.getOperand(1).getOperand(0) == N1)
3935     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3936                        N0.getOperand(1).getOperand(1));
3937 
3938   // fold ((A+(C+B))-B) -> A+C
3939   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3940       N0.getOperand(1).getOperand(1) == N1)
3941     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3942                        N0.getOperand(1).getOperand(0));
3943 
3944   // fold ((A-(B-C))-C) -> A-B
3945   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3946       N0.getOperand(1).getOperand(1) == N1)
3947     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3948                        N0.getOperand(1).getOperand(0));
3949 
3950   // fold (A-(B-C)) -> A+(C-B)
3951   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3952     return DAG.getNode(ISD::ADD, DL, VT, N0,
3953                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3954                                    N1.getOperand(0)));
3955 
3956   // A - (A & B)  ->  A & (~B)
3957   if (N1.getOpcode() == ISD::AND) {
3958     SDValue A = N1.getOperand(0);
3959     SDValue B = N1.getOperand(1);
3960     if (A != N0)
3961       std::swap(A, B);
3962     if (A == N0 &&
3963         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3964       SDValue InvB =
3965           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3966       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3967     }
3968   }
3969 
3970   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3971   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3972     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3973         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3974       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3975                                 N1.getOperand(0).getOperand(1),
3976                                 N1.getOperand(1));
3977       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3978     }
3979     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3980         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3981       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3982                                 N1.getOperand(0),
3983                                 N1.getOperand(1).getOperand(1));
3984       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3985     }
3986   }
3987 
3988   // If either operand of a sub is undef, the result is undef
3989   if (N0.isUndef())
3990     return N0;
3991   if (N1.isUndef())
3992     return N1;
3993 
3994   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3995     return V;
3996 
3997   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3998     return V;
3999 
4000   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
4001     return V;
4002 
4003   if (SDValue V = foldSubToUSubSat(VT, N))
4004     return V;
4005 
4006   // (x - y) - 1  ->  add (xor y, -1), x
4007   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) {
4008     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
4009                               DAG.getAllOnesConstant(DL, VT));
4010     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
4011   }
4012 
4013   // Look for:
4014   //   sub y, (xor x, -1)
4015   // And if the target does not like this form then turn into:
4016   //   add (add x, y), 1
4017   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4018     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4019     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4020   }
4021 
4022   // Hoist one-use addition by non-opaque constant:
4023   //   (x + C) - y  ->  (x - y) + C
4024   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4025       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4026     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4027     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4028   }
4029   // y - (x + C)  ->  (y - x) - C
4030   if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4031       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4032     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4033     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4034   }
4035   // (x - C) - y  ->  (x - y) - C
4036   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4037   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4038       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4039     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4040     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4041   }
4042   // (C - x) - y  ->  C - (x + y)
4043   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4044       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4045     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4046     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4047   }
4048 
4049   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4050   // rather than 'sub 0/1' (the sext should get folded).
4051   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4052   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4053       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4054       TLI.getBooleanContents(VT) ==
4055           TargetLowering::ZeroOrNegativeOneBooleanContent) {
4056     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4057     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4058   }
4059 
4060   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
4061   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
4062     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
4063       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
4064       SDValue S0 = N1.getOperand(0);
4065       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
4066         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
4067           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
4068             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
4069     }
4070   }
4071 
4072   // If the relocation model supports it, consider symbol offsets.
4073   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4074     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4075       // fold (sub Sym+c1, Sym+c2) -> c1-c2
4076       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4077         if (GA->getGlobal() == GB->getGlobal())
4078           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4079                                  DL, VT);
4080     }
4081 
4082   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4083   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4084     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4085     if (TN->getVT() == MVT::i1) {
4086       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4087                                  DAG.getConstant(1, DL, VT));
4088       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4089     }
4090   }
4091 
4092   // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4093   if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4094     const APInt &IntVal = N1.getConstantOperandAPInt(0);
4095     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4096   }
4097 
4098   // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4099   if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4100     APInt NewStep = -N1.getConstantOperandAPInt(0);
4101     return DAG.getNode(ISD::ADD, DL, VT, N0,
4102                        DAG.getStepVector(DL, VT, NewStep));
4103   }
4104 
4105   // Prefer an add for more folding potential and possibly better codegen:
4106   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4107   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4108     SDValue ShAmt = N1.getOperand(1);
4109     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4110     if (ShAmtC &&
4111         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
4112       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4113       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4114     }
4115   }
4116 
4117   // As with the previous fold, prefer add for more folding potential.
4118   // Subtracting SMIN/0 is the same as adding SMIN/0:
4119   // N0 - (X << BW-1) --> N0 + (X << BW-1)
4120   if (N1.getOpcode() == ISD::SHL) {
4121     ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
4122     if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
4123       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4124   }
4125 
4126   // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4127   if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4128       N0.getResNo() == 0 && N0.hasOneUse())
4129     return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4130                        N0.getOperand(0), N1, N0.getOperand(2));
4131 
4132   if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT)) {
4133     // (sub Carry, X)  ->  (uaddo_carry (sub 0, X), 0, Carry)
4134     if (SDValue Carry = getAsCarry(TLI, N0)) {
4135       SDValue X = N1;
4136       SDValue Zero = DAG.getConstant(0, DL, VT);
4137       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4138       return DAG.getNode(ISD::UADDO_CARRY, DL,
4139                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4140                          Carry);
4141     }
4142   }
4143 
4144   // If there's no chance of borrowing from adjacent bits, then sub is xor:
4145   // sub C0, X --> xor X, C0
4146   if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4147     if (!C0->isOpaque()) {
4148       const APInt &C0Val = C0->getAPIntValue();
4149       const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4150       if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4151         return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4152     }
4153   }
4154 
4155   // max(a,b) - min(a,b) --> abd(a,b)
4156   auto MatchSubMaxMin = [&](unsigned Max, unsigned Min, unsigned Abd) {
4157     if (N0.getOpcode() != Max || N1.getOpcode() != Min)
4158       return SDValue();
4159     if ((N0.getOperand(0) != N1.getOperand(0) ||
4160          N0.getOperand(1) != N1.getOperand(1)) &&
4161         (N0.getOperand(0) != N1.getOperand(1) ||
4162          N0.getOperand(1) != N1.getOperand(0)))
4163       return SDValue();
4164     if (!hasOperation(Abd, VT))
4165       return SDValue();
4166     return DAG.getNode(Abd, DL, VT, N0.getOperand(0), N0.getOperand(1));
4167   };
4168   if (SDValue R = MatchSubMaxMin(ISD::SMAX, ISD::SMIN, ISD::ABDS))
4169     return R;
4170   if (SDValue R = MatchSubMaxMin(ISD::UMAX, ISD::UMIN, ISD::ABDU))
4171     return R;
4172 
4173   return SDValue();
4174 }
4175 
4176 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4177   unsigned Opcode = N->getOpcode();
4178   SDValue N0 = N->getOperand(0);
4179   SDValue N1 = N->getOperand(1);
4180   EVT VT = N0.getValueType();
4181   bool IsSigned = Opcode == ISD::SSUBSAT;
4182   SDLoc DL(N);
4183 
4184   // fold (sub_sat x, undef) -> 0
4185   if (N0.isUndef() || N1.isUndef())
4186     return DAG.getConstant(0, DL, VT);
4187 
4188   // fold (sub_sat x, x) -> 0
4189   if (N0 == N1)
4190     return DAG.getConstant(0, DL, VT);
4191 
4192   // fold (sub_sat c1, c2) -> c3
4193   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4194     return C;
4195 
4196   // fold vector ops
4197   if (VT.isVector()) {
4198     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4199       return FoldedVOp;
4200 
4201     // fold (sub_sat x, 0) -> x, vector edition
4202     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4203       return N0;
4204   }
4205 
4206   // fold (sub_sat x, 0) -> x
4207   if (isNullConstant(N1))
4208     return N0;
4209 
4210   // If it cannot overflow, transform into an sub.
4211   if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4212     return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4213 
4214   return SDValue();
4215 }
4216 
4217 SDValue DAGCombiner::visitSUBC(SDNode *N) {
4218   SDValue N0 = N->getOperand(0);
4219   SDValue N1 = N->getOperand(1);
4220   EVT VT = N0.getValueType();
4221   SDLoc DL(N);
4222 
4223   // If the flag result is dead, turn this into an SUB.
4224   if (!N->hasAnyUseOfValue(1))
4225     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4226                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4227 
4228   // fold (subc x, x) -> 0 + no borrow
4229   if (N0 == N1)
4230     return CombineTo(N, DAG.getConstant(0, DL, VT),
4231                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4232 
4233   // fold (subc x, 0) -> x + no borrow
4234   if (isNullConstant(N1))
4235     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4236 
4237   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4238   if (isAllOnesConstant(N0))
4239     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4240                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4241 
4242   return SDValue();
4243 }
4244 
4245 SDValue DAGCombiner::visitSUBO(SDNode *N) {
4246   SDValue N0 = N->getOperand(0);
4247   SDValue N1 = N->getOperand(1);
4248   EVT VT = N0.getValueType();
4249   bool IsSigned = (ISD::SSUBO == N->getOpcode());
4250 
4251   EVT CarryVT = N->getValueType(1);
4252   SDLoc DL(N);
4253 
4254   // If the flag result is dead, turn this into an SUB.
4255   if (!N->hasAnyUseOfValue(1))
4256     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4257                      DAG.getUNDEF(CarryVT));
4258 
4259   // fold (subo x, x) -> 0 + no borrow
4260   if (N0 == N1)
4261     return CombineTo(N, DAG.getConstant(0, DL, VT),
4262                      DAG.getConstant(0, DL, CarryVT));
4263 
4264   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
4265 
4266   // fold (subox, c) -> (addo x, -c)
4267   if (IsSigned && N1C && !N1C->isMinSignedValue()) {
4268     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4269                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4270   }
4271 
4272   // fold (subo x, 0) -> x + no borrow
4273   if (isNullOrNullSplat(N1))
4274     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4275 
4276   // If it cannot overflow, transform into an sub.
4277   if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4278     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4279                      DAG.getConstant(0, DL, CarryVT));
4280 
4281   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4282   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4283     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4284                      DAG.getConstant(0, DL, CarryVT));
4285 
4286   return SDValue();
4287 }
4288 
4289 SDValue DAGCombiner::visitSUBE(SDNode *N) {
4290   SDValue N0 = N->getOperand(0);
4291   SDValue N1 = N->getOperand(1);
4292   SDValue CarryIn = N->getOperand(2);
4293 
4294   // fold (sube x, y, false) -> (subc x, y)
4295   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4296     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4297 
4298   return SDValue();
4299 }
4300 
4301 SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4302   SDValue N0 = N->getOperand(0);
4303   SDValue N1 = N->getOperand(1);
4304   SDValue CarryIn = N->getOperand(2);
4305 
4306   // fold (usubo_carry x, y, false) -> (usubo x, y)
4307   if (isNullConstant(CarryIn)) {
4308     if (!LegalOperations ||
4309         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4310       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4311   }
4312 
4313   return SDValue();
4314 }
4315 
4316 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4317   SDValue N0 = N->getOperand(0);
4318   SDValue N1 = N->getOperand(1);
4319   SDValue CarryIn = N->getOperand(2);
4320 
4321   // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4322   if (isNullConstant(CarryIn)) {
4323     if (!LegalOperations ||
4324         TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4325       return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4326   }
4327 
4328   return SDValue();
4329 }
4330 
4331 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4332 // UMULFIXSAT here.
4333 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4334   SDValue N0 = N->getOperand(0);
4335   SDValue N1 = N->getOperand(1);
4336   SDValue Scale = N->getOperand(2);
4337   EVT VT = N0.getValueType();
4338 
4339   // fold (mulfix x, undef, scale) -> 0
4340   if (N0.isUndef() || N1.isUndef())
4341     return DAG.getConstant(0, SDLoc(N), VT);
4342 
4343   // Canonicalize constant to RHS (vector doesn't have to splat)
4344   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4345      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4346     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4347 
4348   // fold (mulfix x, 0, scale) -> 0
4349   if (isNullConstant(N1))
4350     return DAG.getConstant(0, SDLoc(N), VT);
4351 
4352   return SDValue();
4353 }
4354 
4355 SDValue DAGCombiner::visitMUL(SDNode *N) {
4356   SDValue N0 = N->getOperand(0);
4357   SDValue N1 = N->getOperand(1);
4358   EVT VT = N0.getValueType();
4359   SDLoc DL(N);
4360 
4361   // fold (mul x, undef) -> 0
4362   if (N0.isUndef() || N1.isUndef())
4363     return DAG.getConstant(0, DL, VT);
4364 
4365   // fold (mul c1, c2) -> c1*c2
4366   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4367     return C;
4368 
4369   // canonicalize constant to RHS (vector doesn't have to splat)
4370   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4371       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4372     return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
4373 
4374   bool N1IsConst = false;
4375   bool N1IsOpaqueConst = false;
4376   APInt ConstValue1;
4377 
4378   // fold vector ops
4379   if (VT.isVector()) {
4380     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4381       return FoldedVOp;
4382 
4383     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4384     assert((!N1IsConst ||
4385             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
4386            "Splat APInt should be element width");
4387   } else {
4388     N1IsConst = isa<ConstantSDNode>(N1);
4389     if (N1IsConst) {
4390       ConstValue1 = N1->getAsAPIntVal();
4391       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4392     }
4393   }
4394 
4395   // fold (mul x, 0) -> 0
4396   if (N1IsConst && ConstValue1.isZero())
4397     return N1;
4398 
4399   // fold (mul x, 1) -> x
4400   if (N1IsConst && ConstValue1.isOne())
4401     return N0;
4402 
4403   if (SDValue NewSel = foldBinOpIntoSelect(N))
4404     return NewSel;
4405 
4406   // fold (mul x, -1) -> 0-x
4407   if (N1IsConst && ConstValue1.isAllOnes())
4408     return DAG.getNegative(N0, DL, VT);
4409 
4410   // fold (mul x, (1 << c)) -> x << c
4411   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4412       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4413     if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4414       EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4415       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4416       return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
4417     }
4418   }
4419 
4420   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4421   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4422     unsigned Log2Val = (-ConstValue1).logBase2();
4423     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4424 
4425     // FIXME: If the input is something that is easily negated (e.g. a
4426     // single-use add), we should put the negate there.
4427     return DAG.getNode(ISD::SUB, DL, VT,
4428                        DAG.getConstant(0, DL, VT),
4429                        DAG.getNode(ISD::SHL, DL, VT, N0,
4430                             DAG.getConstant(Log2Val, DL, ShiftVT)));
4431   }
4432 
4433   // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4434   // hi result is in use in case we hit this mid-legalization.
4435   for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4436     if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4437       SDVTList LoHiVT = DAG.getVTList(VT, VT);
4438       // TODO: Can we match commutable operands with getNodeIfExists?
4439       if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4440         if (LoHi->hasAnyUseOfValue(1))
4441           return SDValue(LoHi, 0);
4442       if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4443         if (LoHi->hasAnyUseOfValue(1))
4444           return SDValue(LoHi, 0);
4445     }
4446   }
4447 
4448   // Try to transform:
4449   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4450   // mul x, (2^N + 1) --> add (shl x, N), x
4451   // mul x, (2^N - 1) --> sub (shl x, N), x
4452   // Examples: x * 33 --> (x << 5) + x
4453   //           x * 15 --> (x << 4) - x
4454   //           x * -33 --> -((x << 5) + x)
4455   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4456   // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4457   // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4458   // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4459   // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4460   //           x * 0xf800 --> (x << 16) - (x << 11)
4461   //           x * -0x8800 --> -((x << 15) + (x << 11))
4462   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4463   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4464     // TODO: We could handle more general decomposition of any constant by
4465     //       having the target set a limit on number of ops and making a
4466     //       callback to determine that sequence (similar to sqrt expansion).
4467     unsigned MathOp = ISD::DELETED_NODE;
4468     APInt MulC = ConstValue1.abs();
4469     // The constant `2` should be treated as (2^0 + 1).
4470     unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4471     MulC.lshrInPlace(TZeros);
4472     if ((MulC - 1).isPowerOf2())
4473       MathOp = ISD::ADD;
4474     else if ((MulC + 1).isPowerOf2())
4475       MathOp = ISD::SUB;
4476 
4477     if (MathOp != ISD::DELETED_NODE) {
4478       unsigned ShAmt =
4479           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4480       ShAmt += TZeros;
4481       assert(ShAmt < VT.getScalarSizeInBits() &&
4482              "multiply-by-constant generated out of bounds shift");
4483       SDValue Shl =
4484           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4485       SDValue R =
4486           TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4487                                DAG.getNode(ISD::SHL, DL, VT, N0,
4488                                            DAG.getConstant(TZeros, DL, VT)))
4489                  : DAG.getNode(MathOp, DL, VT, Shl, N0);
4490       if (ConstValue1.isNegative())
4491         R = DAG.getNegative(R, DL, VT);
4492       return R;
4493     }
4494   }
4495 
4496   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4497   if (N0.getOpcode() == ISD::SHL) {
4498     SDValue N01 = N0.getOperand(1);
4499     if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4500       return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4501   }
4502 
4503   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4504   // use.
4505   {
4506     SDValue Sh, Y;
4507 
4508     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
4509     if (N0.getOpcode() == ISD::SHL &&
4510         isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) {
4511       Sh = N0; Y = N1;
4512     } else if (N1.getOpcode() == ISD::SHL &&
4513                isConstantOrConstantVector(N1.getOperand(1)) &&
4514                N1->hasOneUse()) {
4515       Sh = N1; Y = N0;
4516     }
4517 
4518     if (Sh.getNode()) {
4519       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4520       return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4521     }
4522   }
4523 
4524   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4525   if (N0.getOpcode() == ISD::ADD &&
4526       DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
4527       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
4528       isMulAddWithConstProfitable(N, N0, N1))
4529     return DAG.getNode(
4530         ISD::ADD, DL, VT,
4531         DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4532         DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4533 
4534   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4535   ConstantSDNode *NC1 = isConstOrConstSplat(N1);
4536   if (N0.getOpcode() == ISD::VSCALE && NC1) {
4537     const APInt &C0 = N0.getConstantOperandAPInt(0);
4538     const APInt &C1 = NC1->getAPIntValue();
4539     return DAG.getVScale(DL, VT, C0 * C1);
4540   }
4541 
4542   // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4543   APInt MulVal;
4544   if (N0.getOpcode() == ISD::STEP_VECTOR &&
4545       ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4546     const APInt &C0 = N0.getConstantOperandAPInt(0);
4547     APInt NewStep = C0 * MulVal;
4548     return DAG.getStepVector(DL, VT, NewStep);
4549   }
4550 
4551   // Fold ((mul x, 0/undef) -> 0,
4552   //       (mul x, 1) -> x) -> x)
4553   // -> and(x, mask)
4554   // We can replace vectors with '0' and '1' factors with a clearing mask.
4555   if (VT.isFixedLengthVector()) {
4556     unsigned NumElts = VT.getVectorNumElements();
4557     SmallBitVector ClearMask;
4558     ClearMask.reserve(NumElts);
4559     auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4560       if (!V || V->isZero()) {
4561         ClearMask.push_back(true);
4562         return true;
4563       }
4564       ClearMask.push_back(false);
4565       return V->isOne();
4566     };
4567     if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4568         ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4569       assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4570       EVT LegalSVT = N1.getOperand(0).getValueType();
4571       SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4572       SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4573       SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
4574       for (unsigned I = 0; I != NumElts; ++I)
4575         if (ClearMask[I])
4576           Mask[I] = Zero;
4577       return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4578     }
4579   }
4580 
4581   // reassociate mul
4582   if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4583     return RMUL;
4584 
4585   // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4586   if (SDValue SD =
4587           reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4588     return SD;
4589 
4590   // Simplify the operands using demanded-bits information.
4591   if (SimplifyDemandedBits(SDValue(N, 0)))
4592     return SDValue(N, 0);
4593 
4594   return SDValue();
4595 }
4596 
4597 /// Return true if divmod libcall is available.
4598 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4599                                      const TargetLowering &TLI) {
4600   RTLIB::Libcall LC;
4601   EVT NodeType = Node->getValueType(0);
4602   if (!NodeType.isSimple())
4603     return false;
4604   switch (NodeType.getSimpleVT().SimpleTy) {
4605   default: return false; // No libcall for vector types.
4606   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
4607   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4608   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4609   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4610   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4611   }
4612 
4613   return TLI.getLibcallName(LC) != nullptr;
4614 }
4615 
4616 /// Issue divrem if both quotient and remainder are needed.
4617 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4618   if (Node->use_empty())
4619     return SDValue(); // This is a dead node, leave it alone.
4620 
4621   unsigned Opcode = Node->getOpcode();
4622   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4623   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4624 
4625   // DivMod lib calls can still work on non-legal types if using lib-calls.
4626   EVT VT = Node->getValueType(0);
4627   if (VT.isVector() || !VT.isInteger())
4628     return SDValue();
4629 
4630   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4631     return SDValue();
4632 
4633   // If DIVREM is going to get expanded into a libcall,
4634   // but there is no libcall available, then don't combine.
4635   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4636       !isDivRemLibcallAvailable(Node, isSigned, TLI))
4637     return SDValue();
4638 
4639   // If div is legal, it's better to do the normal expansion
4640   unsigned OtherOpcode = 0;
4641   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4642     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4643     if (TLI.isOperationLegalOrCustom(Opcode, VT))
4644       return SDValue();
4645   } else {
4646     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4647     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4648       return SDValue();
4649   }
4650 
4651   SDValue Op0 = Node->getOperand(0);
4652   SDValue Op1 = Node->getOperand(1);
4653   SDValue combined;
4654   for (SDNode *User : Op0->uses()) {
4655     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4656         User->use_empty())
4657       continue;
4658     // Convert the other matching node(s), too;
4659     // otherwise, the DIVREM may get target-legalized into something
4660     // target-specific that we won't be able to recognize.
4661     unsigned UserOpc = User->getOpcode();
4662     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4663         User->getOperand(0) == Op0 &&
4664         User->getOperand(1) == Op1) {
4665       if (!combined) {
4666         if (UserOpc == OtherOpcode) {
4667           SDVTList VTs = DAG.getVTList(VT, VT);
4668           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4669         } else if (UserOpc == DivRemOpc) {
4670           combined = SDValue(User, 0);
4671         } else {
4672           assert(UserOpc == Opcode);
4673           continue;
4674         }
4675       }
4676       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4677         CombineTo(User, combined);
4678       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4679         CombineTo(User, combined.getValue(1));
4680     }
4681   }
4682   return combined;
4683 }
4684 
4685 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4686   SDValue N0 = N->getOperand(0);
4687   SDValue N1 = N->getOperand(1);
4688   EVT VT = N->getValueType(0);
4689   SDLoc DL(N);
4690 
4691   unsigned Opc = N->getOpcode();
4692   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4693   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4694 
4695   // X / undef -> undef
4696   // X % undef -> undef
4697   // X / 0 -> undef
4698   // X % 0 -> undef
4699   // NOTE: This includes vectors where any divisor element is zero/undef.
4700   if (DAG.isUndef(Opc, {N0, N1}))
4701     return DAG.getUNDEF(VT);
4702 
4703   // undef / X -> 0
4704   // undef % X -> 0
4705   if (N0.isUndef())
4706     return DAG.getConstant(0, DL, VT);
4707 
4708   // 0 / X -> 0
4709   // 0 % X -> 0
4710   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4711   if (N0C && N0C->isZero())
4712     return N0;
4713 
4714   // X / X -> 1
4715   // X % X -> 0
4716   if (N0 == N1)
4717     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4718 
4719   // X / 1 -> X
4720   // X % 1 -> 0
4721   // If this is a boolean op (single-bit element type), we can't have
4722   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4723   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4724   // it's a 1.
4725   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4726     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4727 
4728   return SDValue();
4729 }
4730 
4731 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4732   SDValue N0 = N->getOperand(0);
4733   SDValue N1 = N->getOperand(1);
4734   EVT VT = N->getValueType(0);
4735   EVT CCVT = getSetCCResultType(VT);
4736   SDLoc DL(N);
4737 
4738   // fold (sdiv c1, c2) -> c1/c2
4739   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4740     return C;
4741 
4742   // fold vector ops
4743   if (VT.isVector())
4744     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4745       return FoldedVOp;
4746 
4747   // fold (sdiv X, -1) -> 0-X
4748   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4749   if (N1C && N1C->isAllOnes())
4750     return DAG.getNegative(N0, DL, VT);
4751 
4752   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4753   if (N1C && N1C->isMinSignedValue())
4754     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4755                          DAG.getConstant(1, DL, VT),
4756                          DAG.getConstant(0, DL, VT));
4757 
4758   if (SDValue V = simplifyDivRem(N, DAG))
4759     return V;
4760 
4761   if (SDValue NewSel = foldBinOpIntoSelect(N))
4762     return NewSel;
4763 
4764   // If we know the sign bits of both operands are zero, strength reduce to a
4765   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
4766   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4767     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4768 
4769   if (SDValue V = visitSDIVLike(N0, N1, N)) {
4770     // If the corresponding remainder node exists, update its users with
4771     // (Dividend - (Quotient * Divisor).
4772     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4773                                               { N0, N1 })) {
4774       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4775       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4776       AddToWorklist(Mul.getNode());
4777       AddToWorklist(Sub.getNode());
4778       CombineTo(RemNode, Sub);
4779     }
4780     return V;
4781   }
4782 
4783   // sdiv, srem -> sdivrem
4784   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4785   // true.  Otherwise, we break the simplification logic in visitREM().
4786   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4787   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4788     if (SDValue DivRem = useDivRem(N))
4789         return DivRem;
4790 
4791   return SDValue();
4792 }
4793 
4794 static bool isDivisorPowerOfTwo(SDValue Divisor) {
4795   // Helper for determining whether a value is a power-2 constant scalar or a
4796   // vector of such elements.
4797   auto IsPowerOfTwo = [](ConstantSDNode *C) {
4798     if (C->isZero() || C->isOpaque())
4799       return false;
4800     if (C->getAPIntValue().isPowerOf2())
4801       return true;
4802     if (C->getAPIntValue().isNegatedPowerOf2())
4803       return true;
4804     return false;
4805   };
4806 
4807   return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4808 }
4809 
4810 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4811   SDLoc DL(N);
4812   EVT VT = N->getValueType(0);
4813   EVT CCVT = getSetCCResultType(VT);
4814   unsigned BitWidth = VT.getScalarSizeInBits();
4815 
4816   // fold (sdiv X, pow2) -> simple ops after legalize
4817   // FIXME: We check for the exact bit here because the generic lowering gives
4818   // better results in that case. The target-specific lowering should learn how
4819   // to handle exact sdivs efficiently.
4820   if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4821     // Target-specific implementation of sdiv x, pow2.
4822     if (SDValue Res = BuildSDIVPow2(N))
4823       return Res;
4824 
4825     // Create constants that are functions of the shift amount value.
4826     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4827     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4828     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4829     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4830     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4831     if (!isConstantOrConstantVector(Inexact))
4832       return SDValue();
4833 
4834     // Splat the sign bit into the register
4835     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4836                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4837     AddToWorklist(Sign.getNode());
4838 
4839     // Add (N0 < 0) ? abs2 - 1 : 0;
4840     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4841     AddToWorklist(Srl.getNode());
4842     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4843     AddToWorklist(Add.getNode());
4844     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4845     AddToWorklist(Sra.getNode());
4846 
4847     // Special case: (sdiv X, 1) -> X
4848     // Special Case: (sdiv X, -1) -> 0-X
4849     SDValue One = DAG.getConstant(1, DL, VT);
4850     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4851     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4852     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4853     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4854     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4855 
4856     // If dividing by a positive value, we're done. Otherwise, the result must
4857     // be negated.
4858     SDValue Zero = DAG.getConstant(0, DL, VT);
4859     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4860 
4861     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4862     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4863     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4864     return Res;
4865   }
4866 
4867   // If integer divide is expensive and we satisfy the requirements, emit an
4868   // alternate sequence.  Targets may check function attributes for size/speed
4869   // trade-offs.
4870   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4871   if (isConstantOrConstantVector(N1) &&
4872       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4873     if (SDValue Op = BuildSDIV(N))
4874       return Op;
4875 
4876   return SDValue();
4877 }
4878 
4879 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4880   SDValue N0 = N->getOperand(0);
4881   SDValue N1 = N->getOperand(1);
4882   EVT VT = N->getValueType(0);
4883   EVT CCVT = getSetCCResultType(VT);
4884   SDLoc DL(N);
4885 
4886   // fold (udiv c1, c2) -> c1/c2
4887   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4888     return C;
4889 
4890   // fold vector ops
4891   if (VT.isVector())
4892     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4893       return FoldedVOp;
4894 
4895   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4896   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4897   if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4898     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4899                          DAG.getConstant(1, DL, VT),
4900                          DAG.getConstant(0, DL, VT));
4901   }
4902 
4903   if (SDValue V = simplifyDivRem(N, DAG))
4904     return V;
4905 
4906   if (SDValue NewSel = foldBinOpIntoSelect(N))
4907     return NewSel;
4908 
4909   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4910     // If the corresponding remainder node exists, update its users with
4911     // (Dividend - (Quotient * Divisor).
4912     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4913                                               { N0, N1 })) {
4914       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4915       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4916       AddToWorklist(Mul.getNode());
4917       AddToWorklist(Sub.getNode());
4918       CombineTo(RemNode, Sub);
4919     }
4920     return V;
4921   }
4922 
4923   // sdiv, srem -> sdivrem
4924   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4925   // true.  Otherwise, we break the simplification logic in visitREM().
4926   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4927   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4928     if (SDValue DivRem = useDivRem(N))
4929         return DivRem;
4930 
4931   return SDValue();
4932 }
4933 
4934 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4935   SDLoc DL(N);
4936   EVT VT = N->getValueType(0);
4937 
4938   // fold (udiv x, (1 << c)) -> x >>u c
4939   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
4940     if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4941       AddToWorklist(LogBase2.getNode());
4942 
4943       EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4944       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4945       AddToWorklist(Trunc.getNode());
4946       return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4947     }
4948   }
4949 
4950   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4951   if (N1.getOpcode() == ISD::SHL) {
4952     SDValue N10 = N1.getOperand(0);
4953     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
4954       if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
4955         AddToWorklist(LogBase2.getNode());
4956 
4957         EVT ADDVT = N1.getOperand(1).getValueType();
4958         SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4959         AddToWorklist(Trunc.getNode());
4960         SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4961         AddToWorklist(Add.getNode());
4962         return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4963       }
4964     }
4965   }
4966 
4967   // fold (udiv x, c) -> alternate
4968   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4969   if (isConstantOrConstantVector(N1) &&
4970       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4971     if (SDValue Op = BuildUDIV(N))
4972       return Op;
4973 
4974   return SDValue();
4975 }
4976 
4977 SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4978   if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4979       !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4980     // Target-specific implementation of srem x, pow2.
4981     if (SDValue Res = BuildSREMPow2(N))
4982       return Res;
4983   }
4984   return SDValue();
4985 }
4986 
4987 // handles ISD::SREM and ISD::UREM
4988 SDValue DAGCombiner::visitREM(SDNode *N) {
4989   unsigned Opcode = N->getOpcode();
4990   SDValue N0 = N->getOperand(0);
4991   SDValue N1 = N->getOperand(1);
4992   EVT VT = N->getValueType(0);
4993   EVT CCVT = getSetCCResultType(VT);
4994 
4995   bool isSigned = (Opcode == ISD::SREM);
4996   SDLoc DL(N);
4997 
4998   // fold (rem c1, c2) -> c1%c2
4999   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5000     return C;
5001 
5002   // fold (urem X, -1) -> select(FX == -1, 0, FX)
5003   // Freeze the numerator to avoid a miscompile with an undefined value.
5004   if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5005       CCVT.isVector() == VT.isVector()) {
5006     SDValue F0 = DAG.getFreeze(N0);
5007     SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5008     return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5009   }
5010 
5011   if (SDValue V = simplifyDivRem(N, DAG))
5012     return V;
5013 
5014   if (SDValue NewSel = foldBinOpIntoSelect(N))
5015     return NewSel;
5016 
5017   if (isSigned) {
5018     // If we know the sign bits of both operands are zero, strength reduce to a
5019     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5020     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5021       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5022   } else {
5023     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5024       // fold (urem x, pow2) -> (and x, pow2-1)
5025       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5026       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5027       AddToWorklist(Add.getNode());
5028       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5029     }
5030     // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5031     // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5032     // TODO: We should sink the following into isKnownToBePowerOfTwo
5033     // using a OrZero parameter analogous to our handling in ValueTracking.
5034     if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5035         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
5036       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5037       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5038       AddToWorklist(Add.getNode());
5039       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5040     }
5041   }
5042 
5043   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5044 
5045   // If X/C can be simplified by the division-by-constant logic, lower
5046   // X%C to the equivalent of X-X/C*C.
5047   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5048   // speculative DIV must not cause a DIVREM conversion.  We guard against this
5049   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
5050   // combine will not return a DIVREM.  Regardless, checking cheapness here
5051   // makes sense since the simplification results in fatter code.
5052   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5053     if (isSigned) {
5054       // check if we can build faster implementation for srem
5055       if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5056         return OptimizedRem;
5057     }
5058 
5059     SDValue OptimizedDiv =
5060         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5061     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5062       // If the equivalent Div node also exists, update its users.
5063       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5064       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5065                                                 { N0, N1 }))
5066         CombineTo(DivNode, OptimizedDiv);
5067       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5068       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5069       AddToWorklist(OptimizedDiv.getNode());
5070       AddToWorklist(Mul.getNode());
5071       return Sub;
5072     }
5073   }
5074 
5075   // sdiv, srem -> sdivrem
5076   if (SDValue DivRem = useDivRem(N))
5077     return DivRem.getValue(1);
5078 
5079   return SDValue();
5080 }
5081 
5082 SDValue DAGCombiner::visitMULHS(SDNode *N) {
5083   SDValue N0 = N->getOperand(0);
5084   SDValue N1 = N->getOperand(1);
5085   EVT VT = N->getValueType(0);
5086   SDLoc DL(N);
5087 
5088   // fold (mulhs c1, c2)
5089   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5090     return C;
5091 
5092   // canonicalize constant to RHS.
5093   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5094       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5095     return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5096 
5097   if (VT.isVector()) {
5098     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5099       return FoldedVOp;
5100 
5101     // fold (mulhs x, 0) -> 0
5102     // do not return N1, because undef node may exist.
5103     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5104       return DAG.getConstant(0, DL, VT);
5105   }
5106 
5107   // fold (mulhs x, 0) -> 0
5108   if (isNullConstant(N1))
5109     return N1;
5110 
5111   // fold (mulhs x, 1) -> (sra x, size(x)-1)
5112   if (isOneConstant(N1))
5113     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
5114                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
5115                                        getShiftAmountTy(N0.getValueType())));
5116 
5117   // fold (mulhs x, undef) -> 0
5118   if (N0.isUndef() || N1.isUndef())
5119     return DAG.getConstant(0, DL, VT);
5120 
5121   // If the type twice as wide is legal, transform the mulhs to a wider multiply
5122   // plus a shift.
5123   if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5124       !VT.isVector()) {
5125     MVT Simple = VT.getSimpleVT();
5126     unsigned SimpleSize = Simple.getSizeInBits();
5127     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5128     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5129       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5130       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5131       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5132       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5133             DAG.getConstant(SimpleSize, DL,
5134                             getShiftAmountTy(N1.getValueType())));
5135       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5136     }
5137   }
5138 
5139   return SDValue();
5140 }
5141 
5142 SDValue DAGCombiner::visitMULHU(SDNode *N) {
5143   SDValue N0 = N->getOperand(0);
5144   SDValue N1 = N->getOperand(1);
5145   EVT VT = N->getValueType(0);
5146   SDLoc DL(N);
5147 
5148   // fold (mulhu c1, c2)
5149   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5150     return C;
5151 
5152   // canonicalize constant to RHS.
5153   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5154       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5155     return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5156 
5157   if (VT.isVector()) {
5158     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5159       return FoldedVOp;
5160 
5161     // fold (mulhu x, 0) -> 0
5162     // do not return N1, because undef node may exist.
5163     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5164       return DAG.getConstant(0, DL, VT);
5165   }
5166 
5167   // fold (mulhu x, 0) -> 0
5168   if (isNullConstant(N1))
5169     return N1;
5170 
5171   // fold (mulhu x, 1) -> 0
5172   if (isOneConstant(N1))
5173     return DAG.getConstant(0, DL, N0.getValueType());
5174 
5175   // fold (mulhu x, undef) -> 0
5176   if (N0.isUndef() || N1.isUndef())
5177     return DAG.getConstant(0, DL, VT);
5178 
5179   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5180   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5181       hasOperation(ISD::SRL, VT)) {
5182     if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5183       unsigned NumEltBits = VT.getScalarSizeInBits();
5184       SDValue SRLAmt = DAG.getNode(
5185           ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5186       EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5187       SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5188       return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5189     }
5190   }
5191 
5192   // If the type twice as wide is legal, transform the mulhu to a wider multiply
5193   // plus a shift.
5194   if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5195       !VT.isVector()) {
5196     MVT Simple = VT.getSimpleVT();
5197     unsigned SimpleSize = Simple.getSizeInBits();
5198     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5199     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5200       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5201       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5202       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5203       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5204             DAG.getConstant(SimpleSize, DL,
5205                             getShiftAmountTy(N1.getValueType())));
5206       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5207     }
5208   }
5209 
5210   // Simplify the operands using demanded-bits information.
5211   // We don't have demanded bits support for MULHU so this just enables constant
5212   // folding based on known bits.
5213   if (SimplifyDemandedBits(SDValue(N, 0)))
5214     return SDValue(N, 0);
5215 
5216   return SDValue();
5217 }
5218 
5219 SDValue DAGCombiner::visitAVG(SDNode *N) {
5220   unsigned Opcode = N->getOpcode();
5221   SDValue N0 = N->getOperand(0);
5222   SDValue N1 = N->getOperand(1);
5223   EVT VT = N->getValueType(0);
5224   SDLoc DL(N);
5225 
5226   // fold (avg c1, c2)
5227   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5228     return C;
5229 
5230   // canonicalize constant to RHS.
5231   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5232       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5233     return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5234 
5235   if (VT.isVector()) {
5236     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5237       return FoldedVOp;
5238 
5239     // fold (avgfloor x, 0) -> x >> 1
5240     if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
5241       if (Opcode == ISD::AVGFLOORS)
5242         return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
5243       if (Opcode == ISD::AVGFLOORU)
5244         return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
5245     }
5246   }
5247 
5248   // fold (avg x, undef) -> x
5249   if (N0.isUndef())
5250     return N1;
5251   if (N1.isUndef())
5252     return N0;
5253 
5254   // Fold (avg x, x) --> x
5255   if (N0 == N1 && Level >= AfterLegalizeTypes)
5256     return N0;
5257 
5258   // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
5259 
5260   return SDValue();
5261 }
5262 
5263 SDValue DAGCombiner::visitABD(SDNode *N) {
5264   unsigned Opcode = N->getOpcode();
5265   SDValue N0 = N->getOperand(0);
5266   SDValue N1 = N->getOperand(1);
5267   EVT VT = N->getValueType(0);
5268   SDLoc DL(N);
5269 
5270   // fold (abd c1, c2)
5271   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5272     return C;
5273 
5274   // canonicalize constant to RHS.
5275   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5276       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5277     return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5278 
5279   if (VT.isVector()) {
5280     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5281       return FoldedVOp;
5282 
5283     // fold (abds x, 0) -> abs x
5284     // fold (abdu x, 0) -> x
5285     if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
5286       if (Opcode == ISD::ABDS)
5287         return DAG.getNode(ISD::ABS, DL, VT, N0);
5288       if (Opcode == ISD::ABDU)
5289         return N0;
5290     }
5291   }
5292 
5293   // fold (abd x, undef) -> 0
5294   if (N0.isUndef() || N1.isUndef())
5295     return DAG.getConstant(0, DL, VT);
5296 
5297   // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5298   if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5299       DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5300     return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5301 
5302   return SDValue();
5303 }
5304 
5305 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5306 /// give the opcodes for the two computations that are being performed. Return
5307 /// true if a simplification was made.
5308 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5309                                                 unsigned HiOp) {
5310   // If the high half is not needed, just compute the low half.
5311   bool HiExists = N->hasAnyUseOfValue(1);
5312   if (!HiExists && (!LegalOperations ||
5313                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5314     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5315     return CombineTo(N, Res, Res);
5316   }
5317 
5318   // If the low half is not needed, just compute the high half.
5319   bool LoExists = N->hasAnyUseOfValue(0);
5320   if (!LoExists && (!LegalOperations ||
5321                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5322     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5323     return CombineTo(N, Res, Res);
5324   }
5325 
5326   // If both halves are used, return as it is.
5327   if (LoExists && HiExists)
5328     return SDValue();
5329 
5330   // If the two computed results can be simplified separately, separate them.
5331   if (LoExists) {
5332     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5333     AddToWorklist(Lo.getNode());
5334     SDValue LoOpt = combine(Lo.getNode());
5335     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5336         (!LegalOperations ||
5337          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5338       return CombineTo(N, LoOpt, LoOpt);
5339   }
5340 
5341   if (HiExists) {
5342     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5343     AddToWorklist(Hi.getNode());
5344     SDValue HiOpt = combine(Hi.getNode());
5345     if (HiOpt.getNode() && HiOpt != Hi &&
5346         (!LegalOperations ||
5347          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5348       return CombineTo(N, HiOpt, HiOpt);
5349   }
5350 
5351   return SDValue();
5352 }
5353 
5354 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5355   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5356     return Res;
5357 
5358   SDValue N0 = N->getOperand(0);
5359   SDValue N1 = N->getOperand(1);
5360   EVT VT = N->getValueType(0);
5361   SDLoc DL(N);
5362 
5363   // Constant fold.
5364   if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5365     return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5366 
5367   // canonicalize constant to RHS (vector doesn't have to splat)
5368   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5369       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5370     return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5371 
5372   // If the type is twice as wide is legal, transform the mulhu to a wider
5373   // multiply plus a shift.
5374   if (VT.isSimple() && !VT.isVector()) {
5375     MVT Simple = VT.getSimpleVT();
5376     unsigned SimpleSize = Simple.getSizeInBits();
5377     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5378     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5379       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5380       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5381       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5382       // Compute the high part as N1.
5383       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5384             DAG.getConstant(SimpleSize, DL,
5385                             getShiftAmountTy(Lo.getValueType())));
5386       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5387       // Compute the low part as N0.
5388       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5389       return CombineTo(N, Lo, Hi);
5390     }
5391   }
5392 
5393   return SDValue();
5394 }
5395 
5396 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5397   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5398     return Res;
5399 
5400   SDValue N0 = N->getOperand(0);
5401   SDValue N1 = N->getOperand(1);
5402   EVT VT = N->getValueType(0);
5403   SDLoc DL(N);
5404 
5405   // Constant fold.
5406   if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5407     return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5408 
5409   // canonicalize constant to RHS (vector doesn't have to splat)
5410   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5411       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5412     return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5413 
5414   // (umul_lohi N0, 0) -> (0, 0)
5415   if (isNullConstant(N1)) {
5416     SDValue Zero = DAG.getConstant(0, DL, VT);
5417     return CombineTo(N, Zero, Zero);
5418   }
5419 
5420   // (umul_lohi N0, 1) -> (N0, 0)
5421   if (isOneConstant(N1)) {
5422     SDValue Zero = DAG.getConstant(0, DL, VT);
5423     return CombineTo(N, N0, Zero);
5424   }
5425 
5426   // If the type is twice as wide is legal, transform the mulhu to a wider
5427   // multiply plus a shift.
5428   if (VT.isSimple() && !VT.isVector()) {
5429     MVT Simple = VT.getSimpleVT();
5430     unsigned SimpleSize = Simple.getSizeInBits();
5431     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5432     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5433       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5434       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5435       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5436       // Compute the high part as N1.
5437       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5438             DAG.getConstant(SimpleSize, DL,
5439                             getShiftAmountTy(Lo.getValueType())));
5440       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5441       // Compute the low part as N0.
5442       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5443       return CombineTo(N, Lo, Hi);
5444     }
5445   }
5446 
5447   return SDValue();
5448 }
5449 
5450 SDValue DAGCombiner::visitMULO(SDNode *N) {
5451   SDValue N0 = N->getOperand(0);
5452   SDValue N1 = N->getOperand(1);
5453   EVT VT = N0.getValueType();
5454   bool IsSigned = (ISD::SMULO == N->getOpcode());
5455 
5456   EVT CarryVT = N->getValueType(1);
5457   SDLoc DL(N);
5458 
5459   ConstantSDNode *N0C = isConstOrConstSplat(N0);
5460   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5461 
5462   // fold operation with constant operands.
5463   // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5464   // multiple results.
5465   if (N0C && N1C) {
5466     bool Overflow;
5467     APInt Result =
5468         IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5469                  : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5470     return CombineTo(N, DAG.getConstant(Result, DL, VT),
5471                      DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5472   }
5473 
5474   // canonicalize constant to RHS.
5475   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5476       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5477     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5478 
5479   // fold (mulo x, 0) -> 0 + no carry out
5480   if (isNullOrNullSplat(N1))
5481     return CombineTo(N, DAG.getConstant(0, DL, VT),
5482                      DAG.getConstant(0, DL, CarryVT));
5483 
5484   // (mulo x, 2) -> (addo x, x)
5485   // FIXME: This needs a freeze.
5486   if (N1C && N1C->getAPIntValue() == 2 &&
5487       (!IsSigned || VT.getScalarSizeInBits() > 2))
5488     return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5489                        N->getVTList(), N0, N0);
5490 
5491   // A 1 bit SMULO overflows if both inputs are 1.
5492   if (IsSigned && VT.getScalarSizeInBits() == 1) {
5493     SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5494     SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5495                                DAG.getConstant(0, DL, VT), ISD::SETNE);
5496     return CombineTo(N, And, Cmp);
5497   }
5498 
5499   // If it cannot overflow, transform into a mul.
5500   if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5501     return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5502                      DAG.getConstant(0, DL, CarryVT));
5503   return SDValue();
5504 }
5505 
5506 // Function to calculate whether the Min/Max pair of SDNodes (potentially
5507 // swapped around) make a signed saturate pattern, clamping to between a signed
5508 // saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5509 // Returns the node being clamped and the bitwidth of the clamp in BW. Should
5510 // work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5511 // same as SimplifySelectCC. N0<N1 ? N2 : N3.
5512 static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
5513                                   SDValue N3, ISD::CondCode CC, unsigned &BW,
5514                                   bool &Unsigned, SelectionDAG &DAG) {
5515   auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5516                             ISD::CondCode CC) {
5517     // The compare and select operand should be the same or the select operands
5518     // should be truncated versions of the comparison.
5519     if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5520       return 0;
5521     // The constants need to be the same or a truncated version of each other.
5522     ConstantSDNode *N1C = isConstOrConstSplat(peekThroughTruncates(N1));
5523     ConstantSDNode *N3C = isConstOrConstSplat(peekThroughTruncates(N3));
5524     if (!N1C || !N3C)
5525       return 0;
5526     const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5527     const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5528     if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5529       return 0;
5530     return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5531   };
5532 
5533   // Check the initial value is a SMIN/SMAX equivalent.
5534   unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5535   if (!Opcode0)
5536     return SDValue();
5537 
5538   // We could only need one range check, if the fptosi could never produce
5539   // the upper value.
5540   if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5541     if (isNullOrNullSplat(N3)) {
5542       EVT IntVT = N0.getValueType().getScalarType();
5543       EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5544       if (FPVT.isSimple()) {
5545         Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5546         const fltSemantics &Semantics = InputTy->getFltSemantics();
5547         uint32_t MinBitWidth =
5548           APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5549         if (IntVT.getSizeInBits() >= MinBitWidth) {
5550           Unsigned = true;
5551           BW = PowerOf2Ceil(MinBitWidth);
5552           return N0;
5553         }
5554       }
5555     }
5556   }
5557 
5558   SDValue N00, N01, N02, N03;
5559   ISD::CondCode N0CC;
5560   switch (N0.getOpcode()) {
5561   case ISD::SMIN:
5562   case ISD::SMAX:
5563     N00 = N02 = N0.getOperand(0);
5564     N01 = N03 = N0.getOperand(1);
5565     N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5566     break;
5567   case ISD::SELECT_CC:
5568     N00 = N0.getOperand(0);
5569     N01 = N0.getOperand(1);
5570     N02 = N0.getOperand(2);
5571     N03 = N0.getOperand(3);
5572     N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5573     break;
5574   case ISD::SELECT:
5575   case ISD::VSELECT:
5576     if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5577       return SDValue();
5578     N00 = N0.getOperand(0).getOperand(0);
5579     N01 = N0.getOperand(0).getOperand(1);
5580     N02 = N0.getOperand(1);
5581     N03 = N0.getOperand(2);
5582     N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5583     break;
5584   default:
5585     return SDValue();
5586   }
5587 
5588   unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5589   if (!Opcode1 || Opcode0 == Opcode1)
5590     return SDValue();
5591 
5592   ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5593   ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5594   if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5595     return SDValue();
5596 
5597   const APInt &MinC = MinCOp->getAPIntValue();
5598   const APInt &MaxC = MaxCOp->getAPIntValue();
5599   APInt MinCPlus1 = MinC + 1;
5600   if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5601     BW = MinCPlus1.exactLogBase2() + 1;
5602     Unsigned = false;
5603     return N02;
5604   }
5605 
5606   if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5607     BW = MinCPlus1.exactLogBase2();
5608     Unsigned = true;
5609     return N02;
5610   }
5611 
5612   return SDValue();
5613 }
5614 
5615 static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5616                                            SDValue N3, ISD::CondCode CC,
5617                                            SelectionDAG &DAG) {
5618   unsigned BW;
5619   bool Unsigned;
5620   SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5621   if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5622     return SDValue();
5623   EVT FPVT = Fp.getOperand(0).getValueType();
5624   EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5625   if (FPVT.isVector())
5626     NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5627                              FPVT.getVectorElementCount());
5628   unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5629   if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5630     return SDValue();
5631   SDLoc DL(Fp);
5632   SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5633                             DAG.getValueType(NewVT.getScalarType()));
5634   return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5635 }
5636 
5637 static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5638                                          SDValue N3, ISD::CondCode CC,
5639                                          SelectionDAG &DAG) {
5640   // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5641   // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5642   // be truncated versions of the setcc (N0/N1).
5643   if ((N0 != N2 &&
5644        (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5645       N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
5646     return SDValue();
5647   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5648   ConstantSDNode *N3C = isConstOrConstSplat(N3);
5649   if (!N1C || !N3C)
5650     return SDValue();
5651   const APInt &C1 = N1C->getAPIntValue();
5652   const APInt &C3 = N3C->getAPIntValue();
5653   if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5654       C1 != C3.zext(C1.getBitWidth()))
5655     return SDValue();
5656 
5657   unsigned BW = (C1 + 1).exactLogBase2();
5658   EVT FPVT = N0.getOperand(0).getValueType();
5659   EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5660   if (FPVT.isVector())
5661     NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5662                              FPVT.getVectorElementCount());
5663   if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
5664                                                         FPVT, NewVT))
5665     return SDValue();
5666 
5667   SDValue Sat =
5668       DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5669                   DAG.getValueType(NewVT.getScalarType()));
5670   return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5671 }
5672 
5673 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5674   SDValue N0 = N->getOperand(0);
5675   SDValue N1 = N->getOperand(1);
5676   EVT VT = N0.getValueType();
5677   unsigned Opcode = N->getOpcode();
5678   SDLoc DL(N);
5679 
5680   // fold operation with constant operands.
5681   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5682     return C;
5683 
5684   // If the operands are the same, this is a no-op.
5685   if (N0 == N1)
5686     return N0;
5687 
5688   // canonicalize constant to RHS
5689   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5690       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5691     return DAG.getNode(Opcode, DL, VT, N1, N0);
5692 
5693   // fold vector ops
5694   if (VT.isVector())
5695     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5696       return FoldedVOp;
5697 
5698   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5699   // Only do this if the current op isn't legal and the flipped is.
5700   if (!TLI.isOperationLegal(Opcode, VT) &&
5701       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5702       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5703     unsigned AltOpcode;
5704     switch (Opcode) {
5705     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5706     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5707     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5708     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5709     default: llvm_unreachable("Unknown MINMAX opcode");
5710     }
5711     if (TLI.isOperationLegal(AltOpcode, VT))
5712       return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5713   }
5714 
5715   if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5716     if (SDValue S = PerformMinMaxFpToSatCombine(
5717             N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5718       return S;
5719   if (Opcode == ISD::UMIN)
5720     if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5721       return S;
5722 
5723   // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5724   auto ReductionOpcode = [](unsigned Opcode) {
5725     switch (Opcode) {
5726     case ISD::SMIN:
5727       return ISD::VECREDUCE_SMIN;
5728     case ISD::SMAX:
5729       return ISD::VECREDUCE_SMAX;
5730     case ISD::UMIN:
5731       return ISD::VECREDUCE_UMIN;
5732     case ISD::UMAX:
5733       return ISD::VECREDUCE_UMAX;
5734     default:
5735       llvm_unreachable("Unexpected opcode");
5736     }
5737   };
5738   if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5739                                         SDLoc(N), VT, N0, N1))
5740     return SD;
5741 
5742   // Simplify the operands using demanded-bits information.
5743   if (SimplifyDemandedBits(SDValue(N, 0)))
5744     return SDValue(N, 0);
5745 
5746   return SDValue();
5747 }
5748 
5749 /// If this is a bitwise logic instruction and both operands have the same
5750 /// opcode, try to sink the other opcode after the logic instruction.
5751 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5752   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5753   EVT VT = N0.getValueType();
5754   unsigned LogicOpcode = N->getOpcode();
5755   unsigned HandOpcode = N0.getOpcode();
5756   assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5757   assert(HandOpcode == N1.getOpcode() && "Bad input!");
5758 
5759   // Bail early if none of these transforms apply.
5760   if (N0.getNumOperands() == 0)
5761     return SDValue();
5762 
5763   // FIXME: We should check number of uses of the operands to not increase
5764   //        the instruction count for all transforms.
5765 
5766   // Handle size-changing casts (or sign_extend_inreg).
5767   SDValue X = N0.getOperand(0);
5768   SDValue Y = N1.getOperand(0);
5769   EVT XVT = X.getValueType();
5770   SDLoc DL(N);
5771   if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5772       (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5773        N0.getOperand(1) == N1.getOperand(1))) {
5774     // If both operands have other uses, this transform would create extra
5775     // instructions without eliminating anything.
5776     if (!N0.hasOneUse() && !N1.hasOneUse())
5777       return SDValue();
5778     // We need matching integer source types.
5779     if (XVT != Y.getValueType())
5780       return SDValue();
5781     // Don't create an illegal op during or after legalization. Don't ever
5782     // create an unsupported vector op.
5783     if ((VT.isVector() || LegalOperations) &&
5784         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5785       return SDValue();
5786     // Avoid infinite looping with PromoteIntBinOp.
5787     // TODO: Should we apply desirable/legal constraints to all opcodes?
5788     if ((HandOpcode == ISD::ANY_EXTEND ||
5789          HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5790         LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5791       return SDValue();
5792     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5793     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5794     if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5795       return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5796     return DAG.getNode(HandOpcode, DL, VT, Logic);
5797   }
5798 
5799   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5800   if (HandOpcode == ISD::TRUNCATE) {
5801     // If both operands have other uses, this transform would create extra
5802     // instructions without eliminating anything.
5803     if (!N0.hasOneUse() && !N1.hasOneUse())
5804       return SDValue();
5805     // We need matching source types.
5806     if (XVT != Y.getValueType())
5807       return SDValue();
5808     // Don't create an illegal op during or after legalization.
5809     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5810       return SDValue();
5811     // Be extra careful sinking truncate. If it's free, there's no benefit in
5812     // widening a binop. Also, don't create a logic op on an illegal type.
5813     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5814       return SDValue();
5815     if (!TLI.isTypeLegal(XVT))
5816       return SDValue();
5817     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5818     return DAG.getNode(HandOpcode, DL, VT, Logic);
5819   }
5820 
5821   // For binops SHL/SRL/SRA/AND:
5822   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5823   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5824        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5825       N0.getOperand(1) == N1.getOperand(1)) {
5826     // If either operand has other uses, this transform is not an improvement.
5827     if (!N0.hasOneUse() || !N1.hasOneUse())
5828       return SDValue();
5829     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5830     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5831   }
5832 
5833   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5834   if (HandOpcode == ISD::BSWAP) {
5835     // If either operand has other uses, this transform is not an improvement.
5836     if (!N0.hasOneUse() || !N1.hasOneUse())
5837       return SDValue();
5838     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5839     return DAG.getNode(HandOpcode, DL, VT, Logic);
5840   }
5841 
5842   // For funnel shifts FSHL/FSHR:
5843   // logic_op (OP x, x1, s), (OP y, y1, s) -->
5844   // --> OP (logic_op x, y), (logic_op, x1, y1), s
5845   if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5846       N0.getOperand(2) == N1.getOperand(2)) {
5847     if (!N0.hasOneUse() || !N1.hasOneUse())
5848       return SDValue();
5849     SDValue X1 = N0.getOperand(1);
5850     SDValue Y1 = N1.getOperand(1);
5851     SDValue S = N0.getOperand(2);
5852     SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
5853     SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
5854     return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
5855   }
5856 
5857   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5858   // Only perform this optimization up until type legalization, before
5859   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5860   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5861   // we don't want to undo this promotion.
5862   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5863   // on scalars.
5864   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5865        Level <= AfterLegalizeTypes) {
5866     // Input types must be integer and the same.
5867     if (XVT.isInteger() && XVT == Y.getValueType() &&
5868         !(VT.isVector() && TLI.isTypeLegal(VT) &&
5869           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5870       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5871       return DAG.getNode(HandOpcode, DL, VT, Logic);
5872     }
5873   }
5874 
5875   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5876   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5877   // If both shuffles use the same mask, and both shuffle within a single
5878   // vector, then it is worthwhile to move the swizzle after the operation.
5879   // The type-legalizer generates this pattern when loading illegal
5880   // vector types from memory. In many cases this allows additional shuffle
5881   // optimizations.
5882   // There are other cases where moving the shuffle after the xor/and/or
5883   // is profitable even if shuffles don't perform a swizzle.
5884   // If both shuffles use the same mask, and both shuffles have the same first
5885   // or second operand, then it might still be profitable to move the shuffle
5886   // after the xor/and/or operation.
5887   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5888     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5889     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5890     assert(X.getValueType() == Y.getValueType() &&
5891            "Inputs to shuffles are not the same type");
5892 
5893     // Check that both shuffles use the same mask. The masks are known to be of
5894     // the same length because the result vector type is the same.
5895     // Check also that shuffles have only one use to avoid introducing extra
5896     // instructions.
5897     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5898         !SVN0->getMask().equals(SVN1->getMask()))
5899       return SDValue();
5900 
5901     // Don't try to fold this node if it requires introducing a
5902     // build vector of all zeros that might be illegal at this stage.
5903     SDValue ShOp = N0.getOperand(1);
5904     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5905       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5906 
5907     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5908     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5909       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5910                                   N0.getOperand(0), N1.getOperand(0));
5911       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5912     }
5913 
5914     // Don't try to fold this node if it requires introducing a
5915     // build vector of all zeros that might be illegal at this stage.
5916     ShOp = N0.getOperand(0);
5917     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5918       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5919 
5920     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5921     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5922       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5923                                   N1.getOperand(1));
5924       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5925     }
5926   }
5927 
5928   return SDValue();
5929 }
5930 
5931 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5932 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5933                                        const SDLoc &DL) {
5934   SDValue LL, LR, RL, RR, N0CC, N1CC;
5935   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5936       !isSetCCEquivalent(N1, RL, RR, N1CC))
5937     return SDValue();
5938 
5939   assert(N0.getValueType() == N1.getValueType() &&
5940          "Unexpected operand types for bitwise logic op");
5941   assert(LL.getValueType() == LR.getValueType() &&
5942          RL.getValueType() == RR.getValueType() &&
5943          "Unexpected operand types for setcc");
5944 
5945   // If we're here post-legalization or the logic op type is not i1, the logic
5946   // op type must match a setcc result type. Also, all folds require new
5947   // operations on the left and right operands, so those types must match.
5948   EVT VT = N0.getValueType();
5949   EVT OpVT = LL.getValueType();
5950   if (LegalOperations || VT.getScalarType() != MVT::i1)
5951     if (VT != getSetCCResultType(OpVT))
5952       return SDValue();
5953   if (OpVT != RL.getValueType())
5954     return SDValue();
5955 
5956   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5957   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5958   bool IsInteger = OpVT.isInteger();
5959   if (LR == RR && CC0 == CC1 && IsInteger) {
5960     bool IsZero = isNullOrNullSplat(LR);
5961     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5962 
5963     // All bits clear?
5964     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5965     // All sign bits clear?
5966     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5967     // Any bits set?
5968     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5969     // Any sign bits set?
5970     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5971 
5972     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
5973     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5974     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
5975     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
5976     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5977       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5978       AddToWorklist(Or.getNode());
5979       return DAG.getSetCC(DL, VT, Or, LR, CC1);
5980     }
5981 
5982     // All bits set?
5983     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5984     // All sign bits set?
5985     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5986     // Any bits clear?
5987     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5988     // Any sign bits clear?
5989     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5990 
5991     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5992     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
5993     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5994     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
5995     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
5996       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
5997       AddToWorklist(And.getNode());
5998       return DAG.getSetCC(DL, VT, And, LR, CC1);
5999     }
6000   }
6001 
6002   // TODO: What is the 'or' equivalent of this fold?
6003   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6004   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6005       IsInteger && CC0 == ISD::SETNE &&
6006       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6007        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6008     SDValue One = DAG.getConstant(1, DL, OpVT);
6009     SDValue Two = DAG.getConstant(2, DL, OpVT);
6010     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6011     AddToWorklist(Add.getNode());
6012     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6013   }
6014 
6015   // Try more general transforms if the predicates match and the only user of
6016   // the compares is the 'and' or 'or'.
6017   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6018       N0.hasOneUse() && N1.hasOneUse()) {
6019     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6020     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6021     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6022       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6023       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6024       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6025       SDValue Zero = DAG.getConstant(0, DL, OpVT);
6026       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6027     }
6028 
6029     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6030     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6031       // Match a shared variable operand and 2 non-opaque constant operands.
6032       auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6033         // The difference of the constants must be a single bit.
6034         const APInt &CMax =
6035             APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6036         const APInt &CMin =
6037             APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6038         return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6039       };
6040       if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6041         // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6042         // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6043         SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6044         SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6045         SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6046         SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6047         SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6048         SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6049         SDValue Zero = DAG.getConstant(0, DL, OpVT);
6050         return DAG.getSetCC(DL, VT, And, Zero, CC0);
6051       }
6052     }
6053   }
6054 
6055   // Canonicalize equivalent operands to LL == RL.
6056   if (LL == RR && LR == RL) {
6057     CC1 = ISD::getSetCCSwappedOperands(CC1);
6058     std::swap(RL, RR);
6059   }
6060 
6061   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6062   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6063   if (LL == RL && LR == RR) {
6064     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6065                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6066     if (NewCC != ISD::SETCC_INVALID &&
6067         (!LegalOperations ||
6068          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6069           TLI.isOperationLegal(ISD::SETCC, OpVT))))
6070       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6071   }
6072 
6073   return SDValue();
6074 }
6075 
6076 static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6077                                    SelectionDAG &DAG) {
6078   return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6079 }
6080 
6081 static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6082                                   SelectionDAG &DAG) {
6083   return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6084 }
6085 
6086 static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6087                                      ISD::CondCode CC, unsigned OrAndOpcode,
6088                                      SelectionDAG &DAG,
6089                                      bool isFMAXNUMFMINNUM_IEEE,
6090                                      bool isFMAXNUMFMINNUM) {
6091   // The optimization cannot be applied for all the predicates because
6092   // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6093   // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6094   // applied at all if one of the operands is a signaling NaN.
6095 
6096   // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6097   // are non NaN values.
6098   if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6099       ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
6100     return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6101                    isFMAXNUMFMINNUM_IEEE
6102                ? ISD::FMINNUM_IEEE
6103                : ISD::DELETED_NODE;
6104   else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
6105             (OrAndOpcode == ISD::OR)) ||
6106            ((CC == ISD::SETLT || CC == ISD::SETLE) &&
6107             (OrAndOpcode == ISD::AND)))
6108     return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6109                    isFMAXNUMFMINNUM_IEEE
6110                ? ISD::FMAXNUM_IEEE
6111                : ISD::DELETED_NODE;
6112   // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6113   // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6114   // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6115   // that there are not any sNaNs, then the optimization is not valid
6116   // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6117   // the optimization using FMINNUM/FMAXNUM for the following cases. If
6118   // we can prove that we do not have any sNaNs, then we can do the
6119   // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6120   // cases.
6121   else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6122             (OrAndOpcode == ISD::OR)) ||
6123            ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6124             (OrAndOpcode == ISD::AND)))
6125     return isFMAXNUMFMINNUM ? ISD::FMINNUM
6126                             : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6127                                       isFMAXNUMFMINNUM_IEEE
6128                                   ? ISD::FMINNUM_IEEE
6129                                   : ISD::DELETED_NODE;
6130   else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6131             (OrAndOpcode == ISD::OR)) ||
6132            ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6133             (OrAndOpcode == ISD::AND)))
6134     return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6135                             : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6136                                       isFMAXNUMFMINNUM_IEEE
6137                                   ? ISD::FMAXNUM_IEEE
6138                                   : ISD::DELETED_NODE;
6139   return ISD::DELETED_NODE;
6140 }
6141 
6142 static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
6143   using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind;
6144   assert(
6145       (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6146       "Invalid Op to combine SETCC with");
6147 
6148   // TODO: Search past casts/truncates.
6149   SDValue LHS = LogicOp->getOperand(0);
6150   SDValue RHS = LogicOp->getOperand(1);
6151   if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6152       !LHS->hasOneUse() || !RHS->hasOneUse())
6153     return SDValue();
6154 
6155   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6156   AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC(
6157       LogicOp, LHS.getNode(), RHS.getNode());
6158 
6159   SDValue LHS0 = LHS->getOperand(0);
6160   SDValue RHS0 = RHS->getOperand(0);
6161   SDValue LHS1 = LHS->getOperand(1);
6162   SDValue RHS1 = RHS->getOperand(1);
6163   // TODO: We don't actually need a splat here, for vectors we just need the
6164   // invariants to hold for each element.
6165   auto *LHS1C = isConstOrConstSplat(LHS1);
6166   auto *RHS1C = isConstOrConstSplat(RHS1);
6167   ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6168   ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6169   EVT VT = LogicOp->getValueType(0);
6170   EVT OpVT = LHS0.getValueType();
6171   SDLoc DL(LogicOp);
6172 
6173   // Check if the operands of an and/or operation are comparisons and if they
6174   // compare against the same value. Replace the and/or-cmp-cmp sequence with
6175   // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6176   // sequence will be replaced with min-cmp sequence:
6177   // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6178   // and and-cmp-cmp will be replaced with max-cmp sequence:
6179   // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6180   // The optimization does not work for `==` or `!=` .
6181   // The two comparisons should have either the same predicate or the
6182   // predicate of one of the comparisons is the opposite of the other one.
6183   bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6184                                TLI.isOperationLegal(ISD::FMINNUM_IEEE, OpVT);
6185   bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6186                           TLI.isOperationLegalOrCustom(ISD::FMINNUM, OpVT);
6187   if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6188         TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6189         TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6190         TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6191        (OpVT.isFloatingPoint() &&
6192         (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6193       !ISD::isIntEqualitySetCC(CCL) && !ISD::isFPEqualitySetCC(CCL) &&
6194       CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6195       CCL != ISD::SETTRUE &&
6196       (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6197 
6198     SDValue CommonValue, Operand1, Operand2;
6199     ISD::CondCode CC = ISD::SETCC_INVALID;
6200     if (CCL == CCR) {
6201       if (LHS0 == RHS0) {
6202         CommonValue = LHS0;
6203         Operand1 = LHS1;
6204         Operand2 = RHS1;
6205         CC = ISD::getSetCCSwappedOperands(CCL);
6206       } else if (LHS1 == RHS1) {
6207         CommonValue = LHS1;
6208         Operand1 = LHS0;
6209         Operand2 = RHS0;
6210         CC = CCL;
6211       }
6212     } else {
6213       assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6214       if (LHS0 == RHS1) {
6215         CommonValue = LHS0;
6216         Operand1 = LHS1;
6217         Operand2 = RHS0;
6218         CC = CCR;
6219       } else if (RHS0 == LHS1) {
6220         CommonValue = LHS1;
6221         Operand1 = LHS0;
6222         Operand2 = RHS1;
6223         CC = CCL;
6224       }
6225     }
6226 
6227     // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6228     // handle it using OR/AND.
6229     if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6230       CC = ISD::SETCC_INVALID;
6231     else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6232       CC = ISD::SETCC_INVALID;
6233 
6234     if (CC != ISD::SETCC_INVALID) {
6235       unsigned NewOpcode = ISD::DELETED_NODE;
6236       bool IsSigned = isSignedIntSetCC(CC);
6237       if (OpVT.isInteger()) {
6238         bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6239                        CC == ISD::SETLT || CC == ISD::SETULT);
6240         bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6241         if (IsLess == IsOr)
6242           NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6243         else
6244           NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6245       } else if (OpVT.isFloatingPoint())
6246         NewOpcode =
6247             getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6248                                  DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6249 
6250       if (NewOpcode != ISD::DELETED_NODE) {
6251         SDValue MinMaxValue =
6252             DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6253         return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6254       }
6255     }
6256   }
6257 
6258   if (TargetPreference == AndOrSETCCFoldKind::None)
6259     return SDValue();
6260 
6261   if (CCL == CCR &&
6262       CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6263       LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6264     const APInt &APLhs = LHS1C->getAPIntValue();
6265     const APInt &APRhs = RHS1C->getAPIntValue();
6266 
6267     // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6268     // case this is just a compare).
6269     if (APLhs == (-APRhs) &&
6270         ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6271          DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6272       const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6273       // (icmp eq A, C) | (icmp eq A, -C)
6274       //    -> (icmp eq Abs(A), C)
6275       // (icmp ne A, C) & (icmp ne A, -C)
6276       //    -> (icmp ne Abs(A), C)
6277       SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6278       return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6279                          DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6280     } else if (TargetPreference &
6281                (AndOrSETCCFoldKind::AddAnd | AndOrSETCCFoldKind::NotAnd)) {
6282 
6283       // AndOrSETCCFoldKind::AddAnd:
6284       // A == C0 | A == C1
6285       //  IF IsPow2(smax(C0, C1)-smin(C0, C1))
6286       //    -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6287       // A != C0 & A != C1
6288       //  IF IsPow2(smax(C0, C1)-smin(C0, C1))
6289       //    -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6290 
6291       // AndOrSETCCFoldKind::NotAnd:
6292       // A == C0 | A == C1
6293       //  IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6294       //    -> ~A & smin(C0, C1) == 0
6295       // A != C0 & A != C1
6296       //  IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6297       //    -> ~A & smin(C0, C1) != 0
6298 
6299       const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6300       const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6301       APInt Dif = MaxC - MinC;
6302       if (!Dif.isZero() && Dif.isPowerOf2()) {
6303         if (MaxC.isAllOnes() &&
6304             (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6305           SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6306           SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6307                                       DAG.getConstant(MinC, DL, OpVT));
6308           return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6309                              DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6310         } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6311 
6312           SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6313                                       DAG.getConstant(-MinC, DL, OpVT));
6314           SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6315                                       DAG.getConstant(~Dif, DL, OpVT));
6316           return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6317                              DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6318         }
6319       }
6320     }
6321   }
6322 
6323   return SDValue();
6324 }
6325 
6326 // Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6327 // We canonicalize to the `select` form in the middle end, but the `and` form
6328 // gets better codegen and all tested targets (arm, x86, riscv)
6329 static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F,
6330                                      const SDLoc &DL, SelectionDAG &DAG) {
6331   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6332   if (!isNullConstant(F))
6333     return SDValue();
6334 
6335   EVT CondVT = Cond.getValueType();
6336   if (TLI.getBooleanContents(CondVT) !=
6337       TargetLoweringBase::ZeroOrOneBooleanContent)
6338     return SDValue();
6339 
6340   if (T.getOpcode() != ISD::AND)
6341     return SDValue();
6342 
6343   if (!isOneConstant(T.getOperand(1)))
6344     return SDValue();
6345 
6346   EVT OpVT = T.getValueType();
6347 
6348   SDValue CondMask =
6349       OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6350   return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6351 }
6352 
6353 /// This contains all DAGCombine rules which reduce two values combined by
6354 /// an And operation to a single value. This makes them reusable in the context
6355 /// of visitSELECT(). Rules involving constants are not included as
6356 /// visitSELECT() already handles those cases.
6357 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6358   EVT VT = N1.getValueType();
6359   SDLoc DL(N);
6360 
6361   // fold (and x, undef) -> 0
6362   if (N0.isUndef() || N1.isUndef())
6363     return DAG.getConstant(0, DL, VT);
6364 
6365   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6366     return V;
6367 
6368   // Canonicalize:
6369   //   and(x, add) -> and(add, x)
6370   if (N1.getOpcode() == ISD::ADD)
6371     std::swap(N0, N1);
6372 
6373   // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6374   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6375       VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6376     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6377       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6378         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6379         // immediate for an add, but it is legal if its top c2 bits are set,
6380         // transform the ADD so the immediate doesn't need to be materialized
6381         // in a register.
6382         APInt ADDC = ADDI->getAPIntValue();
6383         APInt SRLC = SRLI->getAPIntValue();
6384         if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6385             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6386           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
6387                                              SRLC.getZExtValue());
6388           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6389             ADDC |= Mask;
6390             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6391               SDLoc DL0(N0);
6392               SDValue NewAdd =
6393                 DAG.getNode(ISD::ADD, DL0, VT,
6394                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6395               CombineTo(N0.getNode(), NewAdd);
6396               // Return N so it doesn't get rechecked!
6397               return SDValue(N, 0);
6398             }
6399           }
6400         }
6401       }
6402     }
6403   }
6404 
6405   return SDValue();
6406 }
6407 
6408 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6409                                    EVT LoadResultTy, EVT &ExtVT) {
6410   if (!AndC->getAPIntValue().isMask())
6411     return false;
6412 
6413   unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6414 
6415   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6416   EVT LoadedVT = LoadN->getMemoryVT();
6417 
6418   if (ExtVT == LoadedVT &&
6419       (!LegalOperations ||
6420        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6421     // ZEXTLOAD will match without needing to change the size of the value being
6422     // loaded.
6423     return true;
6424   }
6425 
6426   // Do not change the width of a volatile or atomic loads.
6427   if (!LoadN->isSimple())
6428     return false;
6429 
6430   // Do not generate loads of non-round integer types since these can
6431   // be expensive (and would be wrong if the type is not byte sized).
6432   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6433     return false;
6434 
6435   if (LegalOperations &&
6436       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6437     return false;
6438 
6439   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6440     return false;
6441 
6442   return true;
6443 }
6444 
6445 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6446                                     ISD::LoadExtType ExtType, EVT &MemVT,
6447                                     unsigned ShAmt) {
6448   if (!LDST)
6449     return false;
6450   // Only allow byte offsets.
6451   if (ShAmt % 8)
6452     return false;
6453 
6454   // Do not generate loads of non-round integer types since these can
6455   // be expensive (and would be wrong if the type is not byte sized).
6456   if (!MemVT.isRound())
6457     return false;
6458 
6459   // Don't change the width of a volatile or atomic loads.
6460   if (!LDST->isSimple())
6461     return false;
6462 
6463   EVT LdStMemVT = LDST->getMemoryVT();
6464 
6465   // Bail out when changing the scalable property, since we can't be sure that
6466   // we're actually narrowing here.
6467   if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6468     return false;
6469 
6470   // Verify that we are actually reducing a load width here.
6471   if (LdStMemVT.bitsLT(MemVT))
6472     return false;
6473 
6474   // Ensure that this isn't going to produce an unsupported memory access.
6475   if (ShAmt) {
6476     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6477     const unsigned ByteShAmt = ShAmt / 8;
6478     const Align LDSTAlign = LDST->getAlign();
6479     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6480     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6481                                 LDST->getAddressSpace(), NarrowAlign,
6482                                 LDST->getMemOperand()->getFlags()))
6483       return false;
6484   }
6485 
6486   // It's not possible to generate a constant of extended or untyped type.
6487   EVT PtrType = LDST->getBasePtr().getValueType();
6488   if (PtrType == MVT::Untyped || PtrType.isExtended())
6489     return false;
6490 
6491   if (isa<LoadSDNode>(LDST)) {
6492     LoadSDNode *Load = cast<LoadSDNode>(LDST);
6493     // Don't transform one with multiple uses, this would require adding a new
6494     // load.
6495     if (!SDValue(Load, 0).hasOneUse())
6496       return false;
6497 
6498     if (LegalOperations &&
6499         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6500       return false;
6501 
6502     // For the transform to be legal, the load must produce only two values
6503     // (the value loaded and the chain).  Don't transform a pre-increment
6504     // load, for example, which produces an extra value.  Otherwise the
6505     // transformation is not equivalent, and the downstream logic to replace
6506     // uses gets things wrong.
6507     if (Load->getNumValues() > 2)
6508       return false;
6509 
6510     // If the load that we're shrinking is an extload and we're not just
6511     // discarding the extension we can't simply shrink the load. Bail.
6512     // TODO: It would be possible to merge the extensions in some cases.
6513     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6514         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6515       return false;
6516 
6517     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6518       return false;
6519   } else {
6520     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6521     StoreSDNode *Store = cast<StoreSDNode>(LDST);
6522     // Can't write outside the original store
6523     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6524       return false;
6525 
6526     if (LegalOperations &&
6527         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6528       return false;
6529   }
6530   return true;
6531 }
6532 
6533 bool DAGCombiner::SearchForAndLoads(SDNode *N,
6534                                     SmallVectorImpl<LoadSDNode*> &Loads,
6535                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6536                                     ConstantSDNode *Mask,
6537                                     SDNode *&NodeToMask) {
6538   // Recursively search for the operands, looking for loads which can be
6539   // narrowed.
6540   for (SDValue Op : N->op_values()) {
6541     if (Op.getValueType().isVector())
6542       return false;
6543 
6544     // Some constants may need fixing up later if they are too large.
6545     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6546       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6547           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6548         NodesWithConsts.insert(N);
6549       continue;
6550     }
6551 
6552     if (!Op.hasOneUse())
6553       return false;
6554 
6555     switch(Op.getOpcode()) {
6556     case ISD::LOAD: {
6557       auto *Load = cast<LoadSDNode>(Op);
6558       EVT ExtVT;
6559       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6560           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6561 
6562         // ZEXTLOAD is already small enough.
6563         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6564             ExtVT.bitsGE(Load->getMemoryVT()))
6565           continue;
6566 
6567         // Use LE to convert equal sized loads to zext.
6568         if (ExtVT.bitsLE(Load->getMemoryVT()))
6569           Loads.push_back(Load);
6570 
6571         continue;
6572       }
6573       return false;
6574     }
6575     case ISD::ZERO_EXTEND:
6576     case ISD::AssertZext: {
6577       unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6578       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6579       EVT VT = Op.getOpcode() == ISD::AssertZext ?
6580         cast<VTSDNode>(Op.getOperand(1))->getVT() :
6581         Op.getOperand(0).getValueType();
6582 
6583       // We can accept extending nodes if the mask is wider or an equal
6584       // width to the original type.
6585       if (ExtVT.bitsGE(VT))
6586         continue;
6587       break;
6588     }
6589     case ISD::OR:
6590     case ISD::XOR:
6591     case ISD::AND:
6592       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6593                              NodeToMask))
6594         return false;
6595       continue;
6596     }
6597 
6598     // Allow one node which will masked along with any loads found.
6599     if (NodeToMask)
6600       return false;
6601 
6602     // Also ensure that the node to be masked only produces one data result.
6603     NodeToMask = Op.getNode();
6604     if (NodeToMask->getNumValues() > 1) {
6605       bool HasValue = false;
6606       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6607         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6608         if (VT != MVT::Glue && VT != MVT::Other) {
6609           if (HasValue) {
6610             NodeToMask = nullptr;
6611             return false;
6612           }
6613           HasValue = true;
6614         }
6615       }
6616       assert(HasValue && "Node to be masked has no data result?");
6617     }
6618   }
6619   return true;
6620 }
6621 
6622 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6623   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6624   if (!Mask)
6625     return false;
6626 
6627   if (!Mask->getAPIntValue().isMask())
6628     return false;
6629 
6630   // No need to do anything if the and directly uses a load.
6631   if (isa<LoadSDNode>(N->getOperand(0)))
6632     return false;
6633 
6634   SmallVector<LoadSDNode*, 8> Loads;
6635   SmallPtrSet<SDNode*, 2> NodesWithConsts;
6636   SDNode *FixupNode = nullptr;
6637   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6638     if (Loads.empty())
6639       return false;
6640 
6641     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6642     SDValue MaskOp = N->getOperand(1);
6643 
6644     // If it exists, fixup the single node we allow in the tree that needs
6645     // masking.
6646     if (FixupNode) {
6647       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6648       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6649                                 FixupNode->getValueType(0),
6650                                 SDValue(FixupNode, 0), MaskOp);
6651       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6652       if (And.getOpcode() == ISD ::AND)
6653         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6654     }
6655 
6656     // Narrow any constants that need it.
6657     for (auto *LogicN : NodesWithConsts) {
6658       SDValue Op0 = LogicN->getOperand(0);
6659       SDValue Op1 = LogicN->getOperand(1);
6660 
6661       if (isa<ConstantSDNode>(Op0))
6662         Op0 =
6663             DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6664 
6665       if (isa<ConstantSDNode>(Op1))
6666         Op1 =
6667             DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6668 
6669       if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6670         std::swap(Op0, Op1);
6671 
6672       DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6673     }
6674 
6675     // Create narrow loads.
6676     for (auto *Load : Loads) {
6677       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6678       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6679                                 SDValue(Load, 0), MaskOp);
6680       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6681       if (And.getOpcode() == ISD ::AND)
6682         And = SDValue(
6683             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6684       SDValue NewLoad = reduceLoadWidth(And.getNode());
6685       assert(NewLoad &&
6686              "Shouldn't be masking the load if it can't be narrowed");
6687       CombineTo(Load, NewLoad, NewLoad.getValue(1));
6688     }
6689     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6690     return true;
6691   }
6692   return false;
6693 }
6694 
6695 // Unfold
6696 //    x &  (-1 'logical shift' y)
6697 // To
6698 //    (x 'opposite logical shift' y) 'logical shift' y
6699 // if it is better for performance.
6700 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6701   assert(N->getOpcode() == ISD::AND);
6702 
6703   SDValue N0 = N->getOperand(0);
6704   SDValue N1 = N->getOperand(1);
6705 
6706   // Do we actually prefer shifts over mask?
6707   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
6708     return SDValue();
6709 
6710   // Try to match  (-1 '[outer] logical shift' y)
6711   unsigned OuterShift;
6712   unsigned InnerShift; // The opposite direction to the OuterShift.
6713   SDValue Y;           // Shift amount.
6714   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6715     if (!M.hasOneUse())
6716       return false;
6717     OuterShift = M->getOpcode();
6718     if (OuterShift == ISD::SHL)
6719       InnerShift = ISD::SRL;
6720     else if (OuterShift == ISD::SRL)
6721       InnerShift = ISD::SHL;
6722     else
6723       return false;
6724     if (!isAllOnesConstant(M->getOperand(0)))
6725       return false;
6726     Y = M->getOperand(1);
6727     return true;
6728   };
6729 
6730   SDValue X;
6731   if (matchMask(N1))
6732     X = N0;
6733   else if (matchMask(N0))
6734     X = N1;
6735   else
6736     return SDValue();
6737 
6738   SDLoc DL(N);
6739   EVT VT = N->getValueType(0);
6740 
6741   //     tmp = x   'opposite logical shift' y
6742   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6743   //     ret = tmp 'logical shift' y
6744   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6745 
6746   return T1;
6747 }
6748 
6749 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6750 /// For a target with a bit test, this is expected to become test + set and save
6751 /// at least 1 instruction.
6752 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
6753   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6754 
6755   // Look through an optional extension.
6756   SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6757   if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6758     And0 = And0.getOperand(0);
6759   if (!isOneConstant(And1) || !And0.hasOneUse())
6760     return SDValue();
6761 
6762   SDValue Src = And0;
6763 
6764   // Attempt to find a 'not' op.
6765   // TODO: Should we favor test+set even without the 'not' op?
6766   bool FoundNot = false;
6767   if (isBitwiseNot(Src)) {
6768     FoundNot = true;
6769     Src = Src.getOperand(0);
6770 
6771     // Look though an optional truncation. The source operand may not be the
6772     // same type as the original 'and', but that is ok because we are masking
6773     // off everything but the low bit.
6774     if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6775       Src = Src.getOperand(0);
6776   }
6777 
6778   // Match a shift-right by constant.
6779   if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6780     return SDValue();
6781 
6782   // This is probably not worthwhile without a supported type.
6783   EVT SrcVT = Src.getValueType();
6784   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6785   if (!TLI.isTypeLegal(SrcVT))
6786     return SDValue();
6787 
6788   // We might have looked through casts that make this transform invalid.
6789   unsigned BitWidth = SrcVT.getScalarSizeInBits();
6790   SDValue ShiftAmt = Src.getOperand(1);
6791   auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6792   if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6793     return SDValue();
6794 
6795   // Set source to shift source.
6796   Src = Src.getOperand(0);
6797 
6798   // Try again to find a 'not' op.
6799   // TODO: Should we favor test+set even with two 'not' ops?
6800   if (!FoundNot) {
6801     if (!isBitwiseNot(Src))
6802       return SDValue();
6803     Src = Src.getOperand(0);
6804   }
6805 
6806   if (!TLI.hasBitTest(Src, ShiftAmt))
6807     return SDValue();
6808 
6809   // Turn this into a bit-test pattern using mask op + setcc:
6810   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6811   // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6812   SDLoc DL(And);
6813   SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6814   EVT CCVT =
6815       TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6816   SDValue Mask = DAG.getConstant(
6817       APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6818   SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6819   SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6820   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6821   return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6822 }
6823 
6824 /// For targets that support usubsat, match a bit-hack form of that operation
6825 /// that ends in 'and' and convert it.
6826 static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
6827   SDValue N0 = N->getOperand(0);
6828   SDValue N1 = N->getOperand(1);
6829   EVT VT = N1.getValueType();
6830 
6831   // Canonicalize SRA as operand 1.
6832   if (N0.getOpcode() == ISD::SRA)
6833     std::swap(N0, N1);
6834 
6835   // xor/add with SMIN (signmask) are logically equivalent.
6836   if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
6837     return SDValue();
6838 
6839   if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
6840       N0.getOperand(0) != N1.getOperand(0))
6841     return SDValue();
6842 
6843   unsigned BitWidth = VT.getScalarSizeInBits();
6844   ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
6845   ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
6846   if (!XorC || !XorC->getAPIntValue().isSignMask() ||
6847       !SraC || SraC->getAPIntValue() != BitWidth - 1)
6848     return SDValue();
6849 
6850   // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6851   // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6852   SDLoc DL(N);
6853   SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
6854   return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
6855 }
6856 
6857 /// Given a bitwise logic operation N with a matching bitwise logic operand,
6858 /// fold a pattern where 2 of the source operands are identically shifted
6859 /// values. For example:
6860 /// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6861 static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
6862                                  SelectionDAG &DAG) {
6863   unsigned LogicOpcode = N->getOpcode();
6864   assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6865          "Expected bitwise logic operation");
6866 
6867   if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6868     return SDValue();
6869 
6870   // Match another bitwise logic op and a shift.
6871   unsigned ShiftOpcode = ShiftOp.getOpcode();
6872   if (LogicOp.getOpcode() != LogicOpcode ||
6873       !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6874         ShiftOpcode == ISD::SRA))
6875     return SDValue();
6876 
6877   // Match another shift op inside the first logic operand. Handle both commuted
6878   // possibilities.
6879   // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6880   // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6881   SDValue X1 = ShiftOp.getOperand(0);
6882   SDValue Y = ShiftOp.getOperand(1);
6883   SDValue X0, Z;
6884   if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6885       LogicOp.getOperand(0).getOperand(1) == Y) {
6886     X0 = LogicOp.getOperand(0).getOperand(0);
6887     Z = LogicOp.getOperand(1);
6888   } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6889              LogicOp.getOperand(1).getOperand(1) == Y) {
6890     X0 = LogicOp.getOperand(1).getOperand(0);
6891     Z = LogicOp.getOperand(0);
6892   } else {
6893     return SDValue();
6894   }
6895 
6896   EVT VT = N->getValueType(0);
6897   SDLoc DL(N);
6898   SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6899   SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6900   return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6901 }
6902 
6903 /// Given a tree of logic operations with shape like
6904 /// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
6905 /// try to match and fold shift operations with the same shift amount.
6906 /// For example:
6907 /// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
6908 /// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
6909 static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
6910                                      SDValue RightHand, SelectionDAG &DAG) {
6911   unsigned LogicOpcode = N->getOpcode();
6912   assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6913          "Expected bitwise logic operation");
6914   if (LeftHand.getOpcode() != LogicOpcode ||
6915       RightHand.getOpcode() != LogicOpcode)
6916     return SDValue();
6917   if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
6918     return SDValue();
6919 
6920   // Try to match one of following patterns:
6921   // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
6922   // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
6923   // Note that foldLogicOfShifts will handle commuted versions of the left hand
6924   // itself.
6925   SDValue CombinedShifts, W;
6926   SDValue R0 = RightHand.getOperand(0);
6927   SDValue R1 = RightHand.getOperand(1);
6928   if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
6929     W = R1;
6930   else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
6931     W = R0;
6932   else
6933     return SDValue();
6934 
6935   EVT VT = N->getValueType(0);
6936   SDLoc DL(N);
6937   return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
6938 }
6939 
6940 SDValue DAGCombiner::visitAND(SDNode *N) {
6941   SDValue N0 = N->getOperand(0);
6942   SDValue N1 = N->getOperand(1);
6943   EVT VT = N1.getValueType();
6944 
6945   // x & x --> x
6946   if (N0 == N1)
6947     return N0;
6948 
6949   // fold (and c1, c2) -> c1&c2
6950   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
6951     return C;
6952 
6953   // canonicalize constant to RHS
6954   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6955       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6956     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
6957 
6958   if (areBitwiseNotOfEachother(N0, N1))
6959     return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), SDLoc(N),
6960                            VT);
6961 
6962   // fold vector ops
6963   if (VT.isVector()) {
6964     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
6965       return FoldedVOp;
6966 
6967     // fold (and x, 0) -> 0, vector edition
6968     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6969       // do not return N1, because undef node may exist in N1
6970       return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
6971                              SDLoc(N), N1.getValueType());
6972 
6973     // fold (and x, -1) -> x, vector edition
6974     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6975       return N0;
6976 
6977     // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6978     auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6979     ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
6980     if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
6981         N1.hasOneUse()) {
6982       EVT LoadVT = MLoad->getMemoryVT();
6983       EVT ExtVT = VT;
6984       if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
6985         // For this AND to be a zero extension of the masked load the elements
6986         // of the BuildVec must mask the bottom bits of the extended element
6987         // type
6988         uint64_t ElementSize =
6989             LoadVT.getVectorElementType().getScalarSizeInBits();
6990         if (Splat->getAPIntValue().isMask(ElementSize)) {
6991           auto NewLoad = DAG.getMaskedLoad(
6992               ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
6993               MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
6994               LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
6995               ISD::ZEXTLOAD, MLoad->isExpandingLoad());
6996           bool LoadHasOtherUsers = !N0.hasOneUse();
6997           CombineTo(N, NewLoad);
6998           if (LoadHasOtherUsers)
6999             CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
7000           return SDValue(N, 0);
7001         }
7002       }
7003     }
7004   }
7005 
7006   // fold (and x, -1) -> x
7007   if (isAllOnesConstant(N1))
7008     return N0;
7009 
7010   // if (and x, c) is known to be zero, return 0
7011   unsigned BitWidth = VT.getScalarSizeInBits();
7012   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7013   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
7014     return DAG.getConstant(0, SDLoc(N), VT);
7015 
7016   if (SDValue R = foldAndOrOfSETCC(N, DAG))
7017     return R;
7018 
7019   if (SDValue NewSel = foldBinOpIntoSelect(N))
7020     return NewSel;
7021 
7022   // reassociate and
7023   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
7024     return RAND;
7025 
7026   // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7027   if (SDValue SD = reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, SDLoc(N),
7028                                         VT, N0, N1))
7029     return SD;
7030 
7031   // fold (and (or x, C), D) -> D if (C & D) == D
7032   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7033     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7034   };
7035   if (N0.getOpcode() == ISD::OR &&
7036       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7037     return N1;
7038 
7039   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7040     SDValue N0Op0 = N0.getOperand(0);
7041     EVT SrcVT = N0Op0.getValueType();
7042     unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7043     APInt Mask = ~N1C->getAPIntValue();
7044     Mask = Mask.trunc(SrcBitWidth);
7045 
7046     // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7047     if (DAG.MaskedValueIsZero(N0Op0, Mask))
7048       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0Op0);
7049 
7050     // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7051     if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7052         TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7053         TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7054         TLI.isNarrowingProfitable(VT, SrcVT)) {
7055       SDLoc DL(N);
7056       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7057                          DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7058                                      DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7059     }
7060   }
7061 
7062   // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7063   if (ISD::isExtOpcode(N0.getOpcode())) {
7064     unsigned ExtOpc = N0.getOpcode();
7065     SDValue N0Op0 = N0.getOperand(0);
7066     if (N0Op0.getOpcode() == ISD::AND &&
7067         (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7068         DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
7069         DAG.isConstantIntBuildVectorOrConstantInt(N0Op0.getOperand(1)) &&
7070         N0->hasOneUse() && N0Op0->hasOneUse()) {
7071       SDLoc DL(N);
7072       SDValue NewMask =
7073           DAG.getNode(ISD::AND, DL, VT, N1,
7074                       DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
7075       return DAG.getNode(ISD::AND, DL, VT,
7076                          DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7077                          NewMask);
7078     }
7079   }
7080 
7081   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7082   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7083   // already be zero by virtue of the width of the base type of the load.
7084   //
7085   // the 'X' node here can either be nothing or an extract_vector_elt to catch
7086   // more cases.
7087   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7088        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
7089        N0.getOperand(0).getOpcode() == ISD::LOAD &&
7090        N0.getOperand(0).getResNo() == 0) ||
7091       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7092     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
7093                                          N0 : N0.getOperand(0) );
7094 
7095     // Get the constant (if applicable) the zero'th operand is being ANDed with.
7096     // This can be a pure constant or a vector splat, in which case we treat the
7097     // vector as a scalar and use the splat value.
7098     APInt Constant = APInt::getZero(1);
7099     if (const ConstantSDNode *C = isConstOrConstSplat(
7100             N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
7101       Constant = C->getAPIntValue();
7102     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7103       unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7104       APInt SplatValue, SplatUndef;
7105       unsigned SplatBitSize;
7106       bool HasAnyUndefs;
7107       // Endianness should not matter here. Code below makes sure that we only
7108       // use the result if the SplatBitSize is a multiple of the vector element
7109       // size. And after that we AND all element sized parts of the splat
7110       // together. So the end result should be the same regardless of in which
7111       // order we do those operations.
7112       const bool IsBigEndian = false;
7113       bool IsSplat =
7114           Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7115                                   HasAnyUndefs, EltBitWidth, IsBigEndian);
7116 
7117       // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7118       // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7119       if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7120         // Undef bits can contribute to a possible optimisation if set, so
7121         // set them.
7122         SplatValue |= SplatUndef;
7123 
7124         // The splat value may be something like "0x00FFFFFF", which means 0 for
7125         // the first vector value and FF for the rest, repeating. We need a mask
7126         // that will apply equally to all members of the vector, so AND all the
7127         // lanes of the constant together.
7128         Constant = APInt::getAllOnes(EltBitWidth);
7129         for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7130           Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7131       }
7132     }
7133 
7134     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7135     // actually legal and isn't going to get expanded, else this is a false
7136     // optimisation.
7137     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7138                                                     Load->getValueType(0),
7139                                                     Load->getMemoryVT());
7140 
7141     // Resize the constant to the same size as the original memory access before
7142     // extension. If it is still the AllOnesValue then this AND is completely
7143     // unneeded.
7144     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7145 
7146     bool B;
7147     switch (Load->getExtensionType()) {
7148     default: B = false; break;
7149     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7150     case ISD::ZEXTLOAD:
7151     case ISD::NON_EXTLOAD: B = true; break;
7152     }
7153 
7154     if (B && Constant.isAllOnes()) {
7155       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7156       // preserve semantics once we get rid of the AND.
7157       SDValue NewLoad(Load, 0);
7158 
7159       // Fold the AND away. NewLoad may get replaced immediately.
7160       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7161 
7162       if (Load->getExtensionType() == ISD::EXTLOAD) {
7163         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7164                               Load->getValueType(0), SDLoc(Load),
7165                               Load->getChain(), Load->getBasePtr(),
7166                               Load->getOffset(), Load->getMemoryVT(),
7167                               Load->getMemOperand());
7168         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7169         if (Load->getNumValues() == 3) {
7170           // PRE/POST_INC loads have 3 values.
7171           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7172                            NewLoad.getValue(2) };
7173           CombineTo(Load, To, 3, true);
7174         } else {
7175           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7176         }
7177       }
7178 
7179       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7180     }
7181   }
7182 
7183   // Try to convert a constant mask AND into a shuffle clear mask.
7184   if (VT.isVector())
7185     if (SDValue Shuffle = XformToShuffleWithZero(N))
7186       return Shuffle;
7187 
7188   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7189     return Combined;
7190 
7191   if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7192       ISD::isExtOpcode(N0.getOperand(0).getOpcode())) {
7193     SDValue Ext = N0.getOperand(0);
7194     EVT ExtVT = Ext->getValueType(0);
7195     SDValue Extendee = Ext->getOperand(0);
7196 
7197     unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7198     if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7199         (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7200       //    (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7201       // => (extract_subvector (iN_zeroext v))
7202       SDValue ZeroExtExtendee =
7203           DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), ExtVT, Extendee);
7204 
7205       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, ZeroExtExtendee,
7206                          N0.getOperand(1));
7207     }
7208   }
7209 
7210   // fold (and (masked_gather x)) -> (zext_masked_gather x)
7211   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7212     EVT MemVT = GN0->getMemoryVT();
7213     EVT ScalarVT = MemVT.getScalarType();
7214 
7215     if (SDValue(GN0, 0).hasOneUse() &&
7216         isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7217         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
7218       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
7219                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
7220 
7221       SDValue ZExtLoad = DAG.getMaskedGather(
7222           DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
7223           GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
7224 
7225       CombineTo(N, ZExtLoad);
7226       AddToWorklist(ZExtLoad.getNode());
7227       // Avoid recheck of N.
7228       return SDValue(N, 0);
7229     }
7230   }
7231 
7232   // fold (and (load x), 255) -> (zextload x, i8)
7233   // fold (and (extload x, i16), 255) -> (zextload x, i8)
7234   if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7235     if (SDValue Res = reduceLoadWidth(N))
7236       return Res;
7237 
7238   if (LegalTypes) {
7239     // Attempt to propagate the AND back up to the leaves which, if they're
7240     // loads, can be combined to narrow loads and the AND node can be removed.
7241     // Perform after legalization so that extend nodes will already be
7242     // combined into the loads.
7243     if (BackwardsPropagateMask(N))
7244       return SDValue(N, 0);
7245   }
7246 
7247   if (SDValue Combined = visitANDLike(N0, N1, N))
7248     return Combined;
7249 
7250   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
7251   if (N0.getOpcode() == N1.getOpcode())
7252     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7253       return V;
7254 
7255   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7256     return R;
7257   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7258     return R;
7259 
7260   // Masking the negated extension of a boolean is just the zero-extended
7261   // boolean:
7262   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7263   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7264   //
7265   // Note: the SimplifyDemandedBits fold below can make an information-losing
7266   // transform, and then we have no way to find this better fold.
7267   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
7268     if (isNullOrNullSplat(N0.getOperand(0))) {
7269       SDValue SubRHS = N0.getOperand(1);
7270       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
7271           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7272         return SubRHS;
7273       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
7274           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7275         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
7276     }
7277   }
7278 
7279   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7280   // fold (and (sra)) -> (and (srl)) when possible.
7281   if (SimplifyDemandedBits(SDValue(N, 0)))
7282     return SDValue(N, 0);
7283 
7284   // fold (zext_inreg (extload x)) -> (zextload x)
7285   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7286   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7287       (ISD::isEXTLoad(N0.getNode()) ||
7288        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7289     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7290     EVT MemVT = LN0->getMemoryVT();
7291     // If we zero all the possible extended bits, then we can turn this into
7292     // a zextload if we are running before legalize or the operation is legal.
7293     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7294     unsigned MemBitSize = MemVT.getScalarSizeInBits();
7295     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7296     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7297         ((!LegalOperations && LN0->isSimple()) ||
7298          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7299       SDValue ExtLoad =
7300           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7301                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7302       AddToWorklist(N);
7303       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7304       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7305     }
7306   }
7307 
7308   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7309   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7310     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7311                                            N0.getOperand(1), false))
7312       return BSwap;
7313   }
7314 
7315   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7316     return Shifts;
7317 
7318   if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7319     return V;
7320 
7321   // Recognize the following pattern:
7322   //
7323   // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7324   //
7325   // where bitmask is a mask that clears the upper bits of AndVT. The
7326   // number of bits in bitmask must be a power of two.
7327   auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7328     if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7329       return false;
7330 
7331     auto *C = dyn_cast<ConstantSDNode>(RHS);
7332     if (!C)
7333       return false;
7334 
7335     if (!C->getAPIntValue().isMask(
7336             LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7337       return false;
7338 
7339     return true;
7340   };
7341 
7342   // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7343   if (IsAndZeroExtMask(N0, N1))
7344     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
7345 
7346   if (hasOperation(ISD::USUBSAT, VT))
7347     if (SDValue V = foldAndToUsubsat(N, DAG))
7348       return V;
7349 
7350   // Postpone until legalization completed to avoid interference with bswap
7351   // folding
7352   if (LegalOperations || VT.isVector())
7353     if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7354       return R;
7355 
7356   return SDValue();
7357 }
7358 
7359 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7360 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7361                                         bool DemandHighBits) {
7362   if (!LegalOperations)
7363     return SDValue();
7364 
7365   EVT VT = N->getValueType(0);
7366   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7367     return SDValue();
7368   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
7369     return SDValue();
7370 
7371   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7372   bool LookPassAnd0 = false;
7373   bool LookPassAnd1 = false;
7374   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7375     std::swap(N0, N1);
7376   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7377     std::swap(N0, N1);
7378   if (N0.getOpcode() == ISD::AND) {
7379     if (!N0->hasOneUse())
7380       return SDValue();
7381     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7382     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7383     // This is needed for X86.
7384     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7385                   N01C->getZExtValue() != 0xFFFF))
7386       return SDValue();
7387     N0 = N0.getOperand(0);
7388     LookPassAnd0 = true;
7389   }
7390 
7391   if (N1.getOpcode() == ISD::AND) {
7392     if (!N1->hasOneUse())
7393       return SDValue();
7394     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7395     if (!N11C || N11C->getZExtValue() != 0xFF)
7396       return SDValue();
7397     N1 = N1.getOperand(0);
7398     LookPassAnd1 = true;
7399   }
7400 
7401   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7402     std::swap(N0, N1);
7403   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7404     return SDValue();
7405   if (!N0->hasOneUse() || !N1->hasOneUse())
7406     return SDValue();
7407 
7408   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7409   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7410   if (!N01C || !N11C)
7411     return SDValue();
7412   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7413     return SDValue();
7414 
7415   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7416   SDValue N00 = N0->getOperand(0);
7417   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7418     if (!N00->hasOneUse())
7419       return SDValue();
7420     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7421     if (!N001C || N001C->getZExtValue() != 0xFF)
7422       return SDValue();
7423     N00 = N00.getOperand(0);
7424     LookPassAnd0 = true;
7425   }
7426 
7427   SDValue N10 = N1->getOperand(0);
7428   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7429     if (!N10->hasOneUse())
7430       return SDValue();
7431     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7432     // Also allow 0xFFFF since the bits will be shifted out. This is needed
7433     // for X86.
7434     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7435                    N101C->getZExtValue() != 0xFFFF))
7436       return SDValue();
7437     N10 = N10.getOperand(0);
7438     LookPassAnd1 = true;
7439   }
7440 
7441   if (N00 != N10)
7442     return SDValue();
7443 
7444   // Make sure everything beyond the low halfword gets set to zero since the SRL
7445   // 16 will clear the top bits.
7446   unsigned OpSizeInBits = VT.getSizeInBits();
7447   if (OpSizeInBits > 16) {
7448     // If the left-shift isn't masked out then the only way this is a bswap is
7449     // if all bits beyond the low 8 are 0. In that case the entire pattern
7450     // reduces to a left shift anyway: leave it for other parts of the combiner.
7451     if (DemandHighBits && !LookPassAnd0)
7452       return SDValue();
7453 
7454     // However, if the right shift isn't masked out then it might be because
7455     // it's not needed. See if we can spot that too. If the high bits aren't
7456     // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7457     // upper bits to be zero.
7458     if (!LookPassAnd1) {
7459       unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7460       if (!DAG.MaskedValueIsZero(N10,
7461                                  APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7462         return SDValue();
7463     }
7464   }
7465 
7466   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7467   if (OpSizeInBits > 16) {
7468     SDLoc DL(N);
7469     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7470                       DAG.getConstant(OpSizeInBits - 16, DL,
7471                                       getShiftAmountTy(VT)));
7472   }
7473   return Res;
7474 }
7475 
7476 /// Return true if the specified node is an element that makes up a 32-bit
7477 /// packed halfword byteswap.
7478 /// ((x & 0x000000ff) << 8) |
7479 /// ((x & 0x0000ff00) >> 8) |
7480 /// ((x & 0x00ff0000) << 8) |
7481 /// ((x & 0xff000000) >> 8)
7482 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
7483   if (!N->hasOneUse())
7484     return false;
7485 
7486   unsigned Opc = N.getOpcode();
7487   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7488     return false;
7489 
7490   SDValue N0 = N.getOperand(0);
7491   unsigned Opc0 = N0.getOpcode();
7492   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7493     return false;
7494 
7495   ConstantSDNode *N1C = nullptr;
7496   // SHL or SRL: look upstream for AND mask operand
7497   if (Opc == ISD::AND)
7498     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7499   else if (Opc0 == ISD::AND)
7500     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7501   if (!N1C)
7502     return false;
7503 
7504   unsigned MaskByteOffset;
7505   switch (N1C->getZExtValue()) {
7506   default:
7507     return false;
7508   case 0xFF:       MaskByteOffset = 0; break;
7509   case 0xFF00:     MaskByteOffset = 1; break;
7510   case 0xFFFF:
7511     // In case demanded bits didn't clear the bits that will be shifted out.
7512     // This is needed for X86.
7513     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7514       MaskByteOffset = 1;
7515       break;
7516     }
7517     return false;
7518   case 0xFF0000:   MaskByteOffset = 2; break;
7519   case 0xFF000000: MaskByteOffset = 3; break;
7520   }
7521 
7522   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7523   if (Opc == ISD::AND) {
7524     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7525       // (x >> 8) & 0xff
7526       // (x >> 8) & 0xff0000
7527       if (Opc0 != ISD::SRL)
7528         return false;
7529       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7530       if (!C || C->getZExtValue() != 8)
7531         return false;
7532     } else {
7533       // (x << 8) & 0xff00
7534       // (x << 8) & 0xff000000
7535       if (Opc0 != ISD::SHL)
7536         return false;
7537       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7538       if (!C || C->getZExtValue() != 8)
7539         return false;
7540     }
7541   } else if (Opc == ISD::SHL) {
7542     // (x & 0xff) << 8
7543     // (x & 0xff0000) << 8
7544     if (MaskByteOffset != 0 && MaskByteOffset != 2)
7545       return false;
7546     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7547     if (!C || C->getZExtValue() != 8)
7548       return false;
7549   } else { // Opc == ISD::SRL
7550     // (x & 0xff00) >> 8
7551     // (x & 0xff000000) >> 8
7552     if (MaskByteOffset != 1 && MaskByteOffset != 3)
7553       return false;
7554     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7555     if (!C || C->getZExtValue() != 8)
7556       return false;
7557   }
7558 
7559   if (Parts[MaskByteOffset])
7560     return false;
7561 
7562   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7563   return true;
7564 }
7565 
7566 // Match 2 elements of a packed halfword bswap.
7567 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
7568   if (N.getOpcode() == ISD::OR)
7569     return isBSwapHWordElement(N.getOperand(0), Parts) &&
7570            isBSwapHWordElement(N.getOperand(1), Parts);
7571 
7572   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7573     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7574     if (!C || C->getAPIntValue() != 16)
7575       return false;
7576     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7577     return true;
7578   }
7579 
7580   return false;
7581 }
7582 
7583 // Match this pattern:
7584 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7585 // And rewrite this to:
7586 //   (rotr (bswap A), 16)
7587 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
7588                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
7589                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
7590   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7591          "MatchBSwapHWordOrAndAnd: expecting i32");
7592   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7593     return SDValue();
7594   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7595     return SDValue();
7596   // TODO: this is too restrictive; lifting this restriction requires more tests
7597   if (!N0->hasOneUse() || !N1->hasOneUse())
7598     return SDValue();
7599   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
7600   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
7601   if (!Mask0 || !Mask1)
7602     return SDValue();
7603   if (Mask0->getAPIntValue() != 0xff00ff00 ||
7604       Mask1->getAPIntValue() != 0x00ff00ff)
7605     return SDValue();
7606   SDValue Shift0 = N0.getOperand(0);
7607   SDValue Shift1 = N1.getOperand(0);
7608   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7609     return SDValue();
7610   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7611   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7612   if (!ShiftAmt0 || !ShiftAmt1)
7613     return SDValue();
7614   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7615     return SDValue();
7616   if (Shift0.getOperand(0) != Shift1.getOperand(0))
7617     return SDValue();
7618 
7619   SDLoc DL(N);
7620   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7621   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
7622   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7623 }
7624 
7625 /// Match a 32-bit packed halfword bswap. That is
7626 /// ((x & 0x000000ff) << 8) |
7627 /// ((x & 0x0000ff00) >> 8) |
7628 /// ((x & 0x00ff0000) << 8) |
7629 /// ((x & 0xff000000) >> 8)
7630 /// => (rotl (bswap x), 16)
7631 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7632   if (!LegalOperations)
7633     return SDValue();
7634 
7635   EVT VT = N->getValueType(0);
7636   if (VT != MVT::i32)
7637     return SDValue();
7638   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
7639     return SDValue();
7640 
7641   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
7642                                               getShiftAmountTy(VT)))
7643     return BSwap;
7644 
7645   // Try again with commuted operands.
7646   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
7647                                               getShiftAmountTy(VT)))
7648     return BSwap;
7649 
7650 
7651   // Look for either
7652   // (or (bswaphpair), (bswaphpair))
7653   // (or (or (bswaphpair), (and)), (and))
7654   // (or (or (and), (bswaphpair)), (and))
7655   SDNode *Parts[4] = {};
7656 
7657   if (isBSwapHWordPair(N0, Parts)) {
7658     // (or (or (and), (and)), (or (and), (and)))
7659     if (!isBSwapHWordPair(N1, Parts))
7660       return SDValue();
7661   } else if (N0.getOpcode() == ISD::OR) {
7662     // (or (or (or (and), (and)), (and)), (and))
7663     if (!isBSwapHWordElement(N1, Parts))
7664       return SDValue();
7665     SDValue N00 = N0.getOperand(0);
7666     SDValue N01 = N0.getOperand(1);
7667     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7668         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7669       return SDValue();
7670   } else {
7671     return SDValue();
7672   }
7673 
7674   // Make sure the parts are all coming from the same node.
7675   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7676     return SDValue();
7677 
7678   SDLoc DL(N);
7679   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7680                               SDValue(Parts[0], 0));
7681 
7682   // Result of the bswap should be rotated by 16. If it's not legal, then
7683   // do  (x << 16) | (x >> 16).
7684   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
7685   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
7686     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7687   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7688     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7689   return DAG.getNode(ISD::OR, DL, VT,
7690                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7691                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7692 }
7693 
7694 /// This contains all DAGCombine rules which reduce two values combined by
7695 /// an Or operation to a single value \see visitANDLike().
7696 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
7697   EVT VT = N1.getValueType();
7698   SDLoc DL(N);
7699 
7700   // fold (or x, undef) -> -1
7701   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7702     return DAG.getAllOnesConstant(DL, VT);
7703 
7704   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7705     return V;
7706 
7707   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
7708   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7709       // Don't increase # computations.
7710       (N0->hasOneUse() || N1->hasOneUse())) {
7711     // We can only do this xform if we know that bits from X that are set in C2
7712     // but not in C1 are already zero.  Likewise for Y.
7713     if (const ConstantSDNode *N0O1C =
7714         getAsNonOpaqueConstant(N0.getOperand(1))) {
7715       if (const ConstantSDNode *N1O1C =
7716           getAsNonOpaqueConstant(N1.getOperand(1))) {
7717         // We can only do this xform if we know that bits from X that are set in
7718         // C2 but not in C1 are already zero.  Likewise for Y.
7719         const APInt &LHSMask = N0O1C->getAPIntValue();
7720         const APInt &RHSMask = N1O1C->getAPIntValue();
7721 
7722         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7723             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7724           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7725                                   N0.getOperand(0), N1.getOperand(0));
7726           return DAG.getNode(ISD::AND, DL, VT, X,
7727                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
7728         }
7729       }
7730     }
7731   }
7732 
7733   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7734   if (N0.getOpcode() == ISD::AND &&
7735       N1.getOpcode() == ISD::AND &&
7736       N0.getOperand(0) == N1.getOperand(0) &&
7737       // Don't increase # computations.
7738       (N0->hasOneUse() || N1->hasOneUse())) {
7739     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7740                             N0.getOperand(1), N1.getOperand(1));
7741     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7742   }
7743 
7744   return SDValue();
7745 }
7746 
7747 /// OR combines for which the commuted variant will be tried as well.
7748 static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
7749                                   SDNode *N) {
7750   EVT VT = N0.getValueType();
7751 
7752   auto peekThroughResize = [](SDValue V) {
7753     if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7754       return V->getOperand(0);
7755     return V;
7756   };
7757 
7758   SDValue N0Resized = peekThroughResize(N0);
7759   if (N0Resized.getOpcode() == ISD::AND) {
7760     SDValue N1Resized = peekThroughResize(N1);
7761     SDValue N00 = N0Resized.getOperand(0);
7762     SDValue N01 = N0Resized.getOperand(1);
7763 
7764     // fold or (and x, y), x --> x
7765     if (N00 == N1Resized || N01 == N1Resized)
7766       return N1;
7767 
7768     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7769     // TODO: Set AllowUndefs = true.
7770     if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7771                                                   /* AllowUndefs */ false)) {
7772       if (peekThroughResize(NotOperand) == N1Resized)
7773         return DAG.getNode(ISD::OR, SDLoc(N), VT,
7774                            DAG.getZExtOrTrunc(N00, SDLoc(N), VT), N1);
7775     }
7776 
7777     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7778     if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7779                                                   /* AllowUndefs */ false)) {
7780       if (peekThroughResize(NotOperand) == N1Resized)
7781         return DAG.getNode(ISD::OR, SDLoc(N), VT,
7782                            DAG.getZExtOrTrunc(N01, SDLoc(N), VT), N1);
7783     }
7784   }
7785 
7786   if (N0.getOpcode() == ISD::XOR) {
7787     // fold or (xor x, y), x --> or x, y
7788     //      or (xor x, y), (x and/or y) --> or x, y
7789     SDValue N00 = N0.getOperand(0);
7790     SDValue N01 = N0.getOperand(1);
7791     if (N00 == N1)
7792       return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
7793     if (N01 == N1)
7794       return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1);
7795 
7796     if (N1.getOpcode() == ISD::AND || N1.getOpcode() == ISD::OR) {
7797       SDValue N10 = N1.getOperand(0);
7798       SDValue N11 = N1.getOperand(1);
7799       if ((N00 == N10 && N01 == N11) || (N00 == N11 && N01 == N10))
7800         return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N01);
7801     }
7802   }
7803 
7804   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7805     return R;
7806 
7807   auto peekThroughZext = [](SDValue V) {
7808     if (V->getOpcode() == ISD::ZERO_EXTEND)
7809       return V->getOperand(0);
7810     return V;
7811   };
7812 
7813   // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7814   if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7815       N0.getOperand(0) == N1.getOperand(0) &&
7816       peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7817     return N0;
7818 
7819   // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7820   if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7821       N0.getOperand(1) == N1.getOperand(0) &&
7822       peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7823     return N0;
7824 
7825   return SDValue();
7826 }
7827 
7828 SDValue DAGCombiner::visitOR(SDNode *N) {
7829   SDValue N0 = N->getOperand(0);
7830   SDValue N1 = N->getOperand(1);
7831   EVT VT = N1.getValueType();
7832 
7833   // x | x --> x
7834   if (N0 == N1)
7835     return N0;
7836 
7837   // fold (or c1, c2) -> c1|c2
7838   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
7839     return C;
7840 
7841   // canonicalize constant to RHS
7842   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7843       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7844     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
7845 
7846   // fold vector ops
7847   if (VT.isVector()) {
7848     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
7849       return FoldedVOp;
7850 
7851     // fold (or x, 0) -> x, vector edition
7852     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
7853       return N0;
7854 
7855     // fold (or x, -1) -> -1, vector edition
7856     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
7857       // do not return N1, because undef node may exist in N1
7858       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
7859 
7860     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
7861     // Do this only if the resulting type / shuffle is legal.
7862     auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
7863     auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
7864     if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
7865       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
7866       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
7867       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7868       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
7869       // Ensure both shuffles have a zero input.
7870       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
7871         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
7872         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
7873         bool CanFold = true;
7874         int NumElts = VT.getVectorNumElements();
7875         SmallVector<int, 4> Mask(NumElts, -1);
7876 
7877         for (int i = 0; i != NumElts; ++i) {
7878           int M0 = SV0->getMaskElt(i);
7879           int M1 = SV1->getMaskElt(i);
7880 
7881           // Determine if either index is pointing to a zero vector.
7882           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
7883           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
7884 
7885           // If one element is zero and the otherside is undef, keep undef.
7886           // This also handles the case that both are undef.
7887           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
7888             continue;
7889 
7890           // Make sure only one of the elements is zero.
7891           if (M0Zero == M1Zero) {
7892             CanFold = false;
7893             break;
7894           }
7895 
7896           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
7897 
7898           // We have a zero and non-zero element. If the non-zero came from
7899           // SV0 make the index a LHS index. If it came from SV1, make it
7900           // a RHS index. We need to mod by NumElts because we don't care
7901           // which operand it came from in the original shuffles.
7902           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
7903         }
7904 
7905         if (CanFold) {
7906           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
7907           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
7908 
7909           SDValue LegalShuffle =
7910               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
7911                                           Mask, DAG);
7912           if (LegalShuffle)
7913             return LegalShuffle;
7914         }
7915       }
7916     }
7917   }
7918 
7919   // fold (or x, 0) -> x
7920   if (isNullConstant(N1))
7921     return N0;
7922 
7923   // fold (or x, -1) -> -1
7924   if (isAllOnesConstant(N1))
7925     return N1;
7926 
7927   if (SDValue NewSel = foldBinOpIntoSelect(N))
7928     return NewSel;
7929 
7930   // fold (or x, c) -> c iff (x & ~c) == 0
7931   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7932   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
7933     return N1;
7934 
7935   if (SDValue R = foldAndOrOfSETCC(N, DAG))
7936     return R;
7937 
7938   if (SDValue Combined = visitORLike(N0, N1, N))
7939     return Combined;
7940 
7941   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7942     return Combined;
7943 
7944   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
7945   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
7946     return BSwap;
7947   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
7948     return BSwap;
7949 
7950   // reassociate or
7951   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
7952     return ROR;
7953 
7954   // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
7955   if (SDValue SD = reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, SDLoc(N),
7956                                         VT, N0, N1))
7957     return SD;
7958 
7959   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
7960   // iff (c1 & c2) != 0 or c1/c2 are undef.
7961   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
7962     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
7963   };
7964   if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
7965       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
7966     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
7967                                                  {N1, N0.getOperand(1)})) {
7968       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
7969       AddToWorklist(IOR.getNode());
7970       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
7971     }
7972   }
7973 
7974   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
7975     return Combined;
7976   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
7977     return Combined;
7978 
7979   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
7980   if (N0.getOpcode() == N1.getOpcode())
7981     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7982       return V;
7983 
7984   // See if this is some rotate idiom.
7985   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
7986     return Rot;
7987 
7988   if (SDValue Load = MatchLoadCombine(N))
7989     return Load;
7990 
7991   // Simplify the operands using demanded-bits information.
7992   if (SimplifyDemandedBits(SDValue(N, 0)))
7993     return SDValue(N, 0);
7994 
7995   // If OR can be rewritten into ADD, try combines based on ADD.
7996   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
7997       DAG.isADDLike(SDValue(N, 0)))
7998     if (SDValue Combined = visitADDLike(N))
7999       return Combined;
8000 
8001   // Postpone until legalization completed to avoid interference with bswap
8002   // folding
8003   if (LegalOperations || VT.isVector())
8004     if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8005       return R;
8006 
8007   return SDValue();
8008 }
8009 
8010 static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op,
8011                                  SDValue &Mask) {
8012   if (Op.getOpcode() == ISD::AND &&
8013       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8014     Mask = Op.getOperand(1);
8015     return Op.getOperand(0);
8016   }
8017   return Op;
8018 }
8019 
8020 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
8021 static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8022                             SDValue &Mask) {
8023   Op = stripConstantMask(DAG, Op, Mask);
8024   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8025     Shift = Op;
8026     return true;
8027   }
8028   return false;
8029 }
8030 
8031 /// Helper function for visitOR to extract the needed side of a rotate idiom
8032 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
8033 /// InstCombine merged some outside op with one of the shifts from
8034 /// the rotate pattern.
8035 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8036 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
8037 /// patterns:
8038 ///
8039 ///   (or (add v v) (shrl v bitwidth-1)):
8040 ///     expands (add v v) -> (shl v 1)
8041 ///
8042 ///   (or (mul v c0) (shrl (mul v c1) c2)):
8043 ///     expands (mul v c0) -> (shl (mul v c1) c3)
8044 ///
8045 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
8046 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
8047 ///
8048 ///   (or (shl v c0) (shrl (shl v c1) c2)):
8049 ///     expands (shl v c0) -> (shl (shl v c1) c3)
8050 ///
8051 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
8052 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
8053 ///
8054 /// Such that in all cases, c3+c2==bitwidth(op v c1).
8055 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
8056                                      SDValue ExtractFrom, SDValue &Mask,
8057                                      const SDLoc &DL) {
8058   assert(OppShift && ExtractFrom && "Empty SDValue");
8059   if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8060     return SDValue();
8061 
8062   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8063 
8064   // Value and Type of the shift.
8065   SDValue OppShiftLHS = OppShift.getOperand(0);
8066   EVT ShiftedVT = OppShiftLHS.getValueType();
8067 
8068   // Amount of the existing shift.
8069   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8070 
8071   // (add v v) -> (shl v 1)
8072   // TODO: Should this be a general DAG canonicalization?
8073   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8074       ExtractFrom.getOpcode() == ISD::ADD &&
8075       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8076       ExtractFrom.getOperand(0) == OppShiftLHS &&
8077       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8078     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8079                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8080 
8081   // Preconditions:
8082   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8083   //
8084   // Find opcode of the needed shift to be extracted from (op0 v c0).
8085   unsigned Opcode = ISD::DELETED_NODE;
8086   bool IsMulOrDiv = false;
8087   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8088   // opcode or its arithmetic (mul or udiv) variant.
8089   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8090     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8091     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8092       return false;
8093     Opcode = NeededShift;
8094     return true;
8095   };
8096   // op0 must be either the needed shift opcode or the mul/udiv equivalent
8097   // that the needed shift can be extracted from.
8098   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8099       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8100     return SDValue();
8101 
8102   // op0 must be the same opcode on both sides, have the same LHS argument,
8103   // and produce the same value type.
8104   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8105       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8106       ShiftedVT != ExtractFrom.getValueType())
8107     return SDValue();
8108 
8109   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8110   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8111   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8112   ConstantSDNode *ExtractFromCst =
8113       isConstOrConstSplat(ExtractFrom.getOperand(1));
8114   // TODO: We should be able to handle non-uniform constant vectors for these values
8115   // Check that we have constant values.
8116   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8117       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8118       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8119     return SDValue();
8120 
8121   // Compute the shift amount we need to extract to complete the rotate.
8122   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8123   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8124     return SDValue();
8125   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8126   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8127   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8128   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8129   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8130 
8131   // Now try extract the needed shift from the ExtractFrom op and see if the
8132   // result matches up with the existing shift's LHS op.
8133   if (IsMulOrDiv) {
8134     // Op to extract from is a mul or udiv by a constant.
8135     // Check:
8136     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8137     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8138     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8139                                                  NeededShiftAmt.getZExtValue());
8140     APInt ResultAmt;
8141     APInt Rem;
8142     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8143     if (Rem != 0 || ResultAmt != OppLHSAmt)
8144       return SDValue();
8145   } else {
8146     // Op to extract from is a shift by a constant.
8147     // Check:
8148     //      c2 - (bitwidth(op0 v c0) - c1) == c0
8149     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8150                                           ExtractFromAmt.getBitWidth()))
8151       return SDValue();
8152   }
8153 
8154   // Return the expanded shift op that should allow a rotate to be formed.
8155   EVT ShiftVT = OppShift.getOperand(1).getValueType();
8156   EVT ResVT = ExtractFrom.getValueType();
8157   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8158   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8159 }
8160 
8161 // Return true if we can prove that, whenever Neg and Pos are both in the
8162 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
8163 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8164 //
8165 //     (or (shift1 X, Neg), (shift2 X, Pos))
8166 //
8167 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8168 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
8169 // to consider shift amounts with defined behavior.
8170 //
8171 // The IsRotate flag should be set when the LHS of both shifts is the same.
8172 // Otherwise if matching a general funnel shift, it should be clear.
8173 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8174                            SelectionDAG &DAG, bool IsRotate) {
8175   const auto &TLI = DAG.getTargetLoweringInfo();
8176   // If EltSize is a power of 2 then:
8177   //
8178   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8179   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8180   //
8181   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8182   // for the stronger condition:
8183   //
8184   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
8185   //
8186   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8187   // we can just replace Neg with Neg' for the rest of the function.
8188   //
8189   // In other cases we check for the even stronger condition:
8190   //
8191   //     Neg == EltSize - Pos                                    [B]
8192   //
8193   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
8194   // behavior if Pos == 0 (and consequently Neg == EltSize).
8195   //
8196   // We could actually use [A] whenever EltSize is a power of 2, but the
8197   // only extra cases that it would match are those uninteresting ones
8198   // where Neg and Pos are never in range at the same time.  E.g. for
8199   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8200   // as well as (sub 32, Pos), but:
8201   //
8202   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8203   //
8204   // always invokes undefined behavior for 32-bit X.
8205   //
8206   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8207   // This allows us to peek through any operations that only affect Mask's
8208   // un-demanded bits.
8209   //
8210   // NOTE: We can only do this when matching operations which won't modify the
8211   // least Log2(EltSize) significant bits and not a general funnel shift.
8212   unsigned MaskLoBits = 0;
8213   if (IsRotate && isPowerOf2_64(EltSize)) {
8214     unsigned Bits = Log2_64(EltSize);
8215     unsigned NegBits = Neg.getScalarValueSizeInBits();
8216     if (NegBits >= Bits) {
8217       APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8218       if (SDValue Inner =
8219               TLI.SimplifyMultipleUseDemandedBits(Neg, DemandedBits, DAG)) {
8220         Neg = Inner;
8221         MaskLoBits = Bits;
8222       }
8223     }
8224   }
8225 
8226   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8227   if (Neg.getOpcode() != ISD::SUB)
8228     return false;
8229   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
8230   if (!NegC)
8231     return false;
8232   SDValue NegOp1 = Neg.getOperand(1);
8233 
8234   // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8235   // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8236   // are redundant for the purpose of the equality.
8237   if (MaskLoBits) {
8238     unsigned PosBits = Pos.getScalarValueSizeInBits();
8239     if (PosBits >= MaskLoBits) {
8240       APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8241       if (SDValue Inner =
8242               TLI.SimplifyMultipleUseDemandedBits(Pos, DemandedBits, DAG)) {
8243         Pos = Inner;
8244       }
8245     }
8246   }
8247 
8248   // The condition we need is now:
8249   //
8250   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8251   //
8252   // If NegOp1 == Pos then we need:
8253   //
8254   //              EltSize & Mask == NegC & Mask
8255   //
8256   // (because "x & Mask" is a truncation and distributes through subtraction).
8257   //
8258   // We also need to account for a potential truncation of NegOp1 if the amount
8259   // has already been legalized to a shift amount type.
8260   APInt Width;
8261   if ((Pos == NegOp1) ||
8262       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8263     Width = NegC->getAPIntValue();
8264 
8265   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8266   // Then the condition we want to prove becomes:
8267   //
8268   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8269   //
8270   // which, again because "x & Mask" is a truncation, becomes:
8271   //
8272   //                NegC & Mask == (EltSize - PosC) & Mask
8273   //             EltSize & Mask == (NegC + PosC) & Mask
8274   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8275     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8276       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8277     else
8278       return false;
8279   } else
8280     return false;
8281 
8282   // Now we just need to check that EltSize & Mask == Width & Mask.
8283   if (MaskLoBits)
8284     // EltSize & Mask is 0 since Mask is EltSize - 1.
8285     return Width.getLoBits(MaskLoBits) == 0;
8286   return Width == EltSize;
8287 }
8288 
8289 // A subroutine of MatchRotate used once we have found an OR of two opposite
8290 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
8291 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8292 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
8293 // Neg with outer conversions stripped away.
8294 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8295                                        SDValue Neg, SDValue InnerPos,
8296                                        SDValue InnerNeg, bool HasPos,
8297                                        unsigned PosOpcode, unsigned NegOpcode,
8298                                        const SDLoc &DL) {
8299   // fold (or (shl x, (*ext y)),
8300   //          (srl x, (*ext (sub 32, y)))) ->
8301   //   (rotl x, y) or (rotr x, (sub 32, y))
8302   //
8303   // fold (or (shl x, (*ext (sub 32, y))),
8304   //          (srl x, (*ext y))) ->
8305   //   (rotr x, y) or (rotl x, (sub 32, y))
8306   EVT VT = Shifted.getValueType();
8307   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8308                      /*IsRotate*/ true)) {
8309     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8310                        HasPos ? Pos : Neg);
8311   }
8312 
8313   return SDValue();
8314 }
8315 
8316 // A subroutine of MatchRotate used once we have found an OR of two opposite
8317 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
8318 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8319 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
8320 // Neg with outer conversions stripped away.
8321 // TODO: Merge with MatchRotatePosNeg.
8322 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8323                                        SDValue Neg, SDValue InnerPos,
8324                                        SDValue InnerNeg, bool HasPos,
8325                                        unsigned PosOpcode, unsigned NegOpcode,
8326                                        const SDLoc &DL) {
8327   EVT VT = N0.getValueType();
8328   unsigned EltBits = VT.getScalarSizeInBits();
8329 
8330   // fold (or (shl x0, (*ext y)),
8331   //          (srl x1, (*ext (sub 32, y)))) ->
8332   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8333   //
8334   // fold (or (shl x0, (*ext (sub 32, y))),
8335   //          (srl x1, (*ext y))) ->
8336   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8337   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8338     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8339                        HasPos ? Pos : Neg);
8340   }
8341 
8342   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8343   // so for now just use the PosOpcode case if its legal.
8344   // TODO: When can we use the NegOpcode case?
8345   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8346     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8347       if (Op.getOpcode() != BinOpc)
8348         return false;
8349       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8350       return Cst && (Cst->getAPIntValue() == Imm);
8351     };
8352 
8353     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8354     //   -> (fshl x0, x1, y)
8355     if (IsBinOpImm(N1, ISD::SRL, 1) &&
8356         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8357         InnerPos == InnerNeg.getOperand(0) &&
8358         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
8359       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8360     }
8361 
8362     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8363     //   -> (fshr x0, x1, y)
8364     if (IsBinOpImm(N0, ISD::SHL, 1) &&
8365         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8366         InnerNeg == InnerPos.getOperand(0) &&
8367         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
8368       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8369     }
8370 
8371     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8372     //   -> (fshr x0, x1, y)
8373     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8374     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8375         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8376         InnerNeg == InnerPos.getOperand(0) &&
8377         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
8378       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8379     }
8380   }
8381 
8382   return SDValue();
8383 }
8384 
8385 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
8386 // idioms for rotate, and if the target supports rotation instructions, generate
8387 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8388 // with different shifted sources.
8389 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8390   EVT VT = LHS.getValueType();
8391 
8392   // The target must have at least one rotate/funnel flavor.
8393   // We still try to match rotate by constant pre-legalization.
8394   // TODO: Support pre-legalization funnel-shift by constant.
8395   bool HasROTL = hasOperation(ISD::ROTL, VT);
8396   bool HasROTR = hasOperation(ISD::ROTR, VT);
8397   bool HasFSHL = hasOperation(ISD::FSHL, VT);
8398   bool HasFSHR = hasOperation(ISD::FSHR, VT);
8399 
8400   // If the type is going to be promoted and the target has enabled custom
8401   // lowering for rotate, allow matching rotate by non-constants. Only allow
8402   // this for scalar types.
8403   if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8404                                   TargetLowering::TypePromoteInteger) {
8405     HasROTL |= TLI.getOperationAction(ISD::ROTL, VT) == TargetLowering::Custom;
8406     HasROTR |= TLI.getOperationAction(ISD::ROTR, VT) == TargetLowering::Custom;
8407   }
8408 
8409   if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8410     return SDValue();
8411 
8412   // Check for truncated rotate.
8413   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8414       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8415     assert(LHS.getValueType() == RHS.getValueType());
8416     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8417       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8418     }
8419   }
8420 
8421   // Match "(X shl/srl V1) & V2" where V2 may not be present.
8422   SDValue LHSShift;   // The shift.
8423   SDValue LHSMask;    // AND value if any.
8424   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8425 
8426   SDValue RHSShift;   // The shift.
8427   SDValue RHSMask;    // AND value if any.
8428   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8429 
8430   // If neither side matched a rotate half, bail
8431   if (!LHSShift && !RHSShift)
8432     return SDValue();
8433 
8434   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8435   // side of the rotate, so try to handle that here. In all cases we need to
8436   // pass the matched shift from the opposite side to compute the opcode and
8437   // needed shift amount to extract.  We still want to do this if both sides
8438   // matched a rotate half because one half may be a potential overshift that
8439   // can be broken down (ie if InstCombine merged two shl or srl ops into a
8440   // single one).
8441 
8442   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8443   if (LHSShift)
8444     if (SDValue NewRHSShift =
8445             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8446       RHSShift = NewRHSShift;
8447   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8448   if (RHSShift)
8449     if (SDValue NewLHSShift =
8450             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8451       LHSShift = NewLHSShift;
8452 
8453   // If a side is still missing, nothing else we can do.
8454   if (!RHSShift || !LHSShift)
8455     return SDValue();
8456 
8457   // At this point we've matched or extracted a shift op on each side.
8458 
8459   if (LHSShift.getOpcode() == RHSShift.getOpcode())
8460     return SDValue(); // Shifts must disagree.
8461 
8462   // Canonicalize shl to left side in a shl/srl pair.
8463   if (RHSShift.getOpcode() == ISD::SHL) {
8464     std::swap(LHS, RHS);
8465     std::swap(LHSShift, RHSShift);
8466     std::swap(LHSMask, RHSMask);
8467   }
8468 
8469   // Something has gone wrong - we've lost the shl/srl pair - bail.
8470   if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8471     return SDValue();
8472 
8473   unsigned EltSizeInBits = VT.getScalarSizeInBits();
8474   SDValue LHSShiftArg = LHSShift.getOperand(0);
8475   SDValue LHSShiftAmt = LHSShift.getOperand(1);
8476   SDValue RHSShiftArg = RHSShift.getOperand(0);
8477   SDValue RHSShiftAmt = RHSShift.getOperand(1);
8478 
8479   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8480                                         ConstantSDNode *RHS) {
8481     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8482   };
8483 
8484   auto ApplyMasks = [&](SDValue Res) {
8485     // If there is an AND of either shifted operand, apply it to the result.
8486     if (LHSMask.getNode() || RHSMask.getNode()) {
8487       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
8488       SDValue Mask = AllOnes;
8489 
8490       if (LHSMask.getNode()) {
8491         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8492         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8493                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8494       }
8495       if (RHSMask.getNode()) {
8496         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8497         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8498                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8499       }
8500 
8501       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8502     }
8503 
8504     return Res;
8505   };
8506 
8507   // TODO: Support pre-legalization funnel-shift by constant.
8508   bool IsRotate = LHSShiftArg == RHSShiftArg;
8509   if (!IsRotate && !(HasFSHL || HasFSHR)) {
8510     if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8511         ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8512       // Look for a disguised rotate by constant.
8513       // The common shifted operand X may be hidden inside another 'or'.
8514       SDValue X, Y;
8515       auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8516         if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8517           return false;
8518         if (CommonOp == Or.getOperand(0)) {
8519           X = CommonOp;
8520           Y = Or.getOperand(1);
8521           return true;
8522         }
8523         if (CommonOp == Or.getOperand(1)) {
8524           X = CommonOp;
8525           Y = Or.getOperand(0);
8526           return true;
8527         }
8528         return false;
8529       };
8530 
8531       SDValue Res;
8532       if (matchOr(LHSShiftArg, RHSShiftArg)) {
8533         // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8534         SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8535         SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8536         Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8537       } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8538         // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8539         SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8540         SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8541         Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8542       } else {
8543         return SDValue();
8544       }
8545 
8546       return ApplyMasks(Res);
8547     }
8548 
8549     return SDValue(); // Requires funnel shift support.
8550   }
8551 
8552   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8553   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8554   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8555   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8556   // iff C1+C2 == EltSizeInBits
8557   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8558     SDValue Res;
8559     if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8560       bool UseROTL = !LegalOperations || HasROTL;
8561       Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8562                         UseROTL ? LHSShiftAmt : RHSShiftAmt);
8563     } else {
8564       bool UseFSHL = !LegalOperations || HasFSHL;
8565       Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8566                         RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8567     }
8568 
8569     return ApplyMasks(Res);
8570   }
8571 
8572   // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8573   // shift.
8574   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8575     return SDValue();
8576 
8577   // If there is a mask here, and we have a variable shift, we can't be sure
8578   // that we're masking out the right stuff.
8579   if (LHSMask.getNode() || RHSMask.getNode())
8580     return SDValue();
8581 
8582   // If the shift amount is sign/zext/any-extended just peel it off.
8583   SDValue LExtOp0 = LHSShiftAmt;
8584   SDValue RExtOp0 = RHSShiftAmt;
8585   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8586        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8587        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8588        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8589       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8590        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8591        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8592        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8593     LExtOp0 = LHSShiftAmt.getOperand(0);
8594     RExtOp0 = RHSShiftAmt.getOperand(0);
8595   }
8596 
8597   if (IsRotate && (HasROTL || HasROTR)) {
8598     SDValue TryL =
8599         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8600                           RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8601     if (TryL)
8602       return TryL;
8603 
8604     SDValue TryR =
8605         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8606                           LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8607     if (TryR)
8608       return TryR;
8609   }
8610 
8611   SDValue TryL =
8612       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8613                         LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8614   if (TryL)
8615     return TryL;
8616 
8617   SDValue TryR =
8618       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8619                         RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8620   if (TryR)
8621     return TryR;
8622 
8623   return SDValue();
8624 }
8625 
8626 /// Recursively traverses the expression calculating the origin of the requested
8627 /// byte of the given value. Returns std::nullopt if the provider can't be
8628 /// calculated.
8629 ///
8630 /// For all the values except the root of the expression, we verify that the
8631 /// value has exactly one use and if not then return std::nullopt. This way if
8632 /// the origin of the byte is returned it's guaranteed that the values which
8633 /// contribute to the byte are not used outside of this expression.
8634 
8635 /// However, there is a special case when dealing with vector loads -- we allow
8636 /// more than one use if the load is a vector type.  Since the values that
8637 /// contribute to the byte ultimately come from the ExtractVectorElements of the
8638 /// Load, we don't care if the Load has uses other than ExtractVectorElements,
8639 /// because those operations are independent from the pattern to be combined.
8640 /// For vector loads, we simply care that the ByteProviders are adjacent
8641 /// positions of the same vector, and their index matches the byte that is being
8642 /// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8643 /// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8644 /// byte position we are trying to provide for the LoadCombine. If these do
8645 /// not match, then we can not combine the vector loads. \p Index uses the
8646 /// byte position we are trying to provide for and is matched against the
8647 /// shl and load size. The \p Index algorithm ensures the requested byte is
8648 /// provided for by the pattern, and the pattern does not over provide bytes.
8649 ///
8650 ///
8651 /// The supported LoadCombine pattern for vector loads is as follows
8652 ///                              or
8653 ///                          /        \
8654 ///                         or        shl
8655 ///                       /     \      |
8656 ///                     or      shl   zext
8657 ///                   /    \     |     |
8658 ///                 shl   zext  zext  EVE*
8659 ///                  |     |     |     |
8660 ///                 zext  EVE*  EVE*  LOAD
8661 ///                  |     |     |
8662 ///                 EVE*  LOAD  LOAD
8663 ///                  |
8664 ///                 LOAD
8665 ///
8666 /// *ExtractVectorElement
8667 using SDByteProvider = ByteProvider<SDNode *>;
8668 
8669 static std::optional<SDByteProvider>
8670 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
8671                       std::optional<uint64_t> VectorIndex,
8672                       unsigned StartingIndex = 0) {
8673 
8674   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8675   if (Depth == 10)
8676     return std::nullopt;
8677 
8678   // Only allow multiple uses if the instruction is a vector load (in which
8679   // case we will use the load for every ExtractVectorElement)
8680   if (Depth && !Op.hasOneUse() &&
8681       (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8682     return std::nullopt;
8683 
8684   // Fail to combine if we have encountered anything but a LOAD after handling
8685   // an ExtractVectorElement.
8686   if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8687     return std::nullopt;
8688 
8689   unsigned BitWidth = Op.getValueSizeInBits();
8690   if (BitWidth % 8 != 0)
8691     return std::nullopt;
8692   unsigned ByteWidth = BitWidth / 8;
8693   assert(Index < ByteWidth && "invalid index requested");
8694   (void) ByteWidth;
8695 
8696   switch (Op.getOpcode()) {
8697   case ISD::OR: {
8698     auto LHS =
8699         calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8700     if (!LHS)
8701       return std::nullopt;
8702     auto RHS =
8703         calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8704     if (!RHS)
8705       return std::nullopt;
8706 
8707     if (LHS->isConstantZero())
8708       return RHS;
8709     if (RHS->isConstantZero())
8710       return LHS;
8711     return std::nullopt;
8712   }
8713   case ISD::SHL: {
8714     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8715     if (!ShiftOp)
8716       return std::nullopt;
8717 
8718     uint64_t BitShift = ShiftOp->getZExtValue();
8719 
8720     if (BitShift % 8 != 0)
8721       return std::nullopt;
8722     uint64_t ByteShift = BitShift / 8;
8723 
8724     // If we are shifting by an amount greater than the index we are trying to
8725     // provide, then do not provide anything. Otherwise, subtract the index by
8726     // the amount we shifted by.
8727     return Index < ByteShift
8728                ? SDByteProvider::getConstantZero()
8729                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8730                                        Depth + 1, VectorIndex, Index);
8731   }
8732   case ISD::ANY_EXTEND:
8733   case ISD::SIGN_EXTEND:
8734   case ISD::ZERO_EXTEND: {
8735     SDValue NarrowOp = Op->getOperand(0);
8736     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8737     if (NarrowBitWidth % 8 != 0)
8738       return std::nullopt;
8739     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8740 
8741     if (Index >= NarrowByteWidth)
8742       return Op.getOpcode() == ISD::ZERO_EXTEND
8743                  ? std::optional<SDByteProvider>(
8744                        SDByteProvider::getConstantZero())
8745                  : std::nullopt;
8746     return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8747                                  StartingIndex);
8748   }
8749   case ISD::BSWAP:
8750     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8751                                  Depth + 1, VectorIndex, StartingIndex);
8752   case ISD::EXTRACT_VECTOR_ELT: {
8753     auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8754     if (!OffsetOp)
8755       return std::nullopt;
8756 
8757     VectorIndex = OffsetOp->getZExtValue();
8758 
8759     SDValue NarrowOp = Op->getOperand(0);
8760     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8761     if (NarrowBitWidth % 8 != 0)
8762       return std::nullopt;
8763     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8764 
8765     // Check to see if the position of the element in the vector corresponds
8766     // with the byte we are trying to provide for. In the case of a vector of
8767     // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8768     // the element will provide a range of bytes. For example, if we have a
8769     // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8770     // 3).
8771     if (*VectorIndex * NarrowByteWidth > StartingIndex)
8772       return std::nullopt;
8773     if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8774       return std::nullopt;
8775 
8776     return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8777                                  VectorIndex, StartingIndex);
8778   }
8779   case ISD::LOAD: {
8780     auto L = cast<LoadSDNode>(Op.getNode());
8781     if (!L->isSimple() || L->isIndexed())
8782       return std::nullopt;
8783 
8784     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8785     if (NarrowBitWidth % 8 != 0)
8786       return std::nullopt;
8787     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8788 
8789     // If the width of the load does not reach byte we are trying to provide for
8790     // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8791     // question
8792     if (Index >= NarrowByteWidth)
8793       return L->getExtensionType() == ISD::ZEXTLOAD
8794                  ? std::optional<SDByteProvider>(
8795                        SDByteProvider::getConstantZero())
8796                  : std::nullopt;
8797 
8798     unsigned BPVectorIndex = VectorIndex.value_or(0U);
8799     return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8800   }
8801   }
8802 
8803   return std::nullopt;
8804 }
8805 
8806 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8807   return i;
8808 }
8809 
8810 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8811   return BW - i - 1;
8812 }
8813 
8814 // Check if the bytes offsets we are looking at match with either big or
8815 // little endian value loaded. Return true for big endian, false for little
8816 // endian, and std::nullopt if match failed.
8817 static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
8818                                        int64_t FirstOffset) {
8819   // The endian can be decided only when it is 2 bytes at least.
8820   unsigned Width = ByteOffsets.size();
8821   if (Width < 2)
8822     return std::nullopt;
8823 
8824   bool BigEndian = true, LittleEndian = true;
8825   for (unsigned i = 0; i < Width; i++) {
8826     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
8827     LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
8828     BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
8829     if (!BigEndian && !LittleEndian)
8830       return std::nullopt;
8831   }
8832 
8833   assert((BigEndian != LittleEndian) && "It should be either big endian or"
8834                                         "little endian");
8835   return BigEndian;
8836 }
8837 
8838 static SDValue stripTruncAndExt(SDValue Value) {
8839   switch (Value.getOpcode()) {
8840   case ISD::TRUNCATE:
8841   case ISD::ZERO_EXTEND:
8842   case ISD::SIGN_EXTEND:
8843   case ISD::ANY_EXTEND:
8844     return stripTruncAndExt(Value.getOperand(0));
8845   }
8846   return Value;
8847 }
8848 
8849 /// Match a pattern where a wide type scalar value is stored by several narrow
8850 /// stores. Fold it into a single store or a BSWAP and a store if the targets
8851 /// supports it.
8852 ///
8853 /// Assuming little endian target:
8854 ///  i8 *p = ...
8855 ///  i32 val = ...
8856 ///  p[0] = (val >> 0) & 0xFF;
8857 ///  p[1] = (val >> 8) & 0xFF;
8858 ///  p[2] = (val >> 16) & 0xFF;
8859 ///  p[3] = (val >> 24) & 0xFF;
8860 /// =>
8861 ///  *((i32)p) = val;
8862 ///
8863 ///  i8 *p = ...
8864 ///  i32 val = ...
8865 ///  p[0] = (val >> 24) & 0xFF;
8866 ///  p[1] = (val >> 16) & 0xFF;
8867 ///  p[2] = (val >> 8) & 0xFF;
8868 ///  p[3] = (val >> 0) & 0xFF;
8869 /// =>
8870 ///  *((i32)p) = BSWAP(val);
8871 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
8872   // The matching looks for "store (trunc x)" patterns that appear early but are
8873   // likely to be replaced by truncating store nodes during combining.
8874   // TODO: If there is evidence that running this later would help, this
8875   //       limitation could be removed. Legality checks may need to be added
8876   //       for the created store and optional bswap/rotate.
8877   if (LegalOperations || OptLevel == CodeGenOptLevel::None)
8878     return SDValue();
8879 
8880   // We only handle merging simple stores of 1-4 bytes.
8881   // TODO: Allow unordered atomics when wider type is legal (see D66309)
8882   EVT MemVT = N->getMemoryVT();
8883   if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
8884       !N->isSimple() || N->isIndexed())
8885     return SDValue();
8886 
8887   // Collect all of the stores in the chain, upto the maximum store width (i64).
8888   SDValue Chain = N->getChain();
8889   SmallVector<StoreSDNode *, 8> Stores = {N};
8890   unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
8891   unsigned MaxWideNumBits = 64;
8892   unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
8893   while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
8894     // All stores must be the same size to ensure that we are writing all of the
8895     // bytes in the wide value.
8896     // This store should have exactly one use as a chain operand for another
8897     // store in the merging set. If there are other chain uses, then the
8898     // transform may not be safe because order of loads/stores outside of this
8899     // set may not be preserved.
8900     // TODO: We could allow multiple sizes by tracking each stored byte.
8901     if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
8902         Store->isIndexed() || !Store->hasOneUse())
8903       return SDValue();
8904     Stores.push_back(Store);
8905     Chain = Store->getChain();
8906     if (MaxStores < Stores.size())
8907       return SDValue();
8908   }
8909   // There is no reason to continue if we do not have at least a pair of stores.
8910   if (Stores.size() < 2)
8911     return SDValue();
8912 
8913   // Handle simple types only.
8914   LLVMContext &Context = *DAG.getContext();
8915   unsigned NumStores = Stores.size();
8916   unsigned WideNumBits = NumStores * NarrowNumBits;
8917   EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
8918   if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
8919     return SDValue();
8920 
8921   // Check if all bytes of the source value that we are looking at are stored
8922   // to the same base address. Collect offsets from Base address into OffsetMap.
8923   SDValue SourceValue;
8924   SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
8925   int64_t FirstOffset = INT64_MAX;
8926   StoreSDNode *FirstStore = nullptr;
8927   std::optional<BaseIndexOffset> Base;
8928   for (auto *Store : Stores) {
8929     // All the stores store different parts of the CombinedValue. A truncate is
8930     // required to get the partial value.
8931     SDValue Trunc = Store->getValue();
8932     if (Trunc.getOpcode() != ISD::TRUNCATE)
8933       return SDValue();
8934     // Other than the first/last part, a shift operation is required to get the
8935     // offset.
8936     int64_t Offset = 0;
8937     SDValue WideVal = Trunc.getOperand(0);
8938     if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
8939         isa<ConstantSDNode>(WideVal.getOperand(1))) {
8940       // The shift amount must be a constant multiple of the narrow type.
8941       // It is translated to the offset address in the wide source value "y".
8942       //
8943       // x = srl y, ShiftAmtC
8944       // i8 z = trunc x
8945       // store z, ...
8946       uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
8947       if (ShiftAmtC % NarrowNumBits != 0)
8948         return SDValue();
8949 
8950       Offset = ShiftAmtC / NarrowNumBits;
8951       WideVal = WideVal.getOperand(0);
8952     }
8953 
8954     // Stores must share the same source value with different offsets.
8955     // Truncate and extends should be stripped to get the single source value.
8956     if (!SourceValue)
8957       SourceValue = WideVal;
8958     else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
8959       return SDValue();
8960     else if (SourceValue.getValueType() != WideVT) {
8961       if (WideVal.getValueType() == WideVT ||
8962           WideVal.getScalarValueSizeInBits() >
8963               SourceValue.getScalarValueSizeInBits())
8964         SourceValue = WideVal;
8965       // Give up if the source value type is smaller than the store size.
8966       if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
8967         return SDValue();
8968     }
8969 
8970     // Stores must share the same base address.
8971     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
8972     int64_t ByteOffsetFromBase = 0;
8973     if (!Base)
8974       Base = Ptr;
8975     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8976       return SDValue();
8977 
8978     // Remember the first store.
8979     if (ByteOffsetFromBase < FirstOffset) {
8980       FirstStore = Store;
8981       FirstOffset = ByteOffsetFromBase;
8982     }
8983     // Map the offset in the store and the offset in the combined value, and
8984     // early return if it has been set before.
8985     if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
8986       return SDValue();
8987     OffsetMap[Offset] = ByteOffsetFromBase;
8988   }
8989 
8990   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
8991   assert(FirstStore && "First store must be set");
8992 
8993   // Check that a store of the wide type is both allowed and fast on the target
8994   const DataLayout &Layout = DAG.getDataLayout();
8995   unsigned Fast = 0;
8996   bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
8997                                         *FirstStore->getMemOperand(), &Fast);
8998   if (!Allowed || !Fast)
8999     return SDValue();
9000 
9001   // Check if the pieces of the value are going to the expected places in memory
9002   // to merge the stores.
9003   auto checkOffsets = [&](bool MatchLittleEndian) {
9004     if (MatchLittleEndian) {
9005       for (unsigned i = 0; i != NumStores; ++i)
9006         if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9007           return false;
9008     } else { // MatchBigEndian by reversing loop counter.
9009       for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9010         if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9011           return false;
9012     }
9013     return true;
9014   };
9015 
9016   // Check if the offsets line up for the native data layout of this target.
9017   bool NeedBswap = false;
9018   bool NeedRotate = false;
9019   if (!checkOffsets(Layout.isLittleEndian())) {
9020     // Special-case: check if byte offsets line up for the opposite endian.
9021     if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9022       NeedBswap = true;
9023     else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9024       NeedRotate = true;
9025     else
9026       return SDValue();
9027   }
9028 
9029   SDLoc DL(N);
9030   if (WideVT != SourceValue.getValueType()) {
9031     assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9032            "Unexpected store value to merge");
9033     SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9034   }
9035 
9036   // Before legalize we can introduce illegal bswaps/rotates which will be later
9037   // converted to an explicit bswap sequence. This way we end up with a single
9038   // store and byte shuffling instead of several stores and byte shuffling.
9039   if (NeedBswap) {
9040     SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9041   } else if (NeedRotate) {
9042     assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9043     SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9044     SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9045   }
9046 
9047   SDValue NewStore =
9048       DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9049                    FirstStore->getPointerInfo(), FirstStore->getAlign());
9050 
9051   // Rely on other DAG combine rules to remove the other individual stores.
9052   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9053   return NewStore;
9054 }
9055 
9056 /// Match a pattern where a wide type scalar value is loaded by several narrow
9057 /// loads and combined by shifts and ors. Fold it into a single load or a load
9058 /// and a BSWAP if the targets supports it.
9059 ///
9060 /// Assuming little endian target:
9061 ///  i8 *a = ...
9062 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9063 /// =>
9064 ///  i32 val = *((i32)a)
9065 ///
9066 ///  i8 *a = ...
9067 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9068 /// =>
9069 ///  i32 val = BSWAP(*((i32)a))
9070 ///
9071 /// TODO: This rule matches complex patterns with OR node roots and doesn't
9072 /// interact well with the worklist mechanism. When a part of the pattern is
9073 /// updated (e.g. one of the loads) its direct users are put into the worklist,
9074 /// but the root node of the pattern which triggers the load combine is not
9075 /// necessarily a direct user of the changed node. For example, once the address
9076 /// of t28 load is reassociated load combine won't be triggered:
9077 ///             t25: i32 = add t4, Constant:i32<2>
9078 ///           t26: i64 = sign_extend t25
9079 ///        t27: i64 = add t2, t26
9080 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9081 ///     t29: i32 = zero_extend t28
9082 ///   t32: i32 = shl t29, Constant:i8<8>
9083 /// t33: i32 = or t23, t32
9084 /// As a possible fix visitLoad can check if the load can be a part of a load
9085 /// combine pattern and add corresponding OR roots to the worklist.
9086 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9087   assert(N->getOpcode() == ISD::OR &&
9088          "Can only match load combining against OR nodes");
9089 
9090   // Handles simple types only
9091   EVT VT = N->getValueType(0);
9092   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9093     return SDValue();
9094   unsigned ByteWidth = VT.getSizeInBits() / 8;
9095 
9096   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9097   auto MemoryByteOffset = [&](SDByteProvider P) {
9098     assert(P.hasSrc() && "Must be a memory byte provider");
9099     auto *Load = cast<LoadSDNode>(P.Src.value());
9100 
9101     unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9102 
9103     assert(LoadBitWidth % 8 == 0 &&
9104            "can only analyze providers for individual bytes not bit");
9105     unsigned LoadByteWidth = LoadBitWidth / 8;
9106     return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9107                              : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9108   };
9109 
9110   std::optional<BaseIndexOffset> Base;
9111   SDValue Chain;
9112 
9113   SmallPtrSet<LoadSDNode *, 8> Loads;
9114   std::optional<SDByteProvider> FirstByteProvider;
9115   int64_t FirstOffset = INT64_MAX;
9116 
9117   // Check if all the bytes of the OR we are looking at are loaded from the same
9118   // base address. Collect bytes offsets from Base address in ByteOffsets.
9119   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9120   unsigned ZeroExtendedBytes = 0;
9121   for (int i = ByteWidth - 1; i >= 0; --i) {
9122     auto P =
9123         calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9124                               /*StartingIndex*/ i);
9125     if (!P)
9126       return SDValue();
9127 
9128     if (P->isConstantZero()) {
9129       // It's OK for the N most significant bytes to be 0, we can just
9130       // zero-extend the load.
9131       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9132         return SDValue();
9133       continue;
9134     }
9135     assert(P->hasSrc() && "provenance should either be memory or zero");
9136     auto *L = cast<LoadSDNode>(P->Src.value());
9137 
9138     // All loads must share the same chain
9139     SDValue LChain = L->getChain();
9140     if (!Chain)
9141       Chain = LChain;
9142     else if (Chain != LChain)
9143       return SDValue();
9144 
9145     // Loads must share the same base address
9146     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
9147     int64_t ByteOffsetFromBase = 0;
9148 
9149     // For vector loads, the expected load combine pattern will have an
9150     // ExtractElement for each index in the vector. While each of these
9151     // ExtractElements will be accessing the same base address as determined
9152     // by the load instruction, the actual bytes they interact with will differ
9153     // due to different ExtractElement indices. To accurately determine the
9154     // byte position of an ExtractElement, we offset the base load ptr with
9155     // the index multiplied by the byte size of each element in the vector.
9156     if (L->getMemoryVT().isVector()) {
9157       unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9158       if (LoadWidthInBit % 8 != 0)
9159         return SDValue();
9160       unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9161       Ptr.addToOffset(ByteOffsetFromVector);
9162     }
9163 
9164     if (!Base)
9165       Base = Ptr;
9166 
9167     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9168       return SDValue();
9169 
9170     // Calculate the offset of the current byte from the base address
9171     ByteOffsetFromBase += MemoryByteOffset(*P);
9172     ByteOffsets[i] = ByteOffsetFromBase;
9173 
9174     // Remember the first byte load
9175     if (ByteOffsetFromBase < FirstOffset) {
9176       FirstByteProvider = P;
9177       FirstOffset = ByteOffsetFromBase;
9178     }
9179 
9180     Loads.insert(L);
9181   }
9182 
9183   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9184          "memory, so there must be at least one load which produces the value");
9185   assert(Base && "Base address of the accessed memory location must be set");
9186   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9187 
9188   bool NeedsZext = ZeroExtendedBytes > 0;
9189 
9190   EVT MemVT =
9191       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9192 
9193   if (!MemVT.isSimple())
9194     return SDValue();
9195 
9196   // Before legalize we can introduce too wide illegal loads which will be later
9197   // split into legal sized loads. This enables us to combine i64 load by i8
9198   // patterns to a couple of i32 loads on 32 bit targets.
9199   if (LegalOperations &&
9200       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
9201                             MemVT))
9202     return SDValue();
9203 
9204   // Check if the bytes of the OR we are looking at match with either big or
9205   // little endian value load
9206   std::optional<bool> IsBigEndian = isBigEndian(
9207       ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9208   if (!IsBigEndian)
9209     return SDValue();
9210 
9211   assert(FirstByteProvider && "must be set");
9212 
9213   // Ensure that the first byte is loaded from zero offset of the first load.
9214   // So the combined value can be loaded from the first load address.
9215   if (MemoryByteOffset(*FirstByteProvider) != 0)
9216     return SDValue();
9217   auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9218 
9219   // The node we are looking at matches with the pattern, check if we can
9220   // replace it with a single (possibly zero-extended) load and bswap + shift if
9221   // needed.
9222 
9223   // If the load needs byte swap check if the target supports it
9224   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9225 
9226   // Before legalize we can introduce illegal bswaps which will be later
9227   // converted to an explicit bswap sequence. This way we end up with a single
9228   // load and byte shuffling instead of several loads and byte shuffling.
9229   // We do not introduce illegal bswaps when zero-extending as this tends to
9230   // introduce too many arithmetic instructions.
9231   if (NeedsBswap && (LegalOperations || NeedsZext) &&
9232       !TLI.isOperationLegal(ISD::BSWAP, VT))
9233     return SDValue();
9234 
9235   // If we need to bswap and zero extend, we have to insert a shift. Check that
9236   // it is legal.
9237   if (NeedsBswap && NeedsZext && LegalOperations &&
9238       !TLI.isOperationLegal(ISD::SHL, VT))
9239     return SDValue();
9240 
9241   // Check that a load of the wide type is both allowed and fast on the target
9242   unsigned Fast = 0;
9243   bool Allowed =
9244       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9245                              *FirstLoad->getMemOperand(), &Fast);
9246   if (!Allowed || !Fast)
9247     return SDValue();
9248 
9249   SDValue NewLoad =
9250       DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9251                      Chain, FirstLoad->getBasePtr(),
9252                      FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9253 
9254   // Transfer chain users from old loads to the new load.
9255   for (LoadSDNode *L : Loads)
9256     DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9257 
9258   if (!NeedsBswap)
9259     return NewLoad;
9260 
9261   SDValue ShiftedLoad =
9262       NeedsZext
9263           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9264                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
9265                                                    SDLoc(N), LegalOperations))
9266           : NewLoad;
9267   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9268 }
9269 
9270 // If the target has andn, bsl, or a similar bit-select instruction,
9271 // we want to unfold masked merge, with canonical pattern of:
9272 //   |        A  |  |B|
9273 //   ((x ^ y) & m) ^ y
9274 //    |  D  |
9275 // Into:
9276 //   (x & m) | (y & ~m)
9277 // If y is a constant, m is not a 'not', and the 'andn' does not work with
9278 // immediates, we unfold into a different pattern:
9279 //   ~(~x & m) & (m | y)
9280 // If x is a constant, m is a 'not', and the 'andn' does not work with
9281 // immediates, we unfold into a different pattern:
9282 //   (x | ~m) & ~(~m & ~y)
9283 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9284 //       the very least that breaks andnpd / andnps patterns, and because those
9285 //       patterns are simplified in IR and shouldn't be created in the DAG
9286 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9287   assert(N->getOpcode() == ISD::XOR);
9288 
9289   // Don't touch 'not' (i.e. where y = -1).
9290   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9291     return SDValue();
9292 
9293   EVT VT = N->getValueType(0);
9294 
9295   // There are 3 commutable operators in the pattern,
9296   // so we have to deal with 8 possible variants of the basic pattern.
9297   SDValue X, Y, M;
9298   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9299     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9300       return false;
9301     SDValue Xor = And.getOperand(XorIdx);
9302     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9303       return false;
9304     SDValue Xor0 = Xor.getOperand(0);
9305     SDValue Xor1 = Xor.getOperand(1);
9306     // Don't touch 'not' (i.e. where y = -1).
9307     if (isAllOnesOrAllOnesSplat(Xor1))
9308       return false;
9309     if (Other == Xor0)
9310       std::swap(Xor0, Xor1);
9311     if (Other != Xor1)
9312       return false;
9313     X = Xor0;
9314     Y = Xor1;
9315     M = And.getOperand(XorIdx ? 0 : 1);
9316     return true;
9317   };
9318 
9319   SDValue N0 = N->getOperand(0);
9320   SDValue N1 = N->getOperand(1);
9321   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9322       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9323     return SDValue();
9324 
9325   // Don't do anything if the mask is constant. This should not be reachable.
9326   // InstCombine should have already unfolded this pattern, and DAGCombiner
9327   // probably shouldn't produce it, too.
9328   if (isa<ConstantSDNode>(M.getNode()))
9329     return SDValue();
9330 
9331   // We can transform if the target has AndNot
9332   if (!TLI.hasAndNot(M))
9333     return SDValue();
9334 
9335   SDLoc DL(N);
9336 
9337   // If Y is a constant, check that 'andn' works with immediates. Unless M is
9338   // a bitwise not that would already allow ANDN to be used.
9339   if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9340     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9341     // If not, we need to do a bit more work to make sure andn is still used.
9342     SDValue NotX = DAG.getNOT(DL, X, VT);
9343     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9344     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9345     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9346     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9347   }
9348 
9349   // If X is a constant and M is a bitwise not, check that 'andn' works with
9350   // immediates.
9351   if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9352     assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9353     // If not, we need to do a bit more work to make sure andn is still used.
9354     SDValue NotM = M.getOperand(0);
9355     SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9356     SDValue NotY = DAG.getNOT(DL, Y, VT);
9357     SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9358     SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9359     return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9360   }
9361 
9362   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9363   SDValue NotM = DAG.getNOT(DL, M, VT);
9364   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9365 
9366   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9367 }
9368 
9369 SDValue DAGCombiner::visitXOR(SDNode *N) {
9370   SDValue N0 = N->getOperand(0);
9371   SDValue N1 = N->getOperand(1);
9372   EVT VT = N0.getValueType();
9373   SDLoc DL(N);
9374 
9375   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9376   if (N0.isUndef() && N1.isUndef())
9377     return DAG.getConstant(0, DL, VT);
9378 
9379   // fold (xor x, undef) -> undef
9380   if (N0.isUndef())
9381     return N0;
9382   if (N1.isUndef())
9383     return N1;
9384 
9385   // fold (xor c1, c2) -> c1^c2
9386   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9387     return C;
9388 
9389   // canonicalize constant to RHS
9390   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
9391       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
9392     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9393 
9394   // fold vector ops
9395   if (VT.isVector()) {
9396     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9397       return FoldedVOp;
9398 
9399     // fold (xor x, 0) -> x, vector edition
9400     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
9401       return N0;
9402   }
9403 
9404   // fold (xor x, 0) -> x
9405   if (isNullConstant(N1))
9406     return N0;
9407 
9408   if (SDValue NewSel = foldBinOpIntoSelect(N))
9409     return NewSel;
9410 
9411   // reassociate xor
9412   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9413     return RXOR;
9414 
9415   // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9416   if (SDValue SD =
9417           reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9418     return SD;
9419 
9420   // fold (a^b) -> (a|b) iff a and b share no bits.
9421   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9422       DAG.haveNoCommonBitsSet(N0, N1))
9423     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
9424 
9425   // look for 'add-like' folds:
9426   // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9427   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9428       isMinSignedConstant(N1))
9429     if (SDValue Combined = visitADDLike(N))
9430       return Combined;
9431 
9432   // fold !(x cc y) -> (x !cc y)
9433   unsigned N0Opcode = N0.getOpcode();
9434   SDValue LHS, RHS, CC;
9435   if (TLI.isConstTrueVal(N1) &&
9436       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9437     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9438                                                LHS.getValueType());
9439     if (!LegalOperations ||
9440         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9441       switch (N0Opcode) {
9442       default:
9443         llvm_unreachable("Unhandled SetCC Equivalent!");
9444       case ISD::SETCC:
9445         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9446       case ISD::SELECT_CC:
9447         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9448                                N0.getOperand(3), NotCC);
9449       case ISD::STRICT_FSETCC:
9450       case ISD::STRICT_FSETCCS: {
9451         if (N0.hasOneUse()) {
9452           // FIXME Can we handle multiple uses? Could we token factor the chain
9453           // results from the new/old setcc?
9454           SDValue SetCC =
9455               DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9456                            N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9457           CombineTo(N, SetCC);
9458           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9459           recursivelyDeleteUnusedNodes(N0.getNode());
9460           return SDValue(N, 0); // Return N so it doesn't get rechecked!
9461         }
9462         break;
9463       }
9464       }
9465     }
9466   }
9467 
9468   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9469   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9470       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9471     SDValue V = N0.getOperand(0);
9472     SDLoc DL0(N0);
9473     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9474                     DAG.getConstant(1, DL0, V.getValueType()));
9475     AddToWorklist(V.getNode());
9476     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9477   }
9478 
9479   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9480   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9481       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9482     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9483     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9484       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9485       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9486       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9487       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9488       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9489     }
9490   }
9491   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9492   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9493       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9494     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9495     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9496       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9497       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9498       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9499       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9500       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9501     }
9502   }
9503 
9504   // fold (not (neg x)) -> (add X, -1)
9505   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9506   // Y is a constant or the subtract has a single use.
9507   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9508       isNullConstant(N0.getOperand(0))) {
9509     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9510                        DAG.getAllOnesConstant(DL, VT));
9511   }
9512 
9513   // fold (not (add X, -1)) -> (neg X)
9514   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9515       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
9516     return DAG.getNegative(N0.getOperand(0), DL, VT);
9517   }
9518 
9519   // fold (xor (and x, y), y) -> (and (not x), y)
9520   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9521     SDValue X = N0.getOperand(0);
9522     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9523     AddToWorklist(NotX.getNode());
9524     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9525   }
9526 
9527   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9528   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
9529     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9530     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9531     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9532       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9533       SDValue S0 = S.getOperand(0);
9534       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9535         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
9536           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9537             return DAG.getNode(ISD::ABS, DL, VT, S0);
9538     }
9539   }
9540 
9541   // fold (xor x, x) -> 0
9542   if (N0 == N1)
9543     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9544 
9545   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9546   // Here is a concrete example of this equivalence:
9547   // i16   x ==  14
9548   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
9549   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9550   //
9551   // =>
9552   //
9553   // i16     ~1      == 0b1111111111111110
9554   // i16 rol(~1, 14) == 0b1011111111111111
9555   //
9556   // Some additional tips to help conceptualize this transform:
9557   // - Try to see the operation as placing a single zero in a value of all ones.
9558   // - There exists no value for x which would allow the result to contain zero.
9559   // - Values of x larger than the bitwidth are undefined and do not require a
9560   //   consistent result.
9561   // - Pushing the zero left requires shifting one bits in from the right.
9562   // A rotate left of ~1 is a nice way of achieving the desired result.
9563   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9564       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
9565     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
9566                        N0.getOperand(1));
9567   }
9568 
9569   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
9570   if (N0Opcode == N1.getOpcode())
9571     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9572       return V;
9573 
9574   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9575     return R;
9576   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9577     return R;
9578   if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9579     return R;
9580 
9581   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
9582   if (SDValue MM = unfoldMaskedMerge(N))
9583     return MM;
9584 
9585   // Simplify the expression using non-local knowledge.
9586   if (SimplifyDemandedBits(SDValue(N, 0)))
9587     return SDValue(N, 0);
9588 
9589   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9590     return Combined;
9591 
9592   return SDValue();
9593 }
9594 
9595 /// If we have a shift-by-constant of a bitwise logic op that itself has a
9596 /// shift-by-constant operand with identical opcode, we may be able to convert
9597 /// that into 2 independent shifts followed by the logic op. This is a
9598 /// throughput improvement.
9599 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
9600   // Match a one-use bitwise logic op.
9601   SDValue LogicOp = Shift->getOperand(0);
9602   if (!LogicOp.hasOneUse())
9603     return SDValue();
9604 
9605   unsigned LogicOpcode = LogicOp.getOpcode();
9606   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9607       LogicOpcode != ISD::XOR)
9608     return SDValue();
9609 
9610   // Find a matching one-use shift by constant.
9611   unsigned ShiftOpcode = Shift->getOpcode();
9612   SDValue C1 = Shift->getOperand(1);
9613   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9614   assert(C1Node && "Expected a shift with constant operand");
9615   const APInt &C1Val = C1Node->getAPIntValue();
9616   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9617                              const APInt *&ShiftAmtVal) {
9618     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9619       return false;
9620 
9621     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9622     if (!ShiftCNode)
9623       return false;
9624 
9625     // Capture the shifted operand and shift amount value.
9626     ShiftOp = V.getOperand(0);
9627     ShiftAmtVal = &ShiftCNode->getAPIntValue();
9628 
9629     // Shift amount types do not have to match their operand type, so check that
9630     // the constants are the same width.
9631     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9632       return false;
9633 
9634     // The fold is not valid if the sum of the shift values exceeds bitwidth.
9635     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
9636       return false;
9637 
9638     return true;
9639   };
9640 
9641   // Logic ops are commutative, so check each operand for a match.
9642   SDValue X, Y;
9643   const APInt *C0Val;
9644   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9645     Y = LogicOp.getOperand(1);
9646   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9647     Y = LogicOp.getOperand(0);
9648   else
9649     return SDValue();
9650 
9651   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9652   SDLoc DL(Shift);
9653   EVT VT = Shift->getValueType(0);
9654   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9655   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9656   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9657   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9658   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
9659 }
9660 
9661 /// Handle transforms common to the three shifts, when the shift amount is a
9662 /// constant.
9663 /// We are looking for: (shift being one of shl/sra/srl)
9664 ///   shift (binop X, C0), C1
9665 /// And want to transform into:
9666 ///   binop (shift X, C1), (shift C0, C1)
9667 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9668   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9669 
9670   // Do not turn a 'not' into a regular xor.
9671   if (isBitwiseNot(N->getOperand(0)))
9672     return SDValue();
9673 
9674   // The inner binop must be one-use, since we want to replace it.
9675   SDValue LHS = N->getOperand(0);
9676   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9677     return SDValue();
9678 
9679   // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9680   if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9681     return R;
9682 
9683   // We want to pull some binops through shifts, so that we have (and (shift))
9684   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
9685   // thing happens with address calculations, so it's important to canonicalize
9686   // it.
9687   switch (LHS.getOpcode()) {
9688   default:
9689     return SDValue();
9690   case ISD::OR:
9691   case ISD::XOR:
9692   case ISD::AND:
9693     break;
9694   case ISD::ADD:
9695     if (N->getOpcode() != ISD::SHL)
9696       return SDValue(); // only shl(add) not sr[al](add).
9697     break;
9698   }
9699 
9700   // FIXME: disable this unless the input to the binop is a shift by a constant
9701   // or is copy/select. Enable this in other cases when figure out it's exactly
9702   // profitable.
9703   SDValue BinOpLHSVal = LHS.getOperand(0);
9704   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9705                             BinOpLHSVal.getOpcode() == ISD::SRA ||
9706                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
9707                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9708   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9709                         BinOpLHSVal.getOpcode() == ISD::SELECT;
9710 
9711   if (!IsShiftByConstant && !IsCopyOrSelect)
9712     return SDValue();
9713 
9714   if (IsCopyOrSelect && N->hasOneUse())
9715     return SDValue();
9716 
9717   // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9718   SDLoc DL(N);
9719   EVT VT = N->getValueType(0);
9720   if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9721           N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9722     SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9723                                    N->getOperand(1));
9724     return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9725   }
9726 
9727   return SDValue();
9728 }
9729 
9730 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9731   assert(N->getOpcode() == ISD::TRUNCATE);
9732   assert(N->getOperand(0).getOpcode() == ISD::AND);
9733 
9734   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9735   EVT TruncVT = N->getValueType(0);
9736   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9737       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9738     SDValue N01 = N->getOperand(0).getOperand(1);
9739     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9740       SDLoc DL(N);
9741       SDValue N00 = N->getOperand(0).getOperand(0);
9742       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9743       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9744       AddToWorklist(Trunc00.getNode());
9745       AddToWorklist(Trunc01.getNode());
9746       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9747     }
9748   }
9749 
9750   return SDValue();
9751 }
9752 
9753 SDValue DAGCombiner::visitRotate(SDNode *N) {
9754   SDLoc dl(N);
9755   SDValue N0 = N->getOperand(0);
9756   SDValue N1 = N->getOperand(1);
9757   EVT VT = N->getValueType(0);
9758   unsigned Bitsize = VT.getScalarSizeInBits();
9759 
9760   // fold (rot x, 0) -> x
9761   if (isNullOrNullSplat(N1))
9762     return N0;
9763 
9764   // fold (rot x, c) -> x iff (c % BitSize) == 0
9765   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9766     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9767     if (DAG.MaskedValueIsZero(N1, ModuloMask))
9768       return N0;
9769   }
9770 
9771   // fold (rot x, c) -> (rot x, c % BitSize)
9772   bool OutOfRange = false;
9773   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9774     OutOfRange |= C->getAPIntValue().uge(Bitsize);
9775     return true;
9776   };
9777   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9778     EVT AmtVT = N1.getValueType();
9779     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9780     if (SDValue Amt =
9781             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9782       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9783   }
9784 
9785   // rot i16 X, 8 --> bswap X
9786   auto *RotAmtC = isConstOrConstSplat(N1);
9787   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9788       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9789     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9790 
9791   // Simplify the operands using demanded-bits information.
9792   if (SimplifyDemandedBits(SDValue(N, 0)))
9793     return SDValue(N, 0);
9794 
9795   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9796   if (N1.getOpcode() == ISD::TRUNCATE &&
9797       N1.getOperand(0).getOpcode() == ISD::AND) {
9798     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9799       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9800   }
9801 
9802   unsigned NextOp = N0.getOpcode();
9803 
9804   // fold (rot* (rot* x, c2), c1)
9805   //   -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
9806   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
9807     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
9808     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
9809     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
9810       EVT ShiftVT = C1->getValueType(0);
9811       bool SameSide = (N->getOpcode() == NextOp);
9812       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
9813       SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
9814       SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9815                                                  {N1, BitsizeC});
9816       SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9817                                                  {N0.getOperand(1), BitsizeC});
9818       if (Norm1 && Norm2)
9819         if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
9820                 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
9821           CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
9822                                                      {CombinedShift, BitsizeC});
9823           SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
9824               ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
9825           return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
9826                              CombinedShiftNorm);
9827         }
9828     }
9829   }
9830   return SDValue();
9831 }
9832 
9833 SDValue DAGCombiner::visitSHL(SDNode *N) {
9834   SDValue N0 = N->getOperand(0);
9835   SDValue N1 = N->getOperand(1);
9836   if (SDValue V = DAG.simplifyShift(N0, N1))
9837     return V;
9838 
9839   EVT VT = N0.getValueType();
9840   EVT ShiftVT = N1.getValueType();
9841   unsigned OpSizeInBits = VT.getScalarSizeInBits();
9842 
9843   // fold (shl c1, c2) -> c1<<c2
9844   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
9845     return C;
9846 
9847   // fold vector ops
9848   if (VT.isVector()) {
9849     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9850       return FoldedVOp;
9851 
9852     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
9853     // If setcc produces all-one true value then:
9854     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
9855     if (N1CV && N1CV->isConstant()) {
9856       if (N0.getOpcode() == ISD::AND) {
9857         SDValue N00 = N0->getOperand(0);
9858         SDValue N01 = N0->getOperand(1);
9859         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
9860 
9861         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
9862             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
9863                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
9864           if (SDValue C =
9865                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
9866             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
9867         }
9868       }
9869     }
9870   }
9871 
9872   if (SDValue NewSel = foldBinOpIntoSelect(N))
9873     return NewSel;
9874 
9875   // if (shl x, c) is known to be zero, return 0
9876   if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9877     return DAG.getConstant(0, SDLoc(N), VT);
9878 
9879   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
9880   if (N1.getOpcode() == ISD::TRUNCATE &&
9881       N1.getOperand(0).getOpcode() == ISD::AND) {
9882     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9883       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
9884   }
9885 
9886   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
9887   if (N0.getOpcode() == ISD::SHL) {
9888     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9889                                           ConstantSDNode *RHS) {
9890       APInt c1 = LHS->getAPIntValue();
9891       APInt c2 = RHS->getAPIntValue();
9892       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9893       return (c1 + c2).uge(OpSizeInBits);
9894     };
9895     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9896       return DAG.getConstant(0, SDLoc(N), VT);
9897 
9898     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9899                                        ConstantSDNode *RHS) {
9900       APInt c1 = LHS->getAPIntValue();
9901       APInt c2 = RHS->getAPIntValue();
9902       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9903       return (c1 + c2).ult(OpSizeInBits);
9904     };
9905     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9906       SDLoc DL(N);
9907       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9908       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
9909     }
9910   }
9911 
9912   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
9913   // For this to be valid, the second form must not preserve any of the bits
9914   // that are shifted out by the inner shift in the first form.  This means
9915   // the outer shift size must be >= the number of bits added by the ext.
9916   // As a corollary, we don't care what kind of ext it is.
9917   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
9918        N0.getOpcode() == ISD::ANY_EXTEND ||
9919        N0.getOpcode() == ISD::SIGN_EXTEND) &&
9920       N0.getOperand(0).getOpcode() == ISD::SHL) {
9921     SDValue N0Op0 = N0.getOperand(0);
9922     SDValue InnerShiftAmt = N0Op0.getOperand(1);
9923     EVT InnerVT = N0Op0.getValueType();
9924     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
9925 
9926     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9927                                                          ConstantSDNode *RHS) {
9928       APInt c1 = LHS->getAPIntValue();
9929       APInt c2 = RHS->getAPIntValue();
9930       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9931       return c2.uge(OpSizeInBits - InnerBitwidth) &&
9932              (c1 + c2).uge(OpSizeInBits);
9933     };
9934     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
9935                                   /*AllowUndefs*/ false,
9936                                   /*AllowTypeMismatch*/ true))
9937       return DAG.getConstant(0, SDLoc(N), VT);
9938 
9939     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9940                                                       ConstantSDNode *RHS) {
9941       APInt c1 = LHS->getAPIntValue();
9942       APInt c2 = RHS->getAPIntValue();
9943       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9944       return c2.uge(OpSizeInBits - InnerBitwidth) &&
9945              (c1 + c2).ult(OpSizeInBits);
9946     };
9947     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
9948                                   /*AllowUndefs*/ false,
9949                                   /*AllowTypeMismatch*/ true)) {
9950       SDLoc DL(N);
9951       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
9952       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
9953       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
9954       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
9955     }
9956   }
9957 
9958   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
9959   // Only fold this if the inner zext has no other uses to avoid increasing
9960   // the total number of instructions.
9961   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9962       N0.getOperand(0).getOpcode() == ISD::SRL) {
9963     SDValue N0Op0 = N0.getOperand(0);
9964     SDValue InnerShiftAmt = N0Op0.getOperand(1);
9965 
9966     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9967       APInt c1 = LHS->getAPIntValue();
9968       APInt c2 = RHS->getAPIntValue();
9969       zeroExtendToMatch(c1, c2);
9970       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
9971     };
9972     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
9973                                   /*AllowUndefs*/ false,
9974                                   /*AllowTypeMismatch*/ true)) {
9975       SDLoc DL(N);
9976       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
9977       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
9978       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
9979       AddToWorklist(NewSHL.getNode());
9980       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
9981     }
9982   }
9983 
9984   if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
9985     auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
9986                                            ConstantSDNode *RHS) {
9987       const APInt &LHSC = LHS->getAPIntValue();
9988       const APInt &RHSC = RHS->getAPIntValue();
9989       return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
9990              LHSC.getZExtValue() <= RHSC.getZExtValue();
9991     };
9992 
9993     SDLoc DL(N);
9994 
9995     // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
9996     // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
9997     if (N0->getFlags().hasExact()) {
9998       if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
9999                                     /*AllowUndefs*/ false,
10000                                     /*AllowTypeMismatch*/ true)) {
10001         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10002         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10003         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10004       }
10005       if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10006                                     /*AllowUndefs*/ false,
10007                                     /*AllowTypeMismatch*/ true)) {
10008         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10009         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10010         return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10011       }
10012     }
10013 
10014     // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10015     //                               (and (srl x, (sub c1, c2), MASK)
10016     // Only fold this if the inner shift has no other uses -- if it does,
10017     // folding this will increase the total number of instructions.
10018     if (N0.getOpcode() == ISD::SRL &&
10019         (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10020         TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
10021       if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10022                                     /*AllowUndefs*/ false,
10023                                     /*AllowTypeMismatch*/ true)) {
10024         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10025         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10026         SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10027         Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10028         Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10029         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10030         return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10031       }
10032       if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10033                                     /*AllowUndefs*/ false,
10034                                     /*AllowTypeMismatch*/ true)) {
10035         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10036         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10037         SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10038         Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10039         SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10040         return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10041       }
10042     }
10043   }
10044 
10045   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10046   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10047       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10048     SDLoc DL(N);
10049     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10050     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10051     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10052   }
10053 
10054   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10055   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10056   // Variant of version done on multiply, except mul by a power of 2 is turned
10057   // into a shift.
10058   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10059       N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
10060     SDValue N01 = N0.getOperand(1);
10061     if (SDValue Shl1 =
10062             DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10063       SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10064       AddToWorklist(Shl0.getNode());
10065       SDNodeFlags Flags;
10066       // Preserve the disjoint flag for Or.
10067       if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10068         Flags.setDisjoint(true);
10069       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1, Flags);
10070     }
10071   }
10072 
10073   // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10074   // TODO: Add zext/add_nuw variant with suitable test coverage
10075   // TODO: Should we limit this with isLegalAddImmediate?
10076   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10077       N0.getOperand(0).getOpcode() == ISD::ADD &&
10078       N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
10079       N0.getOperand(0)->hasOneUse() &&
10080       TLI.isDesirableToCommuteWithShift(N, Level)) {
10081     SDValue Add = N0.getOperand(0);
10082     SDLoc DL(N0);
10083     if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10084                                                   {Add.getOperand(1)})) {
10085       if (SDValue ShlC =
10086               DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10087         SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10088         SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10089         return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10090       }
10091     }
10092   }
10093 
10094   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10095   if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10096     SDValue N01 = N0.getOperand(1);
10097     if (SDValue Shl =
10098             DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10099       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
10100   }
10101 
10102   ConstantSDNode *N1C = isConstOrConstSplat(N1);
10103   if (N1C && !N1C->isOpaque())
10104     if (SDValue NewSHL = visitShiftByConstant(N))
10105       return NewSHL;
10106 
10107   if (SimplifyDemandedBits(SDValue(N, 0)))
10108     return SDValue(N, 0);
10109 
10110   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10111   if (N0.getOpcode() == ISD::VSCALE && N1C) {
10112     const APInt &C0 = N0.getConstantOperandAPInt(0);
10113     const APInt &C1 = N1C->getAPIntValue();
10114     return DAG.getVScale(SDLoc(N), VT, C0 << C1);
10115   }
10116 
10117   // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10118   APInt ShlVal;
10119   if (N0.getOpcode() == ISD::STEP_VECTOR &&
10120       ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10121     const APInt &C0 = N0.getConstantOperandAPInt(0);
10122     if (ShlVal.ult(C0.getBitWidth())) {
10123       APInt NewStep = C0 << ShlVal;
10124       return DAG.getStepVector(SDLoc(N), VT, NewStep);
10125     }
10126   }
10127 
10128   return SDValue();
10129 }
10130 
10131 // Transform a right shift of a multiply into a multiply-high.
10132 // Examples:
10133 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10134 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10135 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
10136                                   const TargetLowering &TLI) {
10137   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10138          "SRL or SRA node is required here!");
10139 
10140   // Check the shift amount. Proceed with the transformation if the shift
10141   // amount is constant.
10142   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10143   if (!ShiftAmtSrc)
10144     return SDValue();
10145 
10146   SDLoc DL(N);
10147 
10148   // The operation feeding into the shift must be a multiply.
10149   SDValue ShiftOperand = N->getOperand(0);
10150   if (ShiftOperand.getOpcode() != ISD::MUL)
10151     return SDValue();
10152 
10153   // Both operands must be equivalent extend nodes.
10154   SDValue LeftOp = ShiftOperand.getOperand(0);
10155   SDValue RightOp = ShiftOperand.getOperand(1);
10156 
10157   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10158   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10159 
10160   if (!IsSignExt && !IsZeroExt)
10161     return SDValue();
10162 
10163   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10164   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10165 
10166   // return true if U may use the lower bits of its operands
10167   auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10168     if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10169       return true;
10170     }
10171     ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10172     if (!UShiftAmtSrc) {
10173       return true;
10174     }
10175     unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10176     return UShiftAmt < NarrowVTSize;
10177   };
10178 
10179   // If the lower part of the MUL is also used and MUL_LOHI is supported
10180   // do not introduce the MULH in favor of MUL_LOHI
10181   unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10182   if (!ShiftOperand.hasOneUse() &&
10183       TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10184       llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
10185     return SDValue();
10186   }
10187 
10188   SDValue MulhRightOp;
10189   if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
10190     unsigned ActiveBits = IsSignExt
10191                               ? Constant->getAPIntValue().getSignificantBits()
10192                               : Constant->getAPIntValue().getActiveBits();
10193     if (ActiveBits > NarrowVTSize)
10194       return SDValue();
10195     MulhRightOp = DAG.getConstant(
10196         Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10197         NarrowVT);
10198   } else {
10199     if (LeftOp.getOpcode() != RightOp.getOpcode())
10200       return SDValue();
10201     // Check that the two extend nodes are the same type.
10202     if (NarrowVT != RightOp.getOperand(0).getValueType())
10203       return SDValue();
10204     MulhRightOp = RightOp.getOperand(0);
10205   }
10206 
10207   EVT WideVT = LeftOp.getValueType();
10208   // Proceed with the transformation if the wide types match.
10209   assert((WideVT == RightOp.getValueType()) &&
10210          "Cannot have a multiply node with two different operand types.");
10211 
10212   // Proceed with the transformation if the wide type is twice as large
10213   // as the narrow type.
10214   if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10215     return SDValue();
10216 
10217   // Check the shift amount with the narrow type size.
10218   // Proceed with the transformation if the shift amount is the width
10219   // of the narrow type.
10220   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10221   if (ShiftAmt != NarrowVTSize)
10222     return SDValue();
10223 
10224   // If the operation feeding into the MUL is a sign extend (sext),
10225   // we use mulhs. Othewise, zero extends (zext) use mulhu.
10226   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10227 
10228   // Combine to mulh if mulh is legal/custom for the narrow type on the target
10229   // or if it is a vector type then we could transform to an acceptable type and
10230   // rely on legalization to split/combine the result.
10231   if (NarrowVT.isVector()) {
10232     EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10233     if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10234         !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10235       return SDValue();
10236   } else {
10237     if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10238       return SDValue();
10239   }
10240 
10241   SDValue Result =
10242       DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10243   bool IsSigned = N->getOpcode() == ISD::SRA;
10244   return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10245 }
10246 
10247 // fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10248 // This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10249 static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG) {
10250   unsigned Opcode = N->getOpcode();
10251   if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10252     return SDValue();
10253 
10254   SDValue N0 = N->getOperand(0);
10255   EVT VT = N->getValueType(0);
10256   SDLoc DL(N);
10257   if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10258     SDValue OldLHS = N0.getOperand(0);
10259     SDValue OldRHS = N0.getOperand(1);
10260 
10261     // If both operands are bswap/bitreverse, ignore the multiuse
10262     // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10263     if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10264       return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10265                          OldRHS.getOperand(0));
10266     }
10267 
10268     if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10269       SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10270       return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10271                          NewBitReorder);
10272     }
10273 
10274     if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10275       SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10276       return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10277                          OldRHS.getOperand(0));
10278     }
10279   }
10280   return SDValue();
10281 }
10282 
10283 SDValue DAGCombiner::visitSRA(SDNode *N) {
10284   SDValue N0 = N->getOperand(0);
10285   SDValue N1 = N->getOperand(1);
10286   if (SDValue V = DAG.simplifyShift(N0, N1))
10287     return V;
10288 
10289   EVT VT = N0.getValueType();
10290   unsigned OpSizeInBits = VT.getScalarSizeInBits();
10291 
10292   // fold (sra c1, c2) -> (sra c1, c2)
10293   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
10294     return C;
10295 
10296   // Arithmetic shifting an all-sign-bit value is a no-op.
10297   // fold (sra 0, x) -> 0
10298   // fold (sra -1, x) -> -1
10299   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10300     return N0;
10301 
10302   // fold vector ops
10303   if (VT.isVector())
10304     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
10305       return FoldedVOp;
10306 
10307   if (SDValue NewSel = foldBinOpIntoSelect(N))
10308     return NewSel;
10309 
10310   ConstantSDNode *N1C = isConstOrConstSplat(N1);
10311 
10312   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10313   // clamp (add c1, c2) to max shift.
10314   if (N0.getOpcode() == ISD::SRA) {
10315     SDLoc DL(N);
10316     EVT ShiftVT = N1.getValueType();
10317     EVT ShiftSVT = ShiftVT.getScalarType();
10318     SmallVector<SDValue, 16> ShiftValues;
10319 
10320     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10321       APInt c1 = LHS->getAPIntValue();
10322       APInt c2 = RHS->getAPIntValue();
10323       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10324       APInt Sum = c1 + c2;
10325       unsigned ShiftSum =
10326           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10327       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10328       return true;
10329     };
10330     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10331       SDValue ShiftValue;
10332       if (N1.getOpcode() == ISD::BUILD_VECTOR)
10333         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10334       else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10335         assert(ShiftValues.size() == 1 &&
10336                "Expected matchBinaryPredicate to return one element for "
10337                "SPLAT_VECTORs");
10338         ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10339       } else
10340         ShiftValue = ShiftValues[0];
10341       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10342     }
10343   }
10344 
10345   // fold (sra (shl X, m), (sub result_size, n))
10346   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10347   // result_size - n != m.
10348   // If truncate is free for the target sext(shl) is likely to result in better
10349   // code.
10350   if (N0.getOpcode() == ISD::SHL && N1C) {
10351     // Get the two constants of the shifts, CN0 = m, CN = n.
10352     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10353     if (N01C) {
10354       LLVMContext &Ctx = *DAG.getContext();
10355       // Determine what the truncate's result bitsize and type would be.
10356       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10357 
10358       if (VT.isVector())
10359         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10360 
10361       // Determine the residual right-shift amount.
10362       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10363 
10364       // If the shift is not a no-op (in which case this should be just a sign
10365       // extend already), the truncated to type is legal, sign_extend is legal
10366       // on that type, and the truncate to that type is both legal and free,
10367       // perform the transform.
10368       if ((ShiftAmt > 0) &&
10369           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
10370           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
10371           TLI.isTruncateFree(VT, TruncVT)) {
10372         SDLoc DL(N);
10373         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
10374             getShiftAmountTy(N0.getOperand(0).getValueType()));
10375         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10376                                     N0.getOperand(0), Amt);
10377         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10378                                     Shift);
10379         return DAG.getNode(ISD::SIGN_EXTEND, DL,
10380                            N->getValueType(0), Trunc);
10381       }
10382     }
10383   }
10384 
10385   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10386   //   sra (add (shl X, N1C), AddC), N1C -->
10387   //   sext (add (trunc X to (width - N1C)), AddC')
10388   //   sra (sub AddC, (shl X, N1C)), N1C -->
10389   //   sext (sub AddC1',(trunc X to (width - N1C)))
10390   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10391       N0.hasOneUse()) {
10392     bool IsAdd = N0.getOpcode() == ISD::ADD;
10393     SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10394     if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10395         Shl.hasOneUse()) {
10396       // TODO: AddC does not need to be a splat.
10397       if (ConstantSDNode *AddC =
10398               isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10399         // Determine what the truncate's type would be and ask the target if
10400         // that is a free operation.
10401         LLVMContext &Ctx = *DAG.getContext();
10402         unsigned ShiftAmt = N1C->getZExtValue();
10403         EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10404         if (VT.isVector())
10405           TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10406 
10407         // TODO: The simple type check probably belongs in the default hook
10408         //       implementation and/or target-specific overrides (because
10409         //       non-simple types likely require masking when legalized), but
10410         //       that restriction may conflict with other transforms.
10411         if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10412             TLI.isTruncateFree(VT, TruncVT)) {
10413           SDLoc DL(N);
10414           SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10415           SDValue ShiftC =
10416               DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10417                                   TruncVT.getScalarSizeInBits()),
10418                               DL, TruncVT);
10419           SDValue Add;
10420           if (IsAdd)
10421             Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10422           else
10423             Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10424           return DAG.getSExtOrTrunc(Add, DL, VT);
10425         }
10426       }
10427     }
10428   }
10429 
10430   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10431   if (N1.getOpcode() == ISD::TRUNCATE &&
10432       N1.getOperand(0).getOpcode() == ISD::AND) {
10433     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10434       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
10435   }
10436 
10437   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10438   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10439   //      if c1 is equal to the number of bits the trunc removes
10440   // TODO - support non-uniform vector shift amounts.
10441   if (N0.getOpcode() == ISD::TRUNCATE &&
10442       (N0.getOperand(0).getOpcode() == ISD::SRL ||
10443        N0.getOperand(0).getOpcode() == ISD::SRA) &&
10444       N0.getOperand(0).hasOneUse() &&
10445       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10446     SDValue N0Op0 = N0.getOperand(0);
10447     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10448       EVT LargeVT = N0Op0.getValueType();
10449       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10450       if (LargeShift->getAPIntValue() == TruncBits) {
10451         SDLoc DL(N);
10452         EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10453         SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10454         Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10455                           DAG.getConstant(TruncBits, DL, LargeShiftVT));
10456         SDValue SRA =
10457             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10458         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10459       }
10460     }
10461   }
10462 
10463   // Simplify, based on bits shifted out of the LHS.
10464   if (SimplifyDemandedBits(SDValue(N, 0)))
10465     return SDValue(N, 0);
10466 
10467   // If the sign bit is known to be zero, switch this to a SRL.
10468   if (DAG.SignBitIsZero(N0))
10469     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
10470 
10471   if (N1C && !N1C->isOpaque())
10472     if (SDValue NewSRA = visitShiftByConstant(N))
10473       return NewSRA;
10474 
10475   // Try to transform this shift into a multiply-high if
10476   // it matches the appropriate pattern detected in combineShiftToMULH.
10477   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
10478     return MULH;
10479 
10480   // Attempt to convert a sra of a load into a narrower sign-extending load.
10481   if (SDValue NarrowLoad = reduceLoadWidth(N))
10482     return NarrowLoad;
10483 
10484   return SDValue();
10485 }
10486 
10487 SDValue DAGCombiner::visitSRL(SDNode *N) {
10488   SDValue N0 = N->getOperand(0);
10489   SDValue N1 = N->getOperand(1);
10490   if (SDValue V = DAG.simplifyShift(N0, N1))
10491     return V;
10492 
10493   EVT VT = N0.getValueType();
10494   EVT ShiftVT = N1.getValueType();
10495   unsigned OpSizeInBits = VT.getScalarSizeInBits();
10496 
10497   // fold (srl c1, c2) -> c1 >>u c2
10498   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
10499     return C;
10500 
10501   // fold vector ops
10502   if (VT.isVector())
10503     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
10504       return FoldedVOp;
10505 
10506   if (SDValue NewSel = foldBinOpIntoSelect(N))
10507     return NewSel;
10508 
10509   // if (srl x, c) is known to be zero, return 0
10510   ConstantSDNode *N1C = isConstOrConstSplat(N1);
10511   if (N1C &&
10512       DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10513     return DAG.getConstant(0, SDLoc(N), VT);
10514 
10515   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10516   if (N0.getOpcode() == ISD::SRL) {
10517     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10518                                           ConstantSDNode *RHS) {
10519       APInt c1 = LHS->getAPIntValue();
10520       APInt c2 = RHS->getAPIntValue();
10521       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10522       return (c1 + c2).uge(OpSizeInBits);
10523     };
10524     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10525       return DAG.getConstant(0, SDLoc(N), VT);
10526 
10527     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10528                                        ConstantSDNode *RHS) {
10529       APInt c1 = LHS->getAPIntValue();
10530       APInt c2 = RHS->getAPIntValue();
10531       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10532       return (c1 + c2).ult(OpSizeInBits);
10533     };
10534     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10535       SDLoc DL(N);
10536       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10537       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10538     }
10539   }
10540 
10541   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10542       N0.getOperand(0).getOpcode() == ISD::SRL) {
10543     SDValue InnerShift = N0.getOperand(0);
10544     // TODO - support non-uniform vector shift amounts.
10545     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10546       uint64_t c1 = N001C->getZExtValue();
10547       uint64_t c2 = N1C->getZExtValue();
10548       EVT InnerShiftVT = InnerShift.getValueType();
10549       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10550       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10551       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10552       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10553       if (c1 + OpSizeInBits == InnerShiftSize) {
10554         SDLoc DL(N);
10555         if (c1 + c2 >= InnerShiftSize)
10556           return DAG.getConstant(0, DL, VT);
10557         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10558         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10559                                        InnerShift.getOperand(0), NewShiftAmt);
10560         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10561       }
10562       // In the more general case, we can clear the high bits after the shift:
10563       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10564       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10565           c1 + c2 < InnerShiftSize) {
10566         SDLoc DL(N);
10567         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10568         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10569                                        InnerShift.getOperand(0), NewShiftAmt);
10570         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10571                                                             OpSizeInBits - c2),
10572                                        DL, InnerShiftVT);
10573         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10574         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10575       }
10576     }
10577   }
10578 
10579   // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10580   //                               (and (srl x, (sub c2, c1), MASK)
10581   if (N0.getOpcode() == ISD::SHL &&
10582       (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10583       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
10584     auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10585                                            ConstantSDNode *RHS) {
10586       const APInt &LHSC = LHS->getAPIntValue();
10587       const APInt &RHSC = RHS->getAPIntValue();
10588       return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10589              LHSC.getZExtValue() <= RHSC.getZExtValue();
10590     };
10591     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10592                                   /*AllowUndefs*/ false,
10593                                   /*AllowTypeMismatch*/ true)) {
10594       SDLoc DL(N);
10595       SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10596       SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10597       SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10598       Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10599       Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10600       SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10601       return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10602     }
10603     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10604                                   /*AllowUndefs*/ false,
10605                                   /*AllowTypeMismatch*/ true)) {
10606       SDLoc DL(N);
10607       SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10608       SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10609       SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10610       Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10611       SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10612       return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10613     }
10614   }
10615 
10616   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10617   // TODO - support non-uniform vector shift amounts.
10618   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10619     // Shifting in all undef bits?
10620     EVT SmallVT = N0.getOperand(0).getValueType();
10621     unsigned BitSize = SmallVT.getScalarSizeInBits();
10622     if (N1C->getAPIntValue().uge(BitSize))
10623       return DAG.getUNDEF(VT);
10624 
10625     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10626       uint64_t ShiftAmt = N1C->getZExtValue();
10627       SDLoc DL0(N0);
10628       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
10629                                        N0.getOperand(0),
10630                           DAG.getConstant(ShiftAmt, DL0,
10631                                           getShiftAmountTy(SmallVT)));
10632       AddToWorklist(SmallShift.getNode());
10633       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10634       SDLoc DL(N);
10635       return DAG.getNode(ISD::AND, DL, VT,
10636                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10637                          DAG.getConstant(Mask, DL, VT));
10638     }
10639   }
10640 
10641   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
10642   // bit, which is unmodified by sra.
10643   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10644     if (N0.getOpcode() == ISD::SRA)
10645       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
10646   }
10647 
10648   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit), and x has a power
10649   // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10650   if (N1C && N0.getOpcode() == ISD::CTLZ &&
10651       isPowerOf2_32(OpSizeInBits) &&
10652       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10653     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10654 
10655     // If any of the input bits are KnownOne, then the input couldn't be all
10656     // zeros, thus the result of the srl will always be zero.
10657     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10658 
10659     // If all of the bits input the to ctlz node are known to be zero, then
10660     // the result of the ctlz is "32" and the result of the shift is one.
10661     APInt UnknownBits = ~Known.Zero;
10662     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10663 
10664     // Otherwise, check to see if there is exactly one bit input to the ctlz.
10665     if (UnknownBits.isPowerOf2()) {
10666       // Okay, we know that only that the single bit specified by UnknownBits
10667       // could be set on input to the CTLZ node. If this bit is set, the SRL
10668       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10669       // to an SRL/XOR pair, which is likely to simplify more.
10670       unsigned ShAmt = UnknownBits.countr_zero();
10671       SDValue Op = N0.getOperand(0);
10672 
10673       if (ShAmt) {
10674         SDLoc DL(N0);
10675         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10676                   DAG.getConstant(ShAmt, DL,
10677                                   getShiftAmountTy(Op.getValueType())));
10678         AddToWorklist(Op.getNode());
10679       }
10680 
10681       SDLoc DL(N);
10682       return DAG.getNode(ISD::XOR, DL, VT,
10683                          Op, DAG.getConstant(1, DL, VT));
10684     }
10685   }
10686 
10687   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10688   if (N1.getOpcode() == ISD::TRUNCATE &&
10689       N1.getOperand(0).getOpcode() == ISD::AND) {
10690     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10691       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
10692   }
10693 
10694   // fold operands of srl based on knowledge that the low bits are not
10695   // demanded.
10696   if (SimplifyDemandedBits(SDValue(N, 0)))
10697     return SDValue(N, 0);
10698 
10699   if (N1C && !N1C->isOpaque())
10700     if (SDValue NewSRL = visitShiftByConstant(N))
10701       return NewSRL;
10702 
10703   // Attempt to convert a srl of a load into a narrower zero-extending load.
10704   if (SDValue NarrowLoad = reduceLoadWidth(N))
10705     return NarrowLoad;
10706 
10707   // Here is a common situation. We want to optimize:
10708   //
10709   //   %a = ...
10710   //   %b = and i32 %a, 2
10711   //   %c = srl i32 %b, 1
10712   //   brcond i32 %c ...
10713   //
10714   // into
10715   //
10716   //   %a = ...
10717   //   %b = and %a, 2
10718   //   %c = setcc eq %b, 0
10719   //   brcond %c ...
10720   //
10721   // However when after the source operand of SRL is optimized into AND, the SRL
10722   // itself may not be optimized further. Look for it and add the BRCOND into
10723   // the worklist.
10724   //
10725   // The also tends to happen for binary operations when SimplifyDemandedBits
10726   // is involved.
10727   //
10728   // FIXME: This is unecessary if we process the DAG in topological order,
10729   // which we plan to do. This workaround can be removed once the DAG is
10730   // processed in topological order.
10731   if (N->hasOneUse()) {
10732     SDNode *Use = *N->use_begin();
10733 
10734     // Look pass the truncate.
10735     if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
10736       Use = *Use->use_begin();
10737 
10738     if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
10739         Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
10740       AddToWorklist(Use);
10741   }
10742 
10743   // Try to transform this shift into a multiply-high if
10744   // it matches the appropriate pattern detected in combineShiftToMULH.
10745   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
10746     return MULH;
10747 
10748   return SDValue();
10749 }
10750 
10751 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10752   EVT VT = N->getValueType(0);
10753   SDValue N0 = N->getOperand(0);
10754   SDValue N1 = N->getOperand(1);
10755   SDValue N2 = N->getOperand(2);
10756   bool IsFSHL = N->getOpcode() == ISD::FSHL;
10757   unsigned BitWidth = VT.getScalarSizeInBits();
10758 
10759   // fold (fshl N0, N1, 0) -> N0
10760   // fold (fshr N0, N1, 0) -> N1
10761   if (isPowerOf2_32(BitWidth))
10762     if (DAG.MaskedValueIsZero(
10763             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10764       return IsFSHL ? N0 : N1;
10765 
10766   auto IsUndefOrZero = [](SDValue V) {
10767     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10768   };
10769 
10770   // TODO - support non-uniform vector shift amounts.
10771   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10772     EVT ShAmtTy = N2.getValueType();
10773 
10774     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10775     if (Cst->getAPIntValue().uge(BitWidth)) {
10776       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10777       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
10778                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
10779     }
10780 
10781     unsigned ShAmt = Cst->getZExtValue();
10782     if (ShAmt == 0)
10783       return IsFSHL ? N0 : N1;
10784 
10785     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10786     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10787     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10788     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10789     if (IsUndefOrZero(N0))
10790       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
10791                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
10792                                          SDLoc(N), ShAmtTy));
10793     if (IsUndefOrZero(N1))
10794       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
10795                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
10796                                          SDLoc(N), ShAmtTy));
10797 
10798     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10799     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10800     // TODO - bigendian support once we have test coverage.
10801     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
10802     // TODO - permit LHS EXTLOAD if extensions are shifted out.
10803     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
10804         !DAG.getDataLayout().isBigEndian()) {
10805       auto *LHS = dyn_cast<LoadSDNode>(N0);
10806       auto *RHS = dyn_cast<LoadSDNode>(N1);
10807       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
10808           LHS->getAddressSpace() == RHS->getAddressSpace() &&
10809           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
10810           ISD::isNON_EXTLoad(LHS)) {
10811         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
10812           SDLoc DL(RHS);
10813           uint64_t PtrOff =
10814               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
10815           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
10816           unsigned Fast = 0;
10817           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10818                                      RHS->getAddressSpace(), NewAlign,
10819                                      RHS->getMemOperand()->getFlags(), &Fast) &&
10820               Fast) {
10821             SDValue NewPtr = DAG.getMemBasePlusOffset(
10822                 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
10823             AddToWorklist(NewPtr.getNode());
10824             SDValue Load = DAG.getLoad(
10825                 VT, DL, RHS->getChain(), NewPtr,
10826                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10827                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
10828             // Replace the old load's chain with the new load's chain.
10829             WorklistRemover DeadNodes(*this);
10830             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
10831             return Load;
10832           }
10833         }
10834       }
10835     }
10836   }
10837 
10838   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
10839   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
10840   // iff We know the shift amount is in range.
10841   // TODO: when is it worth doing SUB(BW, N2) as well?
10842   if (isPowerOf2_32(BitWidth)) {
10843     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
10844     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10845       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
10846     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10847       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
10848   }
10849 
10850   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
10851   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
10852   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
10853   // is legal as well we might be better off avoiding non-constant (BW - N2).
10854   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
10855   if (N0 == N1 && hasOperation(RotOpc, VT))
10856     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
10857 
10858   // Simplify, based on bits shifted out of N0/N1.
10859   if (SimplifyDemandedBits(SDValue(N, 0)))
10860     return SDValue(N, 0);
10861 
10862   return SDValue();
10863 }
10864 
10865 SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
10866   SDValue N0 = N->getOperand(0);
10867   SDValue N1 = N->getOperand(1);
10868   if (SDValue V = DAG.simplifyShift(N0, N1))
10869     return V;
10870 
10871   EVT VT = N0.getValueType();
10872 
10873   // fold (*shlsat c1, c2) -> c1<<c2
10874   if (SDValue C =
10875           DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
10876     return C;
10877 
10878   ConstantSDNode *N1C = isConstOrConstSplat(N1);
10879 
10880   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
10881     // fold (sshlsat x, c) -> (shl x, c)
10882     if (N->getOpcode() == ISD::SSHLSAT && N1C &&
10883         N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
10884       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
10885 
10886     // fold (ushlsat x, c) -> (shl x, c)
10887     if (N->getOpcode() == ISD::USHLSAT && N1C &&
10888         N1C->getAPIntValue().ule(
10889             DAG.computeKnownBits(N0).countMinLeadingZeros()))
10890       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
10891   }
10892 
10893   return SDValue();
10894 }
10895 
10896 // Given a ABS node, detect the following patterns:
10897 // (ABS (SUB (EXTEND a), (EXTEND b))).
10898 // (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
10899 // Generates UABD/SABD instruction.
10900 SDValue DAGCombiner::foldABSToABD(SDNode *N) {
10901   EVT SrcVT = N->getValueType(0);
10902 
10903   if (N->getOpcode() == ISD::TRUNCATE)
10904     N = N->getOperand(0).getNode();
10905 
10906   if (N->getOpcode() != ISD::ABS)
10907     return SDValue();
10908 
10909   EVT VT = N->getValueType(0);
10910   SDValue AbsOp1 = N->getOperand(0);
10911   SDValue Op0, Op1;
10912   SDLoc DL(N);
10913 
10914   if (AbsOp1.getOpcode() != ISD::SUB)
10915     return SDValue();
10916 
10917   Op0 = AbsOp1.getOperand(0);
10918   Op1 = AbsOp1.getOperand(1);
10919 
10920   unsigned Opc0 = Op0.getOpcode();
10921 
10922   // Check if the operands of the sub are (zero|sign)-extended.
10923   // TODO: Should we use ValueTracking instead?
10924   if (Opc0 != Op1.getOpcode() ||
10925       (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
10926        Opc0 != ISD::SIGN_EXTEND_INREG)) {
10927     // fold (abs (sub nsw x, y)) -> abds(x, y)
10928     if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
10929         TLI.preferABDSToABSWithNSW(VT)) {
10930       SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
10931       return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10932     }
10933     return SDValue();
10934   }
10935 
10936   EVT VT0, VT1;
10937   if (Opc0 == ISD::SIGN_EXTEND_INREG) {
10938     VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
10939     VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
10940   } else {
10941     VT0 = Op0.getOperand(0).getValueType();
10942     VT1 = Op1.getOperand(0).getValueType();
10943   }
10944   unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
10945 
10946   // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
10947   // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
10948   EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
10949   if ((VT0 == MaxVT || Op0->hasOneUse()) &&
10950       (VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
10951     SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
10952                               DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
10953                               DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
10954     ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
10955     return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10956   }
10957 
10958   // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
10959   // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
10960   if (hasOperation(ABDOpcode, VT)) {
10961     SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
10962     return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10963   }
10964 
10965   return SDValue();
10966 }
10967 
10968 SDValue DAGCombiner::visitABS(SDNode *N) {
10969   SDValue N0 = N->getOperand(0);
10970   EVT VT = N->getValueType(0);
10971 
10972   // fold (abs c1) -> c2
10973   if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, SDLoc(N), VT, {N0}))
10974     return C;
10975   // fold (abs (abs x)) -> (abs x)
10976   if (N0.getOpcode() == ISD::ABS)
10977     return N0;
10978   // fold (abs x) -> x iff not-negative
10979   if (DAG.SignBitIsZero(N0))
10980     return N0;
10981 
10982   if (SDValue ABD = foldABSToABD(N))
10983     return ABD;
10984 
10985   // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
10986   // iff zero_extend/truncate are free.
10987   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
10988     EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
10989     if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
10990         TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
10991         hasOperation(ISD::ABS, ExtVT)) {
10992       SDLoc DL(N);
10993       return DAG.getNode(
10994           ISD::ZERO_EXTEND, DL, VT,
10995           DAG.getNode(ISD::ABS, DL, ExtVT,
10996                       DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
10997     }
10998   }
10999 
11000   return SDValue();
11001 }
11002 
11003 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11004   SDValue N0 = N->getOperand(0);
11005   EVT VT = N->getValueType(0);
11006   SDLoc DL(N);
11007 
11008   // fold (bswap c1) -> c2
11009   if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11010     return C;
11011   // fold (bswap (bswap x)) -> x
11012   if (N0.getOpcode() == ISD::BSWAP)
11013     return N0.getOperand(0);
11014 
11015   // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11016   // isn't supported, it will be expanded to bswap followed by a manual reversal
11017   // of bits in each byte. By placing bswaps before bitreverse, we can remove
11018   // the two bswaps if the bitreverse gets expanded.
11019   if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11020     SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11021     return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11022   }
11023 
11024   // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11025   // iff x >= bw/2 (i.e. lower half is known zero)
11026   unsigned BW = VT.getScalarSizeInBits();
11027   if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11028     auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11029     EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11030     if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11031         ShAmt->getZExtValue() >= (BW / 2) &&
11032         (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11033         TLI.isTruncateFree(VT, HalfVT) &&
11034         (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11035       SDValue Res = N0.getOperand(0);
11036       if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11037         Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11038                           DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
11039       Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11040       Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11041       return DAG.getZExtOrTrunc(Res, DL, VT);
11042     }
11043   }
11044 
11045   // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11046   // inverse-shift-of-bswap:
11047   // bswap (X u<< C) --> (bswap X) u>> C
11048   // bswap (X u>> C) --> (bswap X) u<< C
11049   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11050       N0.hasOneUse()) {
11051     auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11052     if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11053         ShAmt->getZExtValue() % 8 == 0) {
11054       SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11055       unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11056       return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11057     }
11058   }
11059 
11060   if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11061     return V;
11062 
11063   return SDValue();
11064 }
11065 
11066 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11067   SDValue N0 = N->getOperand(0);
11068   EVT VT = N->getValueType(0);
11069   SDLoc DL(N);
11070 
11071   // fold (bitreverse c1) -> c2
11072   if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11073     return C;
11074   // fold (bitreverse (bitreverse x)) -> x
11075   if (N0.getOpcode() == ISD::BITREVERSE)
11076     return N0.getOperand(0);
11077   return SDValue();
11078 }
11079 
11080 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11081   SDValue N0 = N->getOperand(0);
11082   EVT VT = N->getValueType(0);
11083   SDLoc DL(N);
11084 
11085   // fold (ctlz c1) -> c2
11086   if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11087     return C;
11088 
11089   // If the value is known never to be zero, switch to the undef version.
11090   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11091     if (DAG.isKnownNeverZero(N0))
11092       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11093 
11094   return SDValue();
11095 }
11096 
11097 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11098   SDValue N0 = N->getOperand(0);
11099   EVT VT = N->getValueType(0);
11100   SDLoc DL(N);
11101 
11102   // fold (ctlz_zero_undef c1) -> c2
11103   if (SDValue C =
11104           DAG.FoldConstantArithmetic(ISD::CTLZ_ZERO_UNDEF, DL, VT, {N0}))
11105     return C;
11106   return SDValue();
11107 }
11108 
11109 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11110   SDValue N0 = N->getOperand(0);
11111   EVT VT = N->getValueType(0);
11112   SDLoc DL(N);
11113 
11114   // fold (cttz c1) -> c2
11115   if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11116     return C;
11117 
11118   // If the value is known never to be zero, switch to the undef version.
11119   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11120     if (DAG.isKnownNeverZero(N0))
11121       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11122 
11123   return SDValue();
11124 }
11125 
11126 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11127   SDValue N0 = N->getOperand(0);
11128   EVT VT = N->getValueType(0);
11129   SDLoc DL(N);
11130 
11131   // fold (cttz_zero_undef c1) -> c2
11132   if (SDValue C =
11133           DAG.FoldConstantArithmetic(ISD::CTTZ_ZERO_UNDEF, DL, VT, {N0}))
11134     return C;
11135   return SDValue();
11136 }
11137 
11138 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11139   SDValue N0 = N->getOperand(0);
11140   EVT VT = N->getValueType(0);
11141   SDLoc DL(N);
11142 
11143   // fold (ctpop c1) -> c2
11144   if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11145     return C;
11146   return SDValue();
11147 }
11148 
11149 // FIXME: This should be checking for no signed zeros on individual operands, as
11150 // well as no nans.
11151 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
11152                                          SDValue RHS,
11153                                          const TargetLowering &TLI) {
11154   const TargetOptions &Options = DAG.getTarget().Options;
11155   EVT VT = LHS.getValueType();
11156 
11157   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
11158          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
11159          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
11160 }
11161 
11162 static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
11163                                        SDValue RHS, SDValue True, SDValue False,
11164                                        ISD::CondCode CC,
11165                                        const TargetLowering &TLI,
11166                                        SelectionDAG &DAG) {
11167   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11168   switch (CC) {
11169   case ISD::SETOLT:
11170   case ISD::SETOLE:
11171   case ISD::SETLT:
11172   case ISD::SETLE:
11173   case ISD::SETULT:
11174   case ISD::SETULE: {
11175     // Since it's known never nan to get here already, either fminnum or
11176     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11177     // expanded in terms of it.
11178     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11179     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11180       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11181 
11182     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11183     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11184       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11185     return SDValue();
11186   }
11187   case ISD::SETOGT:
11188   case ISD::SETOGE:
11189   case ISD::SETGT:
11190   case ISD::SETGE:
11191   case ISD::SETUGT:
11192   case ISD::SETUGE: {
11193     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11194     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11195       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11196 
11197     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11198     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11199       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11200     return SDValue();
11201   }
11202   default:
11203     return SDValue();
11204   }
11205 }
11206 
11207 /// Generate Min/Max node
11208 SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11209                                          SDValue RHS, SDValue True,
11210                                          SDValue False, ISD::CondCode CC) {
11211   if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11212     return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11213 
11214   // If we can't directly match this, try to see if we can pull an fneg out of
11215   // the select.
11216   SDValue NegTrue = TLI.getCheaperOrNeutralNegatedExpression(
11217       True, DAG, LegalOperations, ForCodeSize);
11218   if (!NegTrue)
11219     return SDValue();
11220 
11221   HandleSDNode NegTrueHandle(NegTrue);
11222 
11223   // Try to unfold an fneg from the select if we are comparing the negated
11224   // constant.
11225   //
11226   // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11227   //
11228   // TODO: Handle fabs
11229   if (LHS == NegTrue) {
11230     // If we can't directly match this, try to see if we can pull an fneg out of
11231     // the select.
11232     SDValue NegRHS = TLI.getCheaperOrNeutralNegatedExpression(
11233         RHS, DAG, LegalOperations, ForCodeSize);
11234     if (NegRHS) {
11235       HandleSDNode NegRHSHandle(NegRHS);
11236       if (NegRHS == False) {
11237         SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11238                                                    False, CC, TLI, DAG);
11239         if (Combined)
11240           return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11241       }
11242     }
11243   }
11244 
11245   return SDValue();
11246 }
11247 
11248 /// If a (v)select has a condition value that is a sign-bit test, try to smear
11249 /// the condition operand sign-bit across the value width and use it as a mask.
11250 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
11251   SDValue Cond = N->getOperand(0);
11252   SDValue C1 = N->getOperand(1);
11253   SDValue C2 = N->getOperand(2);
11254   if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
11255     return SDValue();
11256 
11257   EVT VT = N->getValueType(0);
11258   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11259       VT != Cond.getOperand(0).getValueType())
11260     return SDValue();
11261 
11262   // The inverted-condition + commuted-select variants of these patterns are
11263   // canonicalized to these forms in IR.
11264   SDValue X = Cond.getOperand(0);
11265   SDValue CondC = Cond.getOperand(1);
11266   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11267   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11268       isAllOnesOrAllOnesSplat(C2)) {
11269     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11270     SDLoc DL(N);
11271     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11272     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11273     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11274   }
11275   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11276     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11277     SDLoc DL(N);
11278     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11279     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11280     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11281   }
11282   return SDValue();
11283 }
11284 
11285 static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT,
11286                                                  const TargetLowering &TLI) {
11287   if (!TLI.convertSelectOfConstantsToMath(VT))
11288     return false;
11289 
11290   if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11291     return true;
11292   if (!TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
11293     return true;
11294 
11295   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11296   if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11297     return true;
11298   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11299     return true;
11300 
11301   return false;
11302 }
11303 
11304 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11305   SDValue Cond = N->getOperand(0);
11306   SDValue N1 = N->getOperand(1);
11307   SDValue N2 = N->getOperand(2);
11308   EVT VT = N->getValueType(0);
11309   EVT CondVT = Cond.getValueType();
11310   SDLoc DL(N);
11311 
11312   if (!VT.isInteger())
11313     return SDValue();
11314 
11315   auto *C1 = dyn_cast<ConstantSDNode>(N1);
11316   auto *C2 = dyn_cast<ConstantSDNode>(N2);
11317   if (!C1 || !C2)
11318     return SDValue();
11319 
11320   if (CondVT != MVT::i1 || LegalOperations) {
11321     // fold (select Cond, 0, 1) -> (xor Cond, 1)
11322     // We can't do this reliably if integer based booleans have different contents
11323     // to floating point based booleans. This is because we can't tell whether we
11324     // have an integer-based boolean or a floating-point-based boolean unless we
11325     // can find the SETCC that produced it and inspect its operands. This is
11326     // fairly easy if C is the SETCC node, but it can potentially be
11327     // undiscoverable (or not reasonably discoverable). For example, it could be
11328     // in another basic block or it could require searching a complicated
11329     // expression.
11330     if (CondVT.isInteger() &&
11331         TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11332             TargetLowering::ZeroOrOneBooleanContent &&
11333         TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11334             TargetLowering::ZeroOrOneBooleanContent &&
11335         C1->isZero() && C2->isOne()) {
11336       SDValue NotCond =
11337           DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11338       if (VT.bitsEq(CondVT))
11339         return NotCond;
11340       return DAG.getZExtOrTrunc(NotCond, DL, VT);
11341     }
11342 
11343     return SDValue();
11344   }
11345 
11346   // Only do this before legalization to avoid conflicting with target-specific
11347   // transforms in the other direction (create a select from a zext/sext). There
11348   // is also a target-independent combine here in DAGCombiner in the other
11349   // direction for (select Cond, -1, 0) when the condition is not i1.
11350   assert(CondVT == MVT::i1 && !LegalOperations);
11351 
11352   // select Cond, 1, 0 --> zext (Cond)
11353   if (C1->isOne() && C2->isZero())
11354     return DAG.getZExtOrTrunc(Cond, DL, VT);
11355 
11356   // select Cond, -1, 0 --> sext (Cond)
11357   if (C1->isAllOnes() && C2->isZero())
11358     return DAG.getSExtOrTrunc(Cond, DL, VT);
11359 
11360   // select Cond, 0, 1 --> zext (!Cond)
11361   if (C1->isZero() && C2->isOne()) {
11362     SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11363     NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11364     return NotCond;
11365   }
11366 
11367   // select Cond, 0, -1 --> sext (!Cond)
11368   if (C1->isZero() && C2->isAllOnes()) {
11369     SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11370     NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11371     return NotCond;
11372   }
11373 
11374   // Use a target hook because some targets may prefer to transform in the
11375   // other direction.
11376   if (!shouldConvertSelectOfConstantsToMath(Cond, VT, TLI))
11377     return SDValue();
11378 
11379   // For any constants that differ by 1, we can transform the select into
11380   // an extend and add.
11381   const APInt &C1Val = C1->getAPIntValue();
11382   const APInt &C2Val = C2->getAPIntValue();
11383 
11384   // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11385   if (C1Val - 1 == C2Val) {
11386     Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11387     return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11388   }
11389 
11390   // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11391   if (C1Val + 1 == C2Val) {
11392     Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11393     return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11394   }
11395 
11396   // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11397   if (C1Val.isPowerOf2() && C2Val.isZero()) {
11398     Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11399     SDValue ShAmtC =
11400         DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11401     return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11402   }
11403 
11404   // select Cond, -1, C --> or (sext Cond), C
11405   if (C1->isAllOnes()) {
11406     Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11407     return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11408   }
11409 
11410   // select Cond, C, -1 --> or (sext (not Cond)), C
11411   if (C2->isAllOnes()) {
11412     SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11413     NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11414     return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11415   }
11416 
11417   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
11418     return V;
11419 
11420   return SDValue();
11421 }
11422 
11423 static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
11424   assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
11425          "Expected a (v)select");
11426   SDValue Cond = N->getOperand(0);
11427   SDValue T = N->getOperand(1), F = N->getOperand(2);
11428   EVT VT = N->getValueType(0);
11429   if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11430     return SDValue();
11431 
11432   // select Cond, Cond, F --> or Cond, F
11433   // select Cond, 1, F    --> or Cond, F
11434   if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11435     return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
11436 
11437   // select Cond, T, Cond --> and Cond, T
11438   // select Cond, T, 0    --> and Cond, T
11439   if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11440     return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
11441 
11442   // select Cond, T, 1 --> or (not Cond), T
11443   if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11444     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
11445     return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
11446   }
11447 
11448   // select Cond, 0, F --> and (not Cond), F
11449   if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11450     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
11451     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
11452   }
11453 
11454   return SDValue();
11455 }
11456 
11457 static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
11458   SDValue N0 = N->getOperand(0);
11459   SDValue N1 = N->getOperand(1);
11460   SDValue N2 = N->getOperand(2);
11461   EVT VT = N->getValueType(0);
11462   if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
11463     return SDValue();
11464 
11465   SDValue Cond0 = N0.getOperand(0);
11466   SDValue Cond1 = N0.getOperand(1);
11467   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11468   if (VT != Cond0.getValueType())
11469     return SDValue();
11470 
11471   // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11472   // compare is inverted from that pattern ("Cond0 s> -1").
11473   if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11474     ; // This is the pattern we are looking for.
11475   else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11476     std::swap(N1, N2);
11477   else
11478     return SDValue();
11479 
11480   // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
11481   if (isNullOrNullSplat(N2)) {
11482     SDLoc DL(N);
11483     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11484     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11485     return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
11486   }
11487 
11488   // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
11489   if (isAllOnesOrAllOnesSplat(N1)) {
11490     SDLoc DL(N);
11491     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11492     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11493     return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
11494   }
11495 
11496   // If we have to invert the sign bit mask, only do that transform if the
11497   // target has a bitwise 'and not' instruction (the invert is free).
11498   // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
11499   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11500   if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11501     SDLoc DL(N);
11502     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11503     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11504     SDValue Not = DAG.getNOT(DL, Sra, VT);
11505     return DAG.getNode(ISD::AND, DL, VT, Not, N2);
11506   }
11507 
11508   // TODO: There's another pattern in this family, but it may require
11509   //       implementing hasOrNot() to check for profitability:
11510   //       (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
11511 
11512   return SDValue();
11513 }
11514 
11515 SDValue DAGCombiner::visitSELECT(SDNode *N) {
11516   SDValue N0 = N->getOperand(0);
11517   SDValue N1 = N->getOperand(1);
11518   SDValue N2 = N->getOperand(2);
11519   EVT VT = N->getValueType(0);
11520   EVT VT0 = N0.getValueType();
11521   SDLoc DL(N);
11522   SDNodeFlags Flags = N->getFlags();
11523 
11524   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11525     return V;
11526 
11527   if (SDValue V = foldBoolSelectToLogic(N, DAG))
11528     return V;
11529 
11530   // select (not Cond), N1, N2 -> select Cond, N2, N1
11531   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11532     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11533     SelectOp->setFlags(Flags);
11534     return SelectOp;
11535   }
11536 
11537   if (SDValue V = foldSelectOfConstants(N))
11538     return V;
11539 
11540   // If we can fold this based on the true/false value, do so.
11541   if (SimplifySelectOps(N, N1, N2))
11542     return SDValue(N, 0); // Don't revisit N.
11543 
11544   if (VT0 == MVT::i1) {
11545     // The code in this block deals with the following 2 equivalences:
11546     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11547     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11548     // The target can specify its preferred form with the
11549     // shouldNormalizeToSelectSequence() callback. However we always transform
11550     // to the right anyway if we find the inner select exists in the DAG anyway
11551     // and we always transform to the left side if we know that we can further
11552     // optimize the combination of the conditions.
11553     bool normalizeToSequence =
11554         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
11555     // select (and Cond0, Cond1), X, Y
11556     //   -> select Cond0, (select Cond1, X, Y), Y
11557     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11558       SDValue Cond0 = N0->getOperand(0);
11559       SDValue Cond1 = N0->getOperand(1);
11560       SDValue InnerSelect =
11561           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11562       if (normalizeToSequence || !InnerSelect.use_empty())
11563         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11564                            InnerSelect, N2, Flags);
11565       // Cleanup on failure.
11566       if (InnerSelect.use_empty())
11567         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11568     }
11569     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11570     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11571       SDValue Cond0 = N0->getOperand(0);
11572       SDValue Cond1 = N0->getOperand(1);
11573       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11574                                         Cond1, N1, N2, Flags);
11575       if (normalizeToSequence || !InnerSelect.use_empty())
11576         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11577                            InnerSelect, Flags);
11578       // Cleanup on failure.
11579       if (InnerSelect.use_empty())
11580         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11581     }
11582 
11583     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11584     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11585       SDValue N1_0 = N1->getOperand(0);
11586       SDValue N1_1 = N1->getOperand(1);
11587       SDValue N1_2 = N1->getOperand(2);
11588       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11589         // Create the actual and node if we can generate good code for it.
11590         if (!normalizeToSequence) {
11591           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11592           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11593                              N2, Flags);
11594         }
11595         // Otherwise see if we can optimize the "and" to a better pattern.
11596         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11597           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11598                              N2, Flags);
11599         }
11600       }
11601     }
11602     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11603     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11604       SDValue N2_0 = N2->getOperand(0);
11605       SDValue N2_1 = N2->getOperand(1);
11606       SDValue N2_2 = N2->getOperand(2);
11607       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11608         // Create the actual or node if we can generate good code for it.
11609         if (!normalizeToSequence) {
11610           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11611           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11612                              N2_2, Flags);
11613         }
11614         // Otherwise see if we can optimize to a better pattern.
11615         if (SDValue Combined = visitORLike(N0, N2_0, N))
11616           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11617                              N2_2, Flags);
11618       }
11619     }
11620   }
11621 
11622   // Fold selects based on a setcc into other things, such as min/max/abs.
11623   if (N0.getOpcode() == ISD::SETCC) {
11624     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11625     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11626 
11627     // select (fcmp lt x, y), x, y -> fminnum x, y
11628     // select (fcmp gt x, y), x, y -> fmaxnum x, y
11629     //
11630     // This is OK if we don't care what happens if either operand is a NaN.
11631     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
11632       if (SDValue FMinMax =
11633               combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
11634         return FMinMax;
11635 
11636     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
11637     // This is conservatively limited to pre-legal-operations to give targets
11638     // a chance to reverse the transform if they want to do that. Also, it is
11639     // unlikely that the pattern would be formed late, so it's probably not
11640     // worth going through the other checks.
11641     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
11642         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
11643         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
11644       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
11645       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
11646       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
11647         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
11648         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
11649         //
11650         // The IR equivalent of this transform would have this form:
11651         //   %a = add %x, C
11652         //   %c = icmp ugt %x, ~C
11653         //   %r = select %c, -1, %a
11654         //   =>
11655         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
11656         //   %u0 = extractvalue %u, 0
11657         //   %u1 = extractvalue %u, 1
11658         //   %r = select %u1, -1, %u0
11659         SDVTList VTs = DAG.getVTList(VT, VT0);
11660         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
11661         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
11662       }
11663     }
11664 
11665     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
11666         (!LegalOperations &&
11667          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
11668       // Any flags available in a select/setcc fold will be on the setcc as they
11669       // migrated from fcmp
11670       Flags = N0->getFlags();
11671       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
11672                                        N2, N0.getOperand(2));
11673       SelectNode->setFlags(Flags);
11674       return SelectNode;
11675     }
11676 
11677     if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
11678       return NewSel;
11679   }
11680 
11681   if (!VT.isVector())
11682     if (SDValue BinOp = foldSelectOfBinops(N))
11683       return BinOp;
11684 
11685   if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
11686     return R;
11687 
11688   return SDValue();
11689 }
11690 
11691 // This function assumes all the vselect's arguments are CONCAT_VECTOR
11692 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
11693 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
11694   SDLoc DL(N);
11695   SDValue Cond = N->getOperand(0);
11696   SDValue LHS = N->getOperand(1);
11697   SDValue RHS = N->getOperand(2);
11698   EVT VT = N->getValueType(0);
11699   int NumElems = VT.getVectorNumElements();
11700   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
11701          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
11702          Cond.getOpcode() == ISD::BUILD_VECTOR);
11703 
11704   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
11705   // binary ones here.
11706   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
11707     return SDValue();
11708 
11709   // We're sure we have an even number of elements due to the
11710   // concat_vectors we have as arguments to vselect.
11711   // Skip BV elements until we find one that's not an UNDEF
11712   // After we find an UNDEF element, keep looping until we get to half the
11713   // length of the BV and see if all the non-undef nodes are the same.
11714   ConstantSDNode *BottomHalf = nullptr;
11715   for (int i = 0; i < NumElems / 2; ++i) {
11716     if (Cond->getOperand(i)->isUndef())
11717       continue;
11718 
11719     if (BottomHalf == nullptr)
11720       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11721     else if (Cond->getOperand(i).getNode() != BottomHalf)
11722       return SDValue();
11723   }
11724 
11725   // Do the same for the second half of the BuildVector
11726   ConstantSDNode *TopHalf = nullptr;
11727   for (int i = NumElems / 2; i < NumElems; ++i) {
11728     if (Cond->getOperand(i)->isUndef())
11729       continue;
11730 
11731     if (TopHalf == nullptr)
11732       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11733     else if (Cond->getOperand(i).getNode() != TopHalf)
11734       return SDValue();
11735   }
11736 
11737   assert(TopHalf && BottomHalf &&
11738          "One half of the selector was all UNDEFs and the other was all the "
11739          "same value. This should have been addressed before this function.");
11740   return DAG.getNode(
11741       ISD::CONCAT_VECTORS, DL, VT,
11742       BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
11743       TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
11744 }
11745 
11746 bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
11747                        SelectionDAG &DAG, const SDLoc &DL) {
11748 
11749   // Only perform the transformation when existing operands can be reused.
11750   if (IndexIsScaled)
11751     return false;
11752 
11753   if (!isNullConstant(BasePtr) && !Index.hasOneUse())
11754     return false;
11755 
11756   EVT VT = BasePtr.getValueType();
11757 
11758   if (SDValue SplatVal = DAG.getSplatValue(Index);
11759       SplatVal && !isNullConstant(SplatVal) &&
11760       SplatVal.getValueType() == VT) {
11761     BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11762     Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
11763     return true;
11764   }
11765 
11766   if (Index.getOpcode() != ISD::ADD)
11767     return false;
11768 
11769   if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
11770       SplatVal && SplatVal.getValueType() == VT) {
11771     BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11772     Index = Index.getOperand(1);
11773     return true;
11774   }
11775   if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
11776       SplatVal && SplatVal.getValueType() == VT) {
11777     BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11778     Index = Index.getOperand(0);
11779     return true;
11780   }
11781   return false;
11782 }
11783 
11784 // Fold sext/zext of index into index type.
11785 bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
11786                      SelectionDAG &DAG) {
11787   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11788 
11789   // It's always safe to look through zero extends.
11790   if (Index.getOpcode() == ISD::ZERO_EXTEND) {
11791     if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11792       IndexType = ISD::UNSIGNED_SCALED;
11793       Index = Index.getOperand(0);
11794       return true;
11795     }
11796     if (ISD::isIndexTypeSigned(IndexType)) {
11797       IndexType = ISD::UNSIGNED_SCALED;
11798       return true;
11799     }
11800   }
11801 
11802   // It's only safe to look through sign extends when Index is signed.
11803   if (Index.getOpcode() == ISD::SIGN_EXTEND &&
11804       ISD::isIndexTypeSigned(IndexType) &&
11805       TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11806     Index = Index.getOperand(0);
11807     return true;
11808   }
11809 
11810   return false;
11811 }
11812 
11813 SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
11814   VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
11815   SDValue Mask = MSC->getMask();
11816   SDValue Chain = MSC->getChain();
11817   SDValue Index = MSC->getIndex();
11818   SDValue Scale = MSC->getScale();
11819   SDValue StoreVal = MSC->getValue();
11820   SDValue BasePtr = MSC->getBasePtr();
11821   SDValue VL = MSC->getVectorLength();
11822   ISD::MemIndexType IndexType = MSC->getIndexType();
11823   SDLoc DL(N);
11824 
11825   // Zap scatters with a zero mask.
11826   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
11827     return Chain;
11828 
11829   if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11830     SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11831     return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11832                             DL, Ops, MSC->getMemOperand(), IndexType);
11833   }
11834 
11835   if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11836     SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11837     return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11838                             DL, Ops, MSC->getMemOperand(), IndexType);
11839   }
11840 
11841   return SDValue();
11842 }
11843 
11844 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
11845   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
11846   SDValue Mask = MSC->getMask();
11847   SDValue Chain = MSC->getChain();
11848   SDValue Index = MSC->getIndex();
11849   SDValue Scale = MSC->getScale();
11850   SDValue StoreVal = MSC->getValue();
11851   SDValue BasePtr = MSC->getBasePtr();
11852   ISD::MemIndexType IndexType = MSC->getIndexType();
11853   SDLoc DL(N);
11854 
11855   // Zap scatters with a zero mask.
11856   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
11857     return Chain;
11858 
11859   if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11860     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11861     return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11862                                 DL, Ops, MSC->getMemOperand(), IndexType,
11863                                 MSC->isTruncatingStore());
11864   }
11865 
11866   if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11867     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11868     return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11869                                 DL, Ops, MSC->getMemOperand(), IndexType,
11870                                 MSC->isTruncatingStore());
11871   }
11872 
11873   return SDValue();
11874 }
11875 
11876 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
11877   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
11878   SDValue Mask = MST->getMask();
11879   SDValue Chain = MST->getChain();
11880   SDValue Value = MST->getValue();
11881   SDValue Ptr = MST->getBasePtr();
11882   SDLoc DL(N);
11883 
11884   // Zap masked stores with a zero mask.
11885   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
11886     return Chain;
11887 
11888   // Remove a masked store if base pointers and masks are equal.
11889   if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
11890     if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
11891         MST1->isSimple() && MST1->getBasePtr() == Ptr &&
11892         !MST->getBasePtr().isUndef() &&
11893         ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
11894                                          MST1->getMemoryVT().getStoreSize()) ||
11895          ISD::isConstantSplatVectorAllOnes(Mask.getNode())) &&
11896         TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
11897                             MST->getMemoryVT().getStoreSize())) {
11898       CombineTo(MST1, MST1->getChain());
11899       if (N->getOpcode() != ISD::DELETED_NODE)
11900         AddToWorklist(N);
11901       return SDValue(N, 0);
11902     }
11903   }
11904 
11905   // If this is a masked load with an all ones mask, we can use a unmasked load.
11906   // FIXME: Can we do this for indexed, compressing, or truncating stores?
11907   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
11908       !MST->isCompressingStore() && !MST->isTruncatingStore())
11909     return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
11910                         MST->getBasePtr(), MST->getPointerInfo(),
11911                         MST->getOriginalAlign(), MachineMemOperand::MOStore,
11912                         MST->getAAInfo());
11913 
11914   // Try transforming N to an indexed store.
11915   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11916     return SDValue(N, 0);
11917 
11918   if (MST->isTruncatingStore() && MST->isUnindexed() &&
11919       Value.getValueType().isInteger() &&
11920       (!isa<ConstantSDNode>(Value) ||
11921        !cast<ConstantSDNode>(Value)->isOpaque())) {
11922     APInt TruncDemandedBits =
11923         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
11924                              MST->getMemoryVT().getScalarSizeInBits());
11925 
11926     // See if we can simplify the operation with
11927     // SimplifyDemandedBits, which only works if the value has a single use.
11928     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
11929       // Re-visit the store if anything changed and the store hasn't been merged
11930       // with another node (N is deleted) SimplifyDemandedBits will add Value's
11931       // node back to the worklist if necessary, but we also need to re-visit
11932       // the Store node itself.
11933       if (N->getOpcode() != ISD::DELETED_NODE)
11934         AddToWorklist(N);
11935       return SDValue(N, 0);
11936     }
11937   }
11938 
11939   // If this is a TRUNC followed by a masked store, fold this into a masked
11940   // truncating store.  We can do this even if this is already a masked
11941   // truncstore.
11942   // TODO: Try combine to masked compress store if possiable.
11943   if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
11944       MST->isUnindexed() && !MST->isCompressingStore() &&
11945       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
11946                                MST->getMemoryVT(), LegalOperations)) {
11947     auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
11948                                          Value.getOperand(0).getValueType());
11949     return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
11950                               MST->getOffset(), Mask, MST->getMemoryVT(),
11951                               MST->getMemOperand(), MST->getAddressingMode(),
11952                               /*IsTruncating=*/true);
11953   }
11954 
11955   return SDValue();
11956 }
11957 
11958 SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
11959   auto *SST = cast<VPStridedStoreSDNode>(N);
11960   EVT EltVT = SST->getValue().getValueType().getVectorElementType();
11961   // Combine strided stores with unit-stride to a regular VP store.
11962   if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
11963       CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
11964     return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
11965                           SST->getBasePtr(), SST->getOffset(), SST->getMask(),
11966                           SST->getVectorLength(), SST->getMemoryVT(),
11967                           SST->getMemOperand(), SST->getAddressingMode(),
11968                           SST->isTruncatingStore(), SST->isCompressingStore());
11969   }
11970   return SDValue();
11971 }
11972 
11973 SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
11974   VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
11975   SDValue Mask = MGT->getMask();
11976   SDValue Chain = MGT->getChain();
11977   SDValue Index = MGT->getIndex();
11978   SDValue Scale = MGT->getScale();
11979   SDValue BasePtr = MGT->getBasePtr();
11980   SDValue VL = MGT->getVectorLength();
11981   ISD::MemIndexType IndexType = MGT->getIndexType();
11982   SDLoc DL(N);
11983 
11984   if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
11985     SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11986     return DAG.getGatherVP(
11987         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11988         Ops, MGT->getMemOperand(), IndexType);
11989   }
11990 
11991   if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
11992     SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11993     return DAG.getGatherVP(
11994         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11995         Ops, MGT->getMemOperand(), IndexType);
11996   }
11997 
11998   return SDValue();
11999 }
12000 
12001 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12002   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12003   SDValue Mask = MGT->getMask();
12004   SDValue Chain = MGT->getChain();
12005   SDValue Index = MGT->getIndex();
12006   SDValue Scale = MGT->getScale();
12007   SDValue PassThru = MGT->getPassThru();
12008   SDValue BasePtr = MGT->getBasePtr();
12009   ISD::MemIndexType IndexType = MGT->getIndexType();
12010   SDLoc DL(N);
12011 
12012   // Zap gathers with a zero mask.
12013   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
12014     return CombineTo(N, PassThru, MGT->getChain());
12015 
12016   if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12017     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12018     return DAG.getMaskedGather(
12019         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12020         Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12021   }
12022 
12023   if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12024     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12025     return DAG.getMaskedGather(
12026         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12027         Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12028   }
12029 
12030   return SDValue();
12031 }
12032 
12033 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12034   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12035   SDValue Mask = MLD->getMask();
12036   SDLoc DL(N);
12037 
12038   // Zap masked loads with a zero mask.
12039   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
12040     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12041 
12042   // If this is a masked load with an all ones mask, we can use a unmasked load.
12043   // FIXME: Can we do this for indexed, expanding, or extending loads?
12044   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12045       !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12046     SDValue NewLd = DAG.getLoad(
12047         N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12048         MLD->getPointerInfo(), MLD->getOriginalAlign(),
12049         MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
12050     return CombineTo(N, NewLd, NewLd.getValue(1));
12051   }
12052 
12053   // Try transforming N to an indexed load.
12054   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12055     return SDValue(N, 0);
12056 
12057   return SDValue();
12058 }
12059 
12060 SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
12061   auto *SLD = cast<VPStridedLoadSDNode>(N);
12062   EVT EltVT = SLD->getValueType(0).getVectorElementType();
12063   // Combine strided loads with unit-stride to a regular VP load.
12064   if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
12065       CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12066     SDValue NewLd = DAG.getLoadVP(
12067         SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
12068         SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
12069         SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12070         SLD->getMemOperand(), SLD->isExpandingLoad());
12071     return CombineTo(N, NewLd, NewLd.getValue(1));
12072   }
12073   return SDValue();
12074 }
12075 
12076 /// A vector select of 2 constant vectors can be simplified to math/logic to
12077 /// avoid a variable select instruction and possibly avoid constant loads.
12078 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
12079   SDValue Cond = N->getOperand(0);
12080   SDValue N1 = N->getOperand(1);
12081   SDValue N2 = N->getOperand(2);
12082   EVT VT = N->getValueType(0);
12083   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
12084       !shouldConvertSelectOfConstantsToMath(Cond, VT, TLI) ||
12085       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
12086       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
12087     return SDValue();
12088 
12089   // Check if we can use the condition value to increment/decrement a single
12090   // constant value. This simplifies a select to an add and removes a constant
12091   // load/materialization from the general case.
12092   bool AllAddOne = true;
12093   bool AllSubOne = true;
12094   unsigned Elts = VT.getVectorNumElements();
12095   for (unsigned i = 0; i != Elts; ++i) {
12096     SDValue N1Elt = N1.getOperand(i);
12097     SDValue N2Elt = N2.getOperand(i);
12098     if (N1Elt.isUndef() || N2Elt.isUndef())
12099       continue;
12100     if (N1Elt.getValueType() != N2Elt.getValueType())
12101       continue;
12102 
12103     const APInt &C1 = N1Elt->getAsAPIntVal();
12104     const APInt &C2 = N2Elt->getAsAPIntVal();
12105     if (C1 != C2 + 1)
12106       AllAddOne = false;
12107     if (C1 != C2 - 1)
12108       AllSubOne = false;
12109   }
12110 
12111   // Further simplifications for the extra-special cases where the constants are
12112   // all 0 or all -1 should be implemented as folds of these patterns.
12113   SDLoc DL(N);
12114   if (AllAddOne || AllSubOne) {
12115     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12116     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12117     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12118     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12119     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12120   }
12121 
12122   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12123   APInt Pow2C;
12124   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12125       isNullOrNullSplat(N2)) {
12126     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12127     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12128     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12129   }
12130 
12131   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
12132     return V;
12133 
12134   // The general case for select-of-constants:
12135   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12136   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12137   // leave that to a machine-specific pass.
12138   return SDValue();
12139 }
12140 
12141 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12142   SDValue N0 = N->getOperand(0);
12143   SDValue N1 = N->getOperand(1);
12144   SDValue N2 = N->getOperand(2);
12145   EVT VT = N->getValueType(0);
12146   SDLoc DL(N);
12147 
12148   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12149     return V;
12150 
12151   if (SDValue V = foldBoolSelectToLogic(N, DAG))
12152     return V;
12153 
12154   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12155   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12156     return DAG.getSelect(DL, VT, F, N2, N1);
12157 
12158   // Canonicalize integer abs.
12159   // vselect (setg[te] X,  0),  X, -X ->
12160   // vselect (setgt    X, -1),  X, -X ->
12161   // vselect (setl[te] X,  0), -X,  X ->
12162   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12163   if (N0.getOpcode() == ISD::SETCC) {
12164     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12165     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12166     bool isAbs = false;
12167     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12168 
12169     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12170          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12171         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12172       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
12173     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12174              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12175       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
12176 
12177     if (isAbs) {
12178       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
12179         return DAG.getNode(ISD::ABS, DL, VT, LHS);
12180 
12181       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
12182                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
12183                                                   DL, getShiftAmountTy(VT)));
12184       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12185       AddToWorklist(Shift.getNode());
12186       AddToWorklist(Add.getNode());
12187       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12188     }
12189 
12190     // vselect x, y (fcmp lt x, y) -> fminnum x, y
12191     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12192     //
12193     // This is OK if we don't care about what happens if either operand is a
12194     // NaN.
12195     //
12196     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
12197       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12198         return FMinMax;
12199     }
12200 
12201     if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12202       return S;
12203     if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12204       return S;
12205 
12206     // If this select has a condition (setcc) with narrower operands than the
12207     // select, try to widen the compare to match the select width.
12208     // TODO: This should be extended to handle any constant.
12209     // TODO: This could be extended to handle non-loading patterns, but that
12210     //       requires thorough testing to avoid regressions.
12211     if (isNullOrNullSplat(RHS)) {
12212       EVT NarrowVT = LHS.getValueType();
12213       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
12214       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12215       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12216       unsigned WideWidth = WideVT.getScalarSizeInBits();
12217       bool IsSigned = isSignedIntSetCC(CC);
12218       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12219       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12220           SetCCWidth != 1 && SetCCWidth < WideWidth &&
12221           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12222           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12223         // Both compare operands can be widened for free. The LHS can use an
12224         // extended load, and the RHS is a constant:
12225         //   vselect (ext (setcc load(X), C)), N1, N2 -->
12226         //   vselect (setcc extload(X), C'), N1, N2
12227         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12228         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12229         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12230         EVT WideSetCCVT = getSetCCResultType(WideVT);
12231         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12232         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12233       }
12234     }
12235 
12236     // Match VSELECTs with absolute difference patterns.
12237     // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12238     // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12239     // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12240     // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12241     if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
12242         N1.getOperand(0) == N2.getOperand(1) &&
12243         N1.getOperand(1) == N2.getOperand(0)) {
12244       bool IsSigned = isSignedIntSetCC(CC);
12245       unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12246       if (hasOperation(ABDOpc, VT)) {
12247         switch (CC) {
12248         case ISD::SETGT:
12249         case ISD::SETGE:
12250         case ISD::SETUGT:
12251         case ISD::SETUGE:
12252           if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
12253             return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12254           break;
12255         case ISD::SETLT:
12256         case ISD::SETLE:
12257         case ISD::SETULT:
12258         case ISD::SETULE:
12259           if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
12260             return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12261           break;
12262         default:
12263           break;
12264         }
12265       }
12266     }
12267 
12268     // Match VSELECTs into add with unsigned saturation.
12269     if (hasOperation(ISD::UADDSAT, VT)) {
12270       // Check if one of the arms of the VSELECT is vector with all bits set.
12271       // If it's on the left side invert the predicate to simplify logic below.
12272       SDValue Other;
12273       ISD::CondCode SatCC = CC;
12274       if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
12275         Other = N2;
12276         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12277       } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12278         Other = N1;
12279       }
12280 
12281       if (Other && Other.getOpcode() == ISD::ADD) {
12282         SDValue CondLHS = LHS, CondRHS = RHS;
12283         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12284 
12285         // Canonicalize condition operands.
12286         if (SatCC == ISD::SETUGE) {
12287           std::swap(CondLHS, CondRHS);
12288           SatCC = ISD::SETULE;
12289         }
12290 
12291         // We can test against either of the addition operands.
12292         // x <= x+y ? x+y : ~0 --> uaddsat x, y
12293         // x+y >= x ? x+y : ~0 --> uaddsat x, y
12294         if (SatCC == ISD::SETULE && Other == CondRHS &&
12295             (OpLHS == CondLHS || OpRHS == CondLHS))
12296           return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12297 
12298         if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12299             (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12300              OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12301             CondLHS == OpLHS) {
12302           // If the RHS is a constant we have to reverse the const
12303           // canonicalization.
12304           // x >= ~C ? x+C : ~0 --> uaddsat x, C
12305           auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12306             return Cond->getAPIntValue() == ~Op->getAPIntValue();
12307           };
12308           if (SatCC == ISD::SETULE &&
12309               ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12310             return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12311         }
12312       }
12313     }
12314 
12315     // Match VSELECTs into sub with unsigned saturation.
12316     if (hasOperation(ISD::USUBSAT, VT)) {
12317       // Check if one of the arms of the VSELECT is a zero vector. If it's on
12318       // the left side invert the predicate to simplify logic below.
12319       SDValue Other;
12320       ISD::CondCode SatCC = CC;
12321       if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
12322         Other = N2;
12323         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12324       } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
12325         Other = N1;
12326       }
12327 
12328       // zext(x) >= y ? trunc(zext(x) - y) : 0
12329       // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12330       // zext(x) >  y ? trunc(zext(x) - y) : 0
12331       // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12332       if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12333           Other.getOperand(0).getOpcode() == ISD::SUB &&
12334           (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12335         SDValue OpLHS = Other.getOperand(0).getOperand(0);
12336         SDValue OpRHS = Other.getOperand(0).getOperand(1);
12337         if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12338           if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12339                                               DAG, DL))
12340             return R;
12341       }
12342 
12343       if (Other && Other.getNumOperands() == 2) {
12344         SDValue CondRHS = RHS;
12345         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12346 
12347         if (OpLHS == LHS) {
12348           // Look for a general sub with unsigned saturation first.
12349           // x >= y ? x-y : 0 --> usubsat x, y
12350           // x >  y ? x-y : 0 --> usubsat x, y
12351           if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12352               Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12353             return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12354 
12355           if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12356               OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12357             if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12358                 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12359               // If the RHS is a constant we have to reverse the const
12360               // canonicalization.
12361               // x > C-1 ? x+-C : 0 --> usubsat x, C
12362               auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12363                 return (!Op && !Cond) ||
12364                        (Op && Cond &&
12365                         Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12366               };
12367               if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12368                   ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12369                                             /*AllowUndefs*/ true)) {
12370                 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12371                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12372               }
12373 
12374               // Another special case: If C was a sign bit, the sub has been
12375               // canonicalized into a xor.
12376               // FIXME: Would it be better to use computeKnownBits to
12377               // determine whether it's safe to decanonicalize the xor?
12378               // x s< 0 ? x^C : 0 --> usubsat x, C
12379               APInt SplatValue;
12380               if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12381                   ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12382                   ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
12383                   SplatValue.isSignMask()) {
12384                 // Note that we have to rebuild the RHS constant here to
12385                 // ensure we don't rely on particular values of undef lanes.
12386                 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12387                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12388               }
12389             }
12390           }
12391         }
12392       }
12393     }
12394   }
12395 
12396   if (SimplifySelectOps(N, N1, N2))
12397     return SDValue(N, 0);  // Don't revisit N.
12398 
12399   // Fold (vselect all_ones, N1, N2) -> N1
12400   if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
12401     return N1;
12402   // Fold (vselect all_zeros, N1, N2) -> N2
12403   if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
12404     return N2;
12405 
12406   // The ConvertSelectToConcatVector function is assuming both the above
12407   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12408   // and addressed.
12409   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12410       N2.getOpcode() == ISD::CONCAT_VECTORS &&
12411       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
12412     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12413       return CV;
12414   }
12415 
12416   if (SDValue V = foldVSelectOfConstants(N))
12417     return V;
12418 
12419   if (hasOperation(ISD::SRA, VT))
12420     if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
12421       return V;
12422 
12423   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
12424     return SDValue(N, 0);
12425 
12426   return SDValue();
12427 }
12428 
12429 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12430   SDValue N0 = N->getOperand(0);
12431   SDValue N1 = N->getOperand(1);
12432   SDValue N2 = N->getOperand(2);
12433   SDValue N3 = N->getOperand(3);
12434   SDValue N4 = N->getOperand(4);
12435   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12436 
12437   // fold select_cc lhs, rhs, x, x, cc -> x
12438   if (N2 == N3)
12439     return N2;
12440 
12441   // select_cc bool, 0, x, y, seteq -> select bool, y, x
12442   if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12443       isNullConstant(N1))
12444     return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
12445 
12446   // Determine if the condition we're dealing with is constant
12447   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12448                                   CC, SDLoc(N), false)) {
12449     AddToWorklist(SCC.getNode());
12450 
12451     // cond always true -> true val
12452     // cond always false -> false val
12453     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12454       return SCCC->isZero() ? N3 : N2;
12455 
12456     // When the condition is UNDEF, just return the first operand. This is
12457     // coherent the DAG creation, no setcc node is created in this case
12458     if (SCC->isUndef())
12459       return N2;
12460 
12461     // Fold to a simpler select_cc
12462     if (SCC.getOpcode() == ISD::SETCC) {
12463       SDValue SelectOp = DAG.getNode(
12464           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
12465           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12466       SelectOp->setFlags(SCC->getFlags());
12467       return SelectOp;
12468     }
12469   }
12470 
12471   // If we can fold this based on the true/false value, do so.
12472   if (SimplifySelectOps(N, N2, N3))
12473     return SDValue(N, 0);  // Don't revisit N.
12474 
12475   // fold select_cc into other things, such as min/max/abs
12476   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
12477 }
12478 
12479 SDValue DAGCombiner::visitSETCC(SDNode *N) {
12480   // setcc is very commonly used as an argument to brcond. This pattern
12481   // also lend itself to numerous combines and, as a result, it is desired
12482   // we keep the argument to a brcond as a setcc as much as possible.
12483   bool PreferSetCC =
12484       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
12485 
12486   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12487   EVT VT = N->getValueType(0);
12488   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12489 
12490   SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);
12491 
12492   if (Combined) {
12493     // If we prefer to have a setcc, and we don't, we'll try our best to
12494     // recreate one using rebuildSetCC.
12495     if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12496       SDValue NewSetCC = rebuildSetCC(Combined);
12497 
12498       // We don't have anything interesting to combine to.
12499       if (NewSetCC.getNode() == N)
12500         return SDValue();
12501 
12502       if (NewSetCC)
12503         return NewSetCC;
12504     }
12505     return Combined;
12506   }
12507 
12508   // Optimize
12509   //    1) (icmp eq/ne (and X, C0), (shift X, C1))
12510   // or
12511   //    2) (icmp eq/ne X, (rotate X, C1))
12512   // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12513   // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12514   // Then:
12515   // If C1 is a power of 2, then the rotate and shift+and versions are
12516   // equivilent, so we can interchange them depending on target preference.
12517   // Otherwise, if we have the shift+and version we can interchange srl/shl
12518   // which inturn affects the constant C0. We can use this to get better
12519   // constants again determined by target preference.
12520   if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12521     auto IsAndWithShift = [](SDValue A, SDValue B) {
12522       return A.getOpcode() == ISD::AND &&
12523              (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12524              A.getOperand(0) == B.getOperand(0);
12525     };
12526     auto IsRotateWithOp = [](SDValue A, SDValue B) {
12527       return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12528              B.getOperand(0) == A;
12529     };
12530     SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12531     bool IsRotate = false;
12532 
12533     // Find either shift+and or rotate pattern.
12534     if (IsAndWithShift(N0, N1)) {
12535       AndOrOp = N0;
12536       ShiftOrRotate = N1;
12537     } else if (IsAndWithShift(N1, N0)) {
12538       AndOrOp = N1;
12539       ShiftOrRotate = N0;
12540     } else if (IsRotateWithOp(N0, N1)) {
12541       IsRotate = true;
12542       AndOrOp = N0;
12543       ShiftOrRotate = N1;
12544     } else if (IsRotateWithOp(N1, N0)) {
12545       IsRotate = true;
12546       AndOrOp = N1;
12547       ShiftOrRotate = N0;
12548     }
12549 
12550     if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
12551         (IsRotate || AndOrOp.hasOneUse())) {
12552       EVT OpVT = N0.getValueType();
12553       // Get constant shift/rotate amount and possibly mask (if its shift+and
12554       // variant).
12555       auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
12556         ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
12557                                                     /*AllowTrunc*/ false);
12558         if (CNode == nullptr)
12559           return std::nullopt;
12560         return CNode->getAPIntValue();
12561       };
12562       std::optional<APInt> AndCMask =
12563           IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
12564       std::optional<APInt> ShiftCAmt =
12565           GetAPIntValue(ShiftOrRotate.getOperand(1));
12566       unsigned NumBits = OpVT.getScalarSizeInBits();
12567 
12568       // We found constants.
12569       if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
12570         unsigned ShiftOpc = ShiftOrRotate.getOpcode();
12571         // Check that the constants meet the constraints.
12572         bool CanTransform = IsRotate;
12573         if (!CanTransform) {
12574           // Check that mask and shift compliment eachother
12575           CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
12576           // Check that we are comparing all bits
12577           CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
12578           // Check that the and mask is correct for the shift
12579           CanTransform &=
12580               ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
12581         }
12582 
12583         // See if target prefers another shift/rotate opcode.
12584         unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
12585             OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
12586         // Transform is valid and we have a new preference.
12587         if (CanTransform && NewShiftOpc != ShiftOpc) {
12588           SDLoc DL(N);
12589           SDValue NewShiftOrRotate =
12590               DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
12591                           ShiftOrRotate.getOperand(1));
12592           SDValue NewAndOrOp = SDValue();
12593 
12594           if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
12595             APInt NewMask =
12596                 NewShiftOpc == ISD::SHL
12597                     ? APInt::getHighBitsSet(NumBits,
12598                                             NumBits - ShiftCAmt->getZExtValue())
12599                     : APInt::getLowBitsSet(NumBits,
12600                                            NumBits - ShiftCAmt->getZExtValue());
12601             NewAndOrOp =
12602                 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
12603                             DAG.getConstant(NewMask, DL, OpVT));
12604           } else {
12605             NewAndOrOp = ShiftOrRotate.getOperand(0);
12606           }
12607 
12608           return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
12609         }
12610       }
12611     }
12612   }
12613   return SDValue();
12614 }
12615 
12616 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
12617   SDValue LHS = N->getOperand(0);
12618   SDValue RHS = N->getOperand(1);
12619   SDValue Carry = N->getOperand(2);
12620   SDValue Cond = N->getOperand(3);
12621 
12622   // If Carry is false, fold to a regular SETCC.
12623   if (isNullConstant(Carry))
12624     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
12625 
12626   return SDValue();
12627 }
12628 
12629 /// Check if N satisfies:
12630 ///   N is used once.
12631 ///   N is a Load.
12632 ///   The load is compatible with ExtOpcode. It means
12633 ///     If load has explicit zero/sign extension, ExpOpcode must have the same
12634 ///     extension.
12635 ///     Otherwise returns true.
12636 static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
12637   if (!N.hasOneUse())
12638     return false;
12639 
12640   if (!isa<LoadSDNode>(N))
12641     return false;
12642 
12643   LoadSDNode *Load = cast<LoadSDNode>(N);
12644   ISD::LoadExtType LoadExt = Load->getExtensionType();
12645   if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
12646     return true;
12647 
12648   // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
12649   // extension.
12650   if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
12651       (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
12652     return false;
12653 
12654   return true;
12655 }
12656 
12657 /// Fold
12658 ///   (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
12659 ///   (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
12660 ///   (aext (select c, load x, load y)) -> (select c, extload x, extload y)
12661 /// This function is called by the DAGCombiner when visiting sext/zext/aext
12662 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12663 static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
12664                                          SelectionDAG &DAG,
12665                                          CombineLevel Level) {
12666   unsigned Opcode = N->getOpcode();
12667   SDValue N0 = N->getOperand(0);
12668   EVT VT = N->getValueType(0);
12669   SDLoc DL(N);
12670 
12671   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
12672           Opcode == ISD::ANY_EXTEND) &&
12673          "Expected EXTEND dag node in input!");
12674 
12675   if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
12676       !N0.hasOneUse())
12677     return SDValue();
12678 
12679   SDValue Op1 = N0->getOperand(1);
12680   SDValue Op2 = N0->getOperand(2);
12681   if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
12682     return SDValue();
12683 
12684   auto ExtLoadOpcode = ISD::EXTLOAD;
12685   if (Opcode == ISD::SIGN_EXTEND)
12686     ExtLoadOpcode = ISD::SEXTLOAD;
12687   else if (Opcode == ISD::ZERO_EXTEND)
12688     ExtLoadOpcode = ISD::ZEXTLOAD;
12689 
12690   // Illegal VSELECT may ISel fail if happen after legalization (DAG
12691   // Combine2), so we should conservatively check the OperationAction.
12692   LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
12693   LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
12694   if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
12695       !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
12696       (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
12697        TLI.getOperationAction(ISD::VSELECT, VT) != TargetLowering::Legal))
12698     return SDValue();
12699 
12700   SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
12701   SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
12702   return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
12703 }
12704 
12705 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
12706 /// a build_vector of constants.
12707 /// This function is called by the DAGCombiner when visiting sext/zext/aext
12708 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12709 /// Vector extends are not folded if operations are legal; this is to
12710 /// avoid introducing illegal build_vector dag nodes.
12711 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
12712                                          SelectionDAG &DAG, bool LegalTypes) {
12713   unsigned Opcode = N->getOpcode();
12714   SDValue N0 = N->getOperand(0);
12715   EVT VT = N->getValueType(0);
12716   SDLoc DL(N);
12717 
12718   assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
12719          "Expected EXTEND dag node in input!");
12720 
12721   // fold (sext c1) -> c1
12722   // fold (zext c1) -> c1
12723   // fold (aext c1) -> c1
12724   if (isa<ConstantSDNode>(N0))
12725     return DAG.getNode(Opcode, DL, VT, N0);
12726 
12727   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12728   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
12729   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12730   if (N0->getOpcode() == ISD::SELECT) {
12731     SDValue Op1 = N0->getOperand(1);
12732     SDValue Op2 = N0->getOperand(2);
12733     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
12734         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
12735       // For any_extend, choose sign extension of the constants to allow a
12736       // possible further transform to sign_extend_inreg.i.e.
12737       //
12738       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
12739       // t2: i64 = any_extend t1
12740       // -->
12741       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
12742       // -->
12743       // t4: i64 = sign_extend_inreg t3
12744       unsigned FoldOpc = Opcode;
12745       if (FoldOpc == ISD::ANY_EXTEND)
12746         FoldOpc = ISD::SIGN_EXTEND;
12747       return DAG.getSelect(DL, VT, N0->getOperand(0),
12748                            DAG.getNode(FoldOpc, DL, VT, Op1),
12749                            DAG.getNode(FoldOpc, DL, VT, Op2));
12750     }
12751   }
12752 
12753   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
12754   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
12755   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
12756   EVT SVT = VT.getScalarType();
12757   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
12758       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
12759     return SDValue();
12760 
12761   // We can fold this node into a build_vector.
12762   unsigned VTBits = SVT.getSizeInBits();
12763   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
12764   SmallVector<SDValue, 8> Elts;
12765   unsigned NumElts = VT.getVectorNumElements();
12766 
12767   for (unsigned i = 0; i != NumElts; ++i) {
12768     SDValue Op = N0.getOperand(i);
12769     if (Op.isUndef()) {
12770       if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
12771         Elts.push_back(DAG.getUNDEF(SVT));
12772       else
12773         Elts.push_back(DAG.getConstant(0, DL, SVT));
12774       continue;
12775     }
12776 
12777     SDLoc DL(Op);
12778     // Get the constant value and if needed trunc it to the size of the type.
12779     // Nodes like build_vector might have constants wider than the scalar type.
12780     APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
12781     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
12782       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
12783     else
12784       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
12785   }
12786 
12787   return DAG.getBuildVector(VT, DL, Elts);
12788 }
12789 
12790 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
12791 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
12792 // transformation. Returns true if extension are possible and the above
12793 // mentioned transformation is profitable.
12794 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
12795                                     unsigned ExtOpc,
12796                                     SmallVectorImpl<SDNode *> &ExtendNodes,
12797                                     const TargetLowering &TLI) {
12798   bool HasCopyToRegUses = false;
12799   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
12800   for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
12801        ++UI) {
12802     SDNode *User = *UI;
12803     if (User == N)
12804       continue;
12805     if (UI.getUse().getResNo() != N0.getResNo())
12806       continue;
12807     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
12808     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
12809       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
12810       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
12811         // Sign bits will be lost after a zext.
12812         return false;
12813       bool Add = false;
12814       for (unsigned i = 0; i != 2; ++i) {
12815         SDValue UseOp = User->getOperand(i);
12816         if (UseOp == N0)
12817           continue;
12818         if (!isa<ConstantSDNode>(UseOp))
12819           return false;
12820         Add = true;
12821       }
12822       if (Add)
12823         ExtendNodes.push_back(User);
12824       continue;
12825     }
12826     // If truncates aren't free and there are users we can't
12827     // extend, it isn't worthwhile.
12828     if (!isTruncFree)
12829       return false;
12830     // Remember if this value is live-out.
12831     if (User->getOpcode() == ISD::CopyToReg)
12832       HasCopyToRegUses = true;
12833   }
12834 
12835   if (HasCopyToRegUses) {
12836     bool BothLiveOut = false;
12837     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
12838          UI != UE; ++UI) {
12839       SDUse &Use = UI.getUse();
12840       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
12841         BothLiveOut = true;
12842         break;
12843       }
12844     }
12845     if (BothLiveOut)
12846       // Both unextended and extended values are live out. There had better be
12847       // a good reason for the transformation.
12848       return !ExtendNodes.empty();
12849   }
12850   return true;
12851 }
12852 
12853 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
12854                                   SDValue OrigLoad, SDValue ExtLoad,
12855                                   ISD::NodeType ExtType) {
12856   // Extend SetCC uses if necessary.
12857   SDLoc DL(ExtLoad);
12858   for (SDNode *SetCC : SetCCs) {
12859     SmallVector<SDValue, 4> Ops;
12860 
12861     for (unsigned j = 0; j != 2; ++j) {
12862       SDValue SOp = SetCC->getOperand(j);
12863       if (SOp == OrigLoad)
12864         Ops.push_back(ExtLoad);
12865       else
12866         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
12867     }
12868 
12869     Ops.push_back(SetCC->getOperand(2));
12870     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
12871   }
12872 }
12873 
12874 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
12875 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
12876   SDValue N0 = N->getOperand(0);
12877   EVT DstVT = N->getValueType(0);
12878   EVT SrcVT = N0.getValueType();
12879 
12880   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
12881           N->getOpcode() == ISD::ZERO_EXTEND) &&
12882          "Unexpected node type (not an extend)!");
12883 
12884   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
12885   // For example, on a target with legal v4i32, but illegal v8i32, turn:
12886   //   (v8i32 (sext (v8i16 (load x))))
12887   // into:
12888   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
12889   //                          (v4i32 (sextload (x + 16)))))
12890   // Where uses of the original load, i.e.:
12891   //   (v8i16 (load x))
12892   // are replaced with:
12893   //   (v8i16 (truncate
12894   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
12895   //                            (v4i32 (sextload (x + 16)))))))
12896   //
12897   // This combine is only applicable to illegal, but splittable, vectors.
12898   // All legal types, and illegal non-vector types, are handled elsewhere.
12899   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
12900   //
12901   if (N0->getOpcode() != ISD::LOAD)
12902     return SDValue();
12903 
12904   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12905 
12906   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
12907       !N0.hasOneUse() || !LN0->isSimple() ||
12908       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
12909       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
12910     return SDValue();
12911 
12912   SmallVector<SDNode *, 4> SetCCs;
12913   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
12914     return SDValue();
12915 
12916   ISD::LoadExtType ExtType =
12917       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12918 
12919   // Try to split the vector types to get down to legal types.
12920   EVT SplitSrcVT = SrcVT;
12921   EVT SplitDstVT = DstVT;
12922   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
12923          SplitSrcVT.getVectorNumElements() > 1) {
12924     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
12925     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
12926   }
12927 
12928   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
12929     return SDValue();
12930 
12931   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
12932 
12933   SDLoc DL(N);
12934   const unsigned NumSplits =
12935       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
12936   const unsigned Stride = SplitSrcVT.getStoreSize();
12937   SmallVector<SDValue, 4> Loads;
12938   SmallVector<SDValue, 4> Chains;
12939 
12940   SDValue BasePtr = LN0->getBasePtr();
12941   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
12942     const unsigned Offset = Idx * Stride;
12943     const Align Align = commonAlignment(LN0->getAlign(), Offset);
12944 
12945     SDValue SplitLoad = DAG.getExtLoad(
12946         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
12947         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
12948         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12949 
12950     BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
12951 
12952     Loads.push_back(SplitLoad.getValue(0));
12953     Chains.push_back(SplitLoad.getValue(1));
12954   }
12955 
12956   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
12957   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
12958 
12959   // Simplify TF.
12960   AddToWorklist(NewChain.getNode());
12961 
12962   CombineTo(N, NewValue);
12963 
12964   // Replace uses of the original load (before extension)
12965   // with a truncate of the concatenated sextloaded vectors.
12966   SDValue Trunc =
12967       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
12968   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
12969   CombineTo(N0.getNode(), Trunc, NewChain);
12970   return SDValue(N, 0); // Return N so it doesn't get rechecked!
12971 }
12972 
12973 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
12974 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
12975 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
12976   assert(N->getOpcode() == ISD::ZERO_EXTEND);
12977   EVT VT = N->getValueType(0);
12978   EVT OrigVT = N->getOperand(0).getValueType();
12979   if (TLI.isZExtFree(OrigVT, VT))
12980     return SDValue();
12981 
12982   // and/or/xor
12983   SDValue N0 = N->getOperand(0);
12984   if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
12985       N0.getOperand(1).getOpcode() != ISD::Constant ||
12986       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
12987     return SDValue();
12988 
12989   // shl/shr
12990   SDValue N1 = N0->getOperand(0);
12991   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
12992       N1.getOperand(1).getOpcode() != ISD::Constant ||
12993       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
12994     return SDValue();
12995 
12996   // load
12997   if (!isa<LoadSDNode>(N1.getOperand(0)))
12998     return SDValue();
12999   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
13000   EVT MemVT = Load->getMemoryVT();
13001   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
13002       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
13003     return SDValue();
13004 
13005 
13006   // If the shift op is SHL, the logic op must be AND, otherwise the result
13007   // will be wrong.
13008   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
13009     return SDValue();
13010 
13011   if (!N0.hasOneUse() || !N1.hasOneUse())
13012     return SDValue();
13013 
13014   SmallVector<SDNode*, 4> SetCCs;
13015   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
13016                                ISD::ZERO_EXTEND, SetCCs, TLI))
13017     return SDValue();
13018 
13019   // Actually do the transformation.
13020   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
13021                                    Load->getChain(), Load->getBasePtr(),
13022                                    Load->getMemoryVT(), Load->getMemOperand());
13023 
13024   SDLoc DL1(N1);
13025   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
13026                               N1.getOperand(1));
13027 
13028   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
13029   SDLoc DL0(N0);
13030   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
13031                             DAG.getConstant(Mask, DL0, VT));
13032 
13033   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13034   CombineTo(N, And);
13035   if (SDValue(Load, 0).hasOneUse()) {
13036     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
13037   } else {
13038     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
13039                                 Load->getValueType(0), ExtLoad);
13040     CombineTo(Load, Trunc, ExtLoad.getValue(1));
13041   }
13042 
13043   // N0 is dead at this point.
13044   recursivelyDeleteUnusedNodes(N0.getNode());
13045 
13046   return SDValue(N,0); // Return N so it doesn't get rechecked!
13047 }
13048 
13049 /// If we're narrowing or widening the result of a vector select and the final
13050 /// size is the same size as a setcc (compare) feeding the select, then try to
13051 /// apply the cast operation to the select's operands because matching vector
13052 /// sizes for a select condition and other operands should be more efficient.
13053 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
13054   unsigned CastOpcode = Cast->getOpcode();
13055   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
13056           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13057           CastOpcode == ISD::FP_ROUND) &&
13058          "Unexpected opcode for vector select narrowing/widening");
13059 
13060   // We only do this transform before legal ops because the pattern may be
13061   // obfuscated by target-specific operations after legalization. Do not create
13062   // an illegal select op, however, because that may be difficult to lower.
13063   EVT VT = Cast->getValueType(0);
13064   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
13065     return SDValue();
13066 
13067   SDValue VSel = Cast->getOperand(0);
13068   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
13069       VSel.getOperand(0).getOpcode() != ISD::SETCC)
13070     return SDValue();
13071 
13072   // Does the setcc have the same vector size as the casted select?
13073   SDValue SetCC = VSel.getOperand(0);
13074   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13075   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13076     return SDValue();
13077 
13078   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13079   SDValue A = VSel.getOperand(1);
13080   SDValue B = VSel.getOperand(2);
13081   SDValue CastA, CastB;
13082   SDLoc DL(Cast);
13083   if (CastOpcode == ISD::FP_ROUND) {
13084     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13085     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13086     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13087   } else {
13088     CastA = DAG.getNode(CastOpcode, DL, VT, A);
13089     CastB = DAG.getNode(CastOpcode, DL, VT, B);
13090   }
13091   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13092 }
13093 
13094 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13095 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13096 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
13097                                      const TargetLowering &TLI, EVT VT,
13098                                      bool LegalOperations, SDNode *N,
13099                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
13100   SDNode *N0Node = N0.getNode();
13101   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13102                                                    : ISD::isZEXTLoad(N0Node);
13103   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13104       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13105     return SDValue();
13106 
13107   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13108   EVT MemVT = LN0->getMemoryVT();
13109   if ((LegalOperations || !LN0->isSimple() ||
13110        VT.isVector()) &&
13111       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13112     return SDValue();
13113 
13114   SDValue ExtLoad =
13115       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13116                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13117   Combiner.CombineTo(N, ExtLoad);
13118   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13119   if (LN0->use_empty())
13120     Combiner.recursivelyDeleteUnusedNodes(LN0);
13121   return SDValue(N, 0); // Return N so it doesn't get rechecked!
13122 }
13123 
13124 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13125 // Only generate vector extloads when 1) they're legal, and 2) they are
13126 // deemed desirable by the target.
13127 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
13128                                   const TargetLowering &TLI, EVT VT,
13129                                   bool LegalOperations, SDNode *N, SDValue N0,
13130                                   ISD::LoadExtType ExtLoadType,
13131                                   ISD::NodeType ExtOpc) {
13132   // TODO: isFixedLengthVector() should be removed and any negative effects on
13133   // code generation being the result of that target's implementation of
13134   // isVectorLoadExtDesirable().
13135   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
13136       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
13137       ((LegalOperations || VT.isFixedLengthVector() ||
13138         !cast<LoadSDNode>(N0)->isSimple()) &&
13139        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
13140     return {};
13141 
13142   bool DoXform = true;
13143   SmallVector<SDNode *, 4> SetCCs;
13144   if (!N0.hasOneUse())
13145     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13146   if (VT.isVector())
13147     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13148   if (!DoXform)
13149     return {};
13150 
13151   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13152   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13153                                    LN0->getBasePtr(), N0.getValueType(),
13154                                    LN0->getMemOperand());
13155   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13156   // If the load value is used only by N, replace it via CombineTo N.
13157   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13158   Combiner.CombineTo(N, ExtLoad);
13159   if (NoReplaceTrunc) {
13160     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13161     Combiner.recursivelyDeleteUnusedNodes(LN0);
13162   } else {
13163     SDValue Trunc =
13164         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13165     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13166   }
13167   return SDValue(N, 0); // Return N so it doesn't get rechecked!
13168 }
13169 
13170 static SDValue
13171 tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT,
13172                          bool LegalOperations, SDNode *N, SDValue N0,
13173                          ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13174   if (!N0.hasOneUse())
13175     return SDValue();
13176 
13177   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13178   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13179     return SDValue();
13180 
13181   if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13182       !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13183     return SDValue();
13184 
13185   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13186     return SDValue();
13187 
13188   SDLoc dl(Ld);
13189   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13190   SDValue NewLoad = DAG.getMaskedLoad(
13191       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13192       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13193       ExtLoadType, Ld->isExpandingLoad());
13194   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13195   return NewLoad;
13196 }
13197 
13198 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
13199                                        bool LegalOperations) {
13200   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13201           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13202 
13203   SDValue SetCC = N->getOperand(0);
13204   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13205       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13206     return SDValue();
13207 
13208   SDValue X = SetCC.getOperand(0);
13209   SDValue Ones = SetCC.getOperand(1);
13210   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13211   EVT VT = N->getValueType(0);
13212   EVT XVT = X.getValueType();
13213   // setge X, C is canonicalized to setgt, so we do not need to match that
13214   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13215   // not require the 'not' op.
13216   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13217     // Invert and smear/shift the sign bit:
13218     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13219     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13220     SDLoc DL(N);
13221     unsigned ShCt = VT.getSizeInBits() - 1;
13222     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13223     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13224       SDValue NotX = DAG.getNOT(DL, X, VT);
13225       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13226       auto ShiftOpcode =
13227         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13228       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13229     }
13230   }
13231   return SDValue();
13232 }
13233 
13234 SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13235   SDValue N0 = N->getOperand(0);
13236   if (N0.getOpcode() != ISD::SETCC)
13237     return SDValue();
13238 
13239   SDValue N00 = N0.getOperand(0);
13240   SDValue N01 = N0.getOperand(1);
13241   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13242   EVT VT = N->getValueType(0);
13243   EVT N00VT = N00.getValueType();
13244   SDLoc DL(N);
13245 
13246   // Propagate fast-math-flags.
13247   SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13248 
13249   // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13250   // the same size as the compared operands. Try to optimize sext(setcc())
13251   // if this is the case.
13252   if (VT.isVector() && !LegalOperations &&
13253       TLI.getBooleanContents(N00VT) ==
13254           TargetLowering::ZeroOrNegativeOneBooleanContent) {
13255     EVT SVT = getSetCCResultType(N00VT);
13256 
13257     // If we already have the desired type, don't change it.
13258     if (SVT != N0.getValueType()) {
13259       // We know that the # elements of the results is the same as the
13260       // # elements of the compare (and the # elements of the compare result
13261       // for that matter).  Check to see that they are the same size.  If so,
13262       // we know that the element size of the sext'd result matches the
13263       // element size of the compare operands.
13264       if (VT.getSizeInBits() == SVT.getSizeInBits())
13265         return DAG.getSetCC(DL, VT, N00, N01, CC);
13266 
13267       // If the desired elements are smaller or larger than the source
13268       // elements, we can use a matching integer vector type and then
13269       // truncate/sign extend.
13270       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13271       if (SVT == MatchingVecType) {
13272         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13273         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13274       }
13275     }
13276 
13277     // Try to eliminate the sext of a setcc by zexting the compare operands.
13278     if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13279         !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
13280       bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13281       unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13282       unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13283 
13284       // We have an unsupported narrow vector compare op that would be legal
13285       // if extended to the destination type. See if the compare operands
13286       // can be freely extended to the destination type.
13287       auto IsFreeToExtend = [&](SDValue V) {
13288         if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13289           return true;
13290         // Match a simple, non-extended load that can be converted to a
13291         // legal {z/s}ext-load.
13292         // TODO: Allow widening of an existing {z/s}ext-load?
13293         if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13294               ISD::isUNINDEXEDLoad(V.getNode()) &&
13295               cast<LoadSDNode>(V)->isSimple() &&
13296               TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13297           return false;
13298 
13299         // Non-chain users of this value must either be the setcc in this
13300         // sequence or extends that can be folded into the new {z/s}ext-load.
13301         for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
13302              UI != UE; ++UI) {
13303           // Skip uses of the chain and the setcc.
13304           SDNode *User = *UI;
13305           if (UI.getUse().getResNo() != 0 || User == N0.getNode())
13306             continue;
13307           // Extra users must have exactly the same cast we are about to create.
13308           // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13309           //       is enhanced similarly.
13310           if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13311             return false;
13312         }
13313         return true;
13314       };
13315 
13316       if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13317         SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13318         SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13319         return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13320       }
13321     }
13322   }
13323 
13324   // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13325   // Here, T can be 1 or -1, depending on the type of the setcc and
13326   // getBooleanContents().
13327   unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13328 
13329   // To determine the "true" side of the select, we need to know the high bit
13330   // of the value returned by the setcc if it evaluates to true.
13331   // If the type of the setcc is i1, then the true case of the select is just
13332   // sext(i1 1), that is, -1.
13333   // If the type of the setcc is larger (say, i8) then the value of the high
13334   // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13335   // of the appropriate width.
13336   SDValue ExtTrueVal = (SetCCWidth == 1)
13337                            ? DAG.getAllOnesConstant(DL, VT)
13338                            : DAG.getBoolConstant(true, DL, VT, N00VT);
13339   SDValue Zero = DAG.getConstant(0, DL, VT);
13340   if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13341     return SCC;
13342 
13343   if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13344     EVT SetCCVT = getSetCCResultType(N00VT);
13345     // Don't do this transform for i1 because there's a select transform
13346     // that would reverse it.
13347     // TODO: We should not do this transform at all without a target hook
13348     // because a sext is likely cheaper than a select?
13349     if (SetCCVT.getScalarSizeInBits() != 1 &&
13350         (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13351       SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13352       return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13353     }
13354   }
13355 
13356   return SDValue();
13357 }
13358 
13359 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13360   SDValue N0 = N->getOperand(0);
13361   EVT VT = N->getValueType(0);
13362   SDLoc DL(N);
13363 
13364   if (VT.isVector())
13365     if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13366       return FoldedVOp;
13367 
13368   // sext(undef) = 0 because the top bit will all be the same.
13369   if (N0.isUndef())
13370     return DAG.getConstant(0, DL, VT);
13371 
13372   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
13373     return Res;
13374 
13375   // fold (sext (sext x)) -> (sext x)
13376   // fold (sext (aext x)) -> (sext x)
13377   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13378     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13379 
13380   // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13381   // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13382   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
13383       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
13384     return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
13385                        N0.getOperand(0));
13386 
13387   // fold (sext (sext_inreg x)) -> (sext (trunc x))
13388   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13389     SDValue N00 = N0.getOperand(0);
13390     EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13391     if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13392         (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13393       SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13394       return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13395     }
13396   }
13397 
13398   if (N0.getOpcode() == ISD::TRUNCATE) {
13399     // fold (sext (truncate (load x))) -> (sext (smaller load x))
13400     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13401     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13402       SDNode *oye = N0.getOperand(0).getNode();
13403       if (NarrowLoad.getNode() != N0.getNode()) {
13404         CombineTo(N0.getNode(), NarrowLoad);
13405         // CombineTo deleted the truncate, if needed, but not what's under it.
13406         AddToWorklist(oye);
13407       }
13408       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13409     }
13410 
13411     // See if the value being truncated is already sign extended.  If so, just
13412     // eliminate the trunc/sext pair.
13413     SDValue Op = N0.getOperand(0);
13414     unsigned OpBits   = Op.getScalarValueSizeInBits();
13415     unsigned MidBits  = N0.getScalarValueSizeInBits();
13416     unsigned DestBits = VT.getScalarSizeInBits();
13417     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13418 
13419     if (OpBits == DestBits) {
13420       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
13421       // bits, it is already ready.
13422       if (NumSignBits > DestBits-MidBits)
13423         return Op;
13424     } else if (OpBits < DestBits) {
13425       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
13426       // bits, just sext from i32.
13427       if (NumSignBits > OpBits-MidBits)
13428         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13429     } else {
13430       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
13431       // bits, just truncate to i32.
13432       if (NumSignBits > OpBits-MidBits)
13433         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13434     }
13435 
13436     // fold (sext (truncate x)) -> (sextinreg x).
13437     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13438                                                  N0.getValueType())) {
13439       if (OpBits < DestBits)
13440         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13441       else if (OpBits > DestBits)
13442         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13443       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13444                          DAG.getValueType(N0.getValueType()));
13445     }
13446   }
13447 
13448   // Try to simplify (sext (load x)).
13449   if (SDValue foldedExt =
13450           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13451                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
13452     return foldedExt;
13453 
13454   if (SDValue foldedExt =
13455           tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13456                                    ISD::SEXTLOAD, ISD::SIGN_EXTEND))
13457     return foldedExt;
13458 
13459   // fold (sext (load x)) to multiple smaller sextloads.
13460   // Only on illegal but splittable vectors.
13461   if (SDValue ExtLoad = CombineExtLoad(N))
13462     return ExtLoad;
13463 
13464   // Try to simplify (sext (sextload x)).
13465   if (SDValue foldedExt = tryToFoldExtOfExtload(
13466           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13467     return foldedExt;
13468 
13469   // fold (sext (and/or/xor (load x), cst)) ->
13470   //      (and/or/xor (sextload x), (sext cst))
13471   if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13472       isa<LoadSDNode>(N0.getOperand(0)) &&
13473       N0.getOperand(1).getOpcode() == ISD::Constant &&
13474       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13475     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13476     EVT MemVT = LN00->getMemoryVT();
13477     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13478       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13479       SmallVector<SDNode*, 4> SetCCs;
13480       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13481                                              ISD::SIGN_EXTEND, SetCCs, TLI);
13482       if (DoXform) {
13483         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13484                                          LN00->getChain(), LN00->getBasePtr(),
13485                                          LN00->getMemoryVT(),
13486                                          LN00->getMemOperand());
13487         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
13488         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13489                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
13490         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13491         bool NoReplaceTruncAnd = !N0.hasOneUse();
13492         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13493         CombineTo(N, And);
13494         // If N0 has multiple uses, change other uses as well.
13495         if (NoReplaceTruncAnd) {
13496           SDValue TruncAnd =
13497               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
13498           CombineTo(N0.getNode(), TruncAnd);
13499         }
13500         if (NoReplaceTrunc) {
13501           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13502         } else {
13503           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13504                                       LN00->getValueType(0), ExtLoad);
13505           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13506         }
13507         return SDValue(N,0); // Return N so it doesn't get rechecked!
13508       }
13509     }
13510   }
13511 
13512   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13513     return V;
13514 
13515   if (SDValue V = foldSextSetcc(N))
13516     return V;
13517 
13518   // fold (sext x) -> (zext x) if the sign bit is known zero.
13519   if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
13520       (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
13521       DAG.SignBitIsZero(N0)) {
13522     SDNodeFlags Flags;
13523     Flags.setNonNeg(true);
13524     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, Flags);
13525   }
13526 
13527   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13528     return NewVSel;
13529 
13530   // Eliminate this sign extend by doing a negation in the destination type:
13531   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
13532   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
13533       isNullOrNullSplat(N0.getOperand(0)) &&
13534       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
13535       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
13536     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
13537     return DAG.getNegative(Zext, DL, VT);
13538   }
13539   // Eliminate this sign extend by doing a decrement in the destination type:
13540   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
13541   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
13542       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
13543       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
13544       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
13545     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13546     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13547   }
13548 
13549   // fold sext (not i1 X) -> add (zext i1 X), -1
13550   // TODO: This could be extended to handle bool vectors.
13551   if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
13552       (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
13553                             TLI.isOperationLegal(ISD::ADD, VT)))) {
13554     // If we can eliminate the 'not', the sext form should be better
13555     if (SDValue NewXor = visitXOR(N0.getNode())) {
13556       // Returning N0 is a form of in-visit replacement that may have
13557       // invalidated N0.
13558       if (NewXor.getNode() == N0.getNode()) {
13559         // Return SDValue here as the xor should have already been replaced in
13560         // this sext.
13561         return SDValue();
13562       }
13563 
13564       // Return a new sext with the new xor.
13565       return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
13566     }
13567 
13568     SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
13569     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13570   }
13571 
13572   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13573     return Res;
13574 
13575   return SDValue();
13576 }
13577 
13578 /// Given an extending node with a pop-count operand, if the target does not
13579 /// support a pop-count in the narrow source type but does support it in the
13580 /// destination type, widen the pop-count to the destination type.
13581 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
13582   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
13583           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
13584 
13585   SDValue CtPop = Extend->getOperand(0);
13586   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
13587     return SDValue();
13588 
13589   EVT VT = Extend->getValueType(0);
13590   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13591   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
13592       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
13593     return SDValue();
13594 
13595   // zext (ctpop X) --> ctpop (zext X)
13596   SDLoc DL(Extend);
13597   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
13598   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
13599 }
13600 
13601 // If we have (zext (abs X)) where X is a type that will be promoted by type
13602 // legalization, convert to (abs (sext X)). But don't extend past a legal type.
13603 static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
13604   assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
13605 
13606   EVT VT = Extend->getValueType(0);
13607   if (VT.isVector())
13608     return SDValue();
13609 
13610   SDValue Abs = Extend->getOperand(0);
13611   if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
13612     return SDValue();
13613 
13614   EVT AbsVT = Abs.getValueType();
13615   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13616   if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
13617       TargetLowering::TypePromoteInteger)
13618     return SDValue();
13619 
13620   EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
13621 
13622   SDValue SExt =
13623       DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
13624   SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
13625   return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
13626 }
13627 
13628 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
13629   SDValue N0 = N->getOperand(0);
13630   EVT VT = N->getValueType(0);
13631   SDLoc DL(N);
13632 
13633   if (VT.isVector())
13634     if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13635       return FoldedVOp;
13636 
13637   // zext(undef) = 0
13638   if (N0.isUndef())
13639     return DAG.getConstant(0, DL, VT);
13640 
13641   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
13642     return Res;
13643 
13644   // fold (zext (zext x)) -> (zext x)
13645   // fold (zext (aext x)) -> (zext x)
13646   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13647     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
13648 
13649   // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13650   // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13651   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
13652       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)
13653     return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(N), VT,
13654                        N0.getOperand(0));
13655 
13656   // fold (zext (truncate x)) -> (zext x) or
13657   //      (zext (truncate x)) -> (truncate x)
13658   // This is valid when the truncated bits of x are already zero.
13659   SDValue Op;
13660   KnownBits Known;
13661   if (isTruncateOf(DAG, N0, Op, Known)) {
13662     APInt TruncatedBits =
13663       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
13664       APInt(Op.getScalarValueSizeInBits(), 0) :
13665       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
13666                         N0.getScalarValueSizeInBits(),
13667                         std::min(Op.getScalarValueSizeInBits(),
13668                                  VT.getScalarSizeInBits()));
13669     if (TruncatedBits.isSubsetOf(Known.Zero)) {
13670       SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13671       DAG.salvageDebugInfo(*N0.getNode());
13672 
13673       return ZExtOrTrunc;
13674     }
13675   }
13676 
13677   // fold (zext (truncate x)) -> (and x, mask)
13678   if (N0.getOpcode() == ISD::TRUNCATE) {
13679     // fold (zext (truncate (load x))) -> (zext (smaller load x))
13680     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
13681     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13682       SDNode *oye = N0.getOperand(0).getNode();
13683       if (NarrowLoad.getNode() != N0.getNode()) {
13684         CombineTo(N0.getNode(), NarrowLoad);
13685         // CombineTo deleted the truncate, if needed, but not what's under it.
13686         AddToWorklist(oye);
13687       }
13688       return SDValue(N, 0); // Return N so it doesn't get rechecked!
13689     }
13690 
13691     EVT SrcVT = N0.getOperand(0).getValueType();
13692     EVT MinVT = N0.getValueType();
13693 
13694     // Try to mask before the extension to avoid having to generate a larger mask,
13695     // possibly over several sub-vectors.
13696     if (SrcVT.bitsLT(VT) && VT.isVector()) {
13697       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
13698                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
13699         SDValue Op = N0.getOperand(0);
13700         Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
13701         AddToWorklist(Op.getNode());
13702         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13703         // Transfer the debug info; the new node is equivalent to N0.
13704         DAG.transferDbgValues(N0, ZExtOrTrunc);
13705         return ZExtOrTrunc;
13706       }
13707     }
13708 
13709     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
13710       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
13711       AddToWorklist(Op.getNode());
13712       SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
13713       // We may safely transfer the debug info describing the truncate node over
13714       // to the equivalent and operation.
13715       DAG.transferDbgValues(N0, And);
13716       return And;
13717     }
13718   }
13719 
13720   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
13721   // if either of the casts is not free.
13722   if (N0.getOpcode() == ISD::AND &&
13723       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13724       N0.getOperand(1).getOpcode() == ISD::Constant &&
13725       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
13726        !TLI.isZExtFree(N0.getValueType(), VT))) {
13727     SDValue X = N0.getOperand(0).getOperand(0);
13728     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
13729     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
13730     return DAG.getNode(ISD::AND, DL, VT,
13731                        X, DAG.getConstant(Mask, DL, VT));
13732   }
13733 
13734   // Try to simplify (zext (load x)).
13735   if (SDValue foldedExt =
13736           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13737                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
13738     return foldedExt;
13739 
13740   if (SDValue foldedExt =
13741           tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13742                                    ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
13743     return foldedExt;
13744 
13745   // fold (zext (load x)) to multiple smaller zextloads.
13746   // Only on illegal but splittable vectors.
13747   if (SDValue ExtLoad = CombineExtLoad(N))
13748     return ExtLoad;
13749 
13750   // fold (zext (and/or/xor (load x), cst)) ->
13751   //      (and/or/xor (zextload x), (zext cst))
13752   // Unless (and (load x) cst) will match as a zextload already and has
13753   // additional users, or the zext is already free.
13754   if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
13755       isa<LoadSDNode>(N0.getOperand(0)) &&
13756       N0.getOperand(1).getOpcode() == ISD::Constant &&
13757       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13758     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13759     EVT MemVT = LN00->getMemoryVT();
13760     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
13761         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
13762       bool DoXform = true;
13763       SmallVector<SDNode*, 4> SetCCs;
13764       if (!N0.hasOneUse()) {
13765         if (N0.getOpcode() == ISD::AND) {
13766           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
13767           EVT LoadResultTy = AndC->getValueType(0);
13768           EVT ExtVT;
13769           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
13770             DoXform = false;
13771         }
13772       }
13773       if (DoXform)
13774         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13775                                           ISD::ZERO_EXTEND, SetCCs, TLI);
13776       if (DoXform) {
13777         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
13778                                          LN00->getChain(), LN00->getBasePtr(),
13779                                          LN00->getMemoryVT(),
13780                                          LN00->getMemOperand());
13781         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
13782         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13783                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
13784         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13785         bool NoReplaceTruncAnd = !N0.hasOneUse();
13786         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13787         CombineTo(N, And);
13788         // If N0 has multiple uses, change other uses as well.
13789         if (NoReplaceTruncAnd) {
13790           SDValue TruncAnd =
13791               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
13792           CombineTo(N0.getNode(), TruncAnd);
13793         }
13794         if (NoReplaceTrunc) {
13795           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13796         } else {
13797           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13798                                       LN00->getValueType(0), ExtLoad);
13799           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13800         }
13801         return SDValue(N,0); // Return N so it doesn't get rechecked!
13802       }
13803     }
13804   }
13805 
13806   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13807   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13808   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
13809     return ZExtLoad;
13810 
13811   // Try to simplify (zext (zextload x)).
13812   if (SDValue foldedExt = tryToFoldExtOfExtload(
13813           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
13814     return foldedExt;
13815 
13816   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13817     return V;
13818 
13819   if (N0.getOpcode() == ISD::SETCC) {
13820     // Propagate fast-math-flags.
13821     SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13822 
13823     // Only do this before legalize for now.
13824     if (!LegalOperations && VT.isVector() &&
13825         N0.getValueType().getVectorElementType() == MVT::i1) {
13826       EVT N00VT = N0.getOperand(0).getValueType();
13827       if (getSetCCResultType(N00VT) == N0.getValueType())
13828         return SDValue();
13829 
13830       // We know that the # elements of the results is the same as the #
13831       // elements of the compare (and the # elements of the compare result for
13832       // that matter). Check to see that they are the same size. If so, we know
13833       // that the element size of the sext'd result matches the element size of
13834       // the compare operands.
13835       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
13836         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
13837         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
13838                                      N0.getOperand(1), N0.getOperand(2));
13839         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
13840       }
13841 
13842       // If the desired elements are smaller or larger than the source
13843       // elements we can use a matching integer vector type and then
13844       // truncate/any extend followed by zext_in_reg.
13845       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
13846       SDValue VsetCC =
13847           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
13848                       N0.getOperand(1), N0.getOperand(2));
13849       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
13850                                     N0.getValueType());
13851     }
13852 
13853     // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
13854     EVT N0VT = N0.getValueType();
13855     EVT N00VT = N0.getOperand(0).getValueType();
13856     if (SDValue SCC = SimplifySelectCC(
13857             DL, N0.getOperand(0), N0.getOperand(1),
13858             DAG.getBoolConstant(true, DL, N0VT, N00VT),
13859             DAG.getBoolConstant(false, DL, N0VT, N00VT),
13860             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
13861       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
13862   }
13863 
13864   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
13865   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
13866       !TLI.isZExtFree(N0, VT)) {
13867     SDValue ShVal = N0.getOperand(0);
13868     SDValue ShAmt = N0.getOperand(1);
13869     if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
13870       if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
13871         if (N0.getOpcode() == ISD::SHL) {
13872           // If the original shl may be shifting out bits, do not perform this
13873           // transformation.
13874           // TODO: Add MaskedValueIsZero check.
13875           unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
13876                                    ShVal.getOperand(0).getValueSizeInBits();
13877           if (ShAmtC->getAPIntValue().ugt(KnownZeroBits))
13878             return SDValue();
13879         }
13880 
13881         // Ensure that the shift amount is wide enough for the shifted value.
13882         if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
13883           ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
13884 
13885         return DAG.getNode(N0.getOpcode(), DL, VT,
13886                            DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
13887       }
13888     }
13889   }
13890 
13891   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13892     return NewVSel;
13893 
13894   if (SDValue NewCtPop = widenCtPop(N, DAG))
13895     return NewCtPop;
13896 
13897   if (SDValue V = widenAbs(N, DAG))
13898     return V;
13899 
13900   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13901     return Res;
13902 
13903   return SDValue();
13904 }
13905 
13906 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
13907   SDValue N0 = N->getOperand(0);
13908   EVT VT = N->getValueType(0);
13909 
13910   // aext(undef) = undef
13911   if (N0.isUndef())
13912     return DAG.getUNDEF(VT);
13913 
13914   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
13915     return Res;
13916 
13917   // fold (aext (aext x)) -> (aext x)
13918   // fold (aext (zext x)) -> (zext x)
13919   // fold (aext (sext x)) -> (sext x)
13920   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
13921       N0.getOpcode() == ISD::ZERO_EXTEND ||
13922       N0.getOpcode() == ISD::SIGN_EXTEND)
13923     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
13924 
13925   // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
13926   // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13927   // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13928   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
13929       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG ||
13930       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
13931     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
13932 
13933   // fold (aext (truncate (load x))) -> (aext (smaller load x))
13934   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
13935   if (N0.getOpcode() == ISD::TRUNCATE) {
13936     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13937       SDNode *oye = N0.getOperand(0).getNode();
13938       if (NarrowLoad.getNode() != N0.getNode()) {
13939         CombineTo(N0.getNode(), NarrowLoad);
13940         // CombineTo deleted the truncate, if needed, but not what's under it.
13941         AddToWorklist(oye);
13942       }
13943       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13944     }
13945   }
13946 
13947   // fold (aext (truncate x))
13948   if (N0.getOpcode() == ISD::TRUNCATE)
13949     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
13950 
13951   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
13952   // if the trunc is not free.
13953   if (N0.getOpcode() == ISD::AND &&
13954       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13955       N0.getOperand(1).getOpcode() == ISD::Constant &&
13956       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
13957     SDLoc DL(N);
13958     SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13959     SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
13960     assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
13961     return DAG.getNode(ISD::AND, DL, VT, X, Y);
13962   }
13963 
13964   // fold (aext (load x)) -> (aext (truncate (extload x)))
13965   // None of the supported targets knows how to perform load and any_ext
13966   // on vectors in one instruction, so attempt to fold to zext instead.
13967   if (VT.isVector()) {
13968     // Try to simplify (zext (load x)).
13969     if (SDValue foldedExt =
13970             tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13971                                ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
13972       return foldedExt;
13973   } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
13974              ISD::isUNINDEXEDLoad(N0.getNode()) &&
13975              TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
13976     bool DoXform = true;
13977     SmallVector<SDNode *, 4> SetCCs;
13978     if (!N0.hasOneUse())
13979       DoXform =
13980           ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
13981     if (DoXform) {
13982       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13983       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
13984                                        LN0->getChain(), LN0->getBasePtr(),
13985                                        N0.getValueType(), LN0->getMemOperand());
13986       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
13987       // If the load value is used only by N, replace it via CombineTo N.
13988       bool NoReplaceTrunc = N0.hasOneUse();
13989       CombineTo(N, ExtLoad);
13990       if (NoReplaceTrunc) {
13991         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13992         recursivelyDeleteUnusedNodes(LN0);
13993       } else {
13994         SDValue Trunc =
13995             DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13996         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13997       }
13998       return SDValue(N, 0); // Return N so it doesn't get rechecked!
13999     }
14000   }
14001 
14002   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14003   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14004   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
14005   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14006       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14007     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14008     ISD::LoadExtType ExtType = LN0->getExtensionType();
14009     EVT MemVT = LN0->getMemoryVT();
14010     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14011       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
14012                                        VT, LN0->getChain(), LN0->getBasePtr(),
14013                                        MemVT, LN0->getMemOperand());
14014       CombineTo(N, ExtLoad);
14015       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14016       recursivelyDeleteUnusedNodes(LN0);
14017       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14018     }
14019   }
14020 
14021   if (N0.getOpcode() == ISD::SETCC) {
14022     // Propagate fast-math-flags.
14023     SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14024 
14025     // For vectors:
14026     // aext(setcc) -> vsetcc
14027     // aext(setcc) -> truncate(vsetcc)
14028     // aext(setcc) -> aext(vsetcc)
14029     // Only do this before legalize for now.
14030     if (VT.isVector() && !LegalOperations) {
14031       EVT N00VT = N0.getOperand(0).getValueType();
14032       if (getSetCCResultType(N00VT) == N0.getValueType())
14033         return SDValue();
14034 
14035       // We know that the # elements of the results is the same as the
14036       // # elements of the compare (and the # elements of the compare result
14037       // for that matter).  Check to see that they are the same size.  If so,
14038       // we know that the element size of the sext'd result matches the
14039       // element size of the compare operands.
14040       if (VT.getSizeInBits() == N00VT.getSizeInBits())
14041         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
14042                              N0.getOperand(1),
14043                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
14044 
14045       // If the desired elements are smaller or larger than the source
14046       // elements we can use a matching integer vector type and then
14047       // truncate/any extend
14048       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14049       SDValue VsetCC =
14050         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
14051                       N0.getOperand(1),
14052                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
14053       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
14054     }
14055 
14056     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14057     SDLoc DL(N);
14058     if (SDValue SCC = SimplifySelectCC(
14059             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14060             DAG.getConstant(0, DL, VT),
14061             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14062       return SCC;
14063   }
14064 
14065   if (SDValue NewCtPop = widenCtPop(N, DAG))
14066     return NewCtPop;
14067 
14068   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
14069     return Res;
14070 
14071   return SDValue();
14072 }
14073 
14074 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14075   unsigned Opcode = N->getOpcode();
14076   SDValue N0 = N->getOperand(0);
14077   SDValue N1 = N->getOperand(1);
14078   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14079 
14080   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14081   if (N0.getOpcode() == Opcode &&
14082       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14083     return N0;
14084 
14085   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14086       N0.getOperand(0).getOpcode() == Opcode) {
14087     // We have an assert, truncate, assert sandwich. Make one stronger assert
14088     // by asserting on the smallest asserted type to the larger source type.
14089     // This eliminates the later assert:
14090     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14091     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14092     SDLoc DL(N);
14093     SDValue BigA = N0.getOperand(0);
14094     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14095     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14096     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14097     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14098                                     BigA.getOperand(0), MinAssertVTVal);
14099     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14100   }
14101 
14102   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14103   // than X. Just move the AssertZext in front of the truncate and drop the
14104   // AssertSExt.
14105   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14106       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
14107       Opcode == ISD::AssertZext) {
14108     SDValue BigA = N0.getOperand(0);
14109     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14110     if (AssertVT.bitsLT(BigA_AssertVT)) {
14111       SDLoc DL(N);
14112       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14113                                       BigA.getOperand(0), N1);
14114       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14115     }
14116   }
14117 
14118   return SDValue();
14119 }
14120 
14121 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14122   SDLoc DL(N);
14123 
14124   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14125   SDValue N0 = N->getOperand(0);
14126 
14127   // Fold (assertalign (assertalign x, AL0), AL1) ->
14128   // (assertalign x, max(AL0, AL1))
14129   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14130     return DAG.getAssertAlign(DL, N0.getOperand(0),
14131                               std::max(AL, AAN->getAlign()));
14132 
14133   // In rare cases, there are trivial arithmetic ops in source operands. Sink
14134   // this assert down to source operands so that those arithmetic ops could be
14135   // exposed to the DAG combining.
14136   switch (N0.getOpcode()) {
14137   default:
14138     break;
14139   case ISD::ADD:
14140   case ISD::SUB: {
14141     unsigned AlignShift = Log2(AL);
14142     SDValue LHS = N0.getOperand(0);
14143     SDValue RHS = N0.getOperand(1);
14144     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14145     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14146     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14147       if (LHSAlignShift < AlignShift)
14148         LHS = DAG.getAssertAlign(DL, LHS, AL);
14149       if (RHSAlignShift < AlignShift)
14150         RHS = DAG.getAssertAlign(DL, RHS, AL);
14151       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14152     }
14153     break;
14154   }
14155   }
14156 
14157   return SDValue();
14158 }
14159 
14160 /// If the result of a load is shifted/masked/truncated to an effectively
14161 /// narrower type, try to transform the load to a narrower type and/or
14162 /// use an extending load.
14163 SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14164   unsigned Opc = N->getOpcode();
14165 
14166   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
14167   SDValue N0 = N->getOperand(0);
14168   EVT VT = N->getValueType(0);
14169   EVT ExtVT = VT;
14170 
14171   // This transformation isn't valid for vector loads.
14172   if (VT.isVector())
14173     return SDValue();
14174 
14175   // The ShAmt variable is used to indicate that we've consumed a right
14176   // shift. I.e. we want to narrow the width of the load by skipping to load the
14177   // ShAmt least significant bits.
14178   unsigned ShAmt = 0;
14179   // A special case is when the least significant bits from the load are masked
14180   // away, but using an AND rather than a right shift. HasShiftedOffset is used
14181   // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14182   // the result.
14183   bool HasShiftedOffset = false;
14184   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14185   // extended to VT.
14186   if (Opc == ISD::SIGN_EXTEND_INREG) {
14187     ExtType = ISD::SEXTLOAD;
14188     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14189   } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14190     // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14191     // value, or it may be shifting a higher subword, half or byte into the
14192     // lowest bits.
14193 
14194     // Only handle shift with constant shift amount, and the shiftee must be a
14195     // load.
14196     auto *LN = dyn_cast<LoadSDNode>(N0);
14197     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14198     if (!N1C || !LN)
14199       return SDValue();
14200     // If the shift amount is larger than the memory type then we're not
14201     // accessing any of the loaded bytes.
14202     ShAmt = N1C->getZExtValue();
14203     uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14204     if (MemoryWidth <= ShAmt)
14205       return SDValue();
14206     // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14207     ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14208     ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14209     // If original load is a SEXTLOAD then we can't simply replace it by a
14210     // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14211     // followed by a ZEXT, but that is not handled at the moment). Similarly if
14212     // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14213     if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14214          LN->getExtensionType() == ISD::ZEXTLOAD) &&
14215         LN->getExtensionType() != ExtType)
14216       return SDValue();
14217   } else if (Opc == ISD::AND) {
14218     // An AND with a constant mask is the same as a truncate + zero-extend.
14219     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14220     if (!AndC)
14221       return SDValue();
14222 
14223     const APInt &Mask = AndC->getAPIntValue();
14224     unsigned ActiveBits = 0;
14225     if (Mask.isMask()) {
14226       ActiveBits = Mask.countr_one();
14227     } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14228       HasShiftedOffset = true;
14229     } else {
14230       return SDValue();
14231     }
14232 
14233     ExtType = ISD::ZEXTLOAD;
14234     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14235   }
14236 
14237   // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14238   // a right shift. Here we redo some of those checks, to possibly adjust the
14239   // ExtVT even further based on "a masking AND". We could also end up here for
14240   // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14241   // need to be done here as well.
14242   if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14243     SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14244     // Bail out when the SRL has more than one use. This is done for historical
14245     // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14246     // check below? And maybe it could be non-profitable to do the transform in
14247     // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14248     // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14249     if (!SRL.hasOneUse())
14250       return SDValue();
14251 
14252     // Only handle shift with constant shift amount, and the shiftee must be a
14253     // load.
14254     auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14255     auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14256     if (!SRL1C || !LN)
14257       return SDValue();
14258 
14259     // If the shift amount is larger than the input type then we're not
14260     // accessing any of the loaded bytes.  If the load was a zextload/extload
14261     // then the result of the shift+trunc is zero/undef (handled elsewhere).
14262     ShAmt = SRL1C->getZExtValue();
14263     uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14264     if (ShAmt >= MemoryWidth)
14265       return SDValue();
14266 
14267     // Because a SRL must be assumed to *need* to zero-extend the high bits
14268     // (as opposed to anyext the high bits), we can't combine the zextload
14269     // lowering of SRL and an sextload.
14270     if (LN->getExtensionType() == ISD::SEXTLOAD)
14271       return SDValue();
14272 
14273     // Avoid reading outside the memory accessed by the original load (could
14274     // happened if we only adjust the load base pointer by ShAmt). Instead we
14275     // try to narrow the load even further. The typical scenario here is:
14276     //   (i64 (truncate (i96 (srl (load x), 64)))) ->
14277     //     (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14278     if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14279       // Don't replace sextload by zextload.
14280       if (ExtType == ISD::SEXTLOAD)
14281         return SDValue();
14282       // Narrow the load.
14283       ExtType = ISD::ZEXTLOAD;
14284       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14285     }
14286 
14287     // If the SRL is only used by a masking AND, we may be able to adjust
14288     // the ExtVT to make the AND redundant.
14289     SDNode *Mask = *(SRL->use_begin());
14290     if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14291         isa<ConstantSDNode>(Mask->getOperand(1))) {
14292       const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14293       if (ShiftMask.isMask()) {
14294         EVT MaskedVT =
14295             EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14296         // If the mask is smaller, recompute the type.
14297         if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14298             TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14299           ExtVT = MaskedVT;
14300       }
14301     }
14302 
14303     N0 = SRL.getOperand(0);
14304   }
14305 
14306   // If the load is shifted left (and the result isn't shifted back right), we
14307   // can fold a truncate through the shift. The typical scenario is that N
14308   // points at a TRUNCATE here so the attempted fold is:
14309   //   (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14310   // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14311   unsigned ShLeftAmt = 0;
14312   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14313       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
14314     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14315       ShLeftAmt = N01->getZExtValue();
14316       N0 = N0.getOperand(0);
14317     }
14318   }
14319 
14320   // If we haven't found a load, we can't narrow it.
14321   if (!isa<LoadSDNode>(N0))
14322     return SDValue();
14323 
14324   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14325   // Reducing the width of a volatile load is illegal.  For atomics, we may be
14326   // able to reduce the width provided we never widen again. (see D66309)
14327   if (!LN0->isSimple() ||
14328       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14329     return SDValue();
14330 
14331   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14332     unsigned LVTStoreBits =
14333         LN0->getMemoryVT().getStoreSizeInBits().getFixedValue();
14334     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14335     return LVTStoreBits - EVTStoreBits - ShAmt;
14336   };
14337 
14338   // We need to adjust the pointer to the load by ShAmt bits in order to load
14339   // the correct bytes.
14340   unsigned PtrAdjustmentInBits =
14341       DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14342 
14343   uint64_t PtrOff = PtrAdjustmentInBits / 8;
14344   Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
14345   SDLoc DL(LN0);
14346   // The original load itself didn't wrap, so an offset within it doesn't.
14347   SDNodeFlags Flags;
14348   Flags.setNoUnsignedWrap(true);
14349   SDValue NewPtr = DAG.getMemBasePlusOffset(
14350       LN0->getBasePtr(), TypeSize::getFixed(PtrOff), DL, Flags);
14351   AddToWorklist(NewPtr.getNode());
14352 
14353   SDValue Load;
14354   if (ExtType == ISD::NON_EXTLOAD)
14355     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14356                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
14357                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14358   else
14359     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14360                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14361                           NewAlign, LN0->getMemOperand()->getFlags(),
14362                           LN0->getAAInfo());
14363 
14364   // Replace the old load's chain with the new load's chain.
14365   WorklistRemover DeadNodes(*this);
14366   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14367 
14368   // Shift the result left, if we've swallowed a left shift.
14369   SDValue Result = Load;
14370   if (ShLeftAmt != 0) {
14371     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
14372     if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
14373       ShImmTy = VT;
14374     // If the shift amount is as large as the result size (but, presumably,
14375     // no larger than the source) then the useful bits of the result are
14376     // zero; we can't simply return the shortened shift, because the result
14377     // of that operation is undefined.
14378     if (ShLeftAmt >= VT.getScalarSizeInBits())
14379       Result = DAG.getConstant(0, DL, VT);
14380     else
14381       Result = DAG.getNode(ISD::SHL, DL, VT,
14382                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
14383   }
14384 
14385   if (HasShiftedOffset) {
14386     // We're using a shifted mask, so the load now has an offset. This means
14387     // that data has been loaded into the lower bytes than it would have been
14388     // before, so we need to shl the loaded data into the correct position in the
14389     // register.
14390     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
14391     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14392     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14393   }
14394 
14395   // Return the new loaded value.
14396   return Result;
14397 }
14398 
14399 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14400   SDValue N0 = N->getOperand(0);
14401   SDValue N1 = N->getOperand(1);
14402   EVT VT = N->getValueType(0);
14403   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14404   unsigned VTBits = VT.getScalarSizeInBits();
14405   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14406 
14407   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14408   if (N0.isUndef())
14409     return DAG.getConstant(0, SDLoc(N), VT);
14410 
14411   // fold (sext_in_reg c1) -> c1
14412   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
14413     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
14414 
14415   // If the input is already sign extended, just drop the extension.
14416   if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14417     return N0;
14418 
14419   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14420   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14421       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14422     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
14423                        N1);
14424 
14425   // fold (sext_in_reg (sext x)) -> (sext x)
14426   // fold (sext_in_reg (aext x)) -> (sext x)
14427   // if x is small enough or if we know that x has more than 1 sign bit and the
14428   // sign_extend_inreg is extending from one of them.
14429   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14430     SDValue N00 = N0.getOperand(0);
14431     unsigned N00Bits = N00.getScalarValueSizeInBits();
14432     if ((N00Bits <= ExtVTBits ||
14433          DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14434         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14435       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14436   }
14437 
14438   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
14439   // if x is small enough or if we know that x has more than 1 sign bit and the
14440   // sign_extend_inreg is extending from one of them.
14441   if (ISD::isExtVecInRegOpcode(N0.getOpcode())) {
14442     SDValue N00 = N0.getOperand(0);
14443     unsigned N00Bits = N00.getScalarValueSizeInBits();
14444     unsigned DstElts = N0.getValueType().getVectorMinNumElements();
14445     unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
14446     bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
14447     APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
14448     if ((N00Bits == ExtVTBits ||
14449          (!IsZext && (N00Bits < ExtVTBits ||
14450                       DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
14451         (!LegalOperations ||
14452          TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
14453       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
14454   }
14455 
14456   // fold (sext_in_reg (zext x)) -> (sext x)
14457   // iff we are extending the source sign bit.
14458   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
14459     SDValue N00 = N0.getOperand(0);
14460     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
14461         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14462       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14463   }
14464 
14465   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
14466   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
14467     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
14468 
14469   // fold operands of sext_in_reg based on knowledge that the top bits are not
14470   // demanded.
14471   if (SimplifyDemandedBits(SDValue(N, 0)))
14472     return SDValue(N, 0);
14473 
14474   // fold (sext_in_reg (load x)) -> (smaller sextload x)
14475   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
14476   if (SDValue NarrowLoad = reduceLoadWidth(N))
14477     return NarrowLoad;
14478 
14479   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
14480   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
14481   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
14482   if (N0.getOpcode() == ISD::SRL) {
14483     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
14484       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
14485         // We can turn this into an SRA iff the input to the SRL is already sign
14486         // extended enough.
14487         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
14488         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
14489           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
14490                              N0.getOperand(1));
14491       }
14492   }
14493 
14494   // fold (sext_inreg (extload x)) -> (sextload x)
14495   // If sextload is not supported by target, we can only do the combine when
14496   // load has one use. Doing otherwise can block folding the extload with other
14497   // extends that the target does support.
14498   if (ISD::isEXTLoad(N0.getNode()) &&
14499       ISD::isUNINDEXEDLoad(N0.getNode()) &&
14500       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14501       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
14502         N0.hasOneUse()) ||
14503        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14504     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14505     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14506                                      LN0->getChain(),
14507                                      LN0->getBasePtr(), ExtVT,
14508                                      LN0->getMemOperand());
14509     CombineTo(N, ExtLoad);
14510     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14511     AddToWorklist(ExtLoad.getNode());
14512     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14513   }
14514 
14515   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
14516   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
14517       N0.hasOneUse() &&
14518       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14519       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
14520        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14521     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14522     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14523                                      LN0->getChain(),
14524                                      LN0->getBasePtr(), ExtVT,
14525                                      LN0->getMemOperand());
14526     CombineTo(N, ExtLoad);
14527     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14528     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14529   }
14530 
14531   // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
14532   // ignore it if the masked load is already sign extended
14533   if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
14534     if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
14535         Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
14536         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
14537       SDValue ExtMaskedLoad = DAG.getMaskedLoad(
14538           VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
14539           Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
14540           Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
14541       CombineTo(N, ExtMaskedLoad);
14542       CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
14543       return SDValue(N, 0); // Return N so it doesn't get rechecked!
14544     }
14545   }
14546 
14547   // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
14548   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
14549     if (SDValue(GN0, 0).hasOneUse() &&
14550         ExtVT == GN0->getMemoryVT() &&
14551         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
14552       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
14553                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
14554 
14555       SDValue ExtLoad = DAG.getMaskedGather(
14556           DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
14557           GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
14558 
14559       CombineTo(N, ExtLoad);
14560       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14561       AddToWorklist(ExtLoad.getNode());
14562       return SDValue(N, 0); // Return N so it doesn't get rechecked!
14563     }
14564   }
14565 
14566   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
14567   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
14568     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
14569                                            N0.getOperand(1), false))
14570       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
14571   }
14572 
14573   // Fold (iM_signext_inreg
14574   //        (extract_subvector (zext|anyext|sext iN_v to _) _)
14575   //        from iN)
14576   //      -> (extract_subvector (signext iN_v to iM))
14577   if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
14578       ISD::isExtOpcode(N0.getOperand(0).getOpcode())) {
14579     SDValue InnerExt = N0.getOperand(0);
14580     EVT InnerExtVT = InnerExt->getValueType(0);
14581     SDValue Extendee = InnerExt->getOperand(0);
14582 
14583     if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
14584         (!LegalOperations ||
14585          TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
14586       SDValue SignExtExtendee =
14587           DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
14588       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
14589                          N0.getOperand(1));
14590     }
14591   }
14592 
14593   return SDValue();
14594 }
14595 
14596 static SDValue
14597 foldExtendVectorInregToExtendOfSubvector(SDNode *N, const TargetLowering &TLI,
14598                                          SelectionDAG &DAG,
14599                                          bool LegalOperations) {
14600   unsigned InregOpcode = N->getOpcode();
14601   unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
14602 
14603   SDValue Src = N->getOperand(0);
14604   EVT VT = N->getValueType(0);
14605   EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
14606                                Src.getValueType().getVectorElementType(),
14607                                VT.getVectorElementCount());
14608 
14609   assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
14610          "Expected EXTEND_VECTOR_INREG dag node in input!");
14611 
14612   // Profitability check: our operand must be an one-use CONCAT_VECTORS.
14613   // FIXME: one-use check may be overly restrictive
14614   if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
14615     return SDValue();
14616 
14617   // Profitability check: we must be extending exactly one of it's operands.
14618   // FIXME: this is probably overly restrictive.
14619   Src = Src.getOperand(0);
14620   if (Src.getValueType() != SrcVT)
14621     return SDValue();
14622 
14623   if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
14624     return SDValue();
14625 
14626   return DAG.getNode(Opcode, SDLoc(N), VT, Src);
14627 }
14628 
14629 SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
14630   SDValue N0 = N->getOperand(0);
14631   EVT VT = N->getValueType(0);
14632 
14633   if (N0.isUndef()) {
14634     // aext_vector_inreg(undef) = undef because the top bits are undefined.
14635     // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
14636     return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
14637                ? DAG.getUNDEF(VT)
14638                : DAG.getConstant(0, SDLoc(N), VT);
14639   }
14640 
14641   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
14642     return Res;
14643 
14644   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
14645     return SDValue(N, 0);
14646 
14647   if (SDValue R = foldExtendVectorInregToExtendOfSubvector(N, TLI, DAG,
14648                                                            LegalOperations))
14649     return R;
14650 
14651   return SDValue();
14652 }
14653 
14654 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
14655   SDValue N0 = N->getOperand(0);
14656   EVT VT = N->getValueType(0);
14657   EVT SrcVT = N0.getValueType();
14658   bool isLE = DAG.getDataLayout().isLittleEndian();
14659 
14660   // trunc(undef) = undef
14661   if (N0.isUndef())
14662     return DAG.getUNDEF(VT);
14663 
14664   // fold (truncate (truncate x)) -> (truncate x)
14665   if (N0.getOpcode() == ISD::TRUNCATE)
14666     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
14667 
14668   // fold (truncate c1) -> c1
14669   if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, SDLoc(N), VT, {N0}))
14670     return C;
14671 
14672   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
14673   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
14674       N0.getOpcode() == ISD::SIGN_EXTEND ||
14675       N0.getOpcode() == ISD::ANY_EXTEND) {
14676     // if the source is smaller than the dest, we still need an extend.
14677     if (N0.getOperand(0).getValueType().bitsLT(VT))
14678       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
14679     // if the source is larger than the dest, than we just need the truncate.
14680     if (N0.getOperand(0).getValueType().bitsGT(VT))
14681       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
14682     // if the source and dest are the same type, we can drop both the extend
14683     // and the truncate.
14684     return N0.getOperand(0);
14685   }
14686 
14687   // Try to narrow a truncate-of-sext_in_reg to the destination type:
14688   // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
14689   if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14690       N0.hasOneUse()) {
14691     SDValue X = N0.getOperand(0);
14692     SDValue ExtVal = N0.getOperand(1);
14693     EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
14694     if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
14695       SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
14696       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal);
14697     }
14698   }
14699 
14700   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
14701   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
14702     return SDValue();
14703 
14704   // Fold extract-and-trunc into a narrow extract. For example:
14705   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
14706   //   i32 y = TRUNCATE(i64 x)
14707   //        -- becomes --
14708   //   v16i8 b = BITCAST (v2i64 val)
14709   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
14710   //
14711   // Note: We only run this optimization after type legalization (which often
14712   // creates this pattern) and before operation legalization after which
14713   // we need to be more careful about the vector instructions that we generate.
14714   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14715       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
14716     EVT VecTy = N0.getOperand(0).getValueType();
14717     EVT ExTy = N0.getValueType();
14718     EVT TrTy = N->getValueType(0);
14719 
14720     auto EltCnt = VecTy.getVectorElementCount();
14721     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
14722     auto NewEltCnt = EltCnt * SizeRatio;
14723 
14724     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
14725     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
14726 
14727     SDValue EltNo = N0->getOperand(1);
14728     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
14729       int Elt = EltNo->getAsZExtVal();
14730       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
14731 
14732       SDLoc DL(N);
14733       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
14734                          DAG.getBitcast(NVT, N0.getOperand(0)),
14735                          DAG.getVectorIdxConstant(Index, DL));
14736     }
14737   }
14738 
14739   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
14740   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
14741     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
14742         TLI.isTruncateFree(SrcVT, VT)) {
14743       SDLoc SL(N0);
14744       SDValue Cond = N0.getOperand(0);
14745       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
14746       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
14747       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
14748     }
14749   }
14750 
14751   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
14752   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14753       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
14754       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
14755     SDValue Amt = N0.getOperand(1);
14756     KnownBits Known = DAG.computeKnownBits(Amt);
14757     unsigned Size = VT.getScalarSizeInBits();
14758     if (Known.countMaxActiveBits() <= Log2_32(Size)) {
14759       SDLoc SL(N);
14760       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
14761 
14762       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
14763       if (AmtVT != Amt.getValueType()) {
14764         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
14765         AddToWorklist(Amt.getNode());
14766       }
14767       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
14768     }
14769   }
14770 
14771   if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
14772     return V;
14773 
14774   if (SDValue ABD = foldABSToABD(N))
14775     return ABD;
14776 
14777   // Attempt to pre-truncate BUILD_VECTOR sources.
14778   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
14779       N0.hasOneUse() &&
14780       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
14781       // Avoid creating illegal types if running after type legalizer.
14782       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
14783     SDLoc DL(N);
14784     EVT SVT = VT.getScalarType();
14785     SmallVector<SDValue, 8> TruncOps;
14786     for (const SDValue &Op : N0->op_values()) {
14787       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
14788       TruncOps.push_back(TruncOp);
14789     }
14790     return DAG.getBuildVector(VT, DL, TruncOps);
14791   }
14792 
14793   // trunc (splat_vector x) -> splat_vector (trunc x)
14794   if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
14795       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
14796       (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
14797     SDLoc DL(N);
14798     EVT SVT = VT.getScalarType();
14799     return DAG.getSplatVector(
14800         VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
14801   }
14802 
14803   // Fold a series of buildvector, bitcast, and truncate if possible.
14804   // For example fold
14805   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
14806   //   (2xi32 (buildvector x, y)).
14807   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
14808       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
14809       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
14810       N0.getOperand(0).hasOneUse()) {
14811     SDValue BuildVect = N0.getOperand(0);
14812     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
14813     EVT TruncVecEltTy = VT.getVectorElementType();
14814 
14815     // Check that the element types match.
14816     if (BuildVectEltTy == TruncVecEltTy) {
14817       // Now we only need to compute the offset of the truncated elements.
14818       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
14819       unsigned TruncVecNumElts = VT.getVectorNumElements();
14820       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
14821 
14822       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
14823              "Invalid number of elements");
14824 
14825       SmallVector<SDValue, 8> Opnds;
14826       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
14827         Opnds.push_back(BuildVect.getOperand(i));
14828 
14829       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
14830     }
14831   }
14832 
14833   // fold (truncate (load x)) -> (smaller load x)
14834   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
14835   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
14836     if (SDValue Reduced = reduceLoadWidth(N))
14837       return Reduced;
14838 
14839     // Handle the case where the truncated result is at least as wide as the
14840     // loaded type.
14841     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
14842       auto *LN0 = cast<LoadSDNode>(N0);
14843       if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
14844         SDValue NewLoad = DAG.getExtLoad(
14845             LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
14846             LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
14847         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
14848         return NewLoad;
14849       }
14850     }
14851   }
14852 
14853   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
14854   // where ... are all 'undef'.
14855   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
14856     SmallVector<EVT, 8> VTs;
14857     SDValue V;
14858     unsigned Idx = 0;
14859     unsigned NumDefs = 0;
14860 
14861     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
14862       SDValue X = N0.getOperand(i);
14863       if (!X.isUndef()) {
14864         V = X;
14865         Idx = i;
14866         NumDefs++;
14867       }
14868       // Stop if more than one members are non-undef.
14869       if (NumDefs > 1)
14870         break;
14871 
14872       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
14873                                      VT.getVectorElementType(),
14874                                      X.getValueType().getVectorElementCount()));
14875     }
14876 
14877     if (NumDefs == 0)
14878       return DAG.getUNDEF(VT);
14879 
14880     if (NumDefs == 1) {
14881       assert(V.getNode() && "The single defined operand is empty!");
14882       SmallVector<SDValue, 8> Opnds;
14883       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
14884         if (i != Idx) {
14885           Opnds.push_back(DAG.getUNDEF(VTs[i]));
14886           continue;
14887         }
14888         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
14889         AddToWorklist(NV.getNode());
14890         Opnds.push_back(NV);
14891       }
14892       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
14893     }
14894   }
14895 
14896   // Fold truncate of a bitcast of a vector to an extract of the low vector
14897   // element.
14898   //
14899   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
14900   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
14901     SDValue VecSrc = N0.getOperand(0);
14902     EVT VecSrcVT = VecSrc.getValueType();
14903     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
14904         (!LegalOperations ||
14905          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
14906       SDLoc SL(N);
14907 
14908       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
14909       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
14910                          DAG.getVectorIdxConstant(Idx, SL));
14911     }
14912   }
14913 
14914   // Simplify the operands using demanded-bits information.
14915   if (SimplifyDemandedBits(SDValue(N, 0)))
14916     return SDValue(N, 0);
14917 
14918   // fold (truncate (extract_subvector(ext x))) ->
14919   //      (extract_subvector x)
14920   // TODO: This can be generalized to cover cases where the truncate and extract
14921   // do not fully cancel each other out.
14922   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
14923     SDValue N00 = N0.getOperand(0);
14924     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
14925         N00.getOpcode() == ISD::ZERO_EXTEND ||
14926         N00.getOpcode() == ISD::ANY_EXTEND) {
14927       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
14928           VT.getVectorElementType())
14929         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
14930                            N00.getOperand(0), N0.getOperand(1));
14931     }
14932   }
14933 
14934   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14935     return NewVSel;
14936 
14937   // Narrow a suitable binary operation with a non-opaque constant operand by
14938   // moving it ahead of the truncate. This is limited to pre-legalization
14939   // because targets may prefer a wider type during later combines and invert
14940   // this transform.
14941   switch (N0.getOpcode()) {
14942   case ISD::ADD:
14943   case ISD::SUB:
14944   case ISD::MUL:
14945   case ISD::AND:
14946   case ISD::OR:
14947   case ISD::XOR:
14948     if (!LegalOperations && N0.hasOneUse() &&
14949         (isConstantOrConstantVector(N0.getOperand(0), true) ||
14950          isConstantOrConstantVector(N0.getOperand(1), true))) {
14951       // TODO: We already restricted this to pre-legalization, but for vectors
14952       // we are extra cautious to not create an unsupported operation.
14953       // Target-specific changes are likely needed to avoid regressions here.
14954       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
14955         SDLoc DL(N);
14956         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14957         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
14958         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
14959       }
14960     }
14961     break;
14962   case ISD::ADDE:
14963   case ISD::UADDO_CARRY:
14964     // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
14965     // (trunc uaddo_carry(X, Y, Carry)) ->
14966     //     (uaddo_carry trunc(X), trunc(Y), Carry)
14967     // When the adde's carry is not used.
14968     // We only do for uaddo_carry before legalize operation
14969     if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
14970          TLI.isOperationLegal(N0.getOpcode(), VT)) &&
14971         N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
14972       SDLoc DL(N);
14973       SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14974       SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
14975       SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
14976       return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
14977     }
14978     break;
14979   case ISD::USUBSAT:
14980     // Truncate the USUBSAT only if LHS is a known zero-extension, its not
14981     // enough to know that the upper bits are zero we must ensure that we don't
14982     // introduce an extra truncate.
14983     if (!LegalOperations && N0.hasOneUse() &&
14984         N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
14985         N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
14986             VT.getScalarSizeInBits() &&
14987         hasOperation(N0.getOpcode(), VT)) {
14988       return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
14989                                  DAG, SDLoc(N));
14990     }
14991     break;
14992   }
14993 
14994   return SDValue();
14995 }
14996 
14997 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
14998   SDValue Elt = N->getOperand(i);
14999   if (Elt.getOpcode() != ISD::MERGE_VALUES)
15000     return Elt.getNode();
15001   return Elt.getOperand(Elt.getResNo()).getNode();
15002 }
15003 
15004 /// build_pair (load, load) -> load
15005 /// if load locations are consecutive.
15006 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15007   assert(N->getOpcode() == ISD::BUILD_PAIR);
15008 
15009   auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15010   auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15011 
15012   // A BUILD_PAIR is always having the least significant part in elt 0 and the
15013   // most significant part in elt 1. So when combining into one large load, we
15014   // need to consider the endianness.
15015   if (DAG.getDataLayout().isBigEndian())
15016     std::swap(LD1, LD2);
15017 
15018   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15019       !LD1->hasOneUse() || !LD2->hasOneUse() ||
15020       LD1->getAddressSpace() != LD2->getAddressSpace())
15021     return SDValue();
15022 
15023   unsigned LD1Fast = 0;
15024   EVT LD1VT = LD1->getValueType(0);
15025   unsigned LD1Bytes = LD1VT.getStoreSize();
15026   if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15027       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15028       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15029                              *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15030     return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15031                        LD1->getPointerInfo(), LD1->getAlign());
15032 
15033   return SDValue();
15034 }
15035 
15036 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15037   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15038   // and Lo parts; on big-endian machines it doesn't.
15039   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15040 }
15041 
15042 SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15043                                           const TargetLowering &TLI) {
15044   // If this is not a bitcast to an FP type or if the target doesn't have
15045   // IEEE754-compliant FP logic, we're done.
15046   EVT VT = N->getValueType(0);
15047   SDValue N0 = N->getOperand(0);
15048   EVT SourceVT = N0.getValueType();
15049 
15050   if (!VT.isFloatingPoint())
15051     return SDValue();
15052 
15053   // TODO: Handle cases where the integer constant is a different scalar
15054   // bitwidth to the FP.
15055   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15056     return SDValue();
15057 
15058   unsigned FPOpcode;
15059   APInt SignMask;
15060   switch (N0.getOpcode()) {
15061   case ISD::AND:
15062     FPOpcode = ISD::FABS;
15063     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15064     break;
15065   case ISD::XOR:
15066     FPOpcode = ISD::FNEG;
15067     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15068     break;
15069   case ISD::OR:
15070     FPOpcode = ISD::FABS;
15071     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15072     break;
15073   default:
15074     return SDValue();
15075   }
15076 
15077   if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15078     return SDValue();
15079 
15080   // This needs to be the inverse of logic in foldSignChangeInBitcast.
15081   // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15082   // removing this would require more changes.
15083   auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15084     if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
15085       return true;
15086 
15087     return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15088   };
15089 
15090   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15091   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15092   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15093   //   fneg (fabs X)
15094   SDValue LogicOp0 = N0.getOperand(0);
15095   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15096   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15097       IsBitCastOrFree(LogicOp0, VT)) {
15098     SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15099     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15100     NumFPLogicOpsConv++;
15101     if (N0.getOpcode() == ISD::OR)
15102       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15103     return FPOp;
15104   }
15105 
15106   return SDValue();
15107 }
15108 
15109 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15110   SDValue N0 = N->getOperand(0);
15111   EVT VT = N->getValueType(0);
15112 
15113   if (N0.isUndef())
15114     return DAG.getUNDEF(VT);
15115 
15116   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15117   // Only do this before legalize types, unless both types are integer and the
15118   // scalar type is legal. Only do this before legalize ops, since the target
15119   // maybe depending on the bitcast.
15120   // First check to see if this is all constant.
15121   // TODO: Support FP bitcasts after legalize types.
15122   if (VT.isVector() &&
15123       (!LegalTypes ||
15124        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15125         TLI.isTypeLegal(VT.getVectorElementType()))) &&
15126       N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15127       cast<BuildVectorSDNode>(N0)->isConstant())
15128     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15129                                              VT.getVectorElementType());
15130 
15131   // If the input is a constant, let getNode fold it.
15132   if (isIntOrFPConstant(N0)) {
15133     // If we can't allow illegal operations, we need to check that this is just
15134     // a fp -> int or int -> conversion and that the resulting operation will
15135     // be legal.
15136     if (!LegalOperations ||
15137         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15138          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
15139         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15140          TLI.isOperationLegal(ISD::Constant, VT))) {
15141       SDValue C = DAG.getBitcast(VT, N0);
15142       if (C.getNode() != N)
15143         return C;
15144     }
15145   }
15146 
15147   // (conv (conv x, t1), t2) -> (conv x, t2)
15148   if (N0.getOpcode() == ISD::BITCAST)
15149     return DAG.getBitcast(VT, N0.getOperand(0));
15150 
15151   // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15152   // iff the current bitwise logicop type isn't legal
15153   if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15154       !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15155     auto IsFreeBitcast = [VT](SDValue V) {
15156       return (V.getOpcode() == ISD::BITCAST &&
15157               V.getOperand(0).getValueType() == VT) ||
15158              (ISD::isBuildVectorOfConstantSDNodes(V.getNode()) &&
15159               V->hasOneUse());
15160     };
15161     if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15162       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15163                          DAG.getBitcast(VT, N0.getOperand(0)),
15164                          DAG.getBitcast(VT, N0.getOperand(1)));
15165   }
15166 
15167   // fold (conv (load x)) -> (load (conv*)x)
15168   // If the resultant load doesn't need a higher alignment than the original!
15169   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15170       // Do not remove the cast if the types differ in endian layout.
15171       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
15172           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15173       // If the load is volatile, we only want to change the load type if the
15174       // resulting load is legal. Otherwise we might increase the number of
15175       // memory accesses. We don't care if the original type was legal or not
15176       // as we assume software couldn't rely on the number of accesses of an
15177       // illegal type.
15178       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15179        TLI.isOperationLegal(ISD::LOAD, VT))) {
15180     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15181 
15182     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15183                                     *LN0->getMemOperand())) {
15184       SDValue Load =
15185           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15186                       LN0->getMemOperand());
15187       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15188       return Load;
15189     }
15190   }
15191 
15192   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15193     return V;
15194 
15195   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15196   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15197   //
15198   // For ppc_fp128:
15199   // fold (bitcast (fneg x)) ->
15200   //     flipbit = signbit
15201   //     (xor (bitcast x) (build_pair flipbit, flipbit))
15202   //
15203   // fold (bitcast (fabs x)) ->
15204   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
15205   //     (xor (bitcast x) (build_pair flipbit, flipbit))
15206   // This often reduces constant pool loads.
15207   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15208        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15209       N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15210       !N0.getValueType().isVector()) {
15211     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15212     AddToWorklist(NewConv.getNode());
15213 
15214     SDLoc DL(N);
15215     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15216       assert(VT.getSizeInBits() == 128);
15217       SDValue SignBit = DAG.getConstant(
15218           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15219       SDValue FlipBit;
15220       if (N0.getOpcode() == ISD::FNEG) {
15221         FlipBit = SignBit;
15222         AddToWorklist(FlipBit.getNode());
15223       } else {
15224         assert(N0.getOpcode() == ISD::FABS);
15225         SDValue Hi =
15226             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15227                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
15228                                               SDLoc(NewConv)));
15229         AddToWorklist(Hi.getNode());
15230         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15231         AddToWorklist(FlipBit.getNode());
15232       }
15233       SDValue FlipBits =
15234           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15235       AddToWorklist(FlipBits.getNode());
15236       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15237     }
15238     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15239     if (N0.getOpcode() == ISD::FNEG)
15240       return DAG.getNode(ISD::XOR, DL, VT,
15241                          NewConv, DAG.getConstant(SignBit, DL, VT));
15242     assert(N0.getOpcode() == ISD::FABS);
15243     return DAG.getNode(ISD::AND, DL, VT,
15244                        NewConv, DAG.getConstant(~SignBit, DL, VT));
15245   }
15246 
15247   // fold (bitconvert (fcopysign cst, x)) ->
15248   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
15249   // Note that we don't handle (copysign x, cst) because this can always be
15250   // folded to an fneg or fabs.
15251   //
15252   // For ppc_fp128:
15253   // fold (bitcast (fcopysign cst, x)) ->
15254   //     flipbit = (and (extract_element
15255   //                     (xor (bitcast cst), (bitcast x)), 0),
15256   //                    signbit)
15257   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
15258   if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15259       isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15260       !VT.isVector()) {
15261     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15262     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15263     if (isTypeLegal(IntXVT)) {
15264       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15265       AddToWorklist(X.getNode());
15266 
15267       // If X has a different width than the result/lhs, sext it or truncate it.
15268       unsigned VTWidth = VT.getSizeInBits();
15269       if (OrigXWidth < VTWidth) {
15270         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15271         AddToWorklist(X.getNode());
15272       } else if (OrigXWidth > VTWidth) {
15273         // To get the sign bit in the right place, we have to shift it right
15274         // before truncating.
15275         SDLoc DL(X);
15276         X = DAG.getNode(ISD::SRL, DL,
15277                         X.getValueType(), X,
15278                         DAG.getConstant(OrigXWidth-VTWidth, DL,
15279                                         X.getValueType()));
15280         AddToWorklist(X.getNode());
15281         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15282         AddToWorklist(X.getNode());
15283       }
15284 
15285       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15286         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15287         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15288         AddToWorklist(Cst.getNode());
15289         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15290         AddToWorklist(X.getNode());
15291         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15292         AddToWorklist(XorResult.getNode());
15293         SDValue XorResult64 = DAG.getNode(
15294             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15295             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
15296                                   SDLoc(XorResult)));
15297         AddToWorklist(XorResult64.getNode());
15298         SDValue FlipBit =
15299             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15300                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15301         AddToWorklist(FlipBit.getNode());
15302         SDValue FlipBits =
15303             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15304         AddToWorklist(FlipBits.getNode());
15305         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15306       }
15307       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15308       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15309                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
15310       AddToWorklist(X.getNode());
15311 
15312       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15313       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
15314                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
15315       AddToWorklist(Cst.getNode());
15316 
15317       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
15318     }
15319   }
15320 
15321   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
15322   if (N0.getOpcode() == ISD::BUILD_PAIR)
15323     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
15324       return CombineLD;
15325 
15326   // Remove double bitcasts from shuffles - this is often a legacy of
15327   // XformToShuffleWithZero being used to combine bitmaskings (of
15328   // float vectors bitcast to integer vectors) into shuffles.
15329   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
15330   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
15331       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
15332       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
15333       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
15334     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
15335 
15336     // If operands are a bitcast, peek through if it casts the original VT.
15337     // If operands are a constant, just bitcast back to original VT.
15338     auto PeekThroughBitcast = [&](SDValue Op) {
15339       if (Op.getOpcode() == ISD::BITCAST &&
15340           Op.getOperand(0).getValueType() == VT)
15341         return SDValue(Op.getOperand(0));
15342       if (Op.isUndef() || isAnyConstantBuildVector(Op))
15343         return DAG.getBitcast(VT, Op);
15344       return SDValue();
15345     };
15346 
15347     // FIXME: If either input vector is bitcast, try to convert the shuffle to
15348     // the result type of this bitcast. This would eliminate at least one
15349     // bitcast. See the transform in InstCombine.
15350     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
15351     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
15352     if (!(SV0 && SV1))
15353       return SDValue();
15354 
15355     int MaskScale =
15356         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
15357     SmallVector<int, 8> NewMask;
15358     for (int M : SVN->getMask())
15359       for (int i = 0; i != MaskScale; ++i)
15360         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
15361 
15362     SDValue LegalShuffle =
15363         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
15364     if (LegalShuffle)
15365       return LegalShuffle;
15366   }
15367 
15368   return SDValue();
15369 }
15370 
15371 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
15372   EVT VT = N->getValueType(0);
15373   return CombineConsecutiveLoads(N, VT);
15374 }
15375 
15376 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
15377   SDValue N0 = N->getOperand(0);
15378 
15379   if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
15380     return N0;
15381 
15382   // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
15383   // Try to push freeze through instructions that propagate but don't produce
15384   // poison as far as possible. If an operand of freeze follows three
15385   // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
15386   // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
15387   // the freeze through to the operands that are not guaranteed non-poison.
15388   // NOTE: we will strip poison-generating flags, so ignore them here.
15389   if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
15390                                  /*ConsiderFlags*/ false) ||
15391       N0->getNumValues() != 1 || !N0->hasOneUse())
15392     return SDValue();
15393 
15394   bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR ||
15395                                           N0.getOpcode() == ISD::BUILD_PAIR ||
15396                                           N0.getOpcode() == ISD::CONCAT_VECTORS;
15397 
15398   SmallSetVector<SDValue, 8> MaybePoisonOperands;
15399   for (SDValue Op : N0->ops()) {
15400     if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
15401                                              /*Depth*/ 1))
15402       continue;
15403     bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
15404     bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
15405     if (!HadMaybePoisonOperands)
15406       continue;
15407     if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
15408       // Multiple maybe-poison ops when not allowed - bail out.
15409       return SDValue();
15410     }
15411   }
15412   // NOTE: the whole op may be not guaranteed to not be undef or poison because
15413   // it could create undef or poison due to it's poison-generating flags.
15414   // So not finding any maybe-poison operands is fine.
15415 
15416   for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
15417     // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
15418     if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
15419       continue;
15420     // First, freeze each offending operand.
15421     SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
15422     // Then, change all other uses of unfrozen operand to use frozen operand.
15423     DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
15424     if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
15425         FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
15426       // But, that also updated the use in the freeze we just created, thus
15427       // creating a cycle in a DAG. Let's undo that by mutating the freeze.
15428       DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
15429                              MaybePoisonOperand);
15430     }
15431   }
15432 
15433   // This node has been merged with another.
15434   if (N->getOpcode() == ISD::DELETED_NODE)
15435     return SDValue(N, 0);
15436 
15437   // The whole node may have been updated, so the value we were holding
15438   // may no longer be valid. Re-fetch the operand we're `freeze`ing.
15439   N0 = N->getOperand(0);
15440 
15441   // Finally, recreate the node, it's operands were updated to use
15442   // frozen operands, so we just need to use it's "original" operands.
15443   SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end());
15444   // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
15445   for (SDValue &Op : Ops) {
15446     if (Op.getOpcode() == ISD::UNDEF)
15447       Op = DAG.getFreeze(Op);
15448   }
15449   // NOTE: this strips poison generating flags.
15450   SDValue R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
15451   assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
15452          "Can't create node that may be undef/poison!");
15453   return R;
15454 }
15455 
15456 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
15457 /// operands. DstEltVT indicates the destination element value type.
15458 SDValue DAGCombiner::
15459 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
15460   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
15461 
15462   // If this is already the right type, we're done.
15463   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
15464 
15465   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
15466   unsigned DstBitSize = DstEltVT.getSizeInBits();
15467 
15468   // If this is a conversion of N elements of one type to N elements of another
15469   // type, convert each element.  This handles FP<->INT cases.
15470   if (SrcBitSize == DstBitSize) {
15471     SmallVector<SDValue, 8> Ops;
15472     for (SDValue Op : BV->op_values()) {
15473       // If the vector element type is not legal, the BUILD_VECTOR operands
15474       // are promoted and implicitly truncated.  Make that explicit here.
15475       if (Op.getValueType() != SrcEltVT)
15476         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
15477       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
15478       AddToWorklist(Ops.back().getNode());
15479     }
15480     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
15481                               BV->getValueType(0).getVectorNumElements());
15482     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
15483   }
15484 
15485   // Otherwise, we're growing or shrinking the elements.  To avoid having to
15486   // handle annoying details of growing/shrinking FP values, we convert them to
15487   // int first.
15488   if (SrcEltVT.isFloatingPoint()) {
15489     // Convert the input float vector to a int vector where the elements are the
15490     // same sizes.
15491     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
15492     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
15493     SrcEltVT = IntVT;
15494   }
15495 
15496   // Now we know the input is an integer vector.  If the output is a FP type,
15497   // convert to integer first, then to FP of the right size.
15498   if (DstEltVT.isFloatingPoint()) {
15499     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
15500     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
15501 
15502     // Next, convert to FP elements of the same size.
15503     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
15504   }
15505 
15506   // Okay, we know the src/dst types are both integers of differing types.
15507   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
15508 
15509   // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
15510   // BuildVectorSDNode?
15511   auto *BVN = cast<BuildVectorSDNode>(BV);
15512 
15513   // Extract the constant raw bit data.
15514   BitVector UndefElements;
15515   SmallVector<APInt> RawBits;
15516   bool IsLE = DAG.getDataLayout().isLittleEndian();
15517   if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
15518     return SDValue();
15519 
15520   SDLoc DL(BV);
15521   SmallVector<SDValue, 8> Ops;
15522   for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
15523     if (UndefElements[I])
15524       Ops.push_back(DAG.getUNDEF(DstEltVT));
15525     else
15526       Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
15527   }
15528 
15529   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
15530   return DAG.getBuildVector(VT, DL, Ops);
15531 }
15532 
15533 // Returns true if floating point contraction is allowed on the FMUL-SDValue
15534 // `N`
15535 static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
15536   assert(N.getOpcode() == ISD::FMUL);
15537 
15538   return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
15539          N->getFlags().hasAllowContract();
15540 }
15541 
15542 // Returns true if `N` can assume no infinities involved in its computation.
15543 static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
15544   return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
15545 }
15546 
15547 /// Try to perform FMA combining on a given FADD node.
15548 template <class MatchContextClass>
15549 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
15550   SDValue N0 = N->getOperand(0);
15551   SDValue N1 = N->getOperand(1);
15552   EVT VT = N->getValueType(0);
15553   SDLoc SL(N);
15554   MatchContextClass matcher(DAG, TLI, N);
15555   const TargetOptions &Options = DAG.getTarget().Options;
15556 
15557   bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15558 
15559   // Floating-point multiply-add with intermediate rounding.
15560   // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15561   // FIXME: Add VP_FMAD opcode.
15562   bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15563 
15564   // Floating-point multiply-add without intermediate rounding.
15565   bool HasFMA =
15566       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
15567       (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15568 
15569   // No valid opcode, do not combine.
15570   if (!HasFMAD && !HasFMA)
15571     return SDValue();
15572 
15573   bool CanReassociate =
15574       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15575   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15576                               Options.UnsafeFPMath || HasFMAD);
15577   // If the addition is not contractable, do not combine.
15578   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15579     return SDValue();
15580 
15581   // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
15582   // beneficial. It does not reduce latency. It increases register pressure. It
15583   // replaces an fadd with an fma which is a more complex instruction, so is
15584   // likely to have a larger encoding, use more functional units, etc.
15585   if (N0 == N1)
15586     return SDValue();
15587 
15588   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15589     return SDValue();
15590 
15591   // Always prefer FMAD to FMA for precision.
15592   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15593   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
15594 
15595   auto isFusedOp = [&](SDValue N) {
15596     return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
15597   };
15598 
15599   // Is the node an FMUL and contractable either due to global flags or
15600   // SDNodeFlags.
15601   auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15602     if (!matcher.match(N, ISD::FMUL))
15603       return false;
15604     return AllowFusionGlobally || N->getFlags().hasAllowContract();
15605   };
15606   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
15607   // prefer to fold the multiply with fewer uses.
15608   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
15609     if (N0->use_size() > N1->use_size())
15610       std::swap(N0, N1);
15611   }
15612 
15613   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
15614   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
15615     return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
15616                            N0.getOperand(1), N1);
15617   }
15618 
15619   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
15620   // Note: Commutes FADD operands.
15621   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
15622     return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
15623                            N1.getOperand(1), N0);
15624   }
15625 
15626   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
15627   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
15628   // This also works with nested fma instructions:
15629   // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
15630   // fma A, B, (fma C, D, fma (E, F, G))
15631   // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
15632   // fma A, B, (fma C, D, fma (E, F, G)).
15633   // This requires reassociation because it changes the order of operations.
15634   if (CanReassociate) {
15635     SDValue FMA, E;
15636     if (isFusedOp(N0) && N0.hasOneUse()) {
15637       FMA = N0;
15638       E = N1;
15639     } else if (isFusedOp(N1) && N1.hasOneUse()) {
15640       FMA = N1;
15641       E = N0;
15642     }
15643 
15644     SDValue TmpFMA = FMA;
15645     while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
15646       SDValue FMul = TmpFMA->getOperand(2);
15647       if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
15648         SDValue C = FMul.getOperand(0);
15649         SDValue D = FMul.getOperand(1);
15650         SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
15651         DAG.ReplaceAllUsesOfValueWith(FMul, CDE);
15652         // Replacing the inner FMul could cause the outer FMA to be simplified
15653         // away.
15654         return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
15655       }
15656 
15657       TmpFMA = TmpFMA->getOperand(2);
15658     }
15659   }
15660 
15661   // Look through FP_EXTEND nodes to do more combining.
15662 
15663   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
15664   if (matcher.match(N0, ISD::FP_EXTEND)) {
15665     SDValue N00 = N0.getOperand(0);
15666     if (isContractableFMUL(N00) &&
15667         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15668                             N00.getValueType())) {
15669       return matcher.getNode(
15670           PreferredFusedOpcode, SL, VT,
15671           matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15672           matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
15673     }
15674   }
15675 
15676   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
15677   // Note: Commutes FADD operands.
15678   if (matcher.match(N1, ISD::FP_EXTEND)) {
15679     SDValue N10 = N1.getOperand(0);
15680     if (isContractableFMUL(N10) &&
15681         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15682                             N10.getValueType())) {
15683       return matcher.getNode(
15684           PreferredFusedOpcode, SL, VT,
15685           matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
15686           matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15687     }
15688   }
15689 
15690   // More folding opportunities when target permits.
15691   if (Aggressive) {
15692     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
15693     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
15694     auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15695                                     SDValue Z) {
15696       return matcher.getNode(
15697           PreferredFusedOpcode, SL, VT, X, Y,
15698           matcher.getNode(PreferredFusedOpcode, SL, VT,
15699                           matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15700                           matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15701     };
15702     if (isFusedOp(N0)) {
15703       SDValue N02 = N0.getOperand(2);
15704       if (matcher.match(N02, ISD::FP_EXTEND)) {
15705         SDValue N020 = N02.getOperand(0);
15706         if (isContractableFMUL(N020) &&
15707             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15708                                 N020.getValueType())) {
15709           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
15710                                       N020.getOperand(0), N020.getOperand(1),
15711                                       N1);
15712         }
15713       }
15714     }
15715 
15716     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
15717     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
15718     // FIXME: This turns two single-precision and one double-precision
15719     // operation into two double-precision operations, which might not be
15720     // interesting for all targets, especially GPUs.
15721     auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15722                                     SDValue Z) {
15723       return matcher.getNode(
15724           PreferredFusedOpcode, SL, VT,
15725           matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
15726           matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
15727           matcher.getNode(PreferredFusedOpcode, SL, VT,
15728                           matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15729                           matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15730     };
15731     if (N0.getOpcode() == ISD::FP_EXTEND) {
15732       SDValue N00 = N0.getOperand(0);
15733       if (isFusedOp(N00)) {
15734         SDValue N002 = N00.getOperand(2);
15735         if (isContractableFMUL(N002) &&
15736             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15737                                 N00.getValueType())) {
15738           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
15739                                       N002.getOperand(0), N002.getOperand(1),
15740                                       N1);
15741         }
15742       }
15743     }
15744 
15745     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
15746     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
15747     if (isFusedOp(N1)) {
15748       SDValue N12 = N1.getOperand(2);
15749       if (N12.getOpcode() == ISD::FP_EXTEND) {
15750         SDValue N120 = N12.getOperand(0);
15751         if (isContractableFMUL(N120) &&
15752             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15753                                 N120.getValueType())) {
15754           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
15755                                       N120.getOperand(0), N120.getOperand(1),
15756                                       N0);
15757         }
15758       }
15759     }
15760 
15761     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
15762     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
15763     // FIXME: This turns two single-precision and one double-precision
15764     // operation into two double-precision operations, which might not be
15765     // interesting for all targets, especially GPUs.
15766     if (N1.getOpcode() == ISD::FP_EXTEND) {
15767       SDValue N10 = N1.getOperand(0);
15768       if (isFusedOp(N10)) {
15769         SDValue N102 = N10.getOperand(2);
15770         if (isContractableFMUL(N102) &&
15771             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15772                                 N10.getValueType())) {
15773           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
15774                                       N102.getOperand(0), N102.getOperand(1),
15775                                       N0);
15776         }
15777       }
15778     }
15779   }
15780 
15781   return SDValue();
15782 }
15783 
15784 /// Try to perform FMA combining on a given FSUB node.
15785 template <class MatchContextClass>
15786 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
15787   SDValue N0 = N->getOperand(0);
15788   SDValue N1 = N->getOperand(1);
15789   EVT VT = N->getValueType(0);
15790   SDLoc SL(N);
15791   MatchContextClass matcher(DAG, TLI, N);
15792   const TargetOptions &Options = DAG.getTarget().Options;
15793 
15794   bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15795 
15796   // Floating-point multiply-add with intermediate rounding.
15797   // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15798   // FIXME: Add VP_FMAD opcode.
15799   bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15800 
15801   // Floating-point multiply-add without intermediate rounding.
15802   bool HasFMA =
15803       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
15804       (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15805 
15806   // No valid opcode, do not combine.
15807   if (!HasFMAD && !HasFMA)
15808     return SDValue();
15809 
15810   const SDNodeFlags Flags = N->getFlags();
15811   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15812                               Options.UnsafeFPMath || HasFMAD);
15813 
15814   // If the subtraction is not contractable, do not combine.
15815   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15816     return SDValue();
15817 
15818   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15819     return SDValue();
15820 
15821   // Always prefer FMAD to FMA for precision.
15822   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15823   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
15824   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
15825 
15826   // Is the node an FMUL and contractable either due to global flags or
15827   // SDNodeFlags.
15828   auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15829     if (!matcher.match(N, ISD::FMUL))
15830       return false;
15831     return AllowFusionGlobally || N->getFlags().hasAllowContract();
15832   };
15833 
15834   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15835   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
15836     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
15837       return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
15838                              XY.getOperand(1),
15839                              matcher.getNode(ISD::FNEG, SL, VT, Z));
15840     }
15841     return SDValue();
15842   };
15843 
15844   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15845   // Note: Commutes FSUB operands.
15846   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
15847     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
15848       return matcher.getNode(
15849           PreferredFusedOpcode, SL, VT,
15850           matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
15851           YZ.getOperand(1), X);
15852     }
15853     return SDValue();
15854   };
15855 
15856   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
15857   // prefer to fold the multiply with fewer uses.
15858   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
15859       (N0->use_size() > N1->use_size())) {
15860     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
15861     if (SDValue V = tryToFoldXSubYZ(N0, N1))
15862       return V;
15863     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
15864     if (SDValue V = tryToFoldXYSubZ(N0, N1))
15865       return V;
15866   } else {
15867     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15868     if (SDValue V = tryToFoldXYSubZ(N0, N1))
15869       return V;
15870     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15871     if (SDValue V = tryToFoldXSubYZ(N0, N1))
15872       return V;
15873   }
15874 
15875   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
15876   if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
15877       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
15878     SDValue N00 = N0.getOperand(0).getOperand(0);
15879     SDValue N01 = N0.getOperand(0).getOperand(1);
15880     return matcher.getNode(PreferredFusedOpcode, SL, VT,
15881                            matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
15882                            matcher.getNode(ISD::FNEG, SL, VT, N1));
15883   }
15884 
15885   // Look through FP_EXTEND nodes to do more combining.
15886 
15887   // fold (fsub (fpext (fmul x, y)), z)
15888   //   -> (fma (fpext x), (fpext y), (fneg z))
15889   if (matcher.match(N0, ISD::FP_EXTEND)) {
15890     SDValue N00 = N0.getOperand(0);
15891     if (isContractableFMUL(N00) &&
15892         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15893                             N00.getValueType())) {
15894       return matcher.getNode(
15895           PreferredFusedOpcode, SL, VT,
15896           matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15897           matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
15898           matcher.getNode(ISD::FNEG, SL, VT, N1));
15899     }
15900   }
15901 
15902   // fold (fsub x, (fpext (fmul y, z)))
15903   //   -> (fma (fneg (fpext y)), (fpext z), x)
15904   // Note: Commutes FSUB operands.
15905   if (matcher.match(N1, ISD::FP_EXTEND)) {
15906     SDValue N10 = N1.getOperand(0);
15907     if (isContractableFMUL(N10) &&
15908         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15909                             N10.getValueType())) {
15910       return matcher.getNode(
15911           PreferredFusedOpcode, SL, VT,
15912           matcher.getNode(
15913               ISD::FNEG, SL, VT,
15914               matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
15915           matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15916     }
15917   }
15918 
15919   // fold (fsub (fpext (fneg (fmul, x, y))), z)
15920   //   -> (fneg (fma (fpext x), (fpext y), z))
15921   // Note: This could be removed with appropriate canonicalization of the
15922   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
15923   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
15924   // from implementing the canonicalization in visitFSUB.
15925   if (matcher.match(N0, ISD::FP_EXTEND)) {
15926     SDValue N00 = N0.getOperand(0);
15927     if (matcher.match(N00, ISD::FNEG)) {
15928       SDValue N000 = N00.getOperand(0);
15929       if (isContractableFMUL(N000) &&
15930           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15931                               N00.getValueType())) {
15932         return matcher.getNode(
15933             ISD::FNEG, SL, VT,
15934             matcher.getNode(
15935                 PreferredFusedOpcode, SL, VT,
15936                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
15937                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
15938                 N1));
15939       }
15940     }
15941   }
15942 
15943   // fold (fsub (fneg (fpext (fmul, x, y))), z)
15944   //   -> (fneg (fma (fpext x)), (fpext y), z)
15945   // Note: This could be removed with appropriate canonicalization of the
15946   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
15947   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
15948   // from implementing the canonicalization in visitFSUB.
15949   if (matcher.match(N0, ISD::FNEG)) {
15950     SDValue N00 = N0.getOperand(0);
15951     if (matcher.match(N00, ISD::FP_EXTEND)) {
15952       SDValue N000 = N00.getOperand(0);
15953       if (isContractableFMUL(N000) &&
15954           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15955                               N000.getValueType())) {
15956         return matcher.getNode(
15957             ISD::FNEG, SL, VT,
15958             matcher.getNode(
15959                 PreferredFusedOpcode, SL, VT,
15960                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
15961                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
15962                 N1));
15963       }
15964     }
15965   }
15966 
15967   auto isReassociable = [&Options](SDNode *N) {
15968     return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15969   };
15970 
15971   auto isContractableAndReassociableFMUL = [&isContractableFMUL,
15972                                             &isReassociable](SDValue N) {
15973     return isContractableFMUL(N) && isReassociable(N.getNode());
15974   };
15975 
15976   auto isFusedOp = [&](SDValue N) {
15977     return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
15978   };
15979 
15980   // More folding opportunities when target permits.
15981   if (Aggressive && isReassociable(N)) {
15982     bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
15983     // fold (fsub (fma x, y, (fmul u, v)), z)
15984     //   -> (fma x, y (fma u, v, (fneg z)))
15985     if (CanFuse && isFusedOp(N0) &&
15986         isContractableAndReassociableFMUL(N0.getOperand(2)) &&
15987         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
15988       return matcher.getNode(
15989           PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
15990           matcher.getNode(PreferredFusedOpcode, SL, VT,
15991                           N0.getOperand(2).getOperand(0),
15992                           N0.getOperand(2).getOperand(1),
15993                           matcher.getNode(ISD::FNEG, SL, VT, N1)));
15994     }
15995 
15996     // fold (fsub x, (fma y, z, (fmul u, v)))
15997     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
15998     if (CanFuse && isFusedOp(N1) &&
15999         isContractableAndReassociableFMUL(N1.getOperand(2)) &&
16000         N1->hasOneUse() && NoSignedZero) {
16001       SDValue N20 = N1.getOperand(2).getOperand(0);
16002       SDValue N21 = N1.getOperand(2).getOperand(1);
16003       return matcher.getNode(
16004           PreferredFusedOpcode, SL, VT,
16005           matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16006           N1.getOperand(1),
16007           matcher.getNode(PreferredFusedOpcode, SL, VT,
16008                           matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16009     }
16010 
16011     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16012     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16013     if (isFusedOp(N0) && N0->hasOneUse()) {
16014       SDValue N02 = N0.getOperand(2);
16015       if (matcher.match(N02, ISD::FP_EXTEND)) {
16016         SDValue N020 = N02.getOperand(0);
16017         if (isContractableAndReassociableFMUL(N020) &&
16018             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16019                                 N020.getValueType())) {
16020           return matcher.getNode(
16021               PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16022               matcher.getNode(
16023                   PreferredFusedOpcode, SL, VT,
16024                   matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16025                   matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16026                   matcher.getNode(ISD::FNEG, SL, VT, N1)));
16027         }
16028       }
16029     }
16030 
16031     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16032     //   -> (fma (fpext x), (fpext y),
16033     //           (fma (fpext u), (fpext v), (fneg z)))
16034     // FIXME: This turns two single-precision and one double-precision
16035     // operation into two double-precision operations, which might not be
16036     // interesting for all targets, especially GPUs.
16037     if (matcher.match(N0, ISD::FP_EXTEND)) {
16038       SDValue N00 = N0.getOperand(0);
16039       if (isFusedOp(N00)) {
16040         SDValue N002 = N00.getOperand(2);
16041         if (isContractableAndReassociableFMUL(N002) &&
16042             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16043                                 N00.getValueType())) {
16044           return matcher.getNode(
16045               PreferredFusedOpcode, SL, VT,
16046               matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16047               matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16048               matcher.getNode(
16049                   PreferredFusedOpcode, SL, VT,
16050                   matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16051                   matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16052                   matcher.getNode(ISD::FNEG, SL, VT, N1)));
16053         }
16054       }
16055     }
16056 
16057     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16058     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16059     if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16060         N1->hasOneUse()) {
16061       SDValue N120 = N1.getOperand(2).getOperand(0);
16062       if (isContractableAndReassociableFMUL(N120) &&
16063           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16064                               N120.getValueType())) {
16065         SDValue N1200 = N120.getOperand(0);
16066         SDValue N1201 = N120.getOperand(1);
16067         return matcher.getNode(
16068             PreferredFusedOpcode, SL, VT,
16069             matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16070             N1.getOperand(1),
16071             matcher.getNode(
16072                 PreferredFusedOpcode, SL, VT,
16073                 matcher.getNode(ISD::FNEG, SL, VT,
16074                                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16075                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16076       }
16077     }
16078 
16079     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16080     //   -> (fma (fneg (fpext y)), (fpext z),
16081     //           (fma (fneg (fpext u)), (fpext v), x))
16082     // FIXME: This turns two single-precision and one double-precision
16083     // operation into two double-precision operations, which might not be
16084     // interesting for all targets, especially GPUs.
16085     if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16086       SDValue CvtSrc = N1.getOperand(0);
16087       SDValue N100 = CvtSrc.getOperand(0);
16088       SDValue N101 = CvtSrc.getOperand(1);
16089       SDValue N102 = CvtSrc.getOperand(2);
16090       if (isContractableAndReassociableFMUL(N102) &&
16091           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16092                               CvtSrc.getValueType())) {
16093         SDValue N1020 = N102.getOperand(0);
16094         SDValue N1021 = N102.getOperand(1);
16095         return matcher.getNode(
16096             PreferredFusedOpcode, SL, VT,
16097             matcher.getNode(ISD::FNEG, SL, VT,
16098                             matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16099             matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16100             matcher.getNode(
16101                 PreferredFusedOpcode, SL, VT,
16102                 matcher.getNode(ISD::FNEG, SL, VT,
16103                                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16104                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16105       }
16106     }
16107   }
16108 
16109   return SDValue();
16110 }
16111 
16112 /// Try to perform FMA combining on a given FMUL node based on the distributive
16113 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16114 /// subtraction instead of addition).
16115 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16116   SDValue N0 = N->getOperand(0);
16117   SDValue N1 = N->getOperand(1);
16118   EVT VT = N->getValueType(0);
16119   SDLoc SL(N);
16120 
16121   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16122 
16123   const TargetOptions &Options = DAG.getTarget().Options;
16124 
16125   // The transforms below are incorrect when x == 0 and y == inf, because the
16126   // intermediate multiplication produces a nan.
16127   SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16128   if (!hasNoInfs(Options, FAdd))
16129     return SDValue();
16130 
16131   // Floating-point multiply-add without intermediate rounding.
16132   bool HasFMA =
16133       isContractableFMUL(Options, SDValue(N, 0)) &&
16134       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
16135       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
16136 
16137   // Floating-point multiply-add with intermediate rounding. This can result
16138   // in a less precise result due to the changed rounding order.
16139   bool HasFMAD = Options.UnsafeFPMath &&
16140                  (LegalOperations && TLI.isFMADLegal(DAG, N));
16141 
16142   // No valid opcode, do not combine.
16143   if (!HasFMAD && !HasFMA)
16144     return SDValue();
16145 
16146   // Always prefer FMAD to FMA for precision.
16147   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16148   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
16149 
16150   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16151   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16152   auto FuseFADD = [&](SDValue X, SDValue Y) {
16153     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16154       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16155         if (C->isExactlyValue(+1.0))
16156           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16157                              Y);
16158         if (C->isExactlyValue(-1.0))
16159           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16160                              DAG.getNode(ISD::FNEG, SL, VT, Y));
16161       }
16162     }
16163     return SDValue();
16164   };
16165 
16166   if (SDValue FMA = FuseFADD(N0, N1))
16167     return FMA;
16168   if (SDValue FMA = FuseFADD(N1, N0))
16169     return FMA;
16170 
16171   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16172   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16173   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16174   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16175   auto FuseFSUB = [&](SDValue X, SDValue Y) {
16176     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16177       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16178         if (C0->isExactlyValue(+1.0))
16179           return DAG.getNode(PreferredFusedOpcode, SL, VT,
16180                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16181                              Y);
16182         if (C0->isExactlyValue(-1.0))
16183           return DAG.getNode(PreferredFusedOpcode, SL, VT,
16184                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16185                              DAG.getNode(ISD::FNEG, SL, VT, Y));
16186       }
16187       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16188         if (C1->isExactlyValue(+1.0))
16189           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16190                              DAG.getNode(ISD::FNEG, SL, VT, Y));
16191         if (C1->isExactlyValue(-1.0))
16192           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16193                              Y);
16194       }
16195     }
16196     return SDValue();
16197   };
16198 
16199   if (SDValue FMA = FuseFSUB(N0, N1))
16200     return FMA;
16201   if (SDValue FMA = FuseFSUB(N1, N0))
16202     return FMA;
16203 
16204   return SDValue();
16205 }
16206 
16207 SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16208   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16209 
16210   // FADD -> FMA combines:
16211   if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16212     if (Fused.getOpcode() != ISD::DELETED_NODE)
16213       AddToWorklist(Fused.getNode());
16214     return Fused;
16215   }
16216   return SDValue();
16217 }
16218 
16219 SDValue DAGCombiner::visitFADD(SDNode *N) {
16220   SDValue N0 = N->getOperand(0);
16221   SDValue N1 = N->getOperand(1);
16222   SDNode *N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
16223   SDNode *N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
16224   EVT VT = N->getValueType(0);
16225   SDLoc DL(N);
16226   const TargetOptions &Options = DAG.getTarget().Options;
16227   SDNodeFlags Flags = N->getFlags();
16228   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16229 
16230   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16231     return R;
16232 
16233   // fold (fadd c1, c2) -> c1 + c2
16234   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16235     return C;
16236 
16237   // canonicalize constant to RHS
16238   if (N0CFP && !N1CFP)
16239     return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16240 
16241   // fold vector ops
16242   if (VT.isVector())
16243     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16244       return FoldedVOp;
16245 
16246   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16247   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16248   if (N1C && N1C->isZero())
16249     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
16250       return N0;
16251 
16252   if (SDValue NewSel = foldBinOpIntoSelect(N))
16253     return NewSel;
16254 
16255   // fold (fadd A, (fneg B)) -> (fsub A, B)
16256   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16257     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16258             N1, DAG, LegalOperations, ForCodeSize))
16259       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
16260 
16261   // fold (fadd (fneg A), B) -> (fsub B, A)
16262   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16263     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16264             N0, DAG, LegalOperations, ForCodeSize))
16265       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
16266 
16267   auto isFMulNegTwo = [](SDValue FMul) {
16268     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
16269       return false;
16270     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
16271     return C && C->isExactlyValue(-2.0);
16272   };
16273 
16274   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
16275   if (isFMulNegTwo(N0)) {
16276     SDValue B = N0.getOperand(0);
16277     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16278     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
16279   }
16280   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
16281   if (isFMulNegTwo(N1)) {
16282     SDValue B = N1.getOperand(0);
16283     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16284     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
16285   }
16286 
16287   // No FP constant should be created after legalization as Instruction
16288   // Selection pass has a hard time dealing with FP constants.
16289   bool AllowNewConst = (Level < AfterLegalizeDAG);
16290 
16291   // If nnan is enabled, fold lots of things.
16292   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
16293     // If allowed, fold (fadd (fneg x), x) -> 0.0
16294     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
16295       return DAG.getConstantFP(0.0, DL, VT);
16296 
16297     // If allowed, fold (fadd x, (fneg x)) -> 0.0
16298     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
16299       return DAG.getConstantFP(0.0, DL, VT);
16300   }
16301 
16302   // If 'unsafe math' or reassoc and nsz, fold lots of things.
16303   // TODO: break out portions of the transformations below for which Unsafe is
16304   //       considered and which do not require both nsz and reassoc
16305   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16306        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16307       AllowNewConst) {
16308     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
16309     if (N1CFP && N0.getOpcode() == ISD::FADD &&
16310         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
16311       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
16312       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
16313     }
16314 
16315     // We can fold chains of FADD's of the same value into multiplications.
16316     // This transform is not safe in general because we are reducing the number
16317     // of rounding steps.
16318     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
16319       if (N0.getOpcode() == ISD::FMUL) {
16320         SDNode *CFP00 =
16321             DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
16322         SDNode *CFP01 =
16323             DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
16324 
16325         // (fadd (fmul x, c), x) -> (fmul x, c+1)
16326         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
16327           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16328                                        DAG.getConstantFP(1.0, DL, VT));
16329           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
16330         }
16331 
16332         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
16333         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
16334             N1.getOperand(0) == N1.getOperand(1) &&
16335             N0.getOperand(0) == N1.getOperand(0)) {
16336           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16337                                        DAG.getConstantFP(2.0, DL, VT));
16338           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
16339         }
16340       }
16341 
16342       if (N1.getOpcode() == ISD::FMUL) {
16343         SDNode *CFP10 =
16344             DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
16345         SDNode *CFP11 =
16346             DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
16347 
16348         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
16349         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
16350           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16351                                        DAG.getConstantFP(1.0, DL, VT));
16352           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
16353         }
16354 
16355         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
16356         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
16357             N0.getOperand(0) == N0.getOperand(1) &&
16358             N1.getOperand(0) == N0.getOperand(0)) {
16359           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16360                                        DAG.getConstantFP(2.0, DL, VT));
16361           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
16362         }
16363       }
16364 
16365       if (N0.getOpcode() == ISD::FADD) {
16366         SDNode *CFP00 =
16367             DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
16368         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
16369         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
16370             (N0.getOperand(0) == N1)) {
16371           return DAG.getNode(ISD::FMUL, DL, VT, N1,
16372                              DAG.getConstantFP(3.0, DL, VT));
16373         }
16374       }
16375 
16376       if (N1.getOpcode() == ISD::FADD) {
16377         SDNode *CFP10 =
16378             DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
16379         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
16380         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
16381             N1.getOperand(0) == N0) {
16382           return DAG.getNode(ISD::FMUL, DL, VT, N0,
16383                              DAG.getConstantFP(3.0, DL, VT));
16384         }
16385       }
16386 
16387       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
16388       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
16389           N0.getOperand(0) == N0.getOperand(1) &&
16390           N1.getOperand(0) == N1.getOperand(1) &&
16391           N0.getOperand(0) == N1.getOperand(0)) {
16392         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
16393                            DAG.getConstantFP(4.0, DL, VT));
16394       }
16395     }
16396 
16397     // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
16398     if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
16399                                           VT, N0, N1, Flags))
16400       return SD;
16401   } // enable-unsafe-fp-math
16402 
16403   // FADD -> FMA combines:
16404   if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
16405     if (Fused.getOpcode() != ISD::DELETED_NODE)
16406       AddToWorklist(Fused.getNode());
16407     return Fused;
16408   }
16409   return SDValue();
16410 }
16411 
16412 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
16413   SDValue Chain = N->getOperand(0);
16414   SDValue N0 = N->getOperand(1);
16415   SDValue N1 = N->getOperand(2);
16416   EVT VT = N->getValueType(0);
16417   EVT ChainVT = N->getValueType(1);
16418   SDLoc DL(N);
16419   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16420 
16421   // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
16422   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16423     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16424             N1, DAG, LegalOperations, ForCodeSize)) {
16425       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16426                          {Chain, N0, NegN1});
16427     }
16428 
16429   // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
16430   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16431     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16432             N0, DAG, LegalOperations, ForCodeSize)) {
16433       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16434                          {Chain, N1, NegN0});
16435     }
16436   return SDValue();
16437 }
16438 
16439 SDValue DAGCombiner::visitFSUB(SDNode *N) {
16440   SDValue N0 = N->getOperand(0);
16441   SDValue N1 = N->getOperand(1);
16442   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
16443   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16444   EVT VT = N->getValueType(0);
16445   SDLoc DL(N);
16446   const TargetOptions &Options = DAG.getTarget().Options;
16447   const SDNodeFlags Flags = N->getFlags();
16448   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16449 
16450   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16451     return R;
16452 
16453   // fold (fsub c1, c2) -> c1-c2
16454   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
16455     return C;
16456 
16457   // fold vector ops
16458   if (VT.isVector())
16459     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16460       return FoldedVOp;
16461 
16462   if (SDValue NewSel = foldBinOpIntoSelect(N))
16463     return NewSel;
16464 
16465   // (fsub A, 0) -> A
16466   if (N1CFP && N1CFP->isZero()) {
16467     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
16468         Flags.hasNoSignedZeros()) {
16469       return N0;
16470     }
16471   }
16472 
16473   if (N0 == N1) {
16474     // (fsub x, x) -> 0.0
16475     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
16476       return DAG.getConstantFP(0.0f, DL, VT);
16477   }
16478 
16479   // (fsub -0.0, N1) -> -N1
16480   if (N0CFP && N0CFP->isZero()) {
16481     if (N0CFP->isNegative() ||
16482         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
16483       // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
16484       // flushed to zero, unless all users treat denorms as zero (DAZ).
16485       // FIXME: This transform will change the sign of a NaN and the behavior
16486       // of a signaling NaN. It is only valid when a NoNaN flag is present.
16487       DenormalMode DenormMode = DAG.getDenormalMode(VT);
16488       if (DenormMode == DenormalMode::getIEEE()) {
16489         if (SDValue NegN1 =
16490                 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16491           return NegN1;
16492         if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
16493           return DAG.getNode(ISD::FNEG, DL, VT, N1);
16494       }
16495     }
16496   }
16497 
16498   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16499        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16500       N1.getOpcode() == ISD::FADD) {
16501     // X - (X + Y) -> -Y
16502     if (N0 == N1->getOperand(0))
16503       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
16504     // X - (Y + X) -> -Y
16505     if (N0 == N1->getOperand(1))
16506       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
16507   }
16508 
16509   // fold (fsub A, (fneg B)) -> (fadd A, B)
16510   if (SDValue NegN1 =
16511           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16512     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
16513 
16514   // FSUB -> FMA combines:
16515   if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
16516     AddToWorklist(Fused.getNode());
16517     return Fused;
16518   }
16519 
16520   return SDValue();
16521 }
16522 
16523 // Transform IEEE Floats:
16524 //      (fmul C, (uitofp Pow2))
16525 //          -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
16526 //      (fdiv C, (uitofp Pow2))
16527 //          -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
16528 //
16529 // The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
16530 // there is no need for more than an add/sub.
16531 //
16532 // This is valid under the following circumstances:
16533 // 1) We are dealing with IEEE floats
16534 // 2) C is normal
16535 // 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
16536 // TODO: Much of this could also be used for generating `ldexp` on targets the
16537 // prefer it.
16538 SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
16539   EVT VT = N->getValueType(0);
16540   SDValue ConstOp, Pow2Op;
16541 
16542   std::optional<int> Mantissa;
16543   auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
16544     if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
16545       return false;
16546 
16547     ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
16548     Pow2Op = N->getOperand(1 - ConstOpIdx);
16549     if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
16550         (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
16551          !DAG.computeKnownBits(Pow2Op).isNonNegative()))
16552       return false;
16553 
16554     Pow2Op = Pow2Op.getOperand(0);
16555 
16556     // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
16557     // TODO: We could use knownbits to make this bound more precise.
16558     int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
16559 
16560     auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
16561       if (CFP == nullptr)
16562         return false;
16563 
16564       const APFloat &APF = CFP->getValueAPF();
16565 
16566       // Make sure we have normal/ieee constant.
16567       if (!APF.isNormal() || !APF.isIEEE())
16568         return false;
16569 
16570       // Make sure the floats exponent is within the bounds that this transform
16571       // produces bitwise equals value.
16572       int CurExp = ilogb(APF);
16573       // FMul by pow2 will only increase exponent.
16574       int MinExp =
16575           N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
16576       // FDiv by pow2 will only decrease exponent.
16577       int MaxExp =
16578           N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
16579       if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
16580           MaxExp >= APFloat::semanticsMaxExponent(APF.getSemantics()))
16581         return false;
16582 
16583       // Finally make sure we actually know the mantissa for the float type.
16584       int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
16585       if (!Mantissa)
16586         Mantissa = ThisMantissa;
16587 
16588       return *Mantissa == ThisMantissa && ThisMantissa > 0;
16589     };
16590 
16591     // TODO: We may be able to include undefs.
16592     return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
16593   };
16594 
16595   if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
16596     return SDValue();
16597 
16598   if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
16599     return SDValue();
16600 
16601   // Get log2 after all other checks have taken place. This is because
16602   // BuildLogBase2 may create a new node.
16603   SDLoc DL(N);
16604   // Get Log2 type with same bitwidth as the float type (VT).
16605   EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
16606   if (VT.isVector())
16607     NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
16608                                 VT.getVectorElementCount());
16609 
16610   SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
16611                                /*InexpensiveOnly*/ true, NewIntVT);
16612   if (!Log2)
16613     return SDValue();
16614 
16615   // Perform actual transform.
16616   SDValue MantissaShiftCnt =
16617       DAG.getConstant(*Mantissa, DL, getShiftAmountTy(NewIntVT));
16618   // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
16619   // `(X << C1) + (C << C1)`, but that isn't always the case because of the
16620   // cast. We could implement that by handle here to handle the casts.
16621   SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
16622   SDValue ResAsInt =
16623       DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
16624                   NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
16625   SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
16626   return ResAsFP;
16627 }
16628 
16629 SDValue DAGCombiner::visitFMUL(SDNode *N) {
16630   SDValue N0 = N->getOperand(0);
16631   SDValue N1 = N->getOperand(1);
16632   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16633   EVT VT = N->getValueType(0);
16634   SDLoc DL(N);
16635   const TargetOptions &Options = DAG.getTarget().Options;
16636   const SDNodeFlags Flags = N->getFlags();
16637   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16638 
16639   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16640     return R;
16641 
16642   // fold (fmul c1, c2) -> c1*c2
16643   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
16644     return C;
16645 
16646   // canonicalize constant to RHS
16647   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
16648      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
16649     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
16650 
16651   // fold vector ops
16652   if (VT.isVector())
16653     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16654       return FoldedVOp;
16655 
16656   if (SDValue NewSel = foldBinOpIntoSelect(N))
16657     return NewSel;
16658 
16659   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
16660     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
16661     if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
16662         N0.getOpcode() == ISD::FMUL) {
16663       SDValue N00 = N0.getOperand(0);
16664       SDValue N01 = N0.getOperand(1);
16665       // Avoid an infinite loop by making sure that N00 is not a constant
16666       // (the inner multiply has not been constant folded yet).
16667       if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
16668           !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
16669         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
16670         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
16671       }
16672     }
16673 
16674     // Match a special-case: we convert X * 2.0 into fadd.
16675     // fmul (fadd X, X), C -> fmul X, 2.0 * C
16676     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
16677         N0.getOperand(0) == N0.getOperand(1)) {
16678       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
16679       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
16680       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
16681     }
16682 
16683     // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
16684     if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
16685                                           VT, N0, N1, Flags))
16686       return SD;
16687   }
16688 
16689   // fold (fmul X, 2.0) -> (fadd X, X)
16690   if (N1CFP && N1CFP->isExactlyValue(+2.0))
16691     return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
16692 
16693   // fold (fmul X, -1.0) -> (fsub -0.0, X)
16694   if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
16695     if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
16696       return DAG.getNode(ISD::FSUB, DL, VT,
16697                          DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
16698     }
16699   }
16700 
16701   // -N0 * -N1 --> N0 * N1
16702   TargetLowering::NegatibleCost CostN0 =
16703       TargetLowering::NegatibleCost::Expensive;
16704   TargetLowering::NegatibleCost CostN1 =
16705       TargetLowering::NegatibleCost::Expensive;
16706   SDValue NegN0 =
16707       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16708   if (NegN0) {
16709     HandleSDNode NegN0Handle(NegN0);
16710     SDValue NegN1 =
16711         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16712     if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16713                   CostN1 == TargetLowering::NegatibleCost::Cheaper))
16714       return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
16715   }
16716 
16717   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
16718   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
16719   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
16720       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
16721       TLI.isOperationLegal(ISD::FABS, VT)) {
16722     SDValue Select = N0, X = N1;
16723     if (Select.getOpcode() != ISD::SELECT)
16724       std::swap(Select, X);
16725 
16726     SDValue Cond = Select.getOperand(0);
16727     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
16728     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
16729 
16730     if (TrueOpnd && FalseOpnd &&
16731         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
16732         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
16733         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
16734       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16735       switch (CC) {
16736       default: break;
16737       case ISD::SETOLT:
16738       case ISD::SETULT:
16739       case ISD::SETOLE:
16740       case ISD::SETULE:
16741       case ISD::SETLT:
16742       case ISD::SETLE:
16743         std::swap(TrueOpnd, FalseOpnd);
16744         [[fallthrough]];
16745       case ISD::SETOGT:
16746       case ISD::SETUGT:
16747       case ISD::SETOGE:
16748       case ISD::SETUGE:
16749       case ISD::SETGT:
16750       case ISD::SETGE:
16751         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
16752             TLI.isOperationLegal(ISD::FNEG, VT))
16753           return DAG.getNode(ISD::FNEG, DL, VT,
16754                    DAG.getNode(ISD::FABS, DL, VT, X));
16755         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
16756           return DAG.getNode(ISD::FABS, DL, VT, X);
16757 
16758         break;
16759       }
16760     }
16761   }
16762 
16763   // FMUL -> FMA combines:
16764   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
16765     AddToWorklist(Fused.getNode());
16766     return Fused;
16767   }
16768 
16769   // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
16770   // able to run.
16771   if (SDValue R = combineFMulOrFDivWithIntPow2(N))
16772     return R;
16773 
16774   return SDValue();
16775 }
16776 
16777 template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
16778   SDValue N0 = N->getOperand(0);
16779   SDValue N1 = N->getOperand(1);
16780   SDValue N2 = N->getOperand(2);
16781   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
16782   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
16783   EVT VT = N->getValueType(0);
16784   SDLoc DL(N);
16785   const TargetOptions &Options = DAG.getTarget().Options;
16786   // FMA nodes have flags that propagate to the created nodes.
16787   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16788   MatchContextClass matcher(DAG, TLI, N);
16789 
16790   bool CanReassociate =
16791       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16792 
16793   // Constant fold FMA.
16794   if (isa<ConstantFPSDNode>(N0) &&
16795       isa<ConstantFPSDNode>(N1) &&
16796       isa<ConstantFPSDNode>(N2)) {
16797     return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2);
16798   }
16799 
16800   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
16801   TargetLowering::NegatibleCost CostN0 =
16802       TargetLowering::NegatibleCost::Expensive;
16803   TargetLowering::NegatibleCost CostN1 =
16804       TargetLowering::NegatibleCost::Expensive;
16805   SDValue NegN0 =
16806       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16807   if (NegN0) {
16808     HandleSDNode NegN0Handle(NegN0);
16809     SDValue NegN1 =
16810         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16811     if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16812                   CostN1 == TargetLowering::NegatibleCost::Cheaper))
16813       return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
16814   }
16815 
16816   // FIXME: use fast math flags instead of Options.UnsafeFPMath
16817   if (Options.UnsafeFPMath) {
16818     if (N0CFP && N0CFP->isZero())
16819       return N2;
16820     if (N1CFP && N1CFP->isZero())
16821       return N2;
16822   }
16823 
16824   // FIXME: Support splat of constant.
16825   if (N0CFP && N0CFP->isExactlyValue(1.0))
16826     return matcher.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
16827   if (N1CFP && N1CFP->isExactlyValue(1.0))
16828     return matcher.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
16829 
16830   // Canonicalize (fma c, x, y) -> (fma x, c, y)
16831   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
16832      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
16833     return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
16834 
16835   if (CanReassociate) {
16836     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
16837     if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
16838         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
16839         DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
16840       return matcher.getNode(
16841           ISD::FMUL, DL, VT, N0,
16842           matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
16843     }
16844 
16845     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
16846     if (matcher.match(N0, ISD::FMUL) &&
16847         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
16848         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
16849       return matcher.getNode(
16850           ISD::FMA, DL, VT, N0.getOperand(0),
16851           matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
16852     }
16853   }
16854 
16855   // (fma x, -1, y) -> (fadd (fneg x), y)
16856   // FIXME: Support splat of constant.
16857   if (N1CFP) {
16858     if (N1CFP->isExactlyValue(1.0))
16859       return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
16860 
16861     if (N1CFP->isExactlyValue(-1.0) &&
16862         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
16863       SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
16864       AddToWorklist(RHSNeg.getNode());
16865       return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
16866     }
16867 
16868     // fma (fneg x), K, y -> fma x -K, y
16869     if (matcher.match(N0, ISD::FNEG) &&
16870         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
16871          (N1.hasOneUse() &&
16872           !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
16873       return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
16874                              matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
16875     }
16876   }
16877 
16878   // FIXME: Support splat of constant.
16879   if (CanReassociate) {
16880     // (fma x, c, x) -> (fmul x, (c+1))
16881     if (N1CFP && N0 == N2) {
16882       return matcher.getNode(ISD::FMUL, DL, VT, N0,
16883                              matcher.getNode(ISD::FADD, DL, VT, N1,
16884                                              DAG.getConstantFP(1.0, DL, VT)));
16885     }
16886 
16887     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
16888     if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
16889       return matcher.getNode(ISD::FMUL, DL, VT, N0,
16890                              matcher.getNode(ISD::FADD, DL, VT, N1,
16891                                              DAG.getConstantFP(-1.0, DL, VT)));
16892     }
16893   }
16894 
16895   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
16896   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
16897   if (!TLI.isFNegFree(VT))
16898     if (SDValue Neg = TLI.getCheaperNegatedExpression(
16899             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
16900       return matcher.getNode(ISD::FNEG, DL, VT, Neg);
16901   return SDValue();
16902 }
16903 
16904 SDValue DAGCombiner::visitFMAD(SDNode *N) {
16905   SDValue N0 = N->getOperand(0);
16906   SDValue N1 = N->getOperand(1);
16907   SDValue N2 = N->getOperand(2);
16908   EVT VT = N->getValueType(0);
16909   SDLoc DL(N);
16910 
16911   // Constant fold FMAD.
16912   if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
16913       isa<ConstantFPSDNode>(N2))
16914     return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
16915 
16916   return SDValue();
16917 }
16918 
16919 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
16920 // reciprocal.
16921 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
16922 // Notice that this is not always beneficial. One reason is different targets
16923 // may have different costs for FDIV and FMUL, so sometimes the cost of two
16924 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
16925 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
16926 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
16927   // TODO: Limit this transform based on optsize/minsize - it always creates at
16928   //       least 1 extra instruction. But the perf win may be substantial enough
16929   //       that only minsize should restrict this.
16930   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
16931   const SDNodeFlags Flags = N->getFlags();
16932   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
16933     return SDValue();
16934 
16935   // Skip if current node is a reciprocal/fneg-reciprocal.
16936   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
16937   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
16938   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
16939     return SDValue();
16940 
16941   // Exit early if the target does not want this transform or if there can't
16942   // possibly be enough uses of the divisor to make the transform worthwhile.
16943   unsigned MinUses = TLI.combineRepeatedFPDivisors();
16944 
16945   // For splat vectors, scale the number of uses by the splat factor. If we can
16946   // convert the division into a scalar op, that will likely be much faster.
16947   unsigned NumElts = 1;
16948   EVT VT = N->getValueType(0);
16949   if (VT.isVector() && DAG.isSplatValue(N1))
16950     NumElts = VT.getVectorMinNumElements();
16951 
16952   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
16953     return SDValue();
16954 
16955   // Find all FDIV users of the same divisor.
16956   // Use a set because duplicates may be present in the user list.
16957   SetVector<SDNode *> Users;
16958   for (auto *U : N1->uses()) {
16959     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
16960       // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
16961       if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
16962           U->getOperand(0) == U->getOperand(1).getOperand(0) &&
16963           U->getFlags().hasAllowReassociation() &&
16964           U->getFlags().hasNoSignedZeros())
16965         continue;
16966 
16967       // This division is eligible for optimization only if global unsafe math
16968       // is enabled or if this division allows reciprocal formation.
16969       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
16970         Users.insert(U);
16971     }
16972   }
16973 
16974   // Now that we have the actual number of divisor uses, make sure it meets
16975   // the minimum threshold specified by the target.
16976   if ((Users.size() * NumElts) < MinUses)
16977     return SDValue();
16978 
16979   SDLoc DL(N);
16980   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
16981   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
16982 
16983   // Dividend / Divisor -> Dividend * Reciprocal
16984   for (auto *U : Users) {
16985     SDValue Dividend = U->getOperand(0);
16986     if (Dividend != FPOne) {
16987       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
16988                                     Reciprocal, Flags);
16989       CombineTo(U, NewNode);
16990     } else if (U != Reciprocal.getNode()) {
16991       // In the absence of fast-math-flags, this user node is always the
16992       // same node as Reciprocal, but with FMF they may be different nodes.
16993       CombineTo(U, Reciprocal);
16994     }
16995   }
16996   return SDValue(N, 0);  // N was replaced.
16997 }
16998 
16999 SDValue DAGCombiner::visitFDIV(SDNode *N) {
17000   SDValue N0 = N->getOperand(0);
17001   SDValue N1 = N->getOperand(1);
17002   EVT VT = N->getValueType(0);
17003   SDLoc DL(N);
17004   const TargetOptions &Options = DAG.getTarget().Options;
17005   SDNodeFlags Flags = N->getFlags();
17006   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17007 
17008   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17009     return R;
17010 
17011   // fold (fdiv c1, c2) -> c1/c2
17012   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17013     return C;
17014 
17015   // fold vector ops
17016   if (VT.isVector())
17017     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17018       return FoldedVOp;
17019 
17020   if (SDValue NewSel = foldBinOpIntoSelect(N))
17021     return NewSel;
17022 
17023   if (SDValue V = combineRepeatedFPDivisors(N))
17024     return V;
17025 
17026   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17027     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
17028     if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
17029       // Compute the reciprocal 1.0 / c2.
17030       const APFloat &N1APF = N1CFP->getValueAPF();
17031       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
17032       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
17033       // Only do the transform if the reciprocal is a legal fp immediate that
17034       // isn't too nasty (eg NaN, denormal, ...).
17035       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
17036           (!LegalOperations ||
17037            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17038            // backend)... we should handle this gracefully after Legalize.
17039            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17040            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
17041            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17042         return DAG.getNode(ISD::FMUL, DL, VT, N0,
17043                            DAG.getConstantFP(Recip, DL, VT));
17044     }
17045 
17046     // If this FDIV is part of a reciprocal square root, it may be folded
17047     // into a target-specific square root estimate instruction.
17048     if (N1.getOpcode() == ISD::FSQRT) {
17049       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17050         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17051     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17052                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17053       if (SDValue RV =
17054               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17055         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17056         AddToWorklist(RV.getNode());
17057         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17058       }
17059     } else if (N1.getOpcode() == ISD::FP_ROUND &&
17060                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17061       if (SDValue RV =
17062               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17063         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17064         AddToWorklist(RV.getNode());
17065         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17066       }
17067     } else if (N1.getOpcode() == ISD::FMUL) {
17068       // Look through an FMUL. Even though this won't remove the FDIV directly,
17069       // it's still worthwhile to get rid of the FSQRT if possible.
17070       SDValue Sqrt, Y;
17071       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17072         Sqrt = N1.getOperand(0);
17073         Y = N1.getOperand(1);
17074       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17075         Sqrt = N1.getOperand(1);
17076         Y = N1.getOperand(0);
17077       }
17078       if (Sqrt.getNode()) {
17079         // If the other multiply operand is known positive, pull it into the
17080         // sqrt. That will eliminate the division if we convert to an estimate.
17081         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17082             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17083           SDValue A;
17084           if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17085             A = Y.getOperand(0);
17086           else if (Y == Sqrt.getOperand(0))
17087             A = Y;
17088           if (A) {
17089             // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17090             // X / (A * sqrt(A))       --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17091             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17092             SDValue AAZ =
17093                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17094             if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17095               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17096 
17097             // Estimate creation failed. Clean up speculatively created nodes.
17098             recursivelyDeleteUnusedNodes(AAZ.getNode());
17099           }
17100         }
17101 
17102         // We found a FSQRT, so try to make this fold:
17103         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17104         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17105           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17106           AddToWorklist(Div.getNode());
17107           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17108         }
17109       }
17110     }
17111 
17112     // Fold into a reciprocal estimate and multiply instead of a real divide.
17113     if (Options.NoInfsFPMath || Flags.hasNoInfs())
17114       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17115         return RV;
17116   }
17117 
17118   // Fold X/Sqrt(X) -> Sqrt(X)
17119   if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17120       (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17121     if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17122       return N1;
17123 
17124   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17125   TargetLowering::NegatibleCost CostN0 =
17126       TargetLowering::NegatibleCost::Expensive;
17127   TargetLowering::NegatibleCost CostN1 =
17128       TargetLowering::NegatibleCost::Expensive;
17129   SDValue NegN0 =
17130       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17131   if (NegN0) {
17132     HandleSDNode NegN0Handle(NegN0);
17133     SDValue NegN1 =
17134         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17135     if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17136                   CostN1 == TargetLowering::NegatibleCost::Cheaper))
17137       return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
17138   }
17139 
17140   if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17141     return R;
17142 
17143   return SDValue();
17144 }
17145 
17146 SDValue DAGCombiner::visitFREM(SDNode *N) {
17147   SDValue N0 = N->getOperand(0);
17148   SDValue N1 = N->getOperand(1);
17149   EVT VT = N->getValueType(0);
17150   SDNodeFlags Flags = N->getFlags();
17151   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17152 
17153   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17154     return R;
17155 
17156   // fold (frem c1, c2) -> fmod(c1,c2)
17157   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
17158     return C;
17159 
17160   if (SDValue NewSel = foldBinOpIntoSelect(N))
17161     return NewSel;
17162 
17163   return SDValue();
17164 }
17165 
17166 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17167   SDNodeFlags Flags = N->getFlags();
17168   const TargetOptions &Options = DAG.getTarget().Options;
17169 
17170   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17171   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17172   if (!Flags.hasApproximateFuncs() ||
17173       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17174     return SDValue();
17175 
17176   SDValue N0 = N->getOperand(0);
17177   if (TLI.isFsqrtCheap(N0, DAG))
17178     return SDValue();
17179 
17180   // FSQRT nodes have flags that propagate to the created nodes.
17181   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17182   //       transform the fdiv, we may produce a sub-optimal estimate sequence
17183   //       because the reciprocal calculation may not have to filter out a
17184   //       0.0 input.
17185   return buildSqrtEstimate(N0, Flags);
17186 }
17187 
17188 /// copysign(x, fp_extend(y)) -> copysign(x, y)
17189 /// copysign(x, fp_round(y)) -> copysign(x, y)
17190 /// Operands to the functions are the type of X and Y respectively.
17191 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17192   // Always fold no-op FP casts.
17193   if (XTy == YTy)
17194     return true;
17195 
17196   // Do not optimize out type conversion of f128 type yet.
17197   // For some targets like x86_64, configuration is changed to keep one f128
17198   // value in one SSE register, but instruction selection cannot handle
17199   // FCOPYSIGN on SSE registers yet.
17200   if (YTy == MVT::f128)
17201     return false;
17202 
17203   return !YTy.isVector() || EnableVectorFCopySignExtendRound;
17204 }
17205 
17206 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
17207   SDValue N1 = N->getOperand(1);
17208   if (N1.getOpcode() != ISD::FP_EXTEND &&
17209       N1.getOpcode() != ISD::FP_ROUND)
17210     return false;
17211   EVT N1VT = N1->getValueType(0);
17212   EVT N1Op0VT = N1->getOperand(0).getValueType();
17213   return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17214 }
17215 
17216 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17217   SDValue N0 = N->getOperand(0);
17218   SDValue N1 = N->getOperand(1);
17219   EVT VT = N->getValueType(0);
17220 
17221   // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17222   if (SDValue C =
17223           DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
17224     return C;
17225 
17226   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17227     const APFloat &V = N1C->getValueAPF();
17228     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
17229     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17230     if (!V.isNegative()) {
17231       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17232         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17233     } else {
17234       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17235         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
17236                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
17237     }
17238   }
17239 
17240   // copysign(fabs(x), y) -> copysign(x, y)
17241   // copysign(fneg(x), y) -> copysign(x, y)
17242   // copysign(copysign(x,z), y) -> copysign(x, y)
17243   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
17244       N0.getOpcode() == ISD::FCOPYSIGN)
17245     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
17246 
17247   // copysign(x, abs(y)) -> abs(x)
17248   if (N1.getOpcode() == ISD::FABS)
17249     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17250 
17251   // copysign(x, copysign(y,z)) -> copysign(x, z)
17252   if (N1.getOpcode() == ISD::FCOPYSIGN)
17253     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
17254 
17255   // copysign(x, fp_extend(y)) -> copysign(x, y)
17256   // copysign(x, fp_round(y)) -> copysign(x, y)
17257   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
17258     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
17259 
17260   return SDValue();
17261 }
17262 
17263 SDValue DAGCombiner::visitFPOW(SDNode *N) {
17264   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
17265   if (!ExponentC)
17266     return SDValue();
17267   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17268 
17269   // Try to convert x ** (1/3) into cube root.
17270   // TODO: Handle the various flavors of long double.
17271   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
17272   //       Some range near 1/3 should be fine.
17273   EVT VT = N->getValueType(0);
17274   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
17275       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
17276     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
17277     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
17278     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
17279     // For regular numbers, rounding may cause the results to differ.
17280     // Therefore, we require { nsz ninf nnan afn } for this transform.
17281     // TODO: We could select out the special cases if we don't have nsz/ninf.
17282     SDNodeFlags Flags = N->getFlags();
17283     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
17284         !Flags.hasApproximateFuncs())
17285       return SDValue();
17286 
17287     // Do not create a cbrt() libcall if the target does not have it, and do not
17288     // turn a pow that has lowering support into a cbrt() libcall.
17289     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
17290         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
17291          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
17292       return SDValue();
17293 
17294     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
17295   }
17296 
17297   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
17298   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
17299   // TODO: This could be extended (using a target hook) to handle smaller
17300   // power-of-2 fractional exponents.
17301   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
17302   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
17303   if (ExponentIs025 || ExponentIs075) {
17304     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
17305     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
17306     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
17307     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
17308     // For regular numbers, rounding may cause the results to differ.
17309     // Therefore, we require { nsz ninf afn } for this transform.
17310     // TODO: We could select out the special cases if we don't have nsz/ninf.
17311     SDNodeFlags Flags = N->getFlags();
17312 
17313     // We only need no signed zeros for the 0.25 case.
17314     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
17315         !Flags.hasApproximateFuncs())
17316       return SDValue();
17317 
17318     // Don't double the number of libcalls. We are trying to inline fast code.
17319     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
17320       return SDValue();
17321 
17322     // Assume that libcalls are the smallest code.
17323     // TODO: This restriction should probably be lifted for vectors.
17324     if (ForCodeSize)
17325       return SDValue();
17326 
17327     // pow(X, 0.25) --> sqrt(sqrt(X))
17328     SDLoc DL(N);
17329     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
17330     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
17331     if (ExponentIs025)
17332       return SqrtSqrt;
17333     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
17334     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
17335   }
17336 
17337   return SDValue();
17338 }
17339 
17340 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
17341                                const TargetLowering &TLI) {
17342   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
17343   // replacing casts with a libcall. We also must be allowed to ignore -0.0
17344   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
17345   // conversions would return +0.0.
17346   // FIXME: We should be able to use node-level FMF here.
17347   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
17348   EVT VT = N->getValueType(0);
17349   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
17350       !DAG.getTarget().Options.NoSignedZerosFPMath)
17351     return SDValue();
17352 
17353   // fptosi/fptoui round towards zero, so converting from FP to integer and
17354   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
17355   SDValue N0 = N->getOperand(0);
17356   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
17357       N0.getOperand(0).getValueType() == VT)
17358     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17359 
17360   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
17361       N0.getOperand(0).getValueType() == VT)
17362     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17363 
17364   return SDValue();
17365 }
17366 
17367 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
17368   SDValue N0 = N->getOperand(0);
17369   EVT VT = N->getValueType(0);
17370   EVT OpVT = N0.getValueType();
17371 
17372   // [us]itofp(undef) = 0, because the result value is bounded.
17373   if (N0.isUndef())
17374     return DAG.getConstantFP(0.0, SDLoc(N), VT);
17375 
17376   // fold (sint_to_fp c1) -> c1fp
17377   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
17378       // ...but only if the target supports immediate floating-point values
17379       (!LegalOperations ||
17380        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
17381     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17382 
17383   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
17384   // but UINT_TO_FP is legal on this target, try to convert.
17385   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
17386       hasOperation(ISD::UINT_TO_FP, OpVT)) {
17387     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
17388     if (DAG.SignBitIsZero(N0))
17389       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17390   }
17391 
17392   // The next optimizations are desirable only if SELECT_CC can be lowered.
17393   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
17394   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
17395       !VT.isVector() &&
17396       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17397     SDLoc DL(N);
17398     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
17399                          DAG.getConstantFP(0.0, DL, VT));
17400   }
17401 
17402   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
17403   //      (select (setcc x, y, cc), 1.0, 0.0)
17404   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
17405       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
17406       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17407     SDLoc DL(N);
17408     return DAG.getSelect(DL, VT, N0.getOperand(0),
17409                          DAG.getConstantFP(1.0, DL, VT),
17410                          DAG.getConstantFP(0.0, DL, VT));
17411   }
17412 
17413   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17414     return FTrunc;
17415 
17416   return SDValue();
17417 }
17418 
17419 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
17420   SDValue N0 = N->getOperand(0);
17421   EVT VT = N->getValueType(0);
17422   EVT OpVT = N0.getValueType();
17423 
17424   // [us]itofp(undef) = 0, because the result value is bounded.
17425   if (N0.isUndef())
17426     return DAG.getConstantFP(0.0, SDLoc(N), VT);
17427 
17428   // fold (uint_to_fp c1) -> c1fp
17429   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
17430       // ...but only if the target supports immediate floating-point values
17431       (!LegalOperations ||
17432        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
17433     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17434 
17435   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
17436   // but SINT_TO_FP is legal on this target, try to convert.
17437   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
17438       hasOperation(ISD::SINT_TO_FP, OpVT)) {
17439     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
17440     if (DAG.SignBitIsZero(N0))
17441       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17442   }
17443 
17444   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
17445   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
17446       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17447     SDLoc DL(N);
17448     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
17449                          DAG.getConstantFP(0.0, DL, VT));
17450   }
17451 
17452   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17453     return FTrunc;
17454 
17455   return SDValue();
17456 }
17457 
17458 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
17459 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
17460   SDValue N0 = N->getOperand(0);
17461   EVT VT = N->getValueType(0);
17462 
17463   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
17464     return SDValue();
17465 
17466   SDValue Src = N0.getOperand(0);
17467   EVT SrcVT = Src.getValueType();
17468   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
17469   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
17470 
17471   // We can safely assume the conversion won't overflow the output range,
17472   // because (for example) (uint8_t)18293.f is undefined behavior.
17473 
17474   // Since we can assume the conversion won't overflow, our decision as to
17475   // whether the input will fit in the float should depend on the minimum
17476   // of the input range and output range.
17477 
17478   // This means this is also safe for a signed input and unsigned output, since
17479   // a negative input would lead to undefined behavior.
17480   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
17481   unsigned OutputSize = (int)VT.getScalarSizeInBits();
17482   unsigned ActualSize = std::min(InputSize, OutputSize);
17483   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
17484 
17485   // We can only fold away the float conversion if the input range can be
17486   // represented exactly in the float range.
17487   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
17488     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
17489       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
17490                                                        : ISD::ZERO_EXTEND;
17491       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
17492     }
17493     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
17494       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
17495     return DAG.getBitcast(VT, Src);
17496   }
17497   return SDValue();
17498 }
17499 
17500 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
17501   SDValue N0 = N->getOperand(0);
17502   EVT VT = N->getValueType(0);
17503 
17504   // fold (fp_to_sint undef) -> undef
17505   if (N0.isUndef())
17506     return DAG.getUNDEF(VT);
17507 
17508   // fold (fp_to_sint c1fp) -> c1
17509   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17510     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
17511 
17512   return FoldIntToFPToInt(N, DAG);
17513 }
17514 
17515 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
17516   SDValue N0 = N->getOperand(0);
17517   EVT VT = N->getValueType(0);
17518 
17519   // fold (fp_to_uint undef) -> undef
17520   if (N0.isUndef())
17521     return DAG.getUNDEF(VT);
17522 
17523   // fold (fp_to_uint c1fp) -> c1
17524   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17525     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
17526 
17527   return FoldIntToFPToInt(N, DAG);
17528 }
17529 
17530 SDValue DAGCombiner::visitXRINT(SDNode *N) {
17531   SDValue N0 = N->getOperand(0);
17532   EVT VT = N->getValueType(0);
17533 
17534   // fold (lrint|llrint undef) -> undef
17535   if (N0.isUndef())
17536     return DAG.getUNDEF(VT);
17537 
17538   // fold (lrint|llrint c1fp) -> c1
17539   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17540     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0);
17541 
17542   return SDValue();
17543 }
17544 
17545 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
17546   SDValue N0 = N->getOperand(0);
17547   SDValue N1 = N->getOperand(1);
17548   EVT VT = N->getValueType(0);
17549 
17550   // fold (fp_round c1fp) -> c1fp
17551   if (SDValue C =
17552           DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
17553     return C;
17554 
17555   // fold (fp_round (fp_extend x)) -> x
17556   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
17557     return N0.getOperand(0);
17558 
17559   // fold (fp_round (fp_round x)) -> (fp_round x)
17560   if (N0.getOpcode() == ISD::FP_ROUND) {
17561     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
17562     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
17563 
17564     // Avoid folding legal fp_rounds into non-legal ones.
17565     if (!hasOperation(ISD::FP_ROUND, VT))
17566       return SDValue();
17567 
17568     // Skip this folding if it results in an fp_round from f80 to f16.
17569     //
17570     // f80 to f16 always generates an expensive (and as yet, unimplemented)
17571     // libcall to __truncxfhf2 instead of selecting native f16 conversion
17572     // instructions from f32 or f64.  Moreover, the first (value-preserving)
17573     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
17574     // x86.
17575     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
17576       return SDValue();
17577 
17578     // If the first fp_round isn't a value preserving truncation, it might
17579     // introduce a tie in the second fp_round, that wouldn't occur in the
17580     // single-step fp_round we want to fold to.
17581     // In other words, double rounding isn't the same as rounding.
17582     // Also, this is a value preserving truncation iff both fp_round's are.
17583     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
17584       SDLoc DL(N);
17585       return DAG.getNode(
17586           ISD::FP_ROUND, DL, VT, N0.getOperand(0),
17587           DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
17588     }
17589   }
17590 
17591   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
17592   // Note: From a legality perspective, this is a two step transform.  First,
17593   // we duplicate the fp_round to the arguments of the copysign, then we
17594   // eliminate the fp_round on Y.  The second step requires an additional
17595   // predicate to match the implementation above.
17596   if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
17597       CanCombineFCOPYSIGN_EXTEND_ROUND(VT,
17598                                        N0.getValueType())) {
17599     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
17600                               N0.getOperand(0), N1);
17601     AddToWorklist(Tmp.getNode());
17602     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
17603                        Tmp, N0.getOperand(1));
17604   }
17605 
17606   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17607     return NewVSel;
17608 
17609   return SDValue();
17610 }
17611 
17612 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
17613   SDValue N0 = N->getOperand(0);
17614   EVT VT = N->getValueType(0);
17615 
17616   if (VT.isVector())
17617     if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
17618       return FoldedVOp;
17619 
17620   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
17621   if (N->hasOneUse() &&
17622       N->use_begin()->getOpcode() == ISD::FP_ROUND)
17623     return SDValue();
17624 
17625   // fold (fp_extend c1fp) -> c1fp
17626   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17627     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
17628 
17629   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
17630   if (N0.getOpcode() == ISD::FP16_TO_FP &&
17631       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
17632     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
17633 
17634   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
17635   // value of X.
17636   if (N0.getOpcode() == ISD::FP_ROUND
17637       && N0.getConstantOperandVal(1) == 1) {
17638     SDValue In = N0.getOperand(0);
17639     if (In.getValueType() == VT) return In;
17640     if (VT.bitsLT(In.getValueType()))
17641       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
17642                          In, N0.getOperand(1));
17643     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
17644   }
17645 
17646   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
17647   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17648       TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
17649     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
17650     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
17651                                      LN0->getChain(),
17652                                      LN0->getBasePtr(), N0.getValueType(),
17653                                      LN0->getMemOperand());
17654     CombineTo(N, ExtLoad);
17655     CombineTo(
17656         N0.getNode(),
17657         DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
17658                     DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
17659         ExtLoad.getValue(1));
17660     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
17661   }
17662 
17663   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17664     return NewVSel;
17665 
17666   return SDValue();
17667 }
17668 
17669 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
17670   SDValue N0 = N->getOperand(0);
17671   EVT VT = N->getValueType(0);
17672 
17673   // fold (fceil c1) -> fceil(c1)
17674   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17675     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
17676 
17677   return SDValue();
17678 }
17679 
17680 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
17681   SDValue N0 = N->getOperand(0);
17682   EVT VT = N->getValueType(0);
17683 
17684   // fold (ftrunc c1) -> ftrunc(c1)
17685   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17686     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
17687 
17688   // fold ftrunc (known rounded int x) -> x
17689   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
17690   // likely to be generated to extract integer from a rounded floating value.
17691   switch (N0.getOpcode()) {
17692   default: break;
17693   case ISD::FRINT:
17694   case ISD::FTRUNC:
17695   case ISD::FNEARBYINT:
17696   case ISD::FROUNDEVEN:
17697   case ISD::FFLOOR:
17698   case ISD::FCEIL:
17699     return N0;
17700   }
17701 
17702   return SDValue();
17703 }
17704 
17705 SDValue DAGCombiner::visitFFREXP(SDNode *N) {
17706   SDValue N0 = N->getOperand(0);
17707 
17708   // fold (ffrexp c1) -> ffrexp(c1)
17709   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17710     return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
17711   return SDValue();
17712 }
17713 
17714 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
17715   SDValue N0 = N->getOperand(0);
17716   EVT VT = N->getValueType(0);
17717 
17718   // fold (ffloor c1) -> ffloor(c1)
17719   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17720     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
17721 
17722   return SDValue();
17723 }
17724 
17725 SDValue DAGCombiner::visitFNEG(SDNode *N) {
17726   SDValue N0 = N->getOperand(0);
17727   EVT VT = N->getValueType(0);
17728   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17729 
17730   // Constant fold FNEG.
17731   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17732     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
17733 
17734   if (SDValue NegN0 =
17735           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
17736     return NegN0;
17737 
17738   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
17739   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
17740   // know it was called from a context with a nsz flag if the input fsub does
17741   // not.
17742   if (N0.getOpcode() == ISD::FSUB &&
17743       (DAG.getTarget().Options.NoSignedZerosFPMath ||
17744        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
17745     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
17746                        N0.getOperand(0));
17747   }
17748 
17749   if (SDValue Cast = foldSignChangeInBitcast(N))
17750     return Cast;
17751 
17752   return SDValue();
17753 }
17754 
17755 SDValue DAGCombiner::visitFMinMax(SDNode *N) {
17756   SDValue N0 = N->getOperand(0);
17757   SDValue N1 = N->getOperand(1);
17758   EVT VT = N->getValueType(0);
17759   const SDNodeFlags Flags = N->getFlags();
17760   unsigned Opc = N->getOpcode();
17761   bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
17762   bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
17763   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17764 
17765   // Constant fold.
17766   if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
17767     return C;
17768 
17769   // Canonicalize to constant on RHS.
17770   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
17771       !DAG.isConstantFPBuildVectorOrConstantFP(N1))
17772     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
17773 
17774   if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
17775     const APFloat &AF = N1CFP->getValueAPF();
17776 
17777     // minnum(X, nan) -> X
17778     // maxnum(X, nan) -> X
17779     // minimum(X, nan) -> nan
17780     // maximum(X, nan) -> nan
17781     if (AF.isNaN())
17782       return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
17783 
17784     // In the following folds, inf can be replaced with the largest finite
17785     // float, if the ninf flag is set.
17786     if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
17787       // minnum(X, -inf) -> -inf
17788       // maxnum(X, +inf) -> +inf
17789       // minimum(X, -inf) -> -inf if nnan
17790       // maximum(X, +inf) -> +inf if nnan
17791       if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
17792         return N->getOperand(1);
17793 
17794       // minnum(X, +inf) -> X if nnan
17795       // maxnum(X, -inf) -> X if nnan
17796       // minimum(X, +inf) -> X
17797       // maximum(X, -inf) -> X
17798       if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
17799         return N->getOperand(0);
17800     }
17801   }
17802 
17803   if (SDValue SD = reassociateReduction(
17804           PropagatesNaN
17805               ? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM)
17806               : (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX),
17807           Opc, SDLoc(N), VT, N0, N1, Flags))
17808     return SD;
17809 
17810   return SDValue();
17811 }
17812 
17813 SDValue DAGCombiner::visitFABS(SDNode *N) {
17814   SDValue N0 = N->getOperand(0);
17815   EVT VT = N->getValueType(0);
17816 
17817   // fold (fabs c1) -> fabs(c1)
17818   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17819     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17820 
17821   // fold (fabs (fabs x)) -> (fabs x)
17822   if (N0.getOpcode() == ISD::FABS)
17823     return N->getOperand(0);
17824 
17825   // fold (fabs (fneg x)) -> (fabs x)
17826   // fold (fabs (fcopysign x, y)) -> (fabs x)
17827   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
17828     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
17829 
17830   if (SDValue Cast = foldSignChangeInBitcast(N))
17831     return Cast;
17832 
17833   return SDValue();
17834 }
17835 
17836 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
17837   SDValue Chain = N->getOperand(0);
17838   SDValue N1 = N->getOperand(1);
17839   SDValue N2 = N->getOperand(2);
17840 
17841   // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
17842   // nondeterministic jumps).
17843   if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
17844     return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
17845                        N1->getOperand(0), N2);
17846   }
17847 
17848   // Variant of the previous fold where there is a SETCC in between:
17849   //   BRCOND(SETCC(FREEZE(X), CONST, Cond))
17850   // =>
17851   //   BRCOND(FREEZE(SETCC(X, CONST, Cond)))
17852   // =>
17853   //   BRCOND(SETCC(X, CONST, Cond))
17854   // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
17855   // isn't equivalent to true or false.
17856   // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
17857   // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
17858   if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
17859     SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
17860     ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
17861     ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
17862     ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
17863     bool Updated = false;
17864 
17865     // Is 'X Cond C' always true or false?
17866     auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
17867       bool False = (Cond == ISD::SETULT && C->isZero()) ||
17868                    (Cond == ISD::SETLT && C->isMinSignedValue()) ||
17869                    (Cond == ISD::SETUGT && C->isAllOnes()) ||
17870                    (Cond == ISD::SETGT && C->isMaxSignedValue());
17871       bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
17872                   (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
17873                   (Cond == ISD::SETUGE && C->isZero()) ||
17874                   (Cond == ISD::SETGE && C->isMinSignedValue());
17875       return True || False;
17876     };
17877 
17878     if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
17879       if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
17880         S0 = S0->getOperand(0);
17881         Updated = true;
17882       }
17883     }
17884     if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
17885       if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
17886         S1 = S1->getOperand(0);
17887         Updated = true;
17888       }
17889     }
17890 
17891     if (Updated)
17892       return DAG.getNode(
17893           ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
17894           DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
17895   }
17896 
17897   // If N is a constant we could fold this into a fallthrough or unconditional
17898   // branch. However that doesn't happen very often in normal code, because
17899   // Instcombine/SimplifyCFG should have handled the available opportunities.
17900   // If we did this folding here, it would be necessary to update the
17901   // MachineBasicBlock CFG, which is awkward.
17902 
17903   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
17904   // on the target.
17905   if (N1.getOpcode() == ISD::SETCC &&
17906       TLI.isOperationLegalOrCustom(ISD::BR_CC,
17907                                    N1.getOperand(0).getValueType())) {
17908     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
17909                        Chain, N1.getOperand(2),
17910                        N1.getOperand(0), N1.getOperand(1), N2);
17911   }
17912 
17913   if (N1.hasOneUse()) {
17914     // rebuildSetCC calls visitXor which may change the Chain when there is a
17915     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
17916     HandleSDNode ChainHandle(Chain);
17917     if (SDValue NewN1 = rebuildSetCC(N1))
17918       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
17919                          ChainHandle.getValue(), NewN1, N2);
17920   }
17921 
17922   return SDValue();
17923 }
17924 
17925 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
17926   if (N.getOpcode() == ISD::SRL ||
17927       (N.getOpcode() == ISD::TRUNCATE &&
17928        (N.getOperand(0).hasOneUse() &&
17929         N.getOperand(0).getOpcode() == ISD::SRL))) {
17930     // Look pass the truncate.
17931     if (N.getOpcode() == ISD::TRUNCATE)
17932       N = N.getOperand(0);
17933 
17934     // Match this pattern so that we can generate simpler code:
17935     //
17936     //   %a = ...
17937     //   %b = and i32 %a, 2
17938     //   %c = srl i32 %b, 1
17939     //   brcond i32 %c ...
17940     //
17941     // into
17942     //
17943     //   %a = ...
17944     //   %b = and i32 %a, 2
17945     //   %c = setcc eq %b, 0
17946     //   brcond %c ...
17947     //
17948     // This applies only when the AND constant value has one bit set and the
17949     // SRL constant is equal to the log2 of the AND constant. The back-end is
17950     // smart enough to convert the result into a TEST/JMP sequence.
17951     SDValue Op0 = N.getOperand(0);
17952     SDValue Op1 = N.getOperand(1);
17953 
17954     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
17955       SDValue AndOp1 = Op0.getOperand(1);
17956 
17957       if (AndOp1.getOpcode() == ISD::Constant) {
17958         const APInt &AndConst = AndOp1->getAsAPIntVal();
17959 
17960         if (AndConst.isPowerOf2() &&
17961             Op1->getAsAPIntVal() == AndConst.logBase2()) {
17962           SDLoc DL(N);
17963           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
17964                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
17965                               ISD::SETNE);
17966         }
17967       }
17968     }
17969   }
17970 
17971   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
17972   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
17973   if (N.getOpcode() == ISD::XOR) {
17974     // Because we may call this on a speculatively constructed
17975     // SimplifiedSetCC Node, we need to simplify this node first.
17976     // Ideally this should be folded into SimplifySetCC and not
17977     // here. For now, grab a handle to N so we don't lose it from
17978     // replacements interal to the visit.
17979     HandleSDNode XORHandle(N);
17980     while (N.getOpcode() == ISD::XOR) {
17981       SDValue Tmp = visitXOR(N.getNode());
17982       // No simplification done.
17983       if (!Tmp.getNode())
17984         break;
17985       // Returning N is form in-visit replacement that may invalidated
17986       // N. Grab value from Handle.
17987       if (Tmp.getNode() == N.getNode())
17988         N = XORHandle.getValue();
17989       else // Node simplified. Try simplifying again.
17990         N = Tmp;
17991     }
17992 
17993     if (N.getOpcode() != ISD::XOR)
17994       return N;
17995 
17996     SDValue Op0 = N->getOperand(0);
17997     SDValue Op1 = N->getOperand(1);
17998 
17999     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
18000       bool Equal = false;
18001       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18002       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18003           Op0.getValueType() == MVT::i1) {
18004         N = Op0;
18005         Op0 = N->getOperand(0);
18006         Op1 = N->getOperand(1);
18007         Equal = true;
18008       }
18009 
18010       EVT SetCCVT = N.getValueType();
18011       if (LegalTypes)
18012         SetCCVT = getSetCCResultType(SetCCVT);
18013       // Replace the uses of XOR with SETCC
18014       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
18015                           Equal ? ISD::SETEQ : ISD::SETNE);
18016     }
18017   }
18018 
18019   return SDValue();
18020 }
18021 
18022 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18023 //
18024 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18025   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18026   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18027 
18028   // If N is a constant we could fold this into a fallthrough or unconditional
18029   // branch. However that doesn't happen very often in normal code, because
18030   // Instcombine/SimplifyCFG should have handled the available opportunities.
18031   // If we did this folding here, it would be necessary to update the
18032   // MachineBasicBlock CFG, which is awkward.
18033 
18034   // Use SimplifySetCC to simplify SETCC's.
18035   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
18036                                CondLHS, CondRHS, CC->get(), SDLoc(N),
18037                                false);
18038   if (Simp.getNode()) AddToWorklist(Simp.getNode());
18039 
18040   // fold to a simpler setcc
18041   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18042     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18043                        N->getOperand(0), Simp.getOperand(2),
18044                        Simp.getOperand(0), Simp.getOperand(1),
18045                        N->getOperand(4));
18046 
18047   return SDValue();
18048 }
18049 
18050 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18051                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18052                                      const TargetLowering &TLI) {
18053   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18054     if (LD->isIndexed())
18055       return false;
18056     EVT VT = LD->getMemoryVT();
18057     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18058       return false;
18059     Ptr = LD->getBasePtr();
18060   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18061     if (ST->isIndexed())
18062       return false;
18063     EVT VT = ST->getMemoryVT();
18064     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18065       return false;
18066     Ptr = ST->getBasePtr();
18067     IsLoad = false;
18068   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18069     if (LD->isIndexed())
18070       return false;
18071     EVT VT = LD->getMemoryVT();
18072     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18073         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18074       return false;
18075     Ptr = LD->getBasePtr();
18076     IsMasked = true;
18077   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18078     if (ST->isIndexed())
18079       return false;
18080     EVT VT = ST->getMemoryVT();
18081     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18082         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18083       return false;
18084     Ptr = ST->getBasePtr();
18085     IsLoad = false;
18086     IsMasked = true;
18087   } else {
18088     return false;
18089   }
18090   return true;
18091 }
18092 
18093 /// Try turning a load/store into a pre-indexed load/store when the base
18094 /// pointer is an add or subtract and it has other uses besides the load/store.
18095 /// After the transformation, the new indexed load/store has effectively folded
18096 /// the add/subtract in and all of its other uses are redirected to the
18097 /// new load/store.
18098 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18099   if (Level < AfterLegalizeDAG)
18100     return false;
18101 
18102   bool IsLoad = true;
18103   bool IsMasked = false;
18104   SDValue Ptr;
18105   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18106                                 Ptr, TLI))
18107     return false;
18108 
18109   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18110   // out.  There is no reason to make this a preinc/predec.
18111   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18112       Ptr->hasOneUse())
18113     return false;
18114 
18115   // Ask the target to do addressing mode selection.
18116   SDValue BasePtr;
18117   SDValue Offset;
18118   ISD::MemIndexedMode AM = ISD::UNINDEXED;
18119   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18120     return false;
18121 
18122   // Backends without true r+i pre-indexed forms may need to pass a
18123   // constant base with a variable offset so that constant coercion
18124   // will work with the patterns in canonical form.
18125   bool Swapped = false;
18126   if (isa<ConstantSDNode>(BasePtr)) {
18127     std::swap(BasePtr, Offset);
18128     Swapped = true;
18129   }
18130 
18131   // Don't create a indexed load / store with zero offset.
18132   if (isNullConstant(Offset))
18133     return false;
18134 
18135   // Try turning it into a pre-indexed load / store except when:
18136   // 1) The new base ptr is a frame index.
18137   // 2) If N is a store and the new base ptr is either the same as or is a
18138   //    predecessor of the value being stored.
18139   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18140   //    that would create a cycle.
18141   // 4) All uses are load / store ops that use it as old base ptr.
18142 
18143   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
18144   // (plus the implicit offset) to a register to preinc anyway.
18145   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18146     return false;
18147 
18148   // Check #2.
18149   if (!IsLoad) {
18150     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18151                            : cast<StoreSDNode>(N)->getValue();
18152 
18153     // Would require a copy.
18154     if (Val == BasePtr)
18155       return false;
18156 
18157     // Would create a cycle.
18158     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18159       return false;
18160   }
18161 
18162   // Caches for hasPredecessorHelper.
18163   SmallPtrSet<const SDNode *, 32> Visited;
18164   SmallVector<const SDNode *, 16> Worklist;
18165   Worklist.push_back(N);
18166 
18167   // If the offset is a constant, there may be other adds of constants that
18168   // can be folded with this one. We should do this to avoid having to keep
18169   // a copy of the original base pointer.
18170   SmallVector<SDNode *, 16> OtherUses;
18171   constexpr unsigned int MaxSteps = 8192;
18172   if (isa<ConstantSDNode>(Offset))
18173     for (SDNode::use_iterator UI = BasePtr->use_begin(),
18174                               UE = BasePtr->use_end();
18175          UI != UE; ++UI) {
18176       SDUse &Use = UI.getUse();
18177       // Skip the use that is Ptr and uses of other results from BasePtr's
18178       // node (important for nodes that return multiple results).
18179       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18180         continue;
18181 
18182       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18183                                        MaxSteps))
18184         continue;
18185 
18186       if (Use.getUser()->getOpcode() != ISD::ADD &&
18187           Use.getUser()->getOpcode() != ISD::SUB) {
18188         OtherUses.clear();
18189         break;
18190       }
18191 
18192       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
18193       if (!isa<ConstantSDNode>(Op1)) {
18194         OtherUses.clear();
18195         break;
18196       }
18197 
18198       // FIXME: In some cases, we can be smarter about this.
18199       if (Op1.getValueType() != Offset.getValueType()) {
18200         OtherUses.clear();
18201         break;
18202       }
18203 
18204       OtherUses.push_back(Use.getUser());
18205     }
18206 
18207   if (Swapped)
18208     std::swap(BasePtr, Offset);
18209 
18210   // Now check for #3 and #4.
18211   bool RealUse = false;
18212 
18213   for (SDNode *Use : Ptr->uses()) {
18214     if (Use == N)
18215       continue;
18216     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps))
18217       return false;
18218 
18219     // If Ptr may be folded in addressing mode of other use, then it's
18220     // not profitable to do this transformation.
18221     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
18222       RealUse = true;
18223   }
18224 
18225   if (!RealUse)
18226     return false;
18227 
18228   SDValue Result;
18229   if (!IsMasked) {
18230     if (IsLoad)
18231       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18232     else
18233       Result =
18234           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18235   } else {
18236     if (IsLoad)
18237       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18238                                         Offset, AM);
18239     else
18240       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
18241                                          Offset, AM);
18242   }
18243   ++PreIndexedNodes;
18244   ++NodesCombined;
18245   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
18246              Result.dump(&DAG); dbgs() << '\n');
18247   WorklistRemover DeadNodes(*this);
18248   if (IsLoad) {
18249     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18250     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18251   } else {
18252     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18253   }
18254 
18255   // Finally, since the node is now dead, remove it from the graph.
18256   deleteAndRecombine(N);
18257 
18258   if (Swapped)
18259     std::swap(BasePtr, Offset);
18260 
18261   // Replace other uses of BasePtr that can be updated to use Ptr
18262   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
18263     unsigned OffsetIdx = 1;
18264     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
18265       OffsetIdx = 0;
18266     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
18267            BasePtr.getNode() && "Expected BasePtr operand");
18268 
18269     // We need to replace ptr0 in the following expression:
18270     //   x0 * offset0 + y0 * ptr0 = t0
18271     // knowing that
18272     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
18273     //
18274     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
18275     // indexed load/store and the expression that needs to be re-written.
18276     //
18277     // Therefore, we have:
18278     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
18279 
18280     auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
18281     const APInt &Offset0 = CN->getAPIntValue();
18282     const APInt &Offset1 = Offset->getAsAPIntVal();
18283     int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
18284     int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
18285     int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
18286     int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
18287 
18288     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
18289 
18290     APInt CNV = Offset0;
18291     if (X0 < 0) CNV = -CNV;
18292     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
18293     else CNV = CNV - Offset1;
18294 
18295     SDLoc DL(OtherUses[i]);
18296 
18297     // We can now generate the new expression.
18298     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
18299     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
18300 
18301     SDValue NewUse = DAG.getNode(Opcode,
18302                                  DL,
18303                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
18304     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
18305     deleteAndRecombine(OtherUses[i]);
18306   }
18307 
18308   // Replace the uses of Ptr with uses of the updated base value.
18309   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
18310   deleteAndRecombine(Ptr.getNode());
18311   AddToWorklist(Result.getNode());
18312 
18313   return true;
18314 }
18315 
18316 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
18317                                    SDValue &BasePtr, SDValue &Offset,
18318                                    ISD::MemIndexedMode &AM,
18319                                    SelectionDAG &DAG,
18320                                    const TargetLowering &TLI) {
18321   if (PtrUse == N ||
18322       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
18323     return false;
18324 
18325   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
18326     return false;
18327 
18328   // Don't create a indexed load / store with zero offset.
18329   if (isNullConstant(Offset))
18330     return false;
18331 
18332   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18333     return false;
18334 
18335   SmallPtrSet<const SDNode *, 32> Visited;
18336   for (SDNode *Use : BasePtr->uses()) {
18337     if (Use == Ptr.getNode())
18338       continue;
18339 
18340     // No if there's a later user which could perform the index instead.
18341     if (isa<MemSDNode>(Use)) {
18342       bool IsLoad = true;
18343       bool IsMasked = false;
18344       SDValue OtherPtr;
18345       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
18346                                    IsMasked, OtherPtr, TLI)) {
18347         SmallVector<const SDNode *, 2> Worklist;
18348         Worklist.push_back(Use);
18349         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
18350           return false;
18351       }
18352     }
18353 
18354     // If all the uses are load / store addresses, then don't do the
18355     // transformation.
18356     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
18357       for (SDNode *UseUse : Use->uses())
18358         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
18359           return false;
18360     }
18361   }
18362   return true;
18363 }
18364 
18365 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
18366                                          bool &IsMasked, SDValue &Ptr,
18367                                          SDValue &BasePtr, SDValue &Offset,
18368                                          ISD::MemIndexedMode &AM,
18369                                          SelectionDAG &DAG,
18370                                          const TargetLowering &TLI) {
18371   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
18372                                 IsMasked, Ptr, TLI) ||
18373       Ptr->hasOneUse())
18374     return nullptr;
18375 
18376   // Try turning it into a post-indexed load / store except when
18377   // 1) All uses are load / store ops that use it as base ptr (and
18378   //    it may be folded as addressing mmode).
18379   // 2) Op must be independent of N, i.e. Op is neither a predecessor
18380   //    nor a successor of N. Otherwise, if Op is folded that would
18381   //    create a cycle.
18382   for (SDNode *Op : Ptr->uses()) {
18383     // Check for #1.
18384     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
18385       continue;
18386 
18387     // Check for #2.
18388     SmallPtrSet<const SDNode *, 32> Visited;
18389     SmallVector<const SDNode *, 8> Worklist;
18390     constexpr unsigned int MaxSteps = 8192;
18391     // Ptr is predecessor to both N and Op.
18392     Visited.insert(Ptr.getNode());
18393     Worklist.push_back(N);
18394     Worklist.push_back(Op);
18395     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
18396         !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
18397       return Op;
18398   }
18399   return nullptr;
18400 }
18401 
18402 /// Try to combine a load/store with a add/sub of the base pointer node into a
18403 /// post-indexed load/store. The transformation folded the add/subtract into the
18404 /// new indexed load/store effectively and all of its uses are redirected to the
18405 /// new load/store.
18406 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
18407   if (Level < AfterLegalizeDAG)
18408     return false;
18409 
18410   bool IsLoad = true;
18411   bool IsMasked = false;
18412   SDValue Ptr;
18413   SDValue BasePtr;
18414   SDValue Offset;
18415   ISD::MemIndexedMode AM = ISD::UNINDEXED;
18416   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
18417                                          Offset, AM, DAG, TLI);
18418   if (!Op)
18419     return false;
18420 
18421   SDValue Result;
18422   if (!IsMasked)
18423     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18424                                          Offset, AM)
18425                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
18426                                           BasePtr, Offset, AM);
18427   else
18428     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
18429                                                BasePtr, Offset, AM)
18430                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
18431                                                 BasePtr, Offset, AM);
18432   ++PostIndexedNodes;
18433   ++NodesCombined;
18434   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
18435              Result.dump(&DAG); dbgs() << '\n');
18436   WorklistRemover DeadNodes(*this);
18437   if (IsLoad) {
18438     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18439     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18440   } else {
18441     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18442   }
18443 
18444   // Finally, since the node is now dead, remove it from the graph.
18445   deleteAndRecombine(N);
18446 
18447   // Replace the uses of Use with uses of the updated base value.
18448   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
18449                                 Result.getValue(IsLoad ? 1 : 0));
18450   deleteAndRecombine(Op);
18451   return true;
18452 }
18453 
18454 /// Return the base-pointer arithmetic from an indexed \p LD.
18455 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
18456   ISD::MemIndexedMode AM = LD->getAddressingMode();
18457   assert(AM != ISD::UNINDEXED);
18458   SDValue BP = LD->getOperand(1);
18459   SDValue Inc = LD->getOperand(2);
18460 
18461   // Some backends use TargetConstants for load offsets, but don't expect
18462   // TargetConstants in general ADD nodes. We can convert these constants into
18463   // regular Constants (if the constant is not opaque).
18464   assert((Inc.getOpcode() != ISD::TargetConstant ||
18465           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
18466          "Cannot split out indexing using opaque target constants");
18467   if (Inc.getOpcode() == ISD::TargetConstant) {
18468     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
18469     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
18470                           ConstInc->getValueType(0));
18471   }
18472 
18473   unsigned Opc =
18474       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
18475   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
18476 }
18477 
18478 static inline ElementCount numVectorEltsOrZero(EVT T) {
18479   return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
18480 }
18481 
18482 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
18483   EVT STType = Val.getValueType();
18484   EVT STMemType = ST->getMemoryVT();
18485   if (STType == STMemType)
18486     return true;
18487   if (isTypeLegal(STMemType))
18488     return false; // fail.
18489   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
18490       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
18491     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
18492     return true;
18493   }
18494   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
18495       STType.isInteger() && STMemType.isInteger()) {
18496     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
18497     return true;
18498   }
18499   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
18500     Val = DAG.getBitcast(STMemType, Val);
18501     return true;
18502   }
18503   return false; // fail.
18504 }
18505 
18506 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
18507   EVT LDMemType = LD->getMemoryVT();
18508   EVT LDType = LD->getValueType(0);
18509   assert(Val.getValueType() == LDMemType &&
18510          "Attempting to extend value of non-matching type");
18511   if (LDType == LDMemType)
18512     return true;
18513   if (LDMemType.isInteger() && LDType.isInteger()) {
18514     switch (LD->getExtensionType()) {
18515     case ISD::NON_EXTLOAD:
18516       Val = DAG.getBitcast(LDType, Val);
18517       return true;
18518     case ISD::EXTLOAD:
18519       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
18520       return true;
18521     case ISD::SEXTLOAD:
18522       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
18523       return true;
18524     case ISD::ZEXTLOAD:
18525       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
18526       return true;
18527     }
18528   }
18529   return false;
18530 }
18531 
18532 StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
18533                                                 int64_t &Offset) {
18534   SDValue Chain = LD->getOperand(0);
18535 
18536   // Look through CALLSEQ_START.
18537   if (Chain.getOpcode() == ISD::CALLSEQ_START)
18538     Chain = Chain->getOperand(0);
18539 
18540   StoreSDNode *ST = nullptr;
18541   SmallVector<SDValue, 8> Aliases;
18542   if (Chain.getOpcode() == ISD::TokenFactor) {
18543     // Look for unique store within the TokenFactor.
18544     for (SDValue Op : Chain->ops()) {
18545       StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
18546       if (!Store)
18547         continue;
18548       BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18549       BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18550       if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18551         continue;
18552       // Make sure the store is not aliased with any nodes in TokenFactor.
18553       GatherAllAliases(Store, Chain, Aliases);
18554       if (Aliases.empty() ||
18555           (Aliases.size() == 1 && Aliases.front().getNode() == Store))
18556         ST = Store;
18557       break;
18558     }
18559   } else {
18560     StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
18561     if (Store) {
18562       BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18563       BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18564       if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18565         ST = Store;
18566     }
18567   }
18568 
18569   return ST;
18570 }
18571 
18572 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
18573   if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
18574     return SDValue();
18575   SDValue Chain = LD->getOperand(0);
18576   int64_t Offset;
18577 
18578   StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
18579   // TODO: Relax this restriction for unordered atomics (see D66309)
18580   if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
18581     return SDValue();
18582 
18583   EVT LDType = LD->getValueType(0);
18584   EVT LDMemType = LD->getMemoryVT();
18585   EVT STMemType = ST->getMemoryVT();
18586   EVT STType = ST->getValue().getValueType();
18587 
18588   // There are two cases to consider here:
18589   //  1. The store is fixed width and the load is scalable. In this case we
18590   //     don't know at compile time if the store completely envelops the load
18591   //     so we abandon the optimisation.
18592   //  2. The store is scalable and the load is fixed width. We could
18593   //     potentially support a limited number of cases here, but there has been
18594   //     no cost-benefit analysis to prove it's worth it.
18595   bool LdStScalable = LDMemType.isScalableVT();
18596   if (LdStScalable != STMemType.isScalableVT())
18597     return SDValue();
18598 
18599   // If we are dealing with scalable vectors on a big endian platform the
18600   // calculation of offsets below becomes trickier, since we do not know at
18601   // compile time the absolute size of the vector. Until we've done more
18602   // analysis on big-endian platforms it seems better to bail out for now.
18603   if (LdStScalable && DAG.getDataLayout().isBigEndian())
18604     return SDValue();
18605 
18606   // Normalize for Endianness. After this Offset=0 will denote that the least
18607   // significant bit in the loaded value maps to the least significant bit in
18608   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
18609   // n:th least significant byte of the stored value.
18610   int64_t OrigOffset = Offset;
18611   if (DAG.getDataLayout().isBigEndian())
18612     Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
18613               (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
18614                  8 -
18615              Offset;
18616 
18617   // Check that the stored value cover all bits that are loaded.
18618   bool STCoversLD;
18619 
18620   TypeSize LdMemSize = LDMemType.getSizeInBits();
18621   TypeSize StMemSize = STMemType.getSizeInBits();
18622   if (LdStScalable)
18623     STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
18624   else
18625     STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
18626                                    StMemSize.getFixedValue());
18627 
18628   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
18629     if (LD->isIndexed()) {
18630       // Cannot handle opaque target constants and we must respect the user's
18631       // request not to split indexes from loads.
18632       if (!canSplitIdx(LD))
18633         return SDValue();
18634       SDValue Idx = SplitIndexingFromLoad(LD);
18635       SDValue Ops[] = {Val, Idx, Chain};
18636       return CombineTo(LD, Ops, 3);
18637     }
18638     return CombineTo(LD, Val, Chain);
18639   };
18640 
18641   if (!STCoversLD)
18642     return SDValue();
18643 
18644   // Memory as copy space (potentially masked).
18645   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
18646     // Simple case: Direct non-truncating forwarding
18647     if (LDType.getSizeInBits() == LdMemSize)
18648       return ReplaceLd(LD, ST->getValue(), Chain);
18649     // Can we model the truncate and extension with an and mask?
18650     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
18651         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
18652       // Mask to size of LDMemType
18653       auto Mask =
18654           DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
18655                                                StMemSize.getFixedValue()),
18656                           SDLoc(ST), STType);
18657       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
18658       return ReplaceLd(LD, Val, Chain);
18659     }
18660   }
18661 
18662   // Handle some cases for big-endian that would be Offset 0 and handled for
18663   // little-endian.
18664   SDValue Val = ST->getValue();
18665   if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
18666     if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
18667         !LDType.isVector() && isTypeLegal(STType) &&
18668         TLI.isOperationLegal(ISD::SRL, STType)) {
18669       Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
18670                         DAG.getConstant(Offset * 8, SDLoc(LD), STType));
18671       Offset = 0;
18672     }
18673   }
18674 
18675   // TODO: Deal with nonzero offset.
18676   if (LD->getBasePtr().isUndef() || Offset != 0)
18677     return SDValue();
18678   // Model necessary truncations / extenstions.
18679   // Truncate Value To Stored Memory Size.
18680   do {
18681     if (!getTruncatedStoreValue(ST, Val))
18682       continue;
18683     if (!isTypeLegal(LDMemType))
18684       continue;
18685     if (STMemType != LDMemType) {
18686       // TODO: Support vectors? This requires extract_subvector/bitcast.
18687       if (!STMemType.isVector() && !LDMemType.isVector() &&
18688           STMemType.isInteger() && LDMemType.isInteger())
18689         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
18690       else
18691         continue;
18692     }
18693     if (!extendLoadedValueToExtension(LD, Val))
18694       continue;
18695     return ReplaceLd(LD, Val, Chain);
18696   } while (false);
18697 
18698   // On failure, cleanup dead nodes we may have created.
18699   if (Val->use_empty())
18700     deleteAndRecombine(Val.getNode());
18701   return SDValue();
18702 }
18703 
18704 SDValue DAGCombiner::visitLOAD(SDNode *N) {
18705   LoadSDNode *LD  = cast<LoadSDNode>(N);
18706   SDValue Chain = LD->getChain();
18707   SDValue Ptr   = LD->getBasePtr();
18708 
18709   // If load is not volatile and there are no uses of the loaded value (and
18710   // the updated indexed value in case of indexed loads), change uses of the
18711   // chain value into uses of the chain input (i.e. delete the dead load).
18712   // TODO: Allow this for unordered atomics (see D66309)
18713   if (LD->isSimple()) {
18714     if (N->getValueType(1) == MVT::Other) {
18715       // Unindexed loads.
18716       if (!N->hasAnyUseOfValue(0)) {
18717         // It's not safe to use the two value CombineTo variant here. e.g.
18718         // v1, chain2 = load chain1, loc
18719         // v2, chain3 = load chain2, loc
18720         // v3         = add v2, c
18721         // Now we replace use of chain2 with chain1.  This makes the second load
18722         // isomorphic to the one we are deleting, and thus makes this load live.
18723         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
18724                    dbgs() << "\nWith chain: "; Chain.dump(&DAG);
18725                    dbgs() << "\n");
18726         WorklistRemover DeadNodes(*this);
18727         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
18728         AddUsersToWorklist(Chain.getNode());
18729         if (N->use_empty())
18730           deleteAndRecombine(N);
18731 
18732         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
18733       }
18734     } else {
18735       // Indexed loads.
18736       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
18737 
18738       // If this load has an opaque TargetConstant offset, then we cannot split
18739       // the indexing into an add/sub directly (that TargetConstant may not be
18740       // valid for a different type of node, and we cannot convert an opaque
18741       // target constant into a regular constant).
18742       bool CanSplitIdx = canSplitIdx(LD);
18743 
18744       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
18745         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
18746         SDValue Index;
18747         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
18748           Index = SplitIndexingFromLoad(LD);
18749           // Try to fold the base pointer arithmetic into subsequent loads and
18750           // stores.
18751           AddUsersToWorklist(N);
18752         } else
18753           Index = DAG.getUNDEF(N->getValueType(1));
18754         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
18755                    dbgs() << "\nWith: "; Undef.dump(&DAG);
18756                    dbgs() << " and 2 other values\n");
18757         WorklistRemover DeadNodes(*this);
18758         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
18759         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
18760         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
18761         deleteAndRecombine(N);
18762         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
18763       }
18764     }
18765   }
18766 
18767   // If this load is directly stored, replace the load value with the stored
18768   // value.
18769   if (auto V = ForwardStoreValueToDirectLoad(LD))
18770     return V;
18771 
18772   // Try to infer better alignment information than the load already has.
18773   if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
18774       !LD->isAtomic()) {
18775     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18776       if (*Alignment > LD->getAlign() &&
18777           isAligned(*Alignment, LD->getSrcValueOffset())) {
18778         SDValue NewLoad = DAG.getExtLoad(
18779             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
18780             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
18781             LD->getMemOperand()->getFlags(), LD->getAAInfo());
18782         // NewLoad will always be N as we are only refining the alignment
18783         assert(NewLoad.getNode() == N);
18784         (void)NewLoad;
18785       }
18786     }
18787   }
18788 
18789   if (LD->isUnindexed()) {
18790     // Walk up chain skipping non-aliasing memory nodes.
18791     SDValue BetterChain = FindBetterChain(LD, Chain);
18792 
18793     // If there is a better chain.
18794     if (Chain != BetterChain) {
18795       SDValue ReplLoad;
18796 
18797       // Replace the chain to void dependency.
18798       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
18799         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
18800                                BetterChain, Ptr, LD->getMemOperand());
18801       } else {
18802         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
18803                                   LD->getValueType(0),
18804                                   BetterChain, Ptr, LD->getMemoryVT(),
18805                                   LD->getMemOperand());
18806       }
18807 
18808       // Create token factor to keep old chain connected.
18809       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
18810                                   MVT::Other, Chain, ReplLoad.getValue(1));
18811 
18812       // Replace uses with load result and token factor
18813       return CombineTo(N, ReplLoad.getValue(0), Token);
18814     }
18815   }
18816 
18817   // Try transforming N to an indexed load.
18818   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18819     return SDValue(N, 0);
18820 
18821   // Try to slice up N to more direct loads if the slices are mapped to
18822   // different register banks or pairing can take place.
18823   if (SliceUpLoad(N))
18824     return SDValue(N, 0);
18825 
18826   return SDValue();
18827 }
18828 
18829 namespace {
18830 
18831 /// Helper structure used to slice a load in smaller loads.
18832 /// Basically a slice is obtained from the following sequence:
18833 /// Origin = load Ty1, Base
18834 /// Shift = srl Ty1 Origin, CstTy Amount
18835 /// Inst = trunc Shift to Ty2
18836 ///
18837 /// Then, it will be rewritten into:
18838 /// Slice = load SliceTy, Base + SliceOffset
18839 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
18840 ///
18841 /// SliceTy is deduced from the number of bits that are actually used to
18842 /// build Inst.
18843 struct LoadedSlice {
18844   /// Helper structure used to compute the cost of a slice.
18845   struct Cost {
18846     /// Are we optimizing for code size.
18847     bool ForCodeSize = false;
18848 
18849     /// Various cost.
18850     unsigned Loads = 0;
18851     unsigned Truncates = 0;
18852     unsigned CrossRegisterBanksCopies = 0;
18853     unsigned ZExts = 0;
18854     unsigned Shift = 0;
18855 
18856     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
18857 
18858     /// Get the cost of one isolated slice.
18859     Cost(const LoadedSlice &LS, bool ForCodeSize)
18860         : ForCodeSize(ForCodeSize), Loads(1) {
18861       EVT TruncType = LS.Inst->getValueType(0);
18862       EVT LoadedType = LS.getLoadedType();
18863       if (TruncType != LoadedType &&
18864           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
18865         ZExts = 1;
18866     }
18867 
18868     /// Account for slicing gain in the current cost.
18869     /// Slicing provide a few gains like removing a shift or a
18870     /// truncate. This method allows to grow the cost of the original
18871     /// load with the gain from this slice.
18872     void addSliceGain(const LoadedSlice &LS) {
18873       // Each slice saves a truncate.
18874       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
18875       if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
18876         ++Truncates;
18877       // If there is a shift amount, this slice gets rid of it.
18878       if (LS.Shift)
18879         ++Shift;
18880       // If this slice can merge a cross register bank copy, account for it.
18881       if (LS.canMergeExpensiveCrossRegisterBankCopy())
18882         ++CrossRegisterBanksCopies;
18883     }
18884 
18885     Cost &operator+=(const Cost &RHS) {
18886       Loads += RHS.Loads;
18887       Truncates += RHS.Truncates;
18888       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
18889       ZExts += RHS.ZExts;
18890       Shift += RHS.Shift;
18891       return *this;
18892     }
18893 
18894     bool operator==(const Cost &RHS) const {
18895       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
18896              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
18897              ZExts == RHS.ZExts && Shift == RHS.Shift;
18898     }
18899 
18900     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
18901 
18902     bool operator<(const Cost &RHS) const {
18903       // Assume cross register banks copies are as expensive as loads.
18904       // FIXME: Do we want some more target hooks?
18905       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
18906       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
18907       // Unless we are optimizing for code size, consider the
18908       // expensive operation first.
18909       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
18910         return ExpensiveOpsLHS < ExpensiveOpsRHS;
18911       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
18912              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
18913     }
18914 
18915     bool operator>(const Cost &RHS) const { return RHS < *this; }
18916 
18917     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
18918 
18919     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
18920   };
18921 
18922   // The last instruction that represent the slice. This should be a
18923   // truncate instruction.
18924   SDNode *Inst;
18925 
18926   // The original load instruction.
18927   LoadSDNode *Origin;
18928 
18929   // The right shift amount in bits from the original load.
18930   unsigned Shift;
18931 
18932   // The DAG from which Origin came from.
18933   // This is used to get some contextual information about legal types, etc.
18934   SelectionDAG *DAG;
18935 
18936   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
18937               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
18938       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
18939 
18940   /// Get the bits used in a chunk of bits \p BitWidth large.
18941   /// \return Result is \p BitWidth and has used bits set to 1 and
18942   ///         not used bits set to 0.
18943   APInt getUsedBits() const {
18944     // Reproduce the trunc(lshr) sequence:
18945     // - Start from the truncated value.
18946     // - Zero extend to the desired bit width.
18947     // - Shift left.
18948     assert(Origin && "No original load to compare against.");
18949     unsigned BitWidth = Origin->getValueSizeInBits(0);
18950     assert(Inst && "This slice is not bound to an instruction");
18951     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
18952            "Extracted slice is bigger than the whole type!");
18953     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
18954     UsedBits.setAllBits();
18955     UsedBits = UsedBits.zext(BitWidth);
18956     UsedBits <<= Shift;
18957     return UsedBits;
18958   }
18959 
18960   /// Get the size of the slice to be loaded in bytes.
18961   unsigned getLoadedSize() const {
18962     unsigned SliceSize = getUsedBits().popcount();
18963     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
18964     return SliceSize / 8;
18965   }
18966 
18967   /// Get the type that will be loaded for this slice.
18968   /// Note: This may not be the final type for the slice.
18969   EVT getLoadedType() const {
18970     assert(DAG && "Missing context");
18971     LLVMContext &Ctxt = *DAG->getContext();
18972     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
18973   }
18974 
18975   /// Get the alignment of the load used for this slice.
18976   Align getAlign() const {
18977     Align Alignment = Origin->getAlign();
18978     uint64_t Offset = getOffsetFromBase();
18979     if (Offset != 0)
18980       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
18981     return Alignment;
18982   }
18983 
18984   /// Check if this slice can be rewritten with legal operations.
18985   bool isLegal() const {
18986     // An invalid slice is not legal.
18987     if (!Origin || !Inst || !DAG)
18988       return false;
18989 
18990     // Offsets are for indexed load only, we do not handle that.
18991     if (!Origin->getOffset().isUndef())
18992       return false;
18993 
18994     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
18995 
18996     // Check that the type is legal.
18997     EVT SliceType = getLoadedType();
18998     if (!TLI.isTypeLegal(SliceType))
18999       return false;
19000 
19001     // Check that the load is legal for this type.
19002     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19003       return false;
19004 
19005     // Check that the offset can be computed.
19006     // 1. Check its type.
19007     EVT PtrType = Origin->getBasePtr().getValueType();
19008     if (PtrType == MVT::Untyped || PtrType.isExtended())
19009       return false;
19010 
19011     // 2. Check that it fits in the immediate.
19012     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19013       return false;
19014 
19015     // 3. Check that the computation is legal.
19016     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19017       return false;
19018 
19019     // Check that the zext is legal if it needs one.
19020     EVT TruncateType = Inst->getValueType(0);
19021     if (TruncateType != SliceType &&
19022         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19023       return false;
19024 
19025     return true;
19026   }
19027 
19028   /// Get the offset in bytes of this slice in the original chunk of
19029   /// bits.
19030   /// \pre DAG != nullptr.
19031   uint64_t getOffsetFromBase() const {
19032     assert(DAG && "Missing context.");
19033     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19034     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19035     uint64_t Offset = Shift / 8;
19036     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19037     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19038            "The size of the original loaded type is not a multiple of a"
19039            " byte.");
19040     // If Offset is bigger than TySizeInBytes, it means we are loading all
19041     // zeros. This should have been optimized before in the process.
19042     assert(TySizeInBytes > Offset &&
19043            "Invalid shift amount for given loaded size");
19044     if (IsBigEndian)
19045       Offset = TySizeInBytes - Offset - getLoadedSize();
19046     return Offset;
19047   }
19048 
19049   /// Generate the sequence of instructions to load the slice
19050   /// represented by this object and redirect the uses of this slice to
19051   /// this new sequence of instructions.
19052   /// \pre this->Inst && this->Origin are valid Instructions and this
19053   /// object passed the legal check: LoadedSlice::isLegal returned true.
19054   /// \return The last instruction of the sequence used to load the slice.
19055   SDValue loadSlice() const {
19056     assert(Inst && Origin && "Unable to replace a non-existing slice.");
19057     const SDValue &OldBaseAddr = Origin->getBasePtr();
19058     SDValue BaseAddr = OldBaseAddr;
19059     // Get the offset in that chunk of bytes w.r.t. the endianness.
19060     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19061     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19062     if (Offset) {
19063       // BaseAddr = BaseAddr + Offset.
19064       EVT ArithType = BaseAddr.getValueType();
19065       SDLoc DL(Origin);
19066       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19067                               DAG->getConstant(Offset, DL, ArithType));
19068     }
19069 
19070     // Create the type of the loaded slice according to its size.
19071     EVT SliceType = getLoadedType();
19072 
19073     // Create the load for the slice.
19074     SDValue LastInst =
19075         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19076                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
19077                      Origin->getMemOperand()->getFlags());
19078     // If the final type is not the same as the loaded type, this means that
19079     // we have to pad with zero. Create a zero extend for that.
19080     EVT FinalType = Inst->getValueType(0);
19081     if (SliceType != FinalType)
19082       LastInst =
19083           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19084     return LastInst;
19085   }
19086 
19087   /// Check if this slice can be merged with an expensive cross register
19088   /// bank copy. E.g.,
19089   /// i = load i32
19090   /// f = bitcast i32 i to float
19091   bool canMergeExpensiveCrossRegisterBankCopy() const {
19092     if (!Inst || !Inst->hasOneUse())
19093       return false;
19094     SDNode *Use = *Inst->use_begin();
19095     if (Use->getOpcode() != ISD::BITCAST)
19096       return false;
19097     assert(DAG && "Missing context");
19098     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19099     EVT ResVT = Use->getValueType(0);
19100     const TargetRegisterClass *ResRC =
19101         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
19102     const TargetRegisterClass *ArgRC =
19103         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
19104                            Use->getOperand(0)->isDivergent());
19105     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19106       return false;
19107 
19108     // At this point, we know that we perform a cross-register-bank copy.
19109     // Check if it is expensive.
19110     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
19111     // Assume bitcasts are cheap, unless both register classes do not
19112     // explicitly share a common sub class.
19113     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19114       return false;
19115 
19116     // Check if it will be merged with the load.
19117     // 1. Check the alignment / fast memory access constraint.
19118     unsigned IsFast = 0;
19119     if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19120                                 Origin->getAddressSpace(), getAlign(),
19121                                 Origin->getMemOperand()->getFlags(), &IsFast) ||
19122         !IsFast)
19123       return false;
19124 
19125     // 2. Check that the load is a legal operation for that type.
19126     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19127       return false;
19128 
19129     // 3. Check that we do not have a zext in the way.
19130     if (Inst->getValueType(0) != getLoadedType())
19131       return false;
19132 
19133     return true;
19134   }
19135 };
19136 
19137 } // end anonymous namespace
19138 
19139 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
19140 /// \p UsedBits looks like 0..0 1..1 0..0.
19141 static bool areUsedBitsDense(const APInt &UsedBits) {
19142   // If all the bits are one, this is dense!
19143   if (UsedBits.isAllOnes())
19144     return true;
19145 
19146   // Get rid of the unused bits on the right.
19147   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19148   // Get rid of the unused bits on the left.
19149   if (NarrowedUsedBits.countl_zero())
19150     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19151   // Check that the chunk of bits is completely used.
19152   return NarrowedUsedBits.isAllOnes();
19153 }
19154 
19155 /// Check whether or not \p First and \p Second are next to each other
19156 /// in memory. This means that there is no hole between the bits loaded
19157 /// by \p First and the bits loaded by \p Second.
19158 static bool areSlicesNextToEachOther(const LoadedSlice &First,
19159                                      const LoadedSlice &Second) {
19160   assert(First.Origin == Second.Origin && First.Origin &&
19161          "Unable to match different memory origins.");
19162   APInt UsedBits = First.getUsedBits();
19163   assert((UsedBits & Second.getUsedBits()) == 0 &&
19164          "Slices are not supposed to overlap.");
19165   UsedBits |= Second.getUsedBits();
19166   return areUsedBitsDense(UsedBits);
19167 }
19168 
19169 /// Adjust the \p GlobalLSCost according to the target
19170 /// paring capabilities and the layout of the slices.
19171 /// \pre \p GlobalLSCost should account for at least as many loads as
19172 /// there is in the slices in \p LoadedSlices.
19173 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
19174                                  LoadedSlice::Cost &GlobalLSCost) {
19175   unsigned NumberOfSlices = LoadedSlices.size();
19176   // If there is less than 2 elements, no pairing is possible.
19177   if (NumberOfSlices < 2)
19178     return;
19179 
19180   // Sort the slices so that elements that are likely to be next to each
19181   // other in memory are next to each other in the list.
19182   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19183     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19184     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19185   });
19186   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19187   // First (resp. Second) is the first (resp. Second) potentially candidate
19188   // to be placed in a paired load.
19189   const LoadedSlice *First = nullptr;
19190   const LoadedSlice *Second = nullptr;
19191   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19192                 // Set the beginning of the pair.
19193                                                            First = Second) {
19194     Second = &LoadedSlices[CurrSlice];
19195 
19196     // If First is NULL, it means we start a new pair.
19197     // Get to the next slice.
19198     if (!First)
19199       continue;
19200 
19201     EVT LoadedType = First->getLoadedType();
19202 
19203     // If the types of the slices are different, we cannot pair them.
19204     if (LoadedType != Second->getLoadedType())
19205       continue;
19206 
19207     // Check if the target supplies paired loads for this type.
19208     Align RequiredAlignment;
19209     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19210       // move to the next pair, this type is hopeless.
19211       Second = nullptr;
19212       continue;
19213     }
19214     // Check if we meet the alignment requirement.
19215     if (First->getAlign() < RequiredAlignment)
19216       continue;
19217 
19218     // Check that both loads are next to each other in memory.
19219     if (!areSlicesNextToEachOther(*First, *Second))
19220       continue;
19221 
19222     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19223     --GlobalLSCost.Loads;
19224     // Move to the next pair.
19225     Second = nullptr;
19226   }
19227 }
19228 
19229 /// Check the profitability of all involved LoadedSlice.
19230 /// Currently, it is considered profitable if there is exactly two
19231 /// involved slices (1) which are (2) next to each other in memory, and
19232 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
19233 ///
19234 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
19235 /// the elements themselves.
19236 ///
19237 /// FIXME: When the cost model will be mature enough, we can relax
19238 /// constraints (1) and (2).
19239 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
19240                                 const APInt &UsedBits, bool ForCodeSize) {
19241   unsigned NumberOfSlices = LoadedSlices.size();
19242   if (StressLoadSlicing)
19243     return NumberOfSlices > 1;
19244 
19245   // Check (1).
19246   if (NumberOfSlices != 2)
19247     return false;
19248 
19249   // Check (2).
19250   if (!areUsedBitsDense(UsedBits))
19251     return false;
19252 
19253   // Check (3).
19254   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
19255   // The original code has one big load.
19256   OrigCost.Loads = 1;
19257   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
19258     const LoadedSlice &LS = LoadedSlices[CurrSlice];
19259     // Accumulate the cost of all the slices.
19260     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
19261     GlobalSlicingCost += SliceCost;
19262 
19263     // Account as cost in the original configuration the gain obtained
19264     // with the current slices.
19265     OrigCost.addSliceGain(LS);
19266   }
19267 
19268   // If the target supports paired load, adjust the cost accordingly.
19269   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
19270   return OrigCost > GlobalSlicingCost;
19271 }
19272 
19273 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
19274 /// operations, split it in the various pieces being extracted.
19275 ///
19276 /// This sort of thing is introduced by SROA.
19277 /// This slicing takes care not to insert overlapping loads.
19278 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
19279 bool DAGCombiner::SliceUpLoad(SDNode *N) {
19280   if (Level < AfterLegalizeDAG)
19281     return false;
19282 
19283   LoadSDNode *LD = cast<LoadSDNode>(N);
19284   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
19285       !LD->getValueType(0).isInteger())
19286     return false;
19287 
19288   // The algorithm to split up a load of a scalable vector into individual
19289   // elements currently requires knowing the length of the loaded type,
19290   // so will need adjusting to work on scalable vectors.
19291   if (LD->getValueType(0).isScalableVector())
19292     return false;
19293 
19294   // Keep track of already used bits to detect overlapping values.
19295   // In that case, we will just abort the transformation.
19296   APInt UsedBits(LD->getValueSizeInBits(0), 0);
19297 
19298   SmallVector<LoadedSlice, 4> LoadedSlices;
19299 
19300   // Check if this load is used as several smaller chunks of bits.
19301   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
19302   // of computation for each trunc.
19303   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
19304        UI != UIEnd; ++UI) {
19305     // Skip the uses of the chain.
19306     if (UI.getUse().getResNo() != 0)
19307       continue;
19308 
19309     SDNode *User = *UI;
19310     unsigned Shift = 0;
19311 
19312     // Check if this is a trunc(lshr).
19313     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
19314         isa<ConstantSDNode>(User->getOperand(1))) {
19315       Shift = User->getConstantOperandVal(1);
19316       User = *User->use_begin();
19317     }
19318 
19319     // At this point, User is a Truncate, iff we encountered, trunc or
19320     // trunc(lshr).
19321     if (User->getOpcode() != ISD::TRUNCATE)
19322       return false;
19323 
19324     // The width of the type must be a power of 2 and greater than 8-bits.
19325     // Otherwise the load cannot be represented in LLVM IR.
19326     // Moreover, if we shifted with a non-8-bits multiple, the slice
19327     // will be across several bytes. We do not support that.
19328     unsigned Width = User->getValueSizeInBits(0);
19329     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
19330       return false;
19331 
19332     // Build the slice for this chain of computations.
19333     LoadedSlice LS(User, LD, Shift, &DAG);
19334     APInt CurrentUsedBits = LS.getUsedBits();
19335 
19336     // Check if this slice overlaps with another.
19337     if ((CurrentUsedBits & UsedBits) != 0)
19338       return false;
19339     // Update the bits used globally.
19340     UsedBits |= CurrentUsedBits;
19341 
19342     // Check if the new slice would be legal.
19343     if (!LS.isLegal())
19344       return false;
19345 
19346     // Record the slice.
19347     LoadedSlices.push_back(LS);
19348   }
19349 
19350   // Abort slicing if it does not seem to be profitable.
19351   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
19352     return false;
19353 
19354   ++SlicedLoads;
19355 
19356   // Rewrite each chain to use an independent load.
19357   // By construction, each chain can be represented by a unique load.
19358 
19359   // Prepare the argument for the new token factor for all the slices.
19360   SmallVector<SDValue, 8> ArgChains;
19361   for (const LoadedSlice &LS : LoadedSlices) {
19362     SDValue SliceInst = LS.loadSlice();
19363     CombineTo(LS.Inst, SliceInst, true);
19364     if (SliceInst.getOpcode() != ISD::LOAD)
19365       SliceInst = SliceInst.getOperand(0);
19366     assert(SliceInst->getOpcode() == ISD::LOAD &&
19367            "It takes more than a zext to get to the loaded slice!!");
19368     ArgChains.push_back(SliceInst.getValue(1));
19369   }
19370 
19371   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
19372                               ArgChains);
19373   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19374   AddToWorklist(Chain.getNode());
19375   return true;
19376 }
19377 
19378 /// Check to see if V is (and load (ptr), imm), where the load is having
19379 /// specific bytes cleared out.  If so, return the byte size being masked out
19380 /// and the shift amount.
19381 static std::pair<unsigned, unsigned>
19382 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
19383   std::pair<unsigned, unsigned> Result(0, 0);
19384 
19385   // Check for the structure we're looking for.
19386   if (V->getOpcode() != ISD::AND ||
19387       !isa<ConstantSDNode>(V->getOperand(1)) ||
19388       !ISD::isNormalLoad(V->getOperand(0).getNode()))
19389     return Result;
19390 
19391   // Check the chain and pointer.
19392   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
19393   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
19394 
19395   // This only handles simple types.
19396   if (V.getValueType() != MVT::i16 &&
19397       V.getValueType() != MVT::i32 &&
19398       V.getValueType() != MVT::i64)
19399     return Result;
19400 
19401   // Check the constant mask.  Invert it so that the bits being masked out are
19402   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
19403   // follow the sign bit for uniformity.
19404   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
19405   unsigned NotMaskLZ = llvm::countl_zero(NotMask);
19406   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
19407   unsigned NotMaskTZ = llvm::countr_zero(NotMask);
19408   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
19409   if (NotMaskLZ == 64) return Result;  // All zero mask.
19410 
19411   // See if we have a continuous run of bits.  If so, we have 0*1+0*
19412   if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
19413     return Result;
19414 
19415   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
19416   if (V.getValueType() != MVT::i64 && NotMaskLZ)
19417     NotMaskLZ -= 64-V.getValueSizeInBits();
19418 
19419   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
19420   switch (MaskedBytes) {
19421   case 1:
19422   case 2:
19423   case 4: break;
19424   default: return Result; // All one mask, or 5-byte mask.
19425   }
19426 
19427   // Verify that the first bit starts at a multiple of mask so that the access
19428   // is aligned the same as the access width.
19429   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
19430 
19431   // For narrowing to be valid, it must be the case that the load the
19432   // immediately preceding memory operation before the store.
19433   if (LD == Chain.getNode())
19434     ; // ok.
19435   else if (Chain->getOpcode() == ISD::TokenFactor &&
19436            SDValue(LD, 1).hasOneUse()) {
19437     // LD has only 1 chain use so they are no indirect dependencies.
19438     if (!LD->isOperandOf(Chain.getNode()))
19439       return Result;
19440   } else
19441     return Result; // Fail.
19442 
19443   Result.first = MaskedBytes;
19444   Result.second = NotMaskTZ/8;
19445   return Result;
19446 }
19447 
19448 /// Check to see if IVal is something that provides a value as specified by
19449 /// MaskInfo. If so, replace the specified store with a narrower store of
19450 /// truncated IVal.
19451 static SDValue
19452 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
19453                                 SDValue IVal, StoreSDNode *St,
19454                                 DAGCombiner *DC) {
19455   unsigned NumBytes = MaskInfo.first;
19456   unsigned ByteShift = MaskInfo.second;
19457   SelectionDAG &DAG = DC->getDAG();
19458 
19459   // Check to see if IVal is all zeros in the part being masked in by the 'or'
19460   // that uses this.  If not, this is not a replacement.
19461   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
19462                                   ByteShift*8, (ByteShift+NumBytes)*8);
19463   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
19464 
19465   // Check that it is legal on the target to do this.  It is legal if the new
19466   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
19467   // legalization. If the source type is legal, but the store type isn't, see
19468   // if we can use a truncating store.
19469   MVT VT = MVT::getIntegerVT(NumBytes * 8);
19470   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19471   bool UseTruncStore;
19472   if (DC->isTypeLegal(VT))
19473     UseTruncStore = false;
19474   else if (TLI.isTypeLegal(IVal.getValueType()) &&
19475            TLI.isTruncStoreLegal(IVal.getValueType(), VT))
19476     UseTruncStore = true;
19477   else
19478     return SDValue();
19479 
19480   // Can't do this for indexed stores.
19481   if (St->isIndexed())
19482     return SDValue();
19483 
19484   // Check that the target doesn't think this is a bad idea.
19485   if (St->getMemOperand() &&
19486       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
19487                               *St->getMemOperand()))
19488     return SDValue();
19489 
19490   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
19491   // shifted by ByteShift and truncated down to NumBytes.
19492   if (ByteShift) {
19493     SDLoc DL(IVal);
19494     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
19495                        DAG.getConstant(ByteShift*8, DL,
19496                                     DC->getShiftAmountTy(IVal.getValueType())));
19497   }
19498 
19499   // Figure out the offset for the store and the alignment of the access.
19500   unsigned StOffset;
19501   if (DAG.getDataLayout().isLittleEndian())
19502     StOffset = ByteShift;
19503   else
19504     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
19505 
19506   SDValue Ptr = St->getBasePtr();
19507   if (StOffset) {
19508     SDLoc DL(IVal);
19509     Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(StOffset), DL);
19510   }
19511 
19512   ++OpsNarrowed;
19513   if (UseTruncStore)
19514     return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
19515                              St->getPointerInfo().getWithOffset(StOffset),
19516                              VT, St->getOriginalAlign());
19517 
19518   // Truncate down to the new size.
19519   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
19520 
19521   return DAG
19522       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
19523                 St->getPointerInfo().getWithOffset(StOffset),
19524                 St->getOriginalAlign());
19525 }
19526 
19527 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
19528 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
19529 /// narrowing the load and store if it would end up being a win for performance
19530 /// or code size.
19531 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
19532   StoreSDNode *ST  = cast<StoreSDNode>(N);
19533   if (!ST->isSimple())
19534     return SDValue();
19535 
19536   SDValue Chain = ST->getChain();
19537   SDValue Value = ST->getValue();
19538   SDValue Ptr   = ST->getBasePtr();
19539   EVT VT = Value.getValueType();
19540 
19541   if (ST->isTruncatingStore() || VT.isVector())
19542     return SDValue();
19543 
19544   unsigned Opc = Value.getOpcode();
19545 
19546   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
19547       !Value.hasOneUse())
19548     return SDValue();
19549 
19550   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
19551   // is a byte mask indicating a consecutive number of bytes, check to see if
19552   // Y is known to provide just those bytes.  If so, we try to replace the
19553   // load + replace + store sequence with a single (narrower) store, which makes
19554   // the load dead.
19555   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
19556     std::pair<unsigned, unsigned> MaskedLoad;
19557     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
19558     if (MaskedLoad.first)
19559       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19560                                                   Value.getOperand(1), ST,this))
19561         return NewST;
19562 
19563     // Or is commutative, so try swapping X and Y.
19564     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
19565     if (MaskedLoad.first)
19566       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19567                                                   Value.getOperand(0), ST,this))
19568         return NewST;
19569   }
19570 
19571   if (!EnableReduceLoadOpStoreWidth)
19572     return SDValue();
19573 
19574   if (Value.getOperand(1).getOpcode() != ISD::Constant)
19575     return SDValue();
19576 
19577   SDValue N0 = Value.getOperand(0);
19578   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19579       Chain == SDValue(N0.getNode(), 1)) {
19580     LoadSDNode *LD = cast<LoadSDNode>(N0);
19581     if (LD->getBasePtr() != Ptr ||
19582         LD->getPointerInfo().getAddrSpace() !=
19583         ST->getPointerInfo().getAddrSpace())
19584       return SDValue();
19585 
19586     // Find the type to narrow it the load / op / store to.
19587     SDValue N1 = Value.getOperand(1);
19588     unsigned BitWidth = N1.getValueSizeInBits();
19589     APInt Imm = N1->getAsAPIntVal();
19590     if (Opc == ISD::AND)
19591       Imm ^= APInt::getAllOnes(BitWidth);
19592     if (Imm == 0 || Imm.isAllOnes())
19593       return SDValue();
19594     unsigned ShAmt = Imm.countr_zero();
19595     unsigned MSB = BitWidth - Imm.countl_zero() - 1;
19596     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
19597     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19598     // The narrowing should be profitable, the load/store operation should be
19599     // legal (or custom) and the store size should be equal to the NewVT width.
19600     while (NewBW < BitWidth &&
19601            (NewVT.getStoreSizeInBits() != NewBW ||
19602             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
19603             !TLI.isNarrowingProfitable(VT, NewVT))) {
19604       NewBW = NextPowerOf2(NewBW);
19605       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19606     }
19607     if (NewBW >= BitWidth)
19608       return SDValue();
19609 
19610     // If the lsb changed does not start at the type bitwidth boundary,
19611     // start at the previous one.
19612     if (ShAmt % NewBW)
19613       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
19614     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
19615                                    std::min(BitWidth, ShAmt + NewBW));
19616     if ((Imm & Mask) == Imm) {
19617       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
19618       if (Opc == ISD::AND)
19619         NewImm ^= APInt::getAllOnes(NewBW);
19620       uint64_t PtrOff = ShAmt / 8;
19621       // For big endian targets, we need to adjust the offset to the pointer to
19622       // load the correct bytes.
19623       if (DAG.getDataLayout().isBigEndian())
19624         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
19625 
19626       unsigned IsFast = 0;
19627       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
19628       if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
19629                                   LD->getAddressSpace(), NewAlign,
19630                                   LD->getMemOperand()->getFlags(), &IsFast) ||
19631           !IsFast)
19632         return SDValue();
19633 
19634       SDValue NewPtr =
19635           DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD));
19636       SDValue NewLD =
19637           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
19638                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
19639                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
19640       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
19641                                    DAG.getConstant(NewImm, SDLoc(Value),
19642                                                    NewVT));
19643       SDValue NewST =
19644           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
19645                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
19646 
19647       AddToWorklist(NewPtr.getNode());
19648       AddToWorklist(NewLD.getNode());
19649       AddToWorklist(NewVal.getNode());
19650       WorklistRemover DeadNodes(*this);
19651       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
19652       ++OpsNarrowed;
19653       return NewST;
19654     }
19655   }
19656 
19657   return SDValue();
19658 }
19659 
19660 /// For a given floating point load / store pair, if the load value isn't used
19661 /// by any other operations, then consider transforming the pair to integer
19662 /// load / store operations if the target deems the transformation profitable.
19663 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
19664   StoreSDNode *ST  = cast<StoreSDNode>(N);
19665   SDValue Value = ST->getValue();
19666   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
19667       Value.hasOneUse()) {
19668     LoadSDNode *LD = cast<LoadSDNode>(Value);
19669     EVT VT = LD->getMemoryVT();
19670     if (!VT.isFloatingPoint() ||
19671         VT != ST->getMemoryVT() ||
19672         LD->isNonTemporal() ||
19673         ST->isNonTemporal() ||
19674         LD->getPointerInfo().getAddrSpace() != 0 ||
19675         ST->getPointerInfo().getAddrSpace() != 0)
19676       return SDValue();
19677 
19678     TypeSize VTSize = VT.getSizeInBits();
19679 
19680     // We don't know the size of scalable types at compile time so we cannot
19681     // create an integer of the equivalent size.
19682     if (VTSize.isScalable())
19683       return SDValue();
19684 
19685     unsigned FastLD = 0, FastST = 0;
19686     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
19687     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
19688         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
19689         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
19690         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
19691         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19692                                 *LD->getMemOperand(), &FastLD) ||
19693         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19694                                 *ST->getMemOperand(), &FastST) ||
19695         !FastLD || !FastST)
19696       return SDValue();
19697 
19698     SDValue NewLD =
19699         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
19700                     LD->getPointerInfo(), LD->getAlign());
19701 
19702     SDValue NewST =
19703         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
19704                      ST->getPointerInfo(), ST->getAlign());
19705 
19706     AddToWorklist(NewLD.getNode());
19707     AddToWorklist(NewST.getNode());
19708     WorklistRemover DeadNodes(*this);
19709     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
19710     ++LdStFP2Int;
19711     return NewST;
19712   }
19713 
19714   return SDValue();
19715 }
19716 
19717 // This is a helper function for visitMUL to check the profitability
19718 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
19719 // MulNode is the original multiply, AddNode is (add x, c1),
19720 // and ConstNode is c2.
19721 //
19722 // If the (add x, c1) has multiple uses, we could increase
19723 // the number of adds if we make this transformation.
19724 // It would only be worth doing this if we can remove a
19725 // multiply in the process. Check for that here.
19726 // To illustrate:
19727 //     (A + c1) * c3
19728 //     (A + c2) * c3
19729 // We're checking for cases where we have common "c3 * A" expressions.
19730 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
19731                                               SDValue ConstNode) {
19732   APInt Val;
19733 
19734   // If the add only has one use, and the target thinks the folding is
19735   // profitable or does not lead to worse code, this would be OK to do.
19736   if (AddNode->hasOneUse() &&
19737       TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
19738     return true;
19739 
19740   // Walk all the users of the constant with which we're multiplying.
19741   for (SDNode *Use : ConstNode->uses()) {
19742     if (Use == MulNode) // This use is the one we're on right now. Skip it.
19743       continue;
19744 
19745     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
19746       SDNode *OtherOp;
19747       SDNode *MulVar = AddNode.getOperand(0).getNode();
19748 
19749       // OtherOp is what we're multiplying against the constant.
19750       if (Use->getOperand(0) == ConstNode)
19751         OtherOp = Use->getOperand(1).getNode();
19752       else
19753         OtherOp = Use->getOperand(0).getNode();
19754 
19755       // Check to see if multiply is with the same operand of our "add".
19756       //
19757       //     ConstNode  = CONST
19758       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
19759       //     ...
19760       //     AddNode  = (A + c1)  <-- MulVar is A.
19761       //         = AddNode * ConstNode   <-- current visiting instruction.
19762       //
19763       // If we make this transformation, we will have a common
19764       // multiply (ConstNode * A) that we can save.
19765       if (OtherOp == MulVar)
19766         return true;
19767 
19768       // Now check to see if a future expansion will give us a common
19769       // multiply.
19770       //
19771       //     ConstNode  = CONST
19772       //     AddNode    = (A + c1)
19773       //     ...   = AddNode * ConstNode <-- current visiting instruction.
19774       //     ...
19775       //     OtherOp = (A + c2)
19776       //     Use     = OtherOp * ConstNode <-- visiting Use.
19777       //
19778       // If we make this transformation, we will have a common
19779       // multiply (CONST * A) after we also do the same transformation
19780       // to the "t2" instruction.
19781       if (OtherOp->getOpcode() == ISD::ADD &&
19782           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
19783           OtherOp->getOperand(0).getNode() == MulVar)
19784         return true;
19785     }
19786   }
19787 
19788   // Didn't find a case where this would be profitable.
19789   return false;
19790 }
19791 
19792 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
19793                                          unsigned NumStores) {
19794   SmallVector<SDValue, 8> Chains;
19795   SmallPtrSet<const SDNode *, 8> Visited;
19796   SDLoc StoreDL(StoreNodes[0].MemNode);
19797 
19798   for (unsigned i = 0; i < NumStores; ++i) {
19799     Visited.insert(StoreNodes[i].MemNode);
19800   }
19801 
19802   // don't include nodes that are children or repeated nodes.
19803   for (unsigned i = 0; i < NumStores; ++i) {
19804     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
19805       Chains.push_back(StoreNodes[i].MemNode->getChain());
19806   }
19807 
19808   assert(!Chains.empty() && "Chain should have generated a chain");
19809   return DAG.getTokenFactor(StoreDL, Chains);
19810 }
19811 
19812 bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
19813   const Value *UnderlyingObj = nullptr;
19814   for (const auto &MemOp : StoreNodes) {
19815     const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
19816     // Pseudo value like stack frame has its own frame index and size, should
19817     // not use the first store's frame index for other frames.
19818     if (MMO->getPseudoValue())
19819       return false;
19820 
19821     if (!MMO->getValue())
19822       return false;
19823 
19824     const Value *Obj = getUnderlyingObject(MMO->getValue());
19825 
19826     if (UnderlyingObj && UnderlyingObj != Obj)
19827       return false;
19828 
19829     if (!UnderlyingObj)
19830       UnderlyingObj = Obj;
19831   }
19832 
19833   return true;
19834 }
19835 
19836 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
19837     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
19838     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
19839   // Make sure we have something to merge.
19840   if (NumStores < 2)
19841     return false;
19842 
19843   assert((!UseTrunc || !UseVector) &&
19844          "This optimization cannot emit a vector truncating store");
19845 
19846   // The latest Node in the DAG.
19847   SDLoc DL(StoreNodes[0].MemNode);
19848 
19849   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
19850   unsigned SizeInBits = NumStores * ElementSizeBits;
19851   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
19852 
19853   std::optional<MachineMemOperand::Flags> Flags;
19854   AAMDNodes AAInfo;
19855   for (unsigned I = 0; I != NumStores; ++I) {
19856     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
19857     if (!Flags) {
19858       Flags = St->getMemOperand()->getFlags();
19859       AAInfo = St->getAAInfo();
19860       continue;
19861     }
19862     // Skip merging if there's an inconsistent flag.
19863     if (Flags != St->getMemOperand()->getFlags())
19864       return false;
19865     // Concatenate AA metadata.
19866     AAInfo = AAInfo.concat(St->getAAInfo());
19867   }
19868 
19869   EVT StoreTy;
19870   if (UseVector) {
19871     unsigned Elts = NumStores * NumMemElts;
19872     // Get the type for the merged vector store.
19873     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
19874   } else
19875     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
19876 
19877   SDValue StoredVal;
19878   if (UseVector) {
19879     if (IsConstantSrc) {
19880       SmallVector<SDValue, 8> BuildVector;
19881       for (unsigned I = 0; I != NumStores; ++I) {
19882         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
19883         SDValue Val = St->getValue();
19884         // If constant is of the wrong type, convert it now.  This comes up
19885         // when one of our stores was truncating.
19886         if (MemVT != Val.getValueType()) {
19887           Val = peekThroughBitcasts(Val);
19888           // Deal with constants of wrong size.
19889           if (ElementSizeBits != Val.getValueSizeInBits()) {
19890             auto *C = dyn_cast<ConstantSDNode>(Val);
19891             if (!C)
19892               // Not clear how to truncate FP values.
19893               // TODO: Handle truncation of build_vector constants
19894               return false;
19895 
19896             EVT IntMemVT =
19897                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
19898             Val = DAG.getConstant(C->getAPIntValue()
19899                                       .zextOrTrunc(Val.getValueSizeInBits())
19900                                       .zextOrTrunc(ElementSizeBits),
19901                                   SDLoc(C), IntMemVT);
19902           }
19903           // Make sure correctly size type is the correct type.
19904           Val = DAG.getBitcast(MemVT, Val);
19905         }
19906         BuildVector.push_back(Val);
19907       }
19908       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
19909                                                : ISD::BUILD_VECTOR,
19910                               DL, StoreTy, BuildVector);
19911     } else {
19912       SmallVector<SDValue, 8> Ops;
19913       for (unsigned i = 0; i < NumStores; ++i) {
19914         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
19915         SDValue Val = peekThroughBitcasts(St->getValue());
19916         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
19917         // type MemVT. If the underlying value is not the correct
19918         // type, but it is an extraction of an appropriate vector we
19919         // can recast Val to be of the correct type. This may require
19920         // converting between EXTRACT_VECTOR_ELT and
19921         // EXTRACT_SUBVECTOR.
19922         if ((MemVT != Val.getValueType()) &&
19923             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
19924              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
19925           EVT MemVTScalarTy = MemVT.getScalarType();
19926           // We may need to add a bitcast here to get types to line up.
19927           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
19928             Val = DAG.getBitcast(MemVT, Val);
19929           } else if (MemVT.isVector() &&
19930                      Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
19931             Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
19932           } else {
19933             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
19934                                             : ISD::EXTRACT_VECTOR_ELT;
19935             SDValue Vec = Val.getOperand(0);
19936             SDValue Idx = Val.getOperand(1);
19937             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
19938           }
19939         }
19940         Ops.push_back(Val);
19941       }
19942 
19943       // Build the extracted vector elements back into a vector.
19944       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
19945                                                : ISD::BUILD_VECTOR,
19946                               DL, StoreTy, Ops);
19947     }
19948   } else {
19949     // We should always use a vector store when merging extracted vector
19950     // elements, so this path implies a store of constants.
19951     assert(IsConstantSrc && "Merged vector elements should use vector store");
19952 
19953     APInt StoreInt(SizeInBits, 0);
19954 
19955     // Construct a single integer constant which is made of the smaller
19956     // constant inputs.
19957     bool IsLE = DAG.getDataLayout().isLittleEndian();
19958     for (unsigned i = 0; i < NumStores; ++i) {
19959       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
19960       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
19961 
19962       SDValue Val = St->getValue();
19963       Val = peekThroughBitcasts(Val);
19964       StoreInt <<= ElementSizeBits;
19965       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
19966         StoreInt |= C->getAPIntValue()
19967                         .zextOrTrunc(ElementSizeBits)
19968                         .zextOrTrunc(SizeInBits);
19969       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
19970         StoreInt |= C->getValueAPF()
19971                         .bitcastToAPInt()
19972                         .zextOrTrunc(ElementSizeBits)
19973                         .zextOrTrunc(SizeInBits);
19974         // If fp truncation is necessary give up for now.
19975         if (MemVT.getSizeInBits() != ElementSizeBits)
19976           return false;
19977       } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
19978                  ISD::isBuildVectorOfConstantFPSDNodes(Val.getNode())) {
19979         // Not yet handled
19980         return false;
19981       } else {
19982         llvm_unreachable("Invalid constant element type");
19983       }
19984     }
19985 
19986     // Create the new Load and Store operations.
19987     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
19988   }
19989 
19990   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
19991   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
19992   bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
19993 
19994   // make sure we use trunc store if it's necessary to be legal.
19995   // When generate the new widen store, if the first store's pointer info can
19996   // not be reused, discard the pointer info except the address space because
19997   // now the widen store can not be represented by the original pointer info
19998   // which is for the narrow memory object.
19999   SDValue NewStore;
20000   if (!UseTrunc) {
20001     NewStore = DAG.getStore(
20002         NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20003         CanReusePtrInfo
20004             ? FirstInChain->getPointerInfo()
20005             : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20006         FirstInChain->getAlign(), *Flags, AAInfo);
20007   } else { // Must be realized as a trunc store
20008     EVT LegalizedStoredValTy =
20009         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20010     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20011     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20012     SDValue ExtendedStoreVal =
20013         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20014                         LegalizedStoredValTy);
20015     NewStore = DAG.getTruncStore(
20016         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20017         CanReusePtrInfo
20018             ? FirstInChain->getPointerInfo()
20019             : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20020         StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20021         AAInfo);
20022   }
20023 
20024   // Replace all merged stores with the new store.
20025   for (unsigned i = 0; i < NumStores; ++i)
20026     CombineTo(StoreNodes[i].MemNode, NewStore);
20027 
20028   AddToWorklist(NewChain.getNode());
20029   return true;
20030 }
20031 
20032 void DAGCombiner::getStoreMergeCandidates(
20033     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
20034     SDNode *&RootNode) {
20035   // This holds the base pointer, index, and the offset in bytes from the base
20036   // pointer. We must have a base and an offset. Do not handle stores to undef
20037   // base pointers.
20038   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20039   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20040     return;
20041 
20042   SDValue Val = peekThroughBitcasts(St->getValue());
20043   StoreSource StoreSrc = getStoreSource(Val);
20044   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20045 
20046   // Match on loadbaseptr if relevant.
20047   EVT MemVT = St->getMemoryVT();
20048   BaseIndexOffset LBasePtr;
20049   EVT LoadVT;
20050   if (StoreSrc == StoreSource::Load) {
20051     auto *Ld = cast<LoadSDNode>(Val);
20052     LBasePtr = BaseIndexOffset::match(Ld, DAG);
20053     LoadVT = Ld->getMemoryVT();
20054     // Load and store should be the same type.
20055     if (MemVT != LoadVT)
20056       return;
20057     // Loads must only have one use.
20058     if (!Ld->hasNUsesOfValue(1, 0))
20059       return;
20060     // The memory operands must not be volatile/indexed/atomic.
20061     // TODO: May be able to relax for unordered atomics (see D66309)
20062     if (!Ld->isSimple() || Ld->isIndexed())
20063       return;
20064   }
20065   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20066                             int64_t &Offset) -> bool {
20067     // The memory operands must not be volatile/indexed/atomic.
20068     // TODO: May be able to relax for unordered atomics (see D66309)
20069     if (!Other->isSimple() || Other->isIndexed())
20070       return false;
20071     // Don't mix temporal stores with non-temporal stores.
20072     if (St->isNonTemporal() != Other->isNonTemporal())
20073       return false;
20074     if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*St, *Other))
20075       return false;
20076     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20077     // Allow merging constants of different types as integers.
20078     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20079                                            : Other->getMemoryVT() != MemVT;
20080     switch (StoreSrc) {
20081     case StoreSource::Load: {
20082       if (NoTypeMatch)
20083         return false;
20084       // The Load's Base Ptr must also match.
20085       auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20086       if (!OtherLd)
20087         return false;
20088       BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20089       if (LoadVT != OtherLd->getMemoryVT())
20090         return false;
20091       // Loads must only have one use.
20092       if (!OtherLd->hasNUsesOfValue(1, 0))
20093         return false;
20094       // The memory operands must not be volatile/indexed/atomic.
20095       // TODO: May be able to relax for unordered atomics (see D66309)
20096       if (!OtherLd->isSimple() || OtherLd->isIndexed())
20097         return false;
20098       // Don't mix temporal loads with non-temporal loads.
20099       if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20100         return false;
20101       if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20102                                                    *OtherLd))
20103         return false;
20104       if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20105         return false;
20106       break;
20107     }
20108     case StoreSource::Constant:
20109       if (NoTypeMatch)
20110         return false;
20111       if (getStoreSource(OtherBC) != StoreSource::Constant)
20112         return false;
20113       break;
20114     case StoreSource::Extract:
20115       // Do not merge truncated stores here.
20116       if (Other->isTruncatingStore())
20117         return false;
20118       if (!MemVT.bitsEq(OtherBC.getValueType()))
20119         return false;
20120       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20121           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20122         return false;
20123       break;
20124     default:
20125       llvm_unreachable("Unhandled store source for merging");
20126     }
20127     Ptr = BaseIndexOffset::match(Other, DAG);
20128     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20129   };
20130 
20131   // Check if the pair of StoreNode and the RootNode already bail out many
20132   // times which is over the limit in dependence check.
20133   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20134                                         SDNode *RootNode) -> bool {
20135     auto RootCount = StoreRootCountMap.find(StoreNode);
20136     return RootCount != StoreRootCountMap.end() &&
20137            RootCount->second.first == RootNode &&
20138            RootCount->second.second > StoreMergeDependenceLimit;
20139   };
20140 
20141   auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
20142     // This must be a chain use.
20143     if (UseIter.getOperandNo() != 0)
20144       return;
20145     if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
20146       BaseIndexOffset Ptr;
20147       int64_t PtrDiff;
20148       if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20149           !OverLimitInDependenceCheck(OtherStore, RootNode))
20150         StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20151     }
20152   };
20153 
20154   // We looking for a root node which is an ancestor to all mergable
20155   // stores. We search up through a load, to our root and then down
20156   // through all children. For instance we will find Store{1,2,3} if
20157   // St is Store1, Store2. or Store3 where the root is not a load
20158   // which always true for nonvolatile ops. TODO: Expand
20159   // the search to find all valid candidates through multiple layers of loads.
20160   //
20161   // Root
20162   // |-------|-------|
20163   // Load    Load    Store3
20164   // |       |
20165   // Store1   Store2
20166   //
20167   // FIXME: We should be able to climb and
20168   // descend TokenFactors to find candidates as well.
20169 
20170   RootNode = St->getChain().getNode();
20171 
20172   unsigned NumNodesExplored = 0;
20173   const unsigned MaxSearchNodes = 1024;
20174   if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20175     RootNode = Ldn->getChain().getNode();
20176     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20177          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20178       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
20179         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
20180           TryToAddCandidate(I2);
20181       }
20182       // Check stores that depend on the root (e.g. Store 3 in the chart above).
20183       if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
20184         TryToAddCandidate(I);
20185       }
20186     }
20187   } else {
20188     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20189          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20190       TryToAddCandidate(I);
20191   }
20192 }
20193 
20194 // We need to check that merging these stores does not cause a loop in the
20195 // DAG. Any store candidate may depend on another candidate indirectly through
20196 // its operands. Check in parallel by searching up from operands of candidates.
20197 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
20198     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
20199     SDNode *RootNode) {
20200   // FIXME: We should be able to truncate a full search of
20201   // predecessors by doing a BFS and keeping tabs the originating
20202   // stores from which worklist nodes come from in a similar way to
20203   // TokenFactor simplfication.
20204 
20205   SmallPtrSet<const SDNode *, 32> Visited;
20206   SmallVector<const SDNode *, 8> Worklist;
20207 
20208   // RootNode is a predecessor to all candidates so we need not search
20209   // past it. Add RootNode (peeking through TokenFactors). Do not count
20210   // these towards size check.
20211 
20212   Worklist.push_back(RootNode);
20213   while (!Worklist.empty()) {
20214     auto N = Worklist.pop_back_val();
20215     if (!Visited.insert(N).second)
20216       continue; // Already present in Visited.
20217     if (N->getOpcode() == ISD::TokenFactor) {
20218       for (SDValue Op : N->ops())
20219         Worklist.push_back(Op.getNode());
20220     }
20221   }
20222 
20223   // Don't count pruning nodes towards max.
20224   unsigned int Max = 1024 + Visited.size();
20225   // Search Ops of store candidates.
20226   for (unsigned i = 0; i < NumStores; ++i) {
20227     SDNode *N = StoreNodes[i].MemNode;
20228     // Of the 4 Store Operands:
20229     //   * Chain (Op 0) -> We have already considered these
20230     //                     in candidate selection, but only by following the
20231     //                     chain dependencies. We could still have a chain
20232     //                     dependency to a load, that has a non-chain dep to
20233     //                     another load, that depends on a store, etc. So it is
20234     //                     possible to have dependencies that consist of a mix
20235     //                     of chain and non-chain deps, and we need to include
20236     //                     chain operands in the analysis here..
20237     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
20238     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
20239     //                       but aren't necessarily fromt the same base node, so
20240     //                       cycles possible (e.g. via indexed store).
20241     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
20242     //               non-indexed stores). Not constant on all targets (e.g. ARM)
20243     //               and so can participate in a cycle.
20244     for (unsigned j = 0; j < N->getNumOperands(); ++j)
20245       Worklist.push_back(N->getOperand(j).getNode());
20246   }
20247   // Search through DAG. We can stop early if we find a store node.
20248   for (unsigned i = 0; i < NumStores; ++i)
20249     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
20250                                      Max)) {
20251       // If the searching bail out, record the StoreNode and RootNode in the
20252       // StoreRootCountMap. If we have seen the pair many times over a limit,
20253       // we won't add the StoreNode into StoreNodes set again.
20254       if (Visited.size() >= Max) {
20255         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
20256         if (RootCount.first == RootNode)
20257           RootCount.second++;
20258         else
20259           RootCount = {RootNode, 1};
20260       }
20261       return false;
20262     }
20263   return true;
20264 }
20265 
20266 unsigned
20267 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
20268                                   int64_t ElementSizeBytes) const {
20269   while (true) {
20270     // Find a store past the width of the first store.
20271     size_t StartIdx = 0;
20272     while ((StartIdx + 1 < StoreNodes.size()) &&
20273            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
20274               StoreNodes[StartIdx + 1].OffsetFromBase)
20275       ++StartIdx;
20276 
20277     // Bail if we don't have enough candidates to merge.
20278     if (StartIdx + 1 >= StoreNodes.size())
20279       return 0;
20280 
20281     // Trim stores that overlapped with the first store.
20282     if (StartIdx)
20283       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
20284 
20285     // Scan the memory operations on the chain and find the first
20286     // non-consecutive store memory address.
20287     unsigned NumConsecutiveStores = 1;
20288     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
20289     // Check that the addresses are consecutive starting from the second
20290     // element in the list of stores.
20291     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
20292       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
20293       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20294         break;
20295       NumConsecutiveStores = i + 1;
20296     }
20297     if (NumConsecutiveStores > 1)
20298       return NumConsecutiveStores;
20299 
20300     // There are no consecutive stores at the start of the list.
20301     // Remove the first store and try again.
20302     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
20303   }
20304 }
20305 
20306 bool DAGCombiner::tryStoreMergeOfConstants(
20307     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20308     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
20309   LLVMContext &Context = *DAG.getContext();
20310   const DataLayout &DL = DAG.getDataLayout();
20311   int64_t ElementSizeBytes = MemVT.getStoreSize();
20312   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20313   bool MadeChange = false;
20314 
20315   // Store the constants into memory as one consecutive store.
20316   while (NumConsecutiveStores >= 2) {
20317     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20318     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20319     Align FirstStoreAlign = FirstInChain->getAlign();
20320     unsigned LastLegalType = 1;
20321     unsigned LastLegalVectorType = 1;
20322     bool LastIntegerTrunc = false;
20323     bool NonZero = false;
20324     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
20325     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20326       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
20327       SDValue StoredVal = ST->getValue();
20328       bool IsElementZero = false;
20329       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
20330         IsElementZero = C->isZero();
20331       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
20332         IsElementZero = C->getConstantFPValue()->isNullValue();
20333       else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
20334         IsElementZero = true;
20335       if (IsElementZero) {
20336         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
20337           FirstZeroAfterNonZero = i;
20338       }
20339       NonZero |= !IsElementZero;
20340 
20341       // Find a legal type for the constant store.
20342       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20343       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20344       unsigned IsFast = 0;
20345 
20346       // Break early when size is too large to be legal.
20347       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20348         break;
20349 
20350       if (TLI.isTypeLegal(StoreTy) &&
20351           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20352                                DAG.getMachineFunction()) &&
20353           TLI.allowsMemoryAccess(Context, DL, StoreTy,
20354                                  *FirstInChain->getMemOperand(), &IsFast) &&
20355           IsFast) {
20356         LastIntegerTrunc = false;
20357         LastLegalType = i + 1;
20358         // Or check whether a truncstore is legal.
20359       } else if (TLI.getTypeAction(Context, StoreTy) ==
20360                  TargetLowering::TypePromoteInteger) {
20361         EVT LegalizedStoredValTy =
20362             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
20363         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20364             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20365                                  DAG.getMachineFunction()) &&
20366             TLI.allowsMemoryAccess(Context, DL, StoreTy,
20367                                    *FirstInChain->getMemOperand(), &IsFast) &&
20368             IsFast) {
20369           LastIntegerTrunc = true;
20370           LastLegalType = i + 1;
20371         }
20372       }
20373 
20374       // We only use vectors if the target allows it and the function is not
20375       // marked with the noimplicitfloat attribute.
20376       if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
20377           AllowVectors) {
20378         // Find a legal type for the vector store.
20379         unsigned Elts = (i + 1) * NumMemElts;
20380         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20381         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
20382             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20383             TLI.allowsMemoryAccess(Context, DL, Ty,
20384                                    *FirstInChain->getMemOperand(), &IsFast) &&
20385             IsFast)
20386           LastLegalVectorType = i + 1;
20387       }
20388     }
20389 
20390     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
20391     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
20392     bool UseTrunc = LastIntegerTrunc && !UseVector;
20393 
20394     // Check if we found a legal integer type that creates a meaningful
20395     // merge.
20396     if (NumElem < 2) {
20397       // We know that candidate stores are in order and of correct
20398       // shape. While there is no mergeable sequence from the
20399       // beginning one may start later in the sequence. The only
20400       // reason a merge of size N could have failed where another of
20401       // the same size would not have, is if the alignment has
20402       // improved or we've dropped a non-zero value. Drop as many
20403       // candidates as we can here.
20404       unsigned NumSkip = 1;
20405       while ((NumSkip < NumConsecutiveStores) &&
20406              (NumSkip < FirstZeroAfterNonZero) &&
20407              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20408         NumSkip++;
20409 
20410       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20411       NumConsecutiveStores -= NumSkip;
20412       continue;
20413     }
20414 
20415     // Check that we can merge these candidates without causing a cycle.
20416     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20417                                                   RootNode)) {
20418       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20419       NumConsecutiveStores -= NumElem;
20420       continue;
20421     }
20422 
20423     MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
20424                                                   /*IsConstantSrc*/ true,
20425                                                   UseVector, UseTrunc);
20426 
20427     // Remove merged stores for next iteration.
20428     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20429     NumConsecutiveStores -= NumElem;
20430   }
20431   return MadeChange;
20432 }
20433 
20434 bool DAGCombiner::tryStoreMergeOfExtracts(
20435     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20436     EVT MemVT, SDNode *RootNode) {
20437   LLVMContext &Context = *DAG.getContext();
20438   const DataLayout &DL = DAG.getDataLayout();
20439   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20440   bool MadeChange = false;
20441 
20442   // Loop on Consecutive Stores on success.
20443   while (NumConsecutiveStores >= 2) {
20444     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20445     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20446     Align FirstStoreAlign = FirstInChain->getAlign();
20447     unsigned NumStoresToMerge = 1;
20448     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20449       // Find a legal type for the vector store.
20450       unsigned Elts = (i + 1) * NumMemElts;
20451       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20452       unsigned IsFast = 0;
20453 
20454       // Break early when size is too large to be legal.
20455       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
20456         break;
20457 
20458       if (TLI.isTypeLegal(Ty) &&
20459           TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20460           TLI.allowsMemoryAccess(Context, DL, Ty,
20461                                  *FirstInChain->getMemOperand(), &IsFast) &&
20462           IsFast)
20463         NumStoresToMerge = i + 1;
20464     }
20465 
20466     // Check if we found a legal integer type creating a meaningful
20467     // merge.
20468     if (NumStoresToMerge < 2) {
20469       // We know that candidate stores are in order and of correct
20470       // shape. While there is no mergeable sequence from the
20471       // beginning one may start later in the sequence. The only
20472       // reason a merge of size N could have failed where another of
20473       // the same size would not have, is if the alignment has
20474       // improved. Drop as many candidates as we can here.
20475       unsigned NumSkip = 1;
20476       while ((NumSkip < NumConsecutiveStores) &&
20477              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20478         NumSkip++;
20479 
20480       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20481       NumConsecutiveStores -= NumSkip;
20482       continue;
20483     }
20484 
20485     // Check that we can merge these candidates without causing a cycle.
20486     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
20487                                                   RootNode)) {
20488       StoreNodes.erase(StoreNodes.begin(),
20489                        StoreNodes.begin() + NumStoresToMerge);
20490       NumConsecutiveStores -= NumStoresToMerge;
20491       continue;
20492     }
20493 
20494     MadeChange |= mergeStoresOfConstantsOrVecElts(
20495         StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
20496         /*UseVector*/ true, /*UseTrunc*/ false);
20497 
20498     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
20499     NumConsecutiveStores -= NumStoresToMerge;
20500   }
20501   return MadeChange;
20502 }
20503 
20504 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
20505                                        unsigned NumConsecutiveStores, EVT MemVT,
20506                                        SDNode *RootNode, bool AllowVectors,
20507                                        bool IsNonTemporalStore,
20508                                        bool IsNonTemporalLoad) {
20509   LLVMContext &Context = *DAG.getContext();
20510   const DataLayout &DL = DAG.getDataLayout();
20511   int64_t ElementSizeBytes = MemVT.getStoreSize();
20512   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20513   bool MadeChange = false;
20514 
20515   // Look for load nodes which are used by the stored values.
20516   SmallVector<MemOpLink, 8> LoadNodes;
20517 
20518   // Find acceptable loads. Loads need to have the same chain (token factor),
20519   // must not be zext, volatile, indexed, and they must be consecutive.
20520   BaseIndexOffset LdBasePtr;
20521 
20522   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20523     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20524     SDValue Val = peekThroughBitcasts(St->getValue());
20525     LoadSDNode *Ld = cast<LoadSDNode>(Val);
20526 
20527     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
20528     // If this is not the first ptr that we check.
20529     int64_t LdOffset = 0;
20530     if (LdBasePtr.getBase().getNode()) {
20531       // The base ptr must be the same.
20532       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
20533         break;
20534     } else {
20535       // Check that all other base pointers are the same as this one.
20536       LdBasePtr = LdPtr;
20537     }
20538 
20539     // We found a potential memory operand to merge.
20540     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
20541   }
20542 
20543   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
20544     Align RequiredAlignment;
20545     bool NeedRotate = false;
20546     if (LoadNodes.size() == 2) {
20547       // If we have load/store pair instructions and we only have two values,
20548       // don't bother merging.
20549       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
20550           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
20551         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
20552         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
20553         break;
20554       }
20555       // If the loads are reversed, see if we can rotate the halves into place.
20556       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
20557       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
20558       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
20559       if (Offset0 - Offset1 == ElementSizeBytes &&
20560           (hasOperation(ISD::ROTL, PairVT) ||
20561            hasOperation(ISD::ROTR, PairVT))) {
20562         std::swap(LoadNodes[0], LoadNodes[1]);
20563         NeedRotate = true;
20564       }
20565     }
20566     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20567     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20568     Align FirstStoreAlign = FirstInChain->getAlign();
20569     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
20570 
20571     // Scan the memory operations on the chain and find the first
20572     // non-consecutive load memory address. These variables hold the index in
20573     // the store node array.
20574 
20575     unsigned LastConsecutiveLoad = 1;
20576 
20577     // This variable refers to the size and not index in the array.
20578     unsigned LastLegalVectorType = 1;
20579     unsigned LastLegalIntegerType = 1;
20580     bool isDereferenceable = true;
20581     bool DoIntegerTruncate = false;
20582     int64_t StartAddress = LoadNodes[0].OffsetFromBase;
20583     SDValue LoadChain = FirstLoad->getChain();
20584     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
20585       // All loads must share the same chain.
20586       if (LoadNodes[i].MemNode->getChain() != LoadChain)
20587         break;
20588 
20589       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
20590       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20591         break;
20592       LastConsecutiveLoad = i;
20593 
20594       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
20595         isDereferenceable = false;
20596 
20597       // Find a legal type for the vector store.
20598       unsigned Elts = (i + 1) * NumMemElts;
20599       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20600 
20601       // Break early when size is too large to be legal.
20602       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20603         break;
20604 
20605       unsigned IsFastSt = 0;
20606       unsigned IsFastLd = 0;
20607       // Don't try vector types if we need a rotate. We may still fail the
20608       // legality checks for the integer type, but we can't handle the rotate
20609       // case with vectors.
20610       // FIXME: We could use a shuffle in place of the rotate.
20611       if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
20612           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20613                                DAG.getMachineFunction()) &&
20614           TLI.allowsMemoryAccess(Context, DL, StoreTy,
20615                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
20616           IsFastSt &&
20617           TLI.allowsMemoryAccess(Context, DL, StoreTy,
20618                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
20619           IsFastLd) {
20620         LastLegalVectorType = i + 1;
20621       }
20622 
20623       // Find a legal type for the integer store.
20624       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20625       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20626       if (TLI.isTypeLegal(StoreTy) &&
20627           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20628                                DAG.getMachineFunction()) &&
20629           TLI.allowsMemoryAccess(Context, DL, StoreTy,
20630                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
20631           IsFastSt &&
20632           TLI.allowsMemoryAccess(Context, DL, StoreTy,
20633                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
20634           IsFastLd) {
20635         LastLegalIntegerType = i + 1;
20636         DoIntegerTruncate = false;
20637         // Or check whether a truncstore and extload is legal.
20638       } else if (TLI.getTypeAction(Context, StoreTy) ==
20639                  TargetLowering::TypePromoteInteger) {
20640         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
20641         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20642             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20643                                  DAG.getMachineFunction()) &&
20644             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20645             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20646             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
20647             TLI.allowsMemoryAccess(Context, DL, StoreTy,
20648                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
20649             IsFastSt &&
20650             TLI.allowsMemoryAccess(Context, DL, StoreTy,
20651                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
20652             IsFastLd) {
20653           LastLegalIntegerType = i + 1;
20654           DoIntegerTruncate = true;
20655         }
20656       }
20657     }
20658 
20659     // Only use vector types if the vector type is larger than the integer
20660     // type. If they are the same, use integers.
20661     bool UseVectorTy =
20662         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
20663     unsigned LastLegalType =
20664         std::max(LastLegalVectorType, LastLegalIntegerType);
20665 
20666     // We add +1 here because the LastXXX variables refer to location while
20667     // the NumElem refers to array/index size.
20668     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
20669     NumElem = std::min(LastLegalType, NumElem);
20670     Align FirstLoadAlign = FirstLoad->getAlign();
20671 
20672     if (NumElem < 2) {
20673       // We know that candidate stores are in order and of correct
20674       // shape. While there is no mergeable sequence from the
20675       // beginning one may start later in the sequence. The only
20676       // reason a merge of size N could have failed where another of
20677       // the same size would not have is if the alignment or either
20678       // the load or store has improved. Drop as many candidates as we
20679       // can here.
20680       unsigned NumSkip = 1;
20681       while ((NumSkip < LoadNodes.size()) &&
20682              (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
20683              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20684         NumSkip++;
20685       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20686       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
20687       NumConsecutiveStores -= NumSkip;
20688       continue;
20689     }
20690 
20691     // Check that we can merge these candidates without causing a cycle.
20692     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20693                                                   RootNode)) {
20694       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20695       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
20696       NumConsecutiveStores -= NumElem;
20697       continue;
20698     }
20699 
20700     // Find if it is better to use vectors or integers to load and store
20701     // to memory.
20702     EVT JointMemOpVT;
20703     if (UseVectorTy) {
20704       // Find a legal type for the vector store.
20705       unsigned Elts = NumElem * NumMemElts;
20706       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20707     } else {
20708       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
20709       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
20710     }
20711 
20712     SDLoc LoadDL(LoadNodes[0].MemNode);
20713     SDLoc StoreDL(StoreNodes[0].MemNode);
20714 
20715     // The merged loads are required to have the same incoming chain, so
20716     // using the first's chain is acceptable.
20717 
20718     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
20719     bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20720     AddToWorklist(NewStoreChain.getNode());
20721 
20722     MachineMemOperand::Flags LdMMOFlags =
20723         isDereferenceable ? MachineMemOperand::MODereferenceable
20724                           : MachineMemOperand::MONone;
20725     if (IsNonTemporalLoad)
20726       LdMMOFlags |= MachineMemOperand::MONonTemporal;
20727 
20728     LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
20729 
20730     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
20731                                               ? MachineMemOperand::MONonTemporal
20732                                               : MachineMemOperand::MONone;
20733 
20734     StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
20735 
20736     SDValue NewLoad, NewStore;
20737     if (UseVectorTy || !DoIntegerTruncate) {
20738       NewLoad = DAG.getLoad(
20739           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
20740           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
20741       SDValue StoreOp = NewLoad;
20742       if (NeedRotate) {
20743         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
20744         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
20745                "Unexpected type for rotate-able load pair");
20746         SDValue RotAmt =
20747             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
20748         // Target can convert to the identical ROTR if it does not have ROTL.
20749         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
20750       }
20751       NewStore = DAG.getStore(
20752           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
20753           CanReusePtrInfo ? FirstInChain->getPointerInfo()
20754                           : MachinePointerInfo(FirstStoreAS),
20755           FirstStoreAlign, StMMOFlags);
20756     } else { // This must be the truncstore/extload case
20757       EVT ExtendedTy =
20758           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
20759       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
20760                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
20761                                FirstLoad->getPointerInfo(), JointMemOpVT,
20762                                FirstLoadAlign, LdMMOFlags);
20763       NewStore = DAG.getTruncStore(
20764           NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
20765           CanReusePtrInfo ? FirstInChain->getPointerInfo()
20766                           : MachinePointerInfo(FirstStoreAS),
20767           JointMemOpVT, FirstInChain->getAlign(),
20768           FirstInChain->getMemOperand()->getFlags());
20769     }
20770 
20771     // Transfer chain users from old loads to the new load.
20772     for (unsigned i = 0; i < NumElem; ++i) {
20773       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
20774       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
20775                                     SDValue(NewLoad.getNode(), 1));
20776     }
20777 
20778     // Replace all stores with the new store. Recursively remove corresponding
20779     // values if they are no longer used.
20780     for (unsigned i = 0; i < NumElem; ++i) {
20781       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
20782       CombineTo(StoreNodes[i].MemNode, NewStore);
20783       if (Val->use_empty())
20784         recursivelyDeleteUnusedNodes(Val.getNode());
20785     }
20786 
20787     MadeChange = true;
20788     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20789     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
20790     NumConsecutiveStores -= NumElem;
20791   }
20792   return MadeChange;
20793 }
20794 
20795 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
20796   if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
20797     return false;
20798 
20799   // TODO: Extend this function to merge stores of scalable vectors.
20800   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
20801   // store since we know <vscale x 16 x i8> is exactly twice as large as
20802   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
20803   EVT MemVT = St->getMemoryVT();
20804   if (MemVT.isScalableVT())
20805     return false;
20806   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
20807     return false;
20808 
20809   // This function cannot currently deal with non-byte-sized memory sizes.
20810   int64_t ElementSizeBytes = MemVT.getStoreSize();
20811   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
20812     return false;
20813 
20814   // Do not bother looking at stored values that are not constants, loads, or
20815   // extracted vector elements.
20816   SDValue StoredVal = peekThroughBitcasts(St->getValue());
20817   const StoreSource StoreSrc = getStoreSource(StoredVal);
20818   if (StoreSrc == StoreSource::Unknown)
20819     return false;
20820 
20821   SmallVector<MemOpLink, 8> StoreNodes;
20822   SDNode *RootNode;
20823   // Find potential store merge candidates by searching through chain sub-DAG
20824   getStoreMergeCandidates(St, StoreNodes, RootNode);
20825 
20826   // Check if there is anything to merge.
20827   if (StoreNodes.size() < 2)
20828     return false;
20829 
20830   // Sort the memory operands according to their distance from the
20831   // base pointer.
20832   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
20833     return LHS.OffsetFromBase < RHS.OffsetFromBase;
20834   });
20835 
20836   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
20837       Attribute::NoImplicitFloat);
20838   bool IsNonTemporalStore = St->isNonTemporal();
20839   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
20840                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
20841 
20842   // Store Merge attempts to merge the lowest stores. This generally
20843   // works out as if successful, as the remaining stores are checked
20844   // after the first collection of stores is merged. However, in the
20845   // case that a non-mergeable store is found first, e.g., {p[-2],
20846   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
20847   // mergeable cases. To prevent this, we prune such stores from the
20848   // front of StoreNodes here.
20849   bool MadeChange = false;
20850   while (StoreNodes.size() > 1) {
20851     unsigned NumConsecutiveStores =
20852         getConsecutiveStores(StoreNodes, ElementSizeBytes);
20853     // There are no more stores in the list to examine.
20854     if (NumConsecutiveStores == 0)
20855       return MadeChange;
20856 
20857     // We have at least 2 consecutive stores. Try to merge them.
20858     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
20859     switch (StoreSrc) {
20860     case StoreSource::Constant:
20861       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
20862                                              MemVT, RootNode, AllowVectors);
20863       break;
20864 
20865     case StoreSource::Extract:
20866       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
20867                                             MemVT, RootNode);
20868       break;
20869 
20870     case StoreSource::Load:
20871       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
20872                                          MemVT, RootNode, AllowVectors,
20873                                          IsNonTemporalStore, IsNonTemporalLoad);
20874       break;
20875 
20876     default:
20877       llvm_unreachable("Unhandled store source type");
20878     }
20879   }
20880   return MadeChange;
20881 }
20882 
20883 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
20884   SDLoc SL(ST);
20885   SDValue ReplStore;
20886 
20887   // Replace the chain to avoid dependency.
20888   if (ST->isTruncatingStore()) {
20889     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
20890                                   ST->getBasePtr(), ST->getMemoryVT(),
20891                                   ST->getMemOperand());
20892   } else {
20893     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
20894                              ST->getMemOperand());
20895   }
20896 
20897   // Create token to keep both nodes around.
20898   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
20899                               MVT::Other, ST->getChain(), ReplStore);
20900 
20901   // Make sure the new and old chains are cleaned up.
20902   AddToWorklist(Token.getNode());
20903 
20904   // Don't add users to work list.
20905   return CombineTo(ST, Token, false);
20906 }
20907 
20908 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
20909   SDValue Value = ST->getValue();
20910   if (Value.getOpcode() == ISD::TargetConstantFP)
20911     return SDValue();
20912 
20913   if (!ISD::isNormalStore(ST))
20914     return SDValue();
20915 
20916   SDLoc DL(ST);
20917 
20918   SDValue Chain = ST->getChain();
20919   SDValue Ptr = ST->getBasePtr();
20920 
20921   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
20922 
20923   // NOTE: If the original store is volatile, this transform must not increase
20924   // the number of stores.  For example, on x86-32 an f64 can be stored in one
20925   // processor operation but an i64 (which is not legal) requires two.  So the
20926   // transform should not be done in this case.
20927 
20928   SDValue Tmp;
20929   switch (CFP->getSimpleValueType(0).SimpleTy) {
20930   default:
20931     llvm_unreachable("Unknown FP type");
20932   case MVT::f16:    // We don't do this for these yet.
20933   case MVT::bf16:
20934   case MVT::f80:
20935   case MVT::f128:
20936   case MVT::ppcf128:
20937     return SDValue();
20938   case MVT::f32:
20939     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
20940         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
20941       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
20942                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
20943                             MVT::i32);
20944       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
20945     }
20946 
20947     return SDValue();
20948   case MVT::f64:
20949     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
20950          ST->isSimple()) ||
20951         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
20952       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
20953                             getZExtValue(), SDLoc(CFP), MVT::i64);
20954       return DAG.getStore(Chain, DL, Tmp,
20955                           Ptr, ST->getMemOperand());
20956     }
20957 
20958     if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
20959         !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
20960       // Many FP stores are not made apparent until after legalize, e.g. for
20961       // argument passing.  Since this is so common, custom legalize the
20962       // 64-bit integer store into two 32-bit stores.
20963       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
20964       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
20965       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
20966       if (DAG.getDataLayout().isBigEndian())
20967         std::swap(Lo, Hi);
20968 
20969       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
20970       AAMDNodes AAInfo = ST->getAAInfo();
20971 
20972       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
20973                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
20974       Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(4), DL);
20975       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
20976                                  ST->getPointerInfo().getWithOffset(4),
20977                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
20978       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
20979                          St0, St1);
20980     }
20981 
20982     return SDValue();
20983   }
20984 }
20985 
20986 // (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
20987 //
20988 // If a store of a load with an element inserted into it has no other
20989 // uses in between the chain, then we can consider the vector store
20990 // dead and replace it with just the single scalar element store.
20991 SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
20992   SDLoc DL(ST);
20993   SDValue Value = ST->getValue();
20994   SDValue Ptr = ST->getBasePtr();
20995   SDValue Chain = ST->getChain();
20996   if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
20997     return SDValue();
20998 
20999   SDValue Elt = Value.getOperand(1);
21000   SDValue Idx = Value.getOperand(2);
21001 
21002   // If the element isn't byte sized or is implicitly truncated then we can't
21003   // compute an offset.
21004   EVT EltVT = Elt.getValueType();
21005   if (!EltVT.isByteSized() ||
21006       EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21007     return SDValue();
21008 
21009   auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21010   if (!Ld || Ld->getBasePtr() != Ptr ||
21011       ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21012       !ISD::isNormalStore(ST) ||
21013       Ld->getAddressSpace() != ST->getAddressSpace() ||
21014       !Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1)))
21015     return SDValue();
21016 
21017   unsigned IsFast;
21018   if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21019                               Elt.getValueType(), ST->getAddressSpace(),
21020                               ST->getAlign(), ST->getMemOperand()->getFlags(),
21021                               &IsFast) ||
21022       !IsFast)
21023     return SDValue();
21024 
21025   MachinePointerInfo PointerInfo(ST->getAddressSpace());
21026 
21027   // If the offset is a known constant then try to recover the pointer
21028   // info
21029   SDValue NewPtr;
21030   if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21031     unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21032     NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21033     PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21034   } else {
21035     NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21036   }
21037 
21038   return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21039                       ST->getMemOperand()->getFlags());
21040 }
21041 
21042 SDValue DAGCombiner::visitSTORE(SDNode *N) {
21043   StoreSDNode *ST  = cast<StoreSDNode>(N);
21044   SDValue Chain = ST->getChain();
21045   SDValue Value = ST->getValue();
21046   SDValue Ptr   = ST->getBasePtr();
21047 
21048   // If this is a store of a bit convert, store the input value if the
21049   // resultant store does not need a higher alignment than the original.
21050   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21051       ST->isUnindexed()) {
21052     EVT SVT = Value.getOperand(0).getValueType();
21053     // If the store is volatile, we only want to change the store type if the
21054     // resulting store is legal. Otherwise we might increase the number of
21055     // memory accesses. We don't care if the original type was legal or not
21056     // as we assume software couldn't rely on the number of accesses of an
21057     // illegal type.
21058     // TODO: May be able to relax for unordered atomics (see D66309)
21059     if (((!LegalOperations && ST->isSimple()) ||
21060          TLI.isOperationLegal(ISD::STORE, SVT)) &&
21061         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21062                                      DAG, *ST->getMemOperand())) {
21063       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21064                           ST->getMemOperand());
21065     }
21066   }
21067 
21068   // Turn 'store undef, Ptr' -> nothing.
21069   if (Value.isUndef() && ST->isUnindexed())
21070     return Chain;
21071 
21072   // Try to infer better alignment information than the store already has.
21073   if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21074       !ST->isAtomic()) {
21075     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21076       if (*Alignment > ST->getAlign() &&
21077           isAligned(*Alignment, ST->getSrcValueOffset())) {
21078         SDValue NewStore =
21079             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21080                               ST->getMemoryVT(), *Alignment,
21081                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
21082         // NewStore will always be N as we are only refining the alignment
21083         assert(NewStore.getNode() == N);
21084         (void)NewStore;
21085       }
21086     }
21087   }
21088 
21089   // Try transforming a pair floating point load / store ops to integer
21090   // load / store ops.
21091   if (SDValue NewST = TransformFPLoadStorePair(N))
21092     return NewST;
21093 
21094   // Try transforming several stores into STORE (BSWAP).
21095   if (SDValue Store = mergeTruncStores(ST))
21096     return Store;
21097 
21098   if (ST->isUnindexed()) {
21099     // Walk up chain skipping non-aliasing memory nodes, on this store and any
21100     // adjacent stores.
21101     if (findBetterNeighborChains(ST)) {
21102       // replaceStoreChain uses CombineTo, which handled all of the worklist
21103       // manipulation. Return the original node to not do anything else.
21104       return SDValue(ST, 0);
21105     }
21106     Chain = ST->getChain();
21107   }
21108 
21109   // FIXME: is there such a thing as a truncating indexed store?
21110   if (ST->isTruncatingStore() && ST->isUnindexed() &&
21111       Value.getValueType().isInteger() &&
21112       (!isa<ConstantSDNode>(Value) ||
21113        !cast<ConstantSDNode>(Value)->isOpaque())) {
21114     // Convert a truncating store of a extension into a standard store.
21115     if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21116          Value.getOpcode() == ISD::SIGN_EXTEND ||
21117          Value.getOpcode() == ISD::ANY_EXTEND) &&
21118         Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21119         TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21120       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21121                           ST->getMemOperand());
21122 
21123     APInt TruncDemandedBits =
21124         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21125                              ST->getMemoryVT().getScalarSizeInBits());
21126 
21127     // See if we can simplify the operation with SimplifyDemandedBits, which
21128     // only works if the value has a single use.
21129     AddToWorklist(Value.getNode());
21130     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21131       // Re-visit the store if anything changed and the store hasn't been merged
21132       // with another node (N is deleted) SimplifyDemandedBits will add Value's
21133       // node back to the worklist if necessary, but we also need to re-visit
21134       // the Store node itself.
21135       if (N->getOpcode() != ISD::DELETED_NODE)
21136         AddToWorklist(N);
21137       return SDValue(N, 0);
21138     }
21139 
21140     // Otherwise, see if we can simplify the input to this truncstore with
21141     // knowledge that only the low bits are being used.  For example:
21142     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
21143     if (SDValue Shorter =
21144             TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21145       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21146                                ST->getMemOperand());
21147 
21148     // If we're storing a truncated constant, see if we can simplify it.
21149     // TODO: Move this to targetShrinkDemandedConstant?
21150     if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21151       if (!Cst->isOpaque()) {
21152         const APInt &CValue = Cst->getAPIntValue();
21153         APInt NewVal = CValue & TruncDemandedBits;
21154         if (NewVal != CValue) {
21155           SDValue Shorter =
21156               DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21157           return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21158                                    ST->getMemoryVT(), ST->getMemOperand());
21159         }
21160       }
21161   }
21162 
21163   // If this is a load followed by a store to the same location, then the store
21164   // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21165   // TODO: Add big-endian truncate support with test coverage.
21166   // TODO: Can relax for unordered atomics (see D66309)
21167   SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21168                          ? peekThroughTruncates(Value)
21169                          : Value;
21170   if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21171     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21172         ST->isUnindexed() && ST->isSimple() &&
21173         Ld->getAddressSpace() == ST->getAddressSpace() &&
21174         // There can't be any side effects between the load and store, such as
21175         // a call or store.
21176         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
21177       // The store is dead, remove it.
21178       return Chain;
21179     }
21180   }
21181 
21182   // Try scalarizing vector stores of loads where we only change one element
21183   if (SDValue NewST = replaceStoreOfInsertLoad(ST))
21184     return NewST;
21185 
21186   // TODO: Can relax for unordered atomics (see D66309)
21187   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
21188     if (ST->isUnindexed() && ST->isSimple() &&
21189         ST1->isUnindexed() && ST1->isSimple()) {
21190       if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
21191           ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
21192           ST->getAddressSpace() == ST1->getAddressSpace()) {
21193         // If this is a store followed by a store with the same value to the
21194         // same location, then the store is dead/noop.
21195         return Chain;
21196       }
21197 
21198       if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
21199           !ST1->getBasePtr().isUndef() &&
21200           ST->getAddressSpace() == ST1->getAddressSpace()) {
21201         // If we consider two stores and one smaller in size is a scalable
21202         // vector type and another one a bigger size store with a fixed type,
21203         // then we could not allow the scalable store removal because we don't
21204         // know its final size in the end.
21205         if (ST->getMemoryVT().isScalableVector() ||
21206             ST1->getMemoryVT().isScalableVector()) {
21207           if (ST1->getBasePtr() == Ptr &&
21208               TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
21209                                   ST->getMemoryVT().getStoreSize())) {
21210             CombineTo(ST1, ST1->getChain());
21211             return SDValue(N, 0);
21212           }
21213         } else {
21214           const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
21215           const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
21216           // If this is a store who's preceding store to a subset of the current
21217           // location and no one other node is chained to that store we can
21218           // effectively drop the store. Do not remove stores to undef as they
21219           // may be used as data sinks.
21220           if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
21221                               ChainBase,
21222                               ST1->getMemoryVT().getFixedSizeInBits())) {
21223             CombineTo(ST1, ST1->getChain());
21224             return SDValue(N, 0);
21225           }
21226         }
21227       }
21228     }
21229   }
21230 
21231   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
21232   // truncating store.  We can do this even if this is already a truncstore.
21233   if ((Value.getOpcode() == ISD::FP_ROUND ||
21234        Value.getOpcode() == ISD::TRUNCATE) &&
21235       Value->hasOneUse() && ST->isUnindexed() &&
21236       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
21237                                ST->getMemoryVT(), LegalOperations)) {
21238     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
21239                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
21240   }
21241 
21242   // Always perform this optimization before types are legal. If the target
21243   // prefers, also try this after legalization to catch stores that were created
21244   // by intrinsics or other nodes.
21245   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
21246     while (true) {
21247       // There can be multiple store sequences on the same chain.
21248       // Keep trying to merge store sequences until we are unable to do so
21249       // or until we merge the last store on the chain.
21250       bool Changed = mergeConsecutiveStores(ST);
21251       if (!Changed) break;
21252       // Return N as merge only uses CombineTo and no worklist clean
21253       // up is necessary.
21254       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
21255         return SDValue(N, 0);
21256     }
21257   }
21258 
21259   // Try transforming N to an indexed store.
21260   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
21261     return SDValue(N, 0);
21262 
21263   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
21264   //
21265   // Make sure to do this only after attempting to merge stores in order to
21266   //  avoid changing the types of some subset of stores due to visit order,
21267   //  preventing their merging.
21268   if (isa<ConstantFPSDNode>(ST->getValue())) {
21269     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
21270       return NewSt;
21271   }
21272 
21273   if (SDValue NewSt = splitMergedValStore(ST))
21274     return NewSt;
21275 
21276   return ReduceLoadOpStoreWidth(N);
21277 }
21278 
21279 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
21280   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
21281   if (!LifetimeEnd->hasOffset())
21282     return SDValue();
21283 
21284   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
21285                                         LifetimeEnd->getOffset(), false);
21286 
21287   // We walk up the chains to find stores.
21288   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
21289   while (!Chains.empty()) {
21290     SDValue Chain = Chains.pop_back_val();
21291     if (!Chain.hasOneUse())
21292       continue;
21293     switch (Chain.getOpcode()) {
21294     case ISD::TokenFactor:
21295       for (unsigned Nops = Chain.getNumOperands(); Nops;)
21296         Chains.push_back(Chain.getOperand(--Nops));
21297       break;
21298     case ISD::LIFETIME_START:
21299     case ISD::LIFETIME_END:
21300       // We can forward past any lifetime start/end that can be proven not to
21301       // alias the node.
21302       if (!mayAlias(Chain.getNode(), N))
21303         Chains.push_back(Chain.getOperand(0));
21304       break;
21305     case ISD::STORE: {
21306       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
21307       // TODO: Can relax for unordered atomics (see D66309)
21308       if (!ST->isSimple() || ST->isIndexed())
21309         continue;
21310       const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
21311       // The bounds of a scalable store are not known until runtime, so this
21312       // store cannot be elided.
21313       if (StoreSize.isScalable())
21314         continue;
21315       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
21316       // If we store purely within object bounds just before its lifetime ends,
21317       // we can remove the store.
21318       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
21319                                    StoreSize.getFixedValue() * 8)) {
21320         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
21321                    dbgs() << "\nwithin LIFETIME_END of : ";
21322                    LifetimeEndBase.dump(); dbgs() << "\n");
21323         CombineTo(ST, ST->getChain());
21324         return SDValue(N, 0);
21325       }
21326     }
21327     }
21328   }
21329   return SDValue();
21330 }
21331 
21332 /// For the instruction sequence of store below, F and I values
21333 /// are bundled together as an i64 value before being stored into memory.
21334 /// Sometimes it is more efficent to generate separate stores for F and I,
21335 /// which can remove the bitwise instructions or sink them to colder places.
21336 ///
21337 ///   (store (or (zext (bitcast F to i32) to i64),
21338 ///              (shl (zext I to i64), 32)), addr)  -->
21339 ///   (store F, addr) and (store I, addr+4)
21340 ///
21341 /// Similarly, splitting for other merged store can also be beneficial, like:
21342 /// For pair of {i32, i32}, i64 store --> two i32 stores.
21343 /// For pair of {i32, i16}, i64 store --> two i32 stores.
21344 /// For pair of {i16, i16}, i32 store --> two i16 stores.
21345 /// For pair of {i16, i8},  i32 store --> two i16 stores.
21346 /// For pair of {i8, i8},   i16 store --> two i8 stores.
21347 ///
21348 /// We allow each target to determine specifically which kind of splitting is
21349 /// supported.
21350 ///
21351 /// The store patterns are commonly seen from the simple code snippet below
21352 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
21353 ///   void goo(const std::pair<int, float> &);
21354 ///   hoo() {
21355 ///     ...
21356 ///     goo(std::make_pair(tmp, ftmp));
21357 ///     ...
21358 ///   }
21359 ///
21360 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
21361   if (OptLevel == CodeGenOptLevel::None)
21362     return SDValue();
21363 
21364   // Can't change the number of memory accesses for a volatile store or break
21365   // atomicity for an atomic one.
21366   if (!ST->isSimple())
21367     return SDValue();
21368 
21369   SDValue Val = ST->getValue();
21370   SDLoc DL(ST);
21371 
21372   // Match OR operand.
21373   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
21374     return SDValue();
21375 
21376   // Match SHL operand and get Lower and Higher parts of Val.
21377   SDValue Op1 = Val.getOperand(0);
21378   SDValue Op2 = Val.getOperand(1);
21379   SDValue Lo, Hi;
21380   if (Op1.getOpcode() != ISD::SHL) {
21381     std::swap(Op1, Op2);
21382     if (Op1.getOpcode() != ISD::SHL)
21383       return SDValue();
21384   }
21385   Lo = Op2;
21386   Hi = Op1.getOperand(0);
21387   if (!Op1.hasOneUse())
21388     return SDValue();
21389 
21390   // Match shift amount to HalfValBitSize.
21391   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
21392   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
21393   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
21394     return SDValue();
21395 
21396   // Lo and Hi are zero-extended from int with size less equal than 32
21397   // to i64.
21398   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
21399       !Lo.getOperand(0).getValueType().isScalarInteger() ||
21400       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
21401       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
21402       !Hi.getOperand(0).getValueType().isScalarInteger() ||
21403       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
21404     return SDValue();
21405 
21406   // Use the EVT of low and high parts before bitcast as the input
21407   // of target query.
21408   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
21409                   ? Lo.getOperand(0).getValueType()
21410                   : Lo.getValueType();
21411   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
21412                    ? Hi.getOperand(0).getValueType()
21413                    : Hi.getValueType();
21414   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
21415     return SDValue();
21416 
21417   // Start to split store.
21418   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21419   AAMDNodes AAInfo = ST->getAAInfo();
21420 
21421   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
21422   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
21423   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
21424   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
21425 
21426   SDValue Chain = ST->getChain();
21427   SDValue Ptr = ST->getBasePtr();
21428   // Lower value store.
21429   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21430                              ST->getOriginalAlign(), MMOFlags, AAInfo);
21431   Ptr =
21432       DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
21433   // Higher value store.
21434   SDValue St1 = DAG.getStore(
21435       St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
21436       ST->getOriginalAlign(), MMOFlags, AAInfo);
21437   return St1;
21438 }
21439 
21440 // Merge an insertion into an existing shuffle:
21441 // (insert_vector_elt (vector_shuffle X, Y, Mask),
21442 //                   .(extract_vector_elt X, N), InsIndex)
21443 //   --> (vector_shuffle X, Y, NewMask)
21444 //  and variations where shuffle operands may be CONCAT_VECTORS.
21445 static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef<int> Mask,
21446                                 SmallVectorImpl<int> &NewMask, SDValue Elt,
21447                                 unsigned InsIndex) {
21448   if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
21449       !isa<ConstantSDNode>(Elt.getOperand(1)))
21450     return false;
21451 
21452   // Vec's operand 0 is using indices from 0 to N-1 and
21453   // operand 1 from N to 2N - 1, where N is the number of
21454   // elements in the vectors.
21455   SDValue InsertVal0 = Elt.getOperand(0);
21456   int ElementOffset = -1;
21457 
21458   // We explore the inputs of the shuffle in order to see if we find the
21459   // source of the extract_vector_elt. If so, we can use it to modify the
21460   // shuffle rather than perform an insert_vector_elt.
21461   SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
21462   ArgWorkList.emplace_back(Mask.size(), Y);
21463   ArgWorkList.emplace_back(0, X);
21464 
21465   while (!ArgWorkList.empty()) {
21466     int ArgOffset;
21467     SDValue ArgVal;
21468     std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
21469 
21470     if (ArgVal == InsertVal0) {
21471       ElementOffset = ArgOffset;
21472       break;
21473     }
21474 
21475     // Peek through concat_vector.
21476     if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
21477       int CurrentArgOffset =
21478           ArgOffset + ArgVal.getValueType().getVectorNumElements();
21479       int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
21480       for (SDValue Op : reverse(ArgVal->ops())) {
21481         CurrentArgOffset -= Step;
21482         ArgWorkList.emplace_back(CurrentArgOffset, Op);
21483       }
21484 
21485       // Make sure we went through all the elements and did not screw up index
21486       // computation.
21487       assert(CurrentArgOffset == ArgOffset);
21488     }
21489   }
21490 
21491   // If we failed to find a match, see if we can replace an UNDEF shuffle
21492   // operand.
21493   if (ElementOffset == -1) {
21494     if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
21495       return false;
21496     ElementOffset = Mask.size();
21497     Y = InsertVal0;
21498   }
21499 
21500   NewMask.assign(Mask.begin(), Mask.end());
21501   NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
21502   assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
21503          "NewMask[InsIndex] is out of bound");
21504   return true;
21505 }
21506 
21507 // Merge an insertion into an existing shuffle:
21508 // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
21509 // InsIndex)
21510 //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
21511 //   CONCAT_VECTORS.
21512 SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
21513   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21514          "Expected extract_vector_elt");
21515   SDValue InsertVal = N->getOperand(1);
21516   SDValue Vec = N->getOperand(0);
21517 
21518   auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
21519   if (!SVN || !Vec.hasOneUse())
21520     return SDValue();
21521 
21522   ArrayRef<int> Mask = SVN->getMask();
21523   SDValue X = Vec.getOperand(0);
21524   SDValue Y = Vec.getOperand(1);
21525 
21526   SmallVector<int, 16> NewMask(Mask);
21527   if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
21528     SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
21529         Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
21530     if (LegalShuffle)
21531       return LegalShuffle;
21532   }
21533 
21534   return SDValue();
21535 }
21536 
21537 // Convert a disguised subvector insertion into a shuffle:
21538 // insert_vector_elt V, (bitcast X from vector type), IdxC -->
21539 // bitcast(shuffle (bitcast V), (extended X), Mask)
21540 // Note: We do not use an insert_subvector node because that requires a
21541 // legal subvector type.
21542 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
21543   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21544          "Expected extract_vector_elt");
21545   SDValue InsertVal = N->getOperand(1);
21546 
21547   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
21548       !InsertVal.getOperand(0).getValueType().isVector())
21549     return SDValue();
21550 
21551   SDValue SubVec = InsertVal.getOperand(0);
21552   SDValue DestVec = N->getOperand(0);
21553   EVT SubVecVT = SubVec.getValueType();
21554   EVT VT = DestVec.getValueType();
21555   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
21556   // If the source only has a single vector element, the cost of creating adding
21557   // it to a vector is likely to exceed the cost of a insert_vector_elt.
21558   if (NumSrcElts == 1)
21559     return SDValue();
21560   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
21561   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
21562 
21563   // Step 1: Create a shuffle mask that implements this insert operation. The
21564   // vector that we are inserting into will be operand 0 of the shuffle, so
21565   // those elements are just 'i'. The inserted subvector is in the first
21566   // positions of operand 1 of the shuffle. Example:
21567   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
21568   SmallVector<int, 16> Mask(NumMaskVals);
21569   for (unsigned i = 0; i != NumMaskVals; ++i) {
21570     if (i / NumSrcElts == InsIndex)
21571       Mask[i] = (i % NumSrcElts) + NumMaskVals;
21572     else
21573       Mask[i] = i;
21574   }
21575 
21576   // Bail out if the target can not handle the shuffle we want to create.
21577   EVT SubVecEltVT = SubVecVT.getVectorElementType();
21578   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
21579   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
21580     return SDValue();
21581 
21582   // Step 2: Create a wide vector from the inserted source vector by appending
21583   // undefined elements. This is the same size as our destination vector.
21584   SDLoc DL(N);
21585   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
21586   ConcatOps[0] = SubVec;
21587   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
21588 
21589   // Step 3: Shuffle in the padded subvector.
21590   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
21591   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
21592   AddToWorklist(PaddedSubV.getNode());
21593   AddToWorklist(DestVecBC.getNode());
21594   AddToWorklist(Shuf.getNode());
21595   return DAG.getBitcast(VT, Shuf);
21596 }
21597 
21598 // Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
21599 // possible and the new load will be quick. We use more loads but less shuffles
21600 // and inserts.
21601 SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
21602   EVT VT = N->getValueType(0);
21603 
21604   // InsIndex is expected to be the first of last lane.
21605   if (!VT.isFixedLengthVector() ||
21606       (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
21607     return SDValue();
21608 
21609   // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
21610   // depending on the InsIndex.
21611   auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
21612   SDValue Scalar = N->getOperand(1);
21613   if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
21614         return InsIndex == P.index() || P.value() < 0 ||
21615                (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
21616                (InsIndex == VT.getVectorNumElements() - 1 &&
21617                 P.value() == (int)P.index() + 1);
21618       }))
21619     return SDValue();
21620 
21621   // We optionally skip over an extend so long as both loads are extended in the
21622   // same way from the same type.
21623   unsigned Extend = 0;
21624   if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
21625       Scalar.getOpcode() == ISD::SIGN_EXTEND ||
21626       Scalar.getOpcode() == ISD::ANY_EXTEND) {
21627     Extend = Scalar.getOpcode();
21628     Scalar = Scalar.getOperand(0);
21629   }
21630 
21631   auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
21632   if (!ScalarLoad)
21633     return SDValue();
21634 
21635   SDValue Vec = Shuffle->getOperand(0);
21636   if (Extend) {
21637     if (Vec.getOpcode() != Extend)
21638       return SDValue();
21639     Vec = Vec.getOperand(0);
21640   }
21641   auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
21642   if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
21643     return SDValue();
21644 
21645   int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
21646   if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
21647       !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21648       ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21649       ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
21650     return SDValue();
21651 
21652   // Check that the offset between the pointers to produce a single continuous
21653   // load.
21654   if (InsIndex == 0) {
21655     if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
21656                                             -1))
21657       return SDValue();
21658   } else {
21659     if (!DAG.areNonVolatileConsecutiveLoads(
21660             VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
21661       return SDValue();
21662   }
21663 
21664   // And that the new unaligned load will be fast.
21665   unsigned IsFast = 0;
21666   Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
21667   if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21668                               Vec.getValueType(), VecLoad->getAddressSpace(),
21669                               NewAlign, VecLoad->getMemOperand()->getFlags(),
21670                               &IsFast) ||
21671       !IsFast)
21672     return SDValue();
21673 
21674   // Calculate the new Ptr and create the new load.
21675   SDLoc DL(N);
21676   SDValue Ptr = ScalarLoad->getBasePtr();
21677   if (InsIndex != 0)
21678     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
21679                       DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
21680   MachinePointerInfo PtrInfo =
21681       InsIndex == 0 ? ScalarLoad->getPointerInfo()
21682                     : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
21683 
21684   SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
21685                              ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
21686   DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
21687   DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
21688   return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
21689 }
21690 
21691 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
21692   SDValue InVec = N->getOperand(0);
21693   SDValue InVal = N->getOperand(1);
21694   SDValue EltNo = N->getOperand(2);
21695   SDLoc DL(N);
21696 
21697   EVT VT = InVec.getValueType();
21698   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
21699 
21700   // Insert into out-of-bounds element is undefined.
21701   if (IndexC && VT.isFixedLengthVector() &&
21702       IndexC->getZExtValue() >= VT.getVectorNumElements())
21703     return DAG.getUNDEF(VT);
21704 
21705   // Remove redundant insertions:
21706   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
21707   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21708       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
21709     return InVec;
21710 
21711   if (!IndexC) {
21712     // If this is variable insert to undef vector, it might be better to splat:
21713     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
21714     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
21715       return DAG.getSplat(VT, DL, InVal);
21716     return SDValue();
21717   }
21718 
21719   if (VT.isScalableVector())
21720     return SDValue();
21721 
21722   unsigned NumElts = VT.getVectorNumElements();
21723 
21724   // We must know which element is being inserted for folds below here.
21725   unsigned Elt = IndexC->getZExtValue();
21726 
21727   // Handle <1 x ???> vector insertion special cases.
21728   if (NumElts == 1) {
21729     // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
21730     if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21731         InVal.getOperand(0).getValueType() == VT &&
21732         isNullConstant(InVal.getOperand(1)))
21733       return InVal.getOperand(0);
21734   }
21735 
21736   // Canonicalize insert_vector_elt dag nodes.
21737   // Example:
21738   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
21739   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
21740   //
21741   // Do this only if the child insert_vector node has one use; also
21742   // do this only if indices are both constants and Idx1 < Idx0.
21743   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
21744       && isa<ConstantSDNode>(InVec.getOperand(2))) {
21745     unsigned OtherElt = InVec.getConstantOperandVal(2);
21746     if (Elt < OtherElt) {
21747       // Swap nodes.
21748       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
21749                                   InVec.getOperand(0), InVal, EltNo);
21750       AddToWorklist(NewOp.getNode());
21751       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
21752                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
21753     }
21754   }
21755 
21756   if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
21757     return Shuf;
21758 
21759   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
21760     return Shuf;
21761 
21762   if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
21763     return Shuf;
21764 
21765   // Attempt to convert an insert_vector_elt chain into a legal build_vector.
21766   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
21767     // vXi1 vector - we don't need to recurse.
21768     if (NumElts == 1)
21769       return DAG.getBuildVector(VT, DL, {InVal});
21770 
21771     // If we haven't already collected the element, insert into the op list.
21772     EVT MaxEltVT = InVal.getValueType();
21773     auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
21774                                 unsigned Idx) {
21775       if (!Ops[Idx]) {
21776         Ops[Idx] = Elt;
21777         if (VT.isInteger()) {
21778           EVT EltVT = Elt.getValueType();
21779           MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
21780         }
21781       }
21782     };
21783 
21784     // Ensure all the operands are the same value type, fill any missing
21785     // operands with UNDEF and create the BUILD_VECTOR.
21786     auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
21787       assert(Ops.size() == NumElts && "Unexpected vector size");
21788       for (SDValue &Op : Ops) {
21789         if (Op)
21790           Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
21791         else
21792           Op = DAG.getUNDEF(MaxEltVT);
21793       }
21794       return DAG.getBuildVector(VT, DL, Ops);
21795     };
21796 
21797     SmallVector<SDValue, 8> Ops(NumElts, SDValue());
21798     Ops[Elt] = InVal;
21799 
21800     // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
21801     for (SDValue CurVec = InVec; CurVec;) {
21802       // UNDEF - build new BUILD_VECTOR from already inserted operands.
21803       if (CurVec.isUndef())
21804         return CanonicalizeBuildVector(Ops);
21805 
21806       // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
21807       if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
21808         for (unsigned I = 0; I != NumElts; ++I)
21809           AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
21810         return CanonicalizeBuildVector(Ops);
21811       }
21812 
21813       // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
21814       if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
21815         AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
21816         return CanonicalizeBuildVector(Ops);
21817       }
21818 
21819       // INSERT_VECTOR_ELT - insert operand and continue up the chain.
21820       if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
21821         if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
21822           if (CurIdx->getAPIntValue().ult(NumElts)) {
21823             unsigned Idx = CurIdx->getZExtValue();
21824             AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
21825 
21826             // Found entire BUILD_VECTOR.
21827             if (all_of(Ops, [](SDValue Op) { return !!Op; }))
21828               return CanonicalizeBuildVector(Ops);
21829 
21830             CurVec = CurVec->getOperand(0);
21831             continue;
21832           }
21833 
21834       // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
21835       // update the shuffle mask (and second operand if we started with unary
21836       // shuffle) and create a new legal shuffle.
21837       if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
21838         auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
21839         SDValue LHS = SVN->getOperand(0);
21840         SDValue RHS = SVN->getOperand(1);
21841         SmallVector<int, 16> Mask(SVN->getMask());
21842         bool Merged = true;
21843         for (auto I : enumerate(Ops)) {
21844           SDValue &Op = I.value();
21845           if (Op) {
21846             SmallVector<int, 16> NewMask;
21847             if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
21848               Merged = false;
21849               break;
21850             }
21851             Mask = std::move(NewMask);
21852           }
21853         }
21854         if (Merged)
21855           if (SDValue NewShuffle =
21856                   TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
21857             return NewShuffle;
21858       }
21859 
21860       // If all insertions are zero value, try to convert to AND mask.
21861       // TODO: Do this for -1 with OR mask?
21862       if (!LegalOperations && llvm::isNullConstant(InVal) &&
21863           all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
21864           count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
21865         SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
21866         SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
21867         SmallVector<SDValue, 8> Mask(NumElts);
21868         for (unsigned I = 0; I != NumElts; ++I)
21869           Mask[I] = Ops[I] ? Zero : AllOnes;
21870         return DAG.getNode(ISD::AND, DL, VT, CurVec,
21871                            DAG.getBuildVector(VT, DL, Mask));
21872       }
21873 
21874       // Failed to find a match in the chain - bail.
21875       break;
21876     }
21877 
21878     // See if we can fill in the missing constant elements as zeros.
21879     // TODO: Should we do this for any constant?
21880     APInt DemandedZeroElts = APInt::getZero(NumElts);
21881     for (unsigned I = 0; I != NumElts; ++I)
21882       if (!Ops[I])
21883         DemandedZeroElts.setBit(I);
21884 
21885     if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
21886       SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
21887                                     : DAG.getConstantFP(0, DL, MaxEltVT);
21888       for (unsigned I = 0; I != NumElts; ++I)
21889         if (!Ops[I])
21890           Ops[I] = Zero;
21891 
21892       return CanonicalizeBuildVector(Ops);
21893     }
21894   }
21895 
21896   return SDValue();
21897 }
21898 
21899 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
21900                                                   SDValue EltNo,
21901                                                   LoadSDNode *OriginalLoad) {
21902   assert(OriginalLoad->isSimple());
21903 
21904   EVT ResultVT = EVE->getValueType(0);
21905   EVT VecEltVT = InVecVT.getVectorElementType();
21906 
21907   // If the vector element type is not a multiple of a byte then we are unable
21908   // to correctly compute an address to load only the extracted element as a
21909   // scalar.
21910   if (!VecEltVT.isByteSized())
21911     return SDValue();
21912 
21913   ISD::LoadExtType ExtTy =
21914       ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
21915   if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
21916       !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
21917     return SDValue();
21918 
21919   Align Alignment = OriginalLoad->getAlign();
21920   MachinePointerInfo MPI;
21921   SDLoc DL(EVE);
21922   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
21923     int Elt = ConstEltNo->getZExtValue();
21924     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
21925     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
21926     Alignment = commonAlignment(Alignment, PtrOff);
21927   } else {
21928     // Discard the pointer info except the address space because the memory
21929     // operand can't represent this new access since the offset is variable.
21930     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
21931     Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
21932   }
21933 
21934   unsigned IsFast = 0;
21935   if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
21936                               OriginalLoad->getAddressSpace(), Alignment,
21937                               OriginalLoad->getMemOperand()->getFlags(),
21938                               &IsFast) ||
21939       !IsFast)
21940     return SDValue();
21941 
21942   SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
21943                                                InVecVT, EltNo);
21944 
21945   // We are replacing a vector load with a scalar load. The new load must have
21946   // identical memory op ordering to the original.
21947   SDValue Load;
21948   if (ResultVT.bitsGT(VecEltVT)) {
21949     // If the result type of vextract is wider than the load, then issue an
21950     // extending load instead.
21951     ISD::LoadExtType ExtType =
21952         TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
21953                                                               : ISD::EXTLOAD;
21954     Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
21955                           NewPtr, MPI, VecEltVT, Alignment,
21956                           OriginalLoad->getMemOperand()->getFlags(),
21957                           OriginalLoad->getAAInfo());
21958     DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
21959   } else {
21960     // The result type is narrower or the same width as the vector element
21961     Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
21962                        Alignment, OriginalLoad->getMemOperand()->getFlags(),
21963                        OriginalLoad->getAAInfo());
21964     DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
21965     if (ResultVT.bitsLT(VecEltVT))
21966       Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
21967     else
21968       Load = DAG.getBitcast(ResultVT, Load);
21969   }
21970   ++OpsNarrowed;
21971   return Load;
21972 }
21973 
21974 /// Transform a vector binary operation into a scalar binary operation by moving
21975 /// the math/logic after an extract element of a vector.
21976 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
21977                                        bool LegalOperations) {
21978   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21979   SDValue Vec = ExtElt->getOperand(0);
21980   SDValue Index = ExtElt->getOperand(1);
21981   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
21982   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
21983       Vec->getNumValues() != 1)
21984     return SDValue();
21985 
21986   // Targets may want to avoid this to prevent an expensive register transfer.
21987   if (!TLI.shouldScalarizeBinop(Vec))
21988     return SDValue();
21989 
21990   // Extracting an element of a vector constant is constant-folded, so this
21991   // transform is just replacing a vector op with a scalar op while moving the
21992   // extract.
21993   SDValue Op0 = Vec.getOperand(0);
21994   SDValue Op1 = Vec.getOperand(1);
21995   APInt SplatVal;
21996   if (isAnyConstantBuildVector(Op0, true) ||
21997       ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
21998       isAnyConstantBuildVector(Op1, true) ||
21999       ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
22000     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
22001     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
22002     SDLoc DL(ExtElt);
22003     EVT VT = ExtElt->getValueType(0);
22004     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
22005     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
22006     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
22007   }
22008 
22009   return SDValue();
22010 }
22011 
22012 // Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22013 // recursively analyse all of it's users. and try to model themselves as
22014 // bit sequence extractions. If all of them agree on the new, narrower element
22015 // type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22016 // new element type, do so now.
22017 // This is mainly useful to recover from legalization that scalarized
22018 // the vector as wide elements, but tries to rebuild it with narrower elements.
22019 //
22020 // Some more nodes could be modelled if that helps cover interesting patterns.
22021 bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22022     SDNode *N) {
22023   // We perform this optimization post type-legalization because
22024   // the type-legalizer often scalarizes integer-promoted vectors.
22025   // Performing this optimization before may cause legalizaton cycles.
22026   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22027     return false;
22028 
22029   // TODO: Add support for big-endian.
22030   if (DAG.getDataLayout().isBigEndian())
22031     return false;
22032 
22033   SDValue VecOp = N->getOperand(0);
22034   EVT VecVT = VecOp.getValueType();
22035   assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22036 
22037   // We must start with a constant extraction index.
22038   auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22039   if (!IndexC)
22040     return false;
22041 
22042   assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22043          "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22044 
22045   // TODO: deal with the case of implicit anyext of the extraction.
22046   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22047   EVT ScalarVT = N->getValueType(0);
22048   if (VecVT.getScalarType() != ScalarVT)
22049     return false;
22050 
22051   // TODO: deal with the cases other than everything being integer-typed.
22052   if (!ScalarVT.isScalarInteger())
22053     return false;
22054 
22055   struct Entry {
22056     SDNode *Producer;
22057 
22058     // Which bits of VecOp does it contain?
22059     unsigned BitPos;
22060     int NumBits;
22061     // NOTE: the actual width of \p Producer may be wider than NumBits!
22062 
22063     Entry(Entry &&) = default;
22064     Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22065         : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22066 
22067     Entry() = delete;
22068     Entry(const Entry &) = delete;
22069     Entry &operator=(const Entry &) = delete;
22070     Entry &operator=(Entry &&) = delete;
22071   };
22072   SmallVector<Entry, 32> Worklist;
22073   SmallVector<Entry, 32> Leafs;
22074 
22075   // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22076   Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22077                         /*NumBits=*/VecEltBitWidth);
22078 
22079   while (!Worklist.empty()) {
22080     Entry E = Worklist.pop_back_val();
22081     // Does the node not even use any of the VecOp bits?
22082     if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22083           E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22084       return false; // Let's allow the other combines clean this up first.
22085     // Did we fail to model any of the users of the Producer?
22086     bool ProducerIsLeaf = false;
22087     // Look at each user of this Producer.
22088     for (SDNode *User : E.Producer->uses()) {
22089       switch (User->getOpcode()) {
22090       // TODO: support ISD::BITCAST
22091       // TODO: support ISD::ANY_EXTEND
22092       // TODO: support ISD::ZERO_EXTEND
22093       // TODO: support ISD::SIGN_EXTEND
22094       case ISD::TRUNCATE:
22095         // Truncation simply means we keep position, but extract less bits.
22096         Worklist.emplace_back(User, E.BitPos,
22097                               /*NumBits=*/User->getValueSizeInBits(0));
22098         break;
22099       // TODO: support ISD::SRA
22100       // TODO: support ISD::SHL
22101       case ISD::SRL:
22102         // We should be shifting the Producer by a constant amount.
22103         if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22104             User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22105           // Logical right-shift means that we start extraction later,
22106           // but stop it at the same position we did previously.
22107           unsigned ShAmt = ShAmtC->getZExtValue();
22108           Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22109           break;
22110         }
22111         [[fallthrough]];
22112       default:
22113         // We can not model this user of the Producer.
22114         // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22115         ProducerIsLeaf = true;
22116         // Profitability check: all users that we can not model
22117         //                      must be ISD::BUILD_VECTOR's.
22118         if (User->getOpcode() != ISD::BUILD_VECTOR)
22119           return false;
22120         break;
22121       }
22122     }
22123     if (ProducerIsLeaf)
22124       Leafs.emplace_back(std::move(E));
22125   }
22126 
22127   unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22128 
22129   // If we are still at the same element granularity, give up,
22130   if (NewVecEltBitWidth == VecEltBitWidth)
22131     return false;
22132 
22133   // The vector width must be a multiple of the new element width.
22134   if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22135     return false;
22136 
22137   // All leafs must agree on the new element width.
22138   // All leafs must not expect any "padding" bits ontop of that width.
22139   // All leafs must start extraction from multiple of that width.
22140   if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22141         return (unsigned)E.NumBits == NewVecEltBitWidth &&
22142                E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22143                E.BitPos % NewVecEltBitWidth == 0;
22144       }))
22145     return false;
22146 
22147   EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22148   EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22149                                   VecVT.getSizeInBits() / NewVecEltBitWidth);
22150 
22151   if (LegalTypes &&
22152       !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
22153     return false;
22154 
22155   if (LegalOperations &&
22156       !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
22157         TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, NewVecVT)))
22158     return false;
22159 
22160   SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
22161   for (const Entry &E : Leafs) {
22162     SDLoc DL(E.Producer);
22163     unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
22164     assert(NewIndex < NewVecVT.getVectorNumElements() &&
22165            "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
22166     SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
22167                             DAG.getVectorIdxConstant(NewIndex, DL));
22168     CombineTo(E.Producer, V);
22169   }
22170 
22171   return true;
22172 }
22173 
22174 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
22175   SDValue VecOp = N->getOperand(0);
22176   SDValue Index = N->getOperand(1);
22177   EVT ScalarVT = N->getValueType(0);
22178   EVT VecVT = VecOp.getValueType();
22179   if (VecOp.isUndef())
22180     return DAG.getUNDEF(ScalarVT);
22181 
22182   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
22183   //
22184   // This only really matters if the index is non-constant since other combines
22185   // on the constant elements already work.
22186   SDLoc DL(N);
22187   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
22188       Index == VecOp.getOperand(2)) {
22189     SDValue Elt = VecOp.getOperand(1);
22190     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
22191   }
22192 
22193   // (vextract (scalar_to_vector val, 0) -> val
22194   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22195     // Only 0'th element of SCALAR_TO_VECTOR is defined.
22196     if (DAG.isKnownNeverZero(Index))
22197       return DAG.getUNDEF(ScalarVT);
22198 
22199     // Check if the result type doesn't match the inserted element type.
22200     // The inserted element and extracted element may have mismatched bitwidth.
22201     // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
22202     SDValue InOp = VecOp.getOperand(0);
22203     if (InOp.getValueType() != ScalarVT) {
22204       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22205       if (InOp.getValueType().bitsGT(ScalarVT))
22206         return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
22207       return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
22208     }
22209     return InOp;
22210   }
22211 
22212   // extract_vector_elt of out-of-bounds element -> UNDEF
22213   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22214   if (IndexC && VecVT.isFixedLengthVector() &&
22215       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
22216     return DAG.getUNDEF(ScalarVT);
22217 
22218   // extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx
22219   if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) {
22220     return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
22221                                      VecOp.getOperand(0), Index));
22222   }
22223 
22224   // extract_vector_elt (build_vector x, y), 1 -> y
22225   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
22226        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
22227       TLI.isTypeLegal(VecVT)) {
22228     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
22229             VecVT.isFixedLengthVector()) &&
22230            "BUILD_VECTOR used for scalable vectors");
22231     unsigned IndexVal =
22232         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
22233     SDValue Elt = VecOp.getOperand(IndexVal);
22234     EVT InEltVT = Elt.getValueType();
22235 
22236     if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
22237         isNullConstant(Elt)) {
22238       // Sometimes build_vector's scalar input types do not match result type.
22239       if (ScalarVT == InEltVT)
22240         return Elt;
22241 
22242       // TODO: It may be useful to truncate if free if the build_vector
22243       // implicitly converts.
22244     }
22245   }
22246 
22247   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
22248     return BO;
22249 
22250   if (VecVT.isScalableVector())
22251     return SDValue();
22252 
22253   // All the code from this point onwards assumes fixed width vectors, but it's
22254   // possible that some of the combinations could be made to work for scalable
22255   // vectors too.
22256   unsigned NumElts = VecVT.getVectorNumElements();
22257   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22258 
22259   // See if the extracted element is constant, in which case fold it if its
22260   // a legal fp immediate.
22261   if (IndexC && ScalarVT.isFloatingPoint()) {
22262     APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
22263     KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
22264     if (KnownElt.isConstant()) {
22265       APFloat CstFP =
22266           APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant());
22267       if (TLI.isFPImmLegal(CstFP, ScalarVT))
22268         return DAG.getConstantFP(CstFP, DL, ScalarVT);
22269     }
22270   }
22271 
22272   // TODO: These transforms should not require the 'hasOneUse' restriction, but
22273   // there are regressions on multiple targets without it. We can end up with a
22274   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
22275   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
22276       VecOp.hasOneUse()) {
22277     // The vector index of the LSBs of the source depend on the endian-ness.
22278     bool IsLE = DAG.getDataLayout().isLittleEndian();
22279     unsigned ExtractIndex = IndexC->getZExtValue();
22280     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
22281     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
22282     SDValue BCSrc = VecOp.getOperand(0);
22283     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
22284       return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
22285 
22286     if (LegalTypes && BCSrc.getValueType().isInteger() &&
22287         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22288       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
22289       // trunc i64 X to i32
22290       SDValue X = BCSrc.getOperand(0);
22291       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
22292              "Extract element and scalar to vector can't change element type "
22293              "from FP to integer.");
22294       unsigned XBitWidth = X.getValueSizeInBits();
22295       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
22296 
22297       // An extract element return value type can be wider than its vector
22298       // operand element type. In that case, the high bits are undefined, so
22299       // it's possible that we may need to extend rather than truncate.
22300       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
22301         assert(XBitWidth % VecEltBitWidth == 0 &&
22302                "Scalar bitwidth must be a multiple of vector element bitwidth");
22303         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
22304       }
22305     }
22306   }
22307 
22308   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
22309   // We only perform this optimization before the op legalization phase because
22310   // we may introduce new vector instructions which are not backed by TD
22311   // patterns. For example on AVX, extracting elements from a wide vector
22312   // without using extract_subvector. However, if we can find an underlying
22313   // scalar value, then we can always use that.
22314   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
22315     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
22316     // Find the new index to extract from.
22317     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
22318 
22319     // Extracting an undef index is undef.
22320     if (OrigElt == -1)
22321       return DAG.getUNDEF(ScalarVT);
22322 
22323     // Select the right vector half to extract from.
22324     SDValue SVInVec;
22325     if (OrigElt < (int)NumElts) {
22326       SVInVec = VecOp.getOperand(0);
22327     } else {
22328       SVInVec = VecOp.getOperand(1);
22329       OrigElt -= NumElts;
22330     }
22331 
22332     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
22333       SDValue InOp = SVInVec.getOperand(OrigElt);
22334       if (InOp.getValueType() != ScalarVT) {
22335         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22336         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
22337       }
22338 
22339       return InOp;
22340     }
22341 
22342     // FIXME: We should handle recursing on other vector shuffles and
22343     // scalar_to_vector here as well.
22344 
22345     if (!LegalOperations ||
22346         // FIXME: Should really be just isOperationLegalOrCustom.
22347         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
22348         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
22349       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
22350                          DAG.getVectorIdxConstant(OrigElt, DL));
22351     }
22352   }
22353 
22354   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
22355   // simplify it based on the (valid) extraction indices.
22356   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
22357         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22358                Use->getOperand(0) == VecOp &&
22359                isa<ConstantSDNode>(Use->getOperand(1));
22360       })) {
22361     APInt DemandedElts = APInt::getZero(NumElts);
22362     for (SDNode *Use : VecOp->uses()) {
22363       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
22364       if (CstElt->getAPIntValue().ult(NumElts))
22365         DemandedElts.setBit(CstElt->getZExtValue());
22366     }
22367     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
22368       // We simplified the vector operand of this extract element. If this
22369       // extract is not dead, visit it again so it is folded properly.
22370       if (N->getOpcode() != ISD::DELETED_NODE)
22371         AddToWorklist(N);
22372       return SDValue(N, 0);
22373     }
22374     APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
22375     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
22376       // We simplified the vector operand of this extract element. If this
22377       // extract is not dead, visit it again so it is folded properly.
22378       if (N->getOpcode() != ISD::DELETED_NODE)
22379         AddToWorklist(N);
22380       return SDValue(N, 0);
22381     }
22382   }
22383 
22384   if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
22385     return SDValue(N, 0);
22386 
22387   // Everything under here is trying to match an extract of a loaded value.
22388   // If the result of load has to be truncated, then it's not necessarily
22389   // profitable.
22390   bool BCNumEltsChanged = false;
22391   EVT ExtVT = VecVT.getVectorElementType();
22392   EVT LVT = ExtVT;
22393   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
22394     return SDValue();
22395 
22396   if (VecOp.getOpcode() == ISD::BITCAST) {
22397     // Don't duplicate a load with other uses.
22398     if (!VecOp.hasOneUse())
22399       return SDValue();
22400 
22401     EVT BCVT = VecOp.getOperand(0).getValueType();
22402     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
22403       return SDValue();
22404     if (NumElts != BCVT.getVectorNumElements())
22405       BCNumEltsChanged = true;
22406     VecOp = VecOp.getOperand(0);
22407     ExtVT = BCVT.getVectorElementType();
22408   }
22409 
22410   // extract (vector load $addr), i --> load $addr + i * size
22411   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
22412       ISD::isNormalLoad(VecOp.getNode()) &&
22413       !Index->hasPredecessor(VecOp.getNode())) {
22414     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
22415     if (VecLoad && VecLoad->isSimple())
22416       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
22417   }
22418 
22419   // Perform only after legalization to ensure build_vector / vector_shuffle
22420   // optimizations have already been done.
22421   if (!LegalOperations || !IndexC)
22422     return SDValue();
22423 
22424   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
22425   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
22426   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
22427   int Elt = IndexC->getZExtValue();
22428   LoadSDNode *LN0 = nullptr;
22429   if (ISD::isNormalLoad(VecOp.getNode())) {
22430     LN0 = cast<LoadSDNode>(VecOp);
22431   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
22432              VecOp.getOperand(0).getValueType() == ExtVT &&
22433              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
22434     // Don't duplicate a load with other uses.
22435     if (!VecOp.hasOneUse())
22436       return SDValue();
22437 
22438     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
22439   }
22440   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
22441     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
22442     // =>
22443     // (load $addr+1*size)
22444 
22445     // Don't duplicate a load with other uses.
22446     if (!VecOp.hasOneUse())
22447       return SDValue();
22448 
22449     // If the bit convert changed the number of elements, it is unsafe
22450     // to examine the mask.
22451     if (BCNumEltsChanged)
22452       return SDValue();
22453 
22454     // Select the input vector, guarding against out of range extract vector.
22455     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
22456     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
22457 
22458     if (VecOp.getOpcode() == ISD::BITCAST) {
22459       // Don't duplicate a load with other uses.
22460       if (!VecOp.hasOneUse())
22461         return SDValue();
22462 
22463       VecOp = VecOp.getOperand(0);
22464     }
22465     if (ISD::isNormalLoad(VecOp.getNode())) {
22466       LN0 = cast<LoadSDNode>(VecOp);
22467       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
22468       Index = DAG.getConstant(Elt, DL, Index.getValueType());
22469     }
22470   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
22471              VecVT.getVectorElementType() == ScalarVT &&
22472              (!LegalTypes ||
22473               TLI.isTypeLegal(
22474                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
22475     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
22476     //      -> extract_vector_elt a, 0
22477     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
22478     //      -> extract_vector_elt a, 1
22479     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
22480     //      -> extract_vector_elt b, 0
22481     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
22482     //      -> extract_vector_elt b, 1
22483     SDLoc SL(N);
22484     EVT ConcatVT = VecOp.getOperand(0).getValueType();
22485     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
22486     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
22487                                      Index.getValueType());
22488 
22489     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
22490     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
22491                               ConcatVT.getVectorElementType(),
22492                               ConcatOp, NewIdx);
22493     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
22494   }
22495 
22496   // Make sure we found a non-volatile load and the extractelement is
22497   // the only use.
22498   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
22499     return SDValue();
22500 
22501   // If Idx was -1 above, Elt is going to be -1, so just return undef.
22502   if (Elt == -1)
22503     return DAG.getUNDEF(LVT);
22504 
22505   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
22506 }
22507 
22508 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
22509 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
22510   // We perform this optimization post type-legalization because
22511   // the type-legalizer often scalarizes integer-promoted vectors.
22512   // Performing this optimization before may create bit-casts which
22513   // will be type-legalized to complex code sequences.
22514   // We perform this optimization only before the operation legalizer because we
22515   // may introduce illegal operations.
22516   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22517     return SDValue();
22518 
22519   unsigned NumInScalars = N->getNumOperands();
22520   SDLoc DL(N);
22521   EVT VT = N->getValueType(0);
22522 
22523   // Check to see if this is a BUILD_VECTOR of a bunch of values
22524   // which come from any_extend or zero_extend nodes. If so, we can create
22525   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
22526   // optimizations. We do not handle sign-extend because we can't fill the sign
22527   // using shuffles.
22528   EVT SourceType = MVT::Other;
22529   bool AllAnyExt = true;
22530 
22531   for (unsigned i = 0; i != NumInScalars; ++i) {
22532     SDValue In = N->getOperand(i);
22533     // Ignore undef inputs.
22534     if (In.isUndef()) continue;
22535 
22536     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
22537     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
22538 
22539     // Abort if the element is not an extension.
22540     if (!ZeroExt && !AnyExt) {
22541       SourceType = MVT::Other;
22542       break;
22543     }
22544 
22545     // The input is a ZeroExt or AnyExt. Check the original type.
22546     EVT InTy = In.getOperand(0).getValueType();
22547 
22548     // Check that all of the widened source types are the same.
22549     if (SourceType == MVT::Other)
22550       // First time.
22551       SourceType = InTy;
22552     else if (InTy != SourceType) {
22553       // Multiple income types. Abort.
22554       SourceType = MVT::Other;
22555       break;
22556     }
22557 
22558     // Check if all of the extends are ANY_EXTENDs.
22559     AllAnyExt &= AnyExt;
22560   }
22561 
22562   // In order to have valid types, all of the inputs must be extended from the
22563   // same source type and all of the inputs must be any or zero extend.
22564   // Scalar sizes must be a power of two.
22565   EVT OutScalarTy = VT.getScalarType();
22566   bool ValidTypes =
22567       SourceType != MVT::Other &&
22568       llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
22569       llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
22570 
22571   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
22572   // turn into a single shuffle instruction.
22573   if (!ValidTypes)
22574     return SDValue();
22575 
22576   // If we already have a splat buildvector, then don't fold it if it means
22577   // introducing zeros.
22578   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
22579     return SDValue();
22580 
22581   bool isLE = DAG.getDataLayout().isLittleEndian();
22582   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
22583   assert(ElemRatio > 1 && "Invalid element size ratio");
22584   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
22585                                DAG.getConstant(0, DL, SourceType);
22586 
22587   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
22588   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
22589 
22590   // Populate the new build_vector
22591   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
22592     SDValue Cast = N->getOperand(i);
22593     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
22594             Cast.getOpcode() == ISD::ZERO_EXTEND ||
22595             Cast.isUndef()) && "Invalid cast opcode");
22596     SDValue In;
22597     if (Cast.isUndef())
22598       In = DAG.getUNDEF(SourceType);
22599     else
22600       In = Cast->getOperand(0);
22601     unsigned Index = isLE ? (i * ElemRatio) :
22602                             (i * ElemRatio + (ElemRatio - 1));
22603 
22604     assert(Index < Ops.size() && "Invalid index");
22605     Ops[Index] = In;
22606   }
22607 
22608   // The type of the new BUILD_VECTOR node.
22609   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
22610   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
22611          "Invalid vector size");
22612   // Check if the new vector type is legal.
22613   if (!isTypeLegal(VecVT) ||
22614       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
22615        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
22616     return SDValue();
22617 
22618   // Make the new BUILD_VECTOR.
22619   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
22620 
22621   // The new BUILD_VECTOR node has the potential to be further optimized.
22622   AddToWorklist(BV.getNode());
22623   // Bitcast to the desired type.
22624   return DAG.getBitcast(VT, BV);
22625 }
22626 
22627 // Simplify (build_vec (trunc $1)
22628 //                     (trunc (srl $1 half-width))
22629 //                     (trunc (srl $1 (2 * half-width))))
22630 // to (bitcast $1)
22631 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
22632   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22633 
22634   EVT VT = N->getValueType(0);
22635 
22636   // Don't run this before LegalizeTypes if VT is legal.
22637   // Targets may have other preferences.
22638   if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
22639     return SDValue();
22640 
22641   // Only for little endian
22642   if (!DAG.getDataLayout().isLittleEndian())
22643     return SDValue();
22644 
22645   SDLoc DL(N);
22646   EVT OutScalarTy = VT.getScalarType();
22647   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
22648 
22649   // Only for power of two types to be sure that bitcast works well
22650   if (!isPowerOf2_64(ScalarTypeBitsize))
22651     return SDValue();
22652 
22653   unsigned NumInScalars = N->getNumOperands();
22654 
22655   // Look through bitcasts
22656   auto PeekThroughBitcast = [](SDValue Op) {
22657     if (Op.getOpcode() == ISD::BITCAST)
22658       return Op.getOperand(0);
22659     return Op;
22660   };
22661 
22662   // The source value where all the parts are extracted.
22663   SDValue Src;
22664   for (unsigned i = 0; i != NumInScalars; ++i) {
22665     SDValue In = PeekThroughBitcast(N->getOperand(i));
22666     // Ignore undef inputs.
22667     if (In.isUndef()) continue;
22668 
22669     if (In.getOpcode() != ISD::TRUNCATE)
22670       return SDValue();
22671 
22672     In = PeekThroughBitcast(In.getOperand(0));
22673 
22674     if (In.getOpcode() != ISD::SRL) {
22675       // For now only build_vec without shuffling, handle shifts here in the
22676       // future.
22677       if (i != 0)
22678         return SDValue();
22679 
22680       Src = In;
22681     } else {
22682       // In is SRL
22683       SDValue part = PeekThroughBitcast(In.getOperand(0));
22684 
22685       if (!Src) {
22686         Src = part;
22687       } else if (Src != part) {
22688         // Vector parts do not stem from the same variable
22689         return SDValue();
22690       }
22691 
22692       SDValue ShiftAmtVal = In.getOperand(1);
22693       if (!isa<ConstantSDNode>(ShiftAmtVal))
22694         return SDValue();
22695 
22696       uint64_t ShiftAmt = In.getConstantOperandVal(1);
22697 
22698       // The extracted value is not extracted at the right position
22699       if (ShiftAmt != i * ScalarTypeBitsize)
22700         return SDValue();
22701     }
22702   }
22703 
22704   // Only cast if the size is the same
22705   if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
22706     return SDValue();
22707 
22708   return DAG.getBitcast(VT, Src);
22709 }
22710 
22711 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
22712                                            ArrayRef<int> VectorMask,
22713                                            SDValue VecIn1, SDValue VecIn2,
22714                                            unsigned LeftIdx, bool DidSplitVec) {
22715   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
22716 
22717   EVT VT = N->getValueType(0);
22718   EVT InVT1 = VecIn1.getValueType();
22719   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
22720 
22721   unsigned NumElems = VT.getVectorNumElements();
22722   unsigned ShuffleNumElems = NumElems;
22723 
22724   // If we artificially split a vector in two already, then the offsets in the
22725   // operands will all be based off of VecIn1, even those in VecIn2.
22726   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
22727 
22728   uint64_t VTSize = VT.getFixedSizeInBits();
22729   uint64_t InVT1Size = InVT1.getFixedSizeInBits();
22730   uint64_t InVT2Size = InVT2.getFixedSizeInBits();
22731 
22732   assert(InVT2Size <= InVT1Size &&
22733          "Inputs must be sorted to be in non-increasing vector size order.");
22734 
22735   // We can't generate a shuffle node with mismatched input and output types.
22736   // Try to make the types match the type of the output.
22737   if (InVT1 != VT || InVT2 != VT) {
22738     if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
22739       // If the output vector length is a multiple of both input lengths,
22740       // we can concatenate them and pad the rest with undefs.
22741       unsigned NumConcats = VTSize / InVT1Size;
22742       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
22743       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
22744       ConcatOps[0] = VecIn1;
22745       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
22746       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22747       VecIn2 = SDValue();
22748     } else if (InVT1Size == VTSize * 2) {
22749       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
22750         return SDValue();
22751 
22752       if (!VecIn2.getNode()) {
22753         // If we only have one input vector, and it's twice the size of the
22754         // output, split it in two.
22755         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
22756                              DAG.getVectorIdxConstant(NumElems, DL));
22757         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
22758         // Since we now have shorter input vectors, adjust the offset of the
22759         // second vector's start.
22760         Vec2Offset = NumElems;
22761       } else {
22762         assert(InVT2Size <= InVT1Size &&
22763                "Second input is not going to be larger than the first one.");
22764 
22765         // VecIn1 is wider than the output, and we have another, possibly
22766         // smaller input. Pad the smaller input with undefs, shuffle at the
22767         // input vector width, and extract the output.
22768         // The shuffle type is different than VT, so check legality again.
22769         if (LegalOperations &&
22770             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
22771           return SDValue();
22772 
22773         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
22774         // lower it back into a BUILD_VECTOR. So if the inserted type is
22775         // illegal, don't even try.
22776         if (InVT1 != InVT2) {
22777           if (!TLI.isTypeLegal(InVT2))
22778             return SDValue();
22779           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
22780                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
22781         }
22782         ShuffleNumElems = NumElems * 2;
22783       }
22784     } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
22785       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
22786       ConcatOps[0] = VecIn2;
22787       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22788     } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
22789       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
22790           !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
22791         return SDValue();
22792       // If dest vector has less than two elements, then use shuffle and extract
22793       // from larger regs will cost even more.
22794       if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
22795         return SDValue();
22796       assert(InVT2Size <= InVT1Size &&
22797              "Second input is not going to be larger than the first one.");
22798 
22799       // VecIn1 is wider than the output, and we have another, possibly
22800       // smaller input. Pad the smaller input with undefs, shuffle at the
22801       // input vector width, and extract the output.
22802       // The shuffle type is different than VT, so check legality again.
22803       if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
22804         return SDValue();
22805 
22806       if (InVT1 != InVT2) {
22807         VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
22808                              DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
22809       }
22810       ShuffleNumElems = InVT1Size / VTSize * NumElems;
22811     } else {
22812       // TODO: Support cases where the length mismatch isn't exactly by a
22813       // factor of 2.
22814       // TODO: Move this check upwards, so that if we have bad type
22815       // mismatches, we don't create any DAG nodes.
22816       return SDValue();
22817     }
22818   }
22819 
22820   // Initialize mask to undef.
22821   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
22822 
22823   // Only need to run up to the number of elements actually used, not the
22824   // total number of elements in the shuffle - if we are shuffling a wider
22825   // vector, the high lanes should be set to undef.
22826   for (unsigned i = 0; i != NumElems; ++i) {
22827     if (VectorMask[i] <= 0)
22828       continue;
22829 
22830     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
22831     if (VectorMask[i] == (int)LeftIdx) {
22832       Mask[i] = ExtIndex;
22833     } else if (VectorMask[i] == (int)LeftIdx + 1) {
22834       Mask[i] = Vec2Offset + ExtIndex;
22835     }
22836   }
22837 
22838   // The type the input vectors may have changed above.
22839   InVT1 = VecIn1.getValueType();
22840 
22841   // If we already have a VecIn2, it should have the same type as VecIn1.
22842   // If we don't, get an undef/zero vector of the appropriate type.
22843   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
22844   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
22845 
22846   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
22847   if (ShuffleNumElems > NumElems)
22848     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
22849 
22850   return Shuffle;
22851 }
22852 
22853 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
22854   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22855 
22856   // First, determine where the build vector is not undef.
22857   // TODO: We could extend this to handle zero elements as well as undefs.
22858   int NumBVOps = BV->getNumOperands();
22859   int ZextElt = -1;
22860   for (int i = 0; i != NumBVOps; ++i) {
22861     SDValue Op = BV->getOperand(i);
22862     if (Op.isUndef())
22863       continue;
22864     if (ZextElt == -1)
22865       ZextElt = i;
22866     else
22867       return SDValue();
22868   }
22869   // Bail out if there's no non-undef element.
22870   if (ZextElt == -1)
22871     return SDValue();
22872 
22873   // The build vector contains some number of undef elements and exactly
22874   // one other element. That other element must be a zero-extended scalar
22875   // extracted from a vector at a constant index to turn this into a shuffle.
22876   // Also, require that the build vector does not implicitly truncate/extend
22877   // its elements.
22878   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
22879   EVT VT = BV->getValueType(0);
22880   SDValue Zext = BV->getOperand(ZextElt);
22881   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
22882       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
22883       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
22884       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
22885     return SDValue();
22886 
22887   // The zero-extend must be a multiple of the source size, and we must be
22888   // building a vector of the same size as the source of the extract element.
22889   SDValue Extract = Zext.getOperand(0);
22890   unsigned DestSize = Zext.getValueSizeInBits();
22891   unsigned SrcSize = Extract.getValueSizeInBits();
22892   if (DestSize % SrcSize != 0 ||
22893       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
22894     return SDValue();
22895 
22896   // Create a shuffle mask that will combine the extracted element with zeros
22897   // and undefs.
22898   int ZextRatio = DestSize / SrcSize;
22899   int NumMaskElts = NumBVOps * ZextRatio;
22900   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
22901   for (int i = 0; i != NumMaskElts; ++i) {
22902     if (i / ZextRatio == ZextElt) {
22903       // The low bits of the (potentially translated) extracted element map to
22904       // the source vector. The high bits map to zero. We will use a zero vector
22905       // as the 2nd source operand of the shuffle, so use the 1st element of
22906       // that vector (mask value is number-of-elements) for the high bits.
22907       int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
22908       ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
22909                                            : NumMaskElts;
22910     }
22911 
22912     // Undef elements of the build vector remain undef because we initialize
22913     // the shuffle mask with -1.
22914   }
22915 
22916   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
22917   // bitcast (shuffle V, ZeroVec, VectorMask)
22918   SDLoc DL(BV);
22919   EVT VecVT = Extract.getOperand(0).getValueType();
22920   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
22921   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22922   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
22923                                              ZeroVec, ShufMask, DAG);
22924   if (!Shuf)
22925     return SDValue();
22926   return DAG.getBitcast(VT, Shuf);
22927 }
22928 
22929 // FIXME: promote to STLExtras.
22930 template <typename R, typename T>
22931 static auto getFirstIndexOf(R &&Range, const T &Val) {
22932   auto I = find(Range, Val);
22933   if (I == Range.end())
22934     return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
22935   return std::distance(Range.begin(), I);
22936 }
22937 
22938 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
22939 // operations. If the types of the vectors we're extracting from allow it,
22940 // turn this into a vector_shuffle node.
22941 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
22942   SDLoc DL(N);
22943   EVT VT = N->getValueType(0);
22944 
22945   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
22946   if (!isTypeLegal(VT))
22947     return SDValue();
22948 
22949   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
22950     return V;
22951 
22952   // May only combine to shuffle after legalize if shuffle is legal.
22953   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
22954     return SDValue();
22955 
22956   bool UsesZeroVector = false;
22957   unsigned NumElems = N->getNumOperands();
22958 
22959   // Record, for each element of the newly built vector, which input vector
22960   // that element comes from. -1 stands for undef, 0 for the zero vector,
22961   // and positive values for the input vectors.
22962   // VectorMask maps each element to its vector number, and VecIn maps vector
22963   // numbers to their initial SDValues.
22964 
22965   SmallVector<int, 8> VectorMask(NumElems, -1);
22966   SmallVector<SDValue, 8> VecIn;
22967   VecIn.push_back(SDValue());
22968 
22969   for (unsigned i = 0; i != NumElems; ++i) {
22970     SDValue Op = N->getOperand(i);
22971 
22972     if (Op.isUndef())
22973       continue;
22974 
22975     // See if we can use a blend with a zero vector.
22976     // TODO: Should we generalize this to a blend with an arbitrary constant
22977     // vector?
22978     if (isNullConstant(Op) || isNullFPConstant(Op)) {
22979       UsesZeroVector = true;
22980       VectorMask[i] = 0;
22981       continue;
22982     }
22983 
22984     // Not an undef or zero. If the input is something other than an
22985     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
22986     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
22987         !isa<ConstantSDNode>(Op.getOperand(1)))
22988       return SDValue();
22989     SDValue ExtractedFromVec = Op.getOperand(0);
22990 
22991     if (ExtractedFromVec.getValueType().isScalableVector())
22992       return SDValue();
22993 
22994     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
22995     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
22996       return SDValue();
22997 
22998     // All inputs must have the same element type as the output.
22999     if (VT.getVectorElementType() !=
23000         ExtractedFromVec.getValueType().getVectorElementType())
23001       return SDValue();
23002 
23003     // Have we seen this input vector before?
23004     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23005     // a map back from SDValues to numbers isn't worth it.
23006     int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23007     if (Idx == -1) { // A new source vector?
23008       Idx = VecIn.size();
23009       VecIn.push_back(ExtractedFromVec);
23010     }
23011 
23012     VectorMask[i] = Idx;
23013   }
23014 
23015   // If we didn't find at least one input vector, bail out.
23016   if (VecIn.size() < 2)
23017     return SDValue();
23018 
23019   // If all the Operands of BUILD_VECTOR extract from same
23020   // vector, then split the vector efficiently based on the maximum
23021   // vector access index and adjust the VectorMask and
23022   // VecIn accordingly.
23023   bool DidSplitVec = false;
23024   if (VecIn.size() == 2) {
23025     unsigned MaxIndex = 0;
23026     unsigned NearestPow2 = 0;
23027     SDValue Vec = VecIn.back();
23028     EVT InVT = Vec.getValueType();
23029     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23030 
23031     for (unsigned i = 0; i < NumElems; i++) {
23032       if (VectorMask[i] <= 0)
23033         continue;
23034       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23035       IndexVec[i] = Index;
23036       MaxIndex = std::max(MaxIndex, Index);
23037     }
23038 
23039     NearestPow2 = PowerOf2Ceil(MaxIndex);
23040     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23041         NumElems * 2 < NearestPow2) {
23042       unsigned SplitSize = NearestPow2 / 2;
23043       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23044                                      InVT.getVectorElementType(), SplitSize);
23045       if (TLI.isTypeLegal(SplitVT) &&
23046           SplitSize + SplitVT.getVectorNumElements() <=
23047               InVT.getVectorNumElements()) {
23048         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23049                                      DAG.getVectorIdxConstant(SplitSize, DL));
23050         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23051                                      DAG.getVectorIdxConstant(0, DL));
23052         VecIn.pop_back();
23053         VecIn.push_back(VecIn1);
23054         VecIn.push_back(VecIn2);
23055         DidSplitVec = true;
23056 
23057         for (unsigned i = 0; i < NumElems; i++) {
23058           if (VectorMask[i] <= 0)
23059             continue;
23060           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23061         }
23062       }
23063     }
23064   }
23065 
23066   // Sort input vectors by decreasing vector element count,
23067   // while preserving the relative order of equally-sized vectors.
23068   // Note that we keep the first "implicit zero vector as-is.
23069   SmallVector<SDValue, 8> SortedVecIn(VecIn);
23070   llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23071                     [](const SDValue &a, const SDValue &b) {
23072                       return a.getValueType().getVectorNumElements() >
23073                              b.getValueType().getVectorNumElements();
23074                     });
23075 
23076   // We now also need to rebuild the VectorMask, because it referenced element
23077   // order in VecIn, and we just sorted them.
23078   for (int &SourceVectorIndex : VectorMask) {
23079     if (SourceVectorIndex <= 0)
23080       continue;
23081     unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23082     assert(Idx > 0 && Idx < SortedVecIn.size() &&
23083            VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23084     SourceVectorIndex = Idx;
23085   }
23086 
23087   VecIn = std::move(SortedVecIn);
23088 
23089   // TODO: Should this fire if some of the input vectors has illegal type (like
23090   // it does now), or should we let legalization run its course first?
23091 
23092   // Shuffle phase:
23093   // Take pairs of vectors, and shuffle them so that the result has elements
23094   // from these vectors in the correct places.
23095   // For example, given:
23096   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23097   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23098   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23099   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23100   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23101   // We will generate:
23102   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23103   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23104   SmallVector<SDValue, 4> Shuffles;
23105   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23106     unsigned LeftIdx = 2 * In + 1;
23107     SDValue VecLeft = VecIn[LeftIdx];
23108     SDValue VecRight =
23109         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23110 
23111     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23112                                                 VecRight, LeftIdx, DidSplitVec))
23113       Shuffles.push_back(Shuffle);
23114     else
23115       return SDValue();
23116   }
23117 
23118   // If we need the zero vector as an "ingredient" in the blend tree, add it
23119   // to the list of shuffles.
23120   if (UsesZeroVector)
23121     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
23122                                       : DAG.getConstantFP(0.0, DL, VT));
23123 
23124   // If we only have one shuffle, we're done.
23125   if (Shuffles.size() == 1)
23126     return Shuffles[0];
23127 
23128   // Update the vector mask to point to the post-shuffle vectors.
23129   for (int &Vec : VectorMask)
23130     if (Vec == 0)
23131       Vec = Shuffles.size() - 1;
23132     else
23133       Vec = (Vec - 1) / 2;
23134 
23135   // More than one shuffle. Generate a binary tree of blends, e.g. if from
23136   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
23137   // generate:
23138   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
23139   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
23140   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
23141   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
23142   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
23143   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
23144   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
23145 
23146   // Make sure the initial size of the shuffle list is even.
23147   if (Shuffles.size() % 2)
23148     Shuffles.push_back(DAG.getUNDEF(VT));
23149 
23150   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
23151     if (CurSize % 2) {
23152       Shuffles[CurSize] = DAG.getUNDEF(VT);
23153       CurSize++;
23154     }
23155     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
23156       int Left = 2 * In;
23157       int Right = 2 * In + 1;
23158       SmallVector<int, 8> Mask(NumElems, -1);
23159       SDValue L = Shuffles[Left];
23160       ArrayRef<int> LMask;
23161       bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
23162                            L.use_empty() && L.getOperand(1).isUndef() &&
23163                            L.getOperand(0).getValueType() == L.getValueType();
23164       if (IsLeftShuffle) {
23165         LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
23166         L = L.getOperand(0);
23167       }
23168       SDValue R = Shuffles[Right];
23169       ArrayRef<int> RMask;
23170       bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
23171                             R.use_empty() && R.getOperand(1).isUndef() &&
23172                             R.getOperand(0).getValueType() == R.getValueType();
23173       if (IsRightShuffle) {
23174         RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
23175         R = R.getOperand(0);
23176       }
23177       for (unsigned I = 0; I != NumElems; ++I) {
23178         if (VectorMask[I] == Left) {
23179           Mask[I] = I;
23180           if (IsLeftShuffle)
23181             Mask[I] = LMask[I];
23182           VectorMask[I] = In;
23183         } else if (VectorMask[I] == Right) {
23184           Mask[I] = I + NumElems;
23185           if (IsRightShuffle)
23186             Mask[I] = RMask[I] + NumElems;
23187           VectorMask[I] = In;
23188         }
23189       }
23190 
23191       Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
23192     }
23193   }
23194   return Shuffles[0];
23195 }
23196 
23197 // Try to turn a build vector of zero extends of extract vector elts into a
23198 // a vector zero extend and possibly an extract subvector.
23199 // TODO: Support sign extend?
23200 // TODO: Allow undef elements?
23201 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
23202   if (LegalOperations)
23203     return SDValue();
23204 
23205   EVT VT = N->getValueType(0);
23206 
23207   bool FoundZeroExtend = false;
23208   SDValue Op0 = N->getOperand(0);
23209   auto checkElem = [&](SDValue Op) -> int64_t {
23210     unsigned Opc = Op.getOpcode();
23211     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
23212     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
23213         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23214         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
23215       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
23216         return C->getZExtValue();
23217     return -1;
23218   };
23219 
23220   // Make sure the first element matches
23221   // (zext (extract_vector_elt X, C))
23222   // Offset must be a constant multiple of the
23223   // known-minimum vector length of the result type.
23224   int64_t Offset = checkElem(Op0);
23225   if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
23226     return SDValue();
23227 
23228   unsigned NumElems = N->getNumOperands();
23229   SDValue In = Op0.getOperand(0).getOperand(0);
23230   EVT InSVT = In.getValueType().getScalarType();
23231   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
23232 
23233   // Don't create an illegal input type after type legalization.
23234   if (LegalTypes && !TLI.isTypeLegal(InVT))
23235     return SDValue();
23236 
23237   // Ensure all the elements come from the same vector and are adjacent.
23238   for (unsigned i = 1; i != NumElems; ++i) {
23239     if ((Offset + i) != checkElem(N->getOperand(i)))
23240       return SDValue();
23241   }
23242 
23243   SDLoc DL(N);
23244   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
23245                    Op0.getOperand(0).getOperand(1));
23246   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
23247                      VT, In);
23248 }
23249 
23250 // If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
23251 // and all other elements being constant zero's, granularize the BUILD_VECTOR's
23252 // element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
23253 // This patten can appear during legalization.
23254 //
23255 // NOTE: This can be generalized to allow more than a single
23256 //       non-constant-zero op, UNDEF's, and to be KnownBits-based,
23257 SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
23258   // Don't run this after legalization. Targets may have other preferences.
23259   if (Level >= AfterLegalizeDAG)
23260     return SDValue();
23261 
23262   // FIXME: support big-endian.
23263   if (DAG.getDataLayout().isBigEndian())
23264     return SDValue();
23265 
23266   EVT VT = N->getValueType(0);
23267   EVT OpVT = N->getOperand(0).getValueType();
23268   assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
23269 
23270   EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23271 
23272   if (!TLI.isTypeLegal(OpIntVT) ||
23273       (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
23274     return SDValue();
23275 
23276   unsigned EltBitwidth = VT.getScalarSizeInBits();
23277   // NOTE: the actual width of operands may be wider than that!
23278 
23279   // Analyze all operands of this BUILD_VECTOR. What is the largest number of
23280   // active bits they all have? We'll want to truncate them all to that width.
23281   unsigned ActiveBits = 0;
23282   APInt KnownZeroOps(VT.getVectorNumElements(), 0);
23283   for (auto I : enumerate(N->ops())) {
23284     SDValue Op = I.value();
23285     // FIXME: support UNDEF elements?
23286     if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
23287       unsigned OpActiveBits =
23288           Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
23289       if (OpActiveBits == 0) {
23290         KnownZeroOps.setBit(I.index());
23291         continue;
23292       }
23293       // Profitability check: don't allow non-zero constant operands.
23294       return SDValue();
23295     }
23296     // Profitability check: there must only be a single non-zero operand,
23297     // and it must be the first operand of the BUILD_VECTOR.
23298     if (I.index() != 0)
23299       return SDValue();
23300     // The operand must be a zero-extension itself.
23301     // FIXME: this could be generalized to known leading zeros check.
23302     if (Op.getOpcode() != ISD::ZERO_EXTEND)
23303       return SDValue();
23304     unsigned CurrActiveBits =
23305         Op.getOperand(0).getValueSizeInBits().getFixedValue();
23306     assert(!ActiveBits && "Already encountered non-constant-zero operand?");
23307     ActiveBits = CurrActiveBits;
23308     // We want to at least halve the element size.
23309     if (2 * ActiveBits > EltBitwidth)
23310       return SDValue();
23311   }
23312 
23313   // This BUILD_VECTOR must have at least one non-constant-zero operand.
23314   if (ActiveBits == 0)
23315     return SDValue();
23316 
23317   // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
23318   // into how many chunks can we split our element width?
23319   EVT NewScalarIntVT, NewIntVT;
23320   std::optional<unsigned> Factor;
23321   // We can split the element into at least two chunks, but not into more
23322   // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
23323   // for which the element width is a multiple of it,
23324   // and the resulting types/operations on that chunk width are legal.
23325   assert(2 * ActiveBits <= EltBitwidth &&
23326          "We know that half or less bits of the element are active.");
23327   for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
23328     if (EltBitwidth % Scale != 0)
23329       continue;
23330     unsigned ChunkBitwidth = EltBitwidth / Scale;
23331     assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
23332     NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
23333     NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
23334                                 Scale * N->getNumOperands());
23335     if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
23336         (LegalOperations &&
23337          !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
23338            TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, NewIntVT))))
23339       continue;
23340     Factor = Scale;
23341     break;
23342   }
23343   if (!Factor)
23344     return SDValue();
23345 
23346   SDLoc DL(N);
23347   SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
23348 
23349   // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
23350   SmallVector<SDValue, 16> NewOps;
23351   NewOps.reserve(NewIntVT.getVectorNumElements());
23352   for (auto I : enumerate(N->ops())) {
23353     SDValue Op = I.value();
23354     assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
23355     unsigned SrcOpIdx = I.index();
23356     if (KnownZeroOps[SrcOpIdx]) {
23357       NewOps.append(*Factor, ZeroOp);
23358       continue;
23359     }
23360     Op = DAG.getBitcast(OpIntVT, Op);
23361     Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
23362     NewOps.emplace_back(Op);
23363     NewOps.append(*Factor - 1, ZeroOp);
23364   }
23365   assert(NewOps.size() == NewIntVT.getVectorNumElements());
23366   SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
23367   NewBV = DAG.getBitcast(VT, NewBV);
23368   return NewBV;
23369 }
23370 
23371 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
23372   EVT VT = N->getValueType(0);
23373 
23374   // A vector built entirely of undefs is undef.
23375   if (ISD::allOperandsUndef(N))
23376     return DAG.getUNDEF(VT);
23377 
23378   // If this is a splat of a bitcast from another vector, change to a
23379   // concat_vector.
23380   // For example:
23381   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
23382   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
23383   //
23384   // If X is a build_vector itself, the concat can become a larger build_vector.
23385   // TODO: Maybe this is useful for non-splat too?
23386   if (!LegalOperations) {
23387     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
23388       Splat = peekThroughBitcasts(Splat);
23389       EVT SrcVT = Splat.getValueType();
23390       if (SrcVT.isVector()) {
23391         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
23392         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
23393                                      SrcVT.getVectorElementType(), NumElts);
23394         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
23395           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
23396           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
23397                                        NewVT, Ops);
23398           return DAG.getBitcast(VT, Concat);
23399         }
23400       }
23401     }
23402   }
23403 
23404   // Check if we can express BUILD VECTOR via subvector extract.
23405   if (!LegalTypes && (N->getNumOperands() > 1)) {
23406     SDValue Op0 = N->getOperand(0);
23407     auto checkElem = [&](SDValue Op) -> uint64_t {
23408       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
23409           (Op0.getOperand(0) == Op.getOperand(0)))
23410         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
23411           return CNode->getZExtValue();
23412       return -1;
23413     };
23414 
23415     int Offset = checkElem(Op0);
23416     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
23417       if (Offset + i != checkElem(N->getOperand(i))) {
23418         Offset = -1;
23419         break;
23420       }
23421     }
23422 
23423     if ((Offset == 0) &&
23424         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
23425       return Op0.getOperand(0);
23426     if ((Offset != -1) &&
23427         ((Offset % N->getValueType(0).getVectorNumElements()) ==
23428          0)) // IDX must be multiple of output size.
23429       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
23430                          Op0.getOperand(0), Op0.getOperand(1));
23431   }
23432 
23433   if (SDValue V = convertBuildVecZextToZext(N))
23434     return V;
23435 
23436   if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
23437     return V;
23438 
23439   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
23440     return V;
23441 
23442   if (SDValue V = reduceBuildVecTruncToBitCast(N))
23443     return V;
23444 
23445   if (SDValue V = reduceBuildVecToShuffle(N))
23446     return V;
23447 
23448   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
23449   // Do this late as some of the above may replace the splat.
23450   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
23451     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
23452       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
23453       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
23454     }
23455 
23456   return SDValue();
23457 }
23458 
23459 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
23460   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23461   EVT OpVT = N->getOperand(0).getValueType();
23462 
23463   // If the operands are legal vectors, leave them alone.
23464   if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
23465     return SDValue();
23466 
23467   SDLoc DL(N);
23468   EVT VT = N->getValueType(0);
23469   SmallVector<SDValue, 8> Ops;
23470 
23471   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23472   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
23473 
23474   // Keep track of what we encounter.
23475   bool AnyInteger = false;
23476   bool AnyFP = false;
23477   for (const SDValue &Op : N->ops()) {
23478     if (ISD::BITCAST == Op.getOpcode() &&
23479         !Op.getOperand(0).getValueType().isVector())
23480       Ops.push_back(Op.getOperand(0));
23481     else if (ISD::UNDEF == Op.getOpcode())
23482       Ops.push_back(ScalarUndef);
23483     else
23484       return SDValue();
23485 
23486     // Note whether we encounter an integer or floating point scalar.
23487     // If it's neither, bail out, it could be something weird like x86mmx.
23488     EVT LastOpVT = Ops.back().getValueType();
23489     if (LastOpVT.isFloatingPoint())
23490       AnyFP = true;
23491     else if (LastOpVT.isInteger())
23492       AnyInteger = true;
23493     else
23494       return SDValue();
23495   }
23496 
23497   // If any of the operands is a floating point scalar bitcast to a vector,
23498   // use floating point types throughout, and bitcast everything.
23499   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
23500   if (AnyFP) {
23501     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
23502     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
23503     if (AnyInteger) {
23504       for (SDValue &Op : Ops) {
23505         if (Op.getValueType() == SVT)
23506           continue;
23507         if (Op.isUndef())
23508           Op = ScalarUndef;
23509         else
23510           Op = DAG.getBitcast(SVT, Op);
23511       }
23512     }
23513   }
23514 
23515   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
23516                                VT.getSizeInBits() / SVT.getSizeInBits());
23517   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
23518 }
23519 
23520 // Attempt to merge nested concat_vectors/undefs.
23521 // Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
23522 //  --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
23523 static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
23524                                                   SelectionDAG &DAG) {
23525   EVT VT = N->getValueType(0);
23526 
23527   // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
23528   EVT SubVT;
23529   SDValue FirstConcat;
23530   for (const SDValue &Op : N->ops()) {
23531     if (Op.isUndef())
23532       continue;
23533     if (Op.getOpcode() != ISD::CONCAT_VECTORS)
23534       return SDValue();
23535     if (!FirstConcat) {
23536       SubVT = Op.getOperand(0).getValueType();
23537       if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
23538         return SDValue();
23539       FirstConcat = Op;
23540       continue;
23541     }
23542     if (SubVT != Op.getOperand(0).getValueType())
23543       return SDValue();
23544   }
23545   assert(FirstConcat && "Concat of all-undefs found");
23546 
23547   SmallVector<SDValue> ConcatOps;
23548   for (const SDValue &Op : N->ops()) {
23549     if (Op.isUndef()) {
23550       ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
23551       continue;
23552     }
23553     ConcatOps.append(Op->op_begin(), Op->op_end());
23554   }
23555   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
23556 }
23557 
23558 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
23559 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
23560 // most two distinct vectors the same size as the result, attempt to turn this
23561 // into a legal shuffle.
23562 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
23563   EVT VT = N->getValueType(0);
23564   EVT OpVT = N->getOperand(0).getValueType();
23565 
23566   // We currently can't generate an appropriate shuffle for a scalable vector.
23567   if (VT.isScalableVector())
23568     return SDValue();
23569 
23570   int NumElts = VT.getVectorNumElements();
23571   int NumOpElts = OpVT.getVectorNumElements();
23572 
23573   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
23574   SmallVector<int, 8> Mask;
23575 
23576   for (SDValue Op : N->ops()) {
23577     Op = peekThroughBitcasts(Op);
23578 
23579     // UNDEF nodes convert to UNDEF shuffle mask values.
23580     if (Op.isUndef()) {
23581       Mask.append((unsigned)NumOpElts, -1);
23582       continue;
23583     }
23584 
23585     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
23586       return SDValue();
23587 
23588     // What vector are we extracting the subvector from and at what index?
23589     SDValue ExtVec = Op.getOperand(0);
23590     int ExtIdx = Op.getConstantOperandVal(1);
23591 
23592     // We want the EVT of the original extraction to correctly scale the
23593     // extraction index.
23594     EVT ExtVT = ExtVec.getValueType();
23595     ExtVec = peekThroughBitcasts(ExtVec);
23596 
23597     // UNDEF nodes convert to UNDEF shuffle mask values.
23598     if (ExtVec.isUndef()) {
23599       Mask.append((unsigned)NumOpElts, -1);
23600       continue;
23601     }
23602 
23603     // Ensure that we are extracting a subvector from a vector the same
23604     // size as the result.
23605     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
23606       return SDValue();
23607 
23608     // Scale the subvector index to account for any bitcast.
23609     int NumExtElts = ExtVT.getVectorNumElements();
23610     if (0 == (NumExtElts % NumElts))
23611       ExtIdx /= (NumExtElts / NumElts);
23612     else if (0 == (NumElts % NumExtElts))
23613       ExtIdx *= (NumElts / NumExtElts);
23614     else
23615       return SDValue();
23616 
23617     // At most we can reference 2 inputs in the final shuffle.
23618     if (SV0.isUndef() || SV0 == ExtVec) {
23619       SV0 = ExtVec;
23620       for (int i = 0; i != NumOpElts; ++i)
23621         Mask.push_back(i + ExtIdx);
23622     } else if (SV1.isUndef() || SV1 == ExtVec) {
23623       SV1 = ExtVec;
23624       for (int i = 0; i != NumOpElts; ++i)
23625         Mask.push_back(i + ExtIdx + NumElts);
23626     } else {
23627       return SDValue();
23628     }
23629   }
23630 
23631   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23632   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
23633                                      DAG.getBitcast(VT, SV1), Mask, DAG);
23634 }
23635 
23636 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
23637   unsigned CastOpcode = N->getOperand(0).getOpcode();
23638   switch (CastOpcode) {
23639   case ISD::SINT_TO_FP:
23640   case ISD::UINT_TO_FP:
23641   case ISD::FP_TO_SINT:
23642   case ISD::FP_TO_UINT:
23643     // TODO: Allow more opcodes?
23644     //  case ISD::BITCAST:
23645     //  case ISD::TRUNCATE:
23646     //  case ISD::ZERO_EXTEND:
23647     //  case ISD::SIGN_EXTEND:
23648     //  case ISD::FP_EXTEND:
23649     break;
23650   default:
23651     return SDValue();
23652   }
23653 
23654   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
23655   if (!SrcVT.isVector())
23656     return SDValue();
23657 
23658   // All operands of the concat must be the same kind of cast from the same
23659   // source type.
23660   SmallVector<SDValue, 4> SrcOps;
23661   for (SDValue Op : N->ops()) {
23662     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
23663         Op.getOperand(0).getValueType() != SrcVT)
23664       return SDValue();
23665     SrcOps.push_back(Op.getOperand(0));
23666   }
23667 
23668   // The wider cast must be supported by the target. This is unusual because
23669   // the operation support type parameter depends on the opcode. In addition,
23670   // check the other type in the cast to make sure this is really legal.
23671   EVT VT = N->getValueType(0);
23672   EVT SrcEltVT = SrcVT.getVectorElementType();
23673   ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
23674   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
23675   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23676   switch (CastOpcode) {
23677   case ISD::SINT_TO_FP:
23678   case ISD::UINT_TO_FP:
23679     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
23680         !TLI.isTypeLegal(VT))
23681       return SDValue();
23682     break;
23683   case ISD::FP_TO_SINT:
23684   case ISD::FP_TO_UINT:
23685     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
23686         !TLI.isTypeLegal(ConcatSrcVT))
23687       return SDValue();
23688     break;
23689   default:
23690     llvm_unreachable("Unexpected cast opcode");
23691   }
23692 
23693   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
23694   SDLoc DL(N);
23695   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
23696   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
23697 }
23698 
23699 // See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
23700 // the operands is a SHUFFLE_VECTOR, and all other operands are also operands
23701 // to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
23702 static SDValue combineConcatVectorOfShuffleAndItsOperands(
23703     SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
23704     bool LegalOperations) {
23705   EVT VT = N->getValueType(0);
23706   EVT OpVT = N->getOperand(0).getValueType();
23707   if (VT.isScalableVector())
23708     return SDValue();
23709 
23710   // For now, only allow simple 2-operand concatenations.
23711   if (N->getNumOperands() != 2)
23712     return SDValue();
23713 
23714   // Don't create illegal types/shuffles when not allowed to.
23715   if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
23716       (LegalOperations &&
23717        !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)))
23718     return SDValue();
23719 
23720   // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
23721   // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
23722   // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
23723   // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
23724   // (4) and for now, the SHUFFLE_VECTOR must be unary.
23725   ShuffleVectorSDNode *SVN = nullptr;
23726   for (SDValue Op : N->ops()) {
23727     if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
23728         CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
23729         all_of(N->ops(), [CurSVN](SDValue Op) {
23730           // FIXME: can we allow UNDEF operands?
23731           return !Op.isUndef() &&
23732                  (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
23733         })) {
23734       SVN = CurSVN;
23735       break;
23736     }
23737   }
23738   if (!SVN)
23739     return SDValue();
23740 
23741   // We are going to pad the shuffle operands, so any indice, that was picking
23742   // from the second operand, must be adjusted.
23743   SmallVector<int, 16> AdjustedMask;
23744   AdjustedMask.reserve(SVN->getMask().size());
23745   assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
23746   append_range(AdjustedMask, SVN->getMask());
23747 
23748   // Identity masks for the operands of the (padded) shuffle.
23749   SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
23750   MutableArrayRef<int> FirstShufOpIdentityMask =
23751       MutableArrayRef<int>(IdentityMask)
23752           .take_front(OpVT.getVectorNumElements());
23753   MutableArrayRef<int> SecondShufOpIdentityMask =
23754       MutableArrayRef<int>(IdentityMask).take_back(OpVT.getVectorNumElements());
23755   std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
23756   std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
23757             VT.getVectorNumElements());
23758 
23759   // New combined shuffle mask.
23760   SmallVector<int, 32> Mask;
23761   Mask.reserve(VT.getVectorNumElements());
23762   for (SDValue Op : N->ops()) {
23763     assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
23764     if (Op.getNode() == SVN) {
23765       append_range(Mask, AdjustedMask);
23766       continue;
23767     }
23768     if (Op == SVN->getOperand(0)) {
23769       append_range(Mask, FirstShufOpIdentityMask);
23770       continue;
23771     }
23772     if (Op == SVN->getOperand(1)) {
23773       append_range(Mask, SecondShufOpIdentityMask);
23774       continue;
23775     }
23776     llvm_unreachable("Unexpected operand!");
23777   }
23778 
23779   // Don't create illegal shuffle masks.
23780   if (!TLI.isShuffleMaskLegal(Mask, VT))
23781     return SDValue();
23782 
23783   // Pad the shuffle operands with UNDEF.
23784   SDLoc dl(N);
23785   std::array<SDValue, 2> ShufOps;
23786   for (auto I : zip(SVN->ops(), ShufOps)) {
23787     SDValue ShufOp = std::get<0>(I);
23788     SDValue &NewShufOp = std::get<1>(I);
23789     if (ShufOp.isUndef())
23790       NewShufOp = DAG.getUNDEF(VT);
23791     else {
23792       SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
23793                                           DAG.getUNDEF(OpVT));
23794       ShufOpParts[0] = ShufOp;
23795       NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
23796     }
23797   }
23798   // Finally, create the new wide shuffle.
23799   return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
23800 }
23801 
23802 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
23803   // If we only have one input vector, we don't need to do any concatenation.
23804   if (N->getNumOperands() == 1)
23805     return N->getOperand(0);
23806 
23807   // Check if all of the operands are undefs.
23808   EVT VT = N->getValueType(0);
23809   if (ISD::allOperandsUndef(N))
23810     return DAG.getUNDEF(VT);
23811 
23812   // Optimize concat_vectors where all but the first of the vectors are undef.
23813   if (all_of(drop_begin(N->ops()),
23814              [](const SDValue &Op) { return Op.isUndef(); })) {
23815     SDValue In = N->getOperand(0);
23816     assert(In.getValueType().isVector() && "Must concat vectors");
23817 
23818     // If the input is a concat_vectors, just make a larger concat by padding
23819     // with smaller undefs.
23820     //
23821     // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
23822     // here could cause an infinite loop. That legalizing happens when LegalDAG
23823     // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
23824     // scalable.
23825     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
23826         !(LegalDAG && In.getValueType().isScalableVector())) {
23827       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
23828       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
23829       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
23830       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
23831     }
23832 
23833     SDValue Scalar = peekThroughOneUseBitcasts(In);
23834 
23835     // concat_vectors(scalar_to_vector(scalar), undef) ->
23836     //     scalar_to_vector(scalar)
23837     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23838          Scalar.hasOneUse()) {
23839       EVT SVT = Scalar.getValueType().getVectorElementType();
23840       if (SVT == Scalar.getOperand(0).getValueType())
23841         Scalar = Scalar.getOperand(0);
23842     }
23843 
23844     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
23845     if (!Scalar.getValueType().isVector()) {
23846       // If the bitcast type isn't legal, it might be a trunc of a legal type;
23847       // look through the trunc so we can still do the transform:
23848       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
23849       if (Scalar->getOpcode() == ISD::TRUNCATE &&
23850           !TLI.isTypeLegal(Scalar.getValueType()) &&
23851           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
23852         Scalar = Scalar->getOperand(0);
23853 
23854       EVT SclTy = Scalar.getValueType();
23855 
23856       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
23857         return SDValue();
23858 
23859       // Bail out if the vector size is not a multiple of the scalar size.
23860       if (VT.getSizeInBits() % SclTy.getSizeInBits())
23861         return SDValue();
23862 
23863       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
23864       if (VNTNumElms < 2)
23865         return SDValue();
23866 
23867       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
23868       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
23869         return SDValue();
23870 
23871       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
23872       return DAG.getBitcast(VT, Res);
23873     }
23874   }
23875 
23876   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
23877   // We have already tested above for an UNDEF only concatenation.
23878   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
23879   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
23880   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
23881     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
23882   };
23883   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
23884     SmallVector<SDValue, 8> Opnds;
23885     EVT SVT = VT.getScalarType();
23886 
23887     EVT MinVT = SVT;
23888     if (!SVT.isFloatingPoint()) {
23889       // If BUILD_VECTOR are from built from integer, they may have different
23890       // operand types. Get the smallest type and truncate all operands to it.
23891       bool FoundMinVT = false;
23892       for (const SDValue &Op : N->ops())
23893         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
23894           EVT OpSVT = Op.getOperand(0).getValueType();
23895           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
23896           FoundMinVT = true;
23897         }
23898       assert(FoundMinVT && "Concat vector type mismatch");
23899     }
23900 
23901     for (const SDValue &Op : N->ops()) {
23902       EVT OpVT = Op.getValueType();
23903       unsigned NumElts = OpVT.getVectorNumElements();
23904 
23905       if (ISD::UNDEF == Op.getOpcode())
23906         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
23907 
23908       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
23909         if (SVT.isFloatingPoint()) {
23910           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
23911           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
23912         } else {
23913           for (unsigned i = 0; i != NumElts; ++i)
23914             Opnds.push_back(
23915                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
23916         }
23917       }
23918     }
23919 
23920     assert(VT.getVectorNumElements() == Opnds.size() &&
23921            "Concat vector type mismatch");
23922     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
23923   }
23924 
23925   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
23926   // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
23927   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
23928     return V;
23929 
23930   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
23931     // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
23932     if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
23933       return V;
23934 
23935     // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
23936     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
23937       return V;
23938   }
23939 
23940   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
23941     return V;
23942 
23943   if (SDValue V = combineConcatVectorOfShuffleAndItsOperands(
23944           N, DAG, TLI, LegalTypes, LegalOperations))
23945     return V;
23946 
23947   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
23948   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
23949   // operands and look for a CONCAT operations that place the incoming vectors
23950   // at the exact same location.
23951   //
23952   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
23953   SDValue SingleSource = SDValue();
23954   unsigned PartNumElem =
23955       N->getOperand(0).getValueType().getVectorMinNumElements();
23956 
23957   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
23958     SDValue Op = N->getOperand(i);
23959 
23960     if (Op.isUndef())
23961       continue;
23962 
23963     // Check if this is the identity extract:
23964     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
23965       return SDValue();
23966 
23967     // Find the single incoming vector for the extract_subvector.
23968     if (SingleSource.getNode()) {
23969       if (Op.getOperand(0) != SingleSource)
23970         return SDValue();
23971     } else {
23972       SingleSource = Op.getOperand(0);
23973 
23974       // Check the source type is the same as the type of the result.
23975       // If not, this concat may extend the vector, so we can not
23976       // optimize it away.
23977       if (SingleSource.getValueType() != N->getValueType(0))
23978         return SDValue();
23979     }
23980 
23981     // Check that we are reading from the identity index.
23982     unsigned IdentityIndex = i * PartNumElem;
23983     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
23984       return SDValue();
23985   }
23986 
23987   if (SingleSource.getNode())
23988     return SingleSource;
23989 
23990   return SDValue();
23991 }
23992 
23993 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
23994 // if the subvector can be sourced for free.
23995 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
23996   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
23997       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
23998     return V.getOperand(1);
23999   }
24000   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24001   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
24002       V.getOperand(0).getValueType() == SubVT &&
24003       (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24004     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24005     return V.getOperand(SubIdx);
24006   }
24007   return SDValue();
24008 }
24009 
24010 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
24011                                               SelectionDAG &DAG,
24012                                               bool LegalOperations) {
24013   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24014   SDValue BinOp = Extract->getOperand(0);
24015   unsigned BinOpcode = BinOp.getOpcode();
24016   if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24017     return SDValue();
24018 
24019   EVT VecVT = BinOp.getValueType();
24020   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24021   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24022     return SDValue();
24023 
24024   SDValue Index = Extract->getOperand(1);
24025   EVT SubVT = Extract->getValueType(0);
24026   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24027     return SDValue();
24028 
24029   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24030   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24031 
24032   // TODO: We could handle the case where only 1 operand is being inserted by
24033   //       creating an extract of the other operand, but that requires checking
24034   //       number of uses and/or costs.
24035   if (!Sub0 || !Sub1)
24036     return SDValue();
24037 
24038   // We are inserting both operands of the wide binop only to extract back
24039   // to the narrow vector size. Eliminate all of the insert/extract:
24040   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24041   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24042                      BinOp->getFlags());
24043 }
24044 
24045 /// If we are extracting a subvector produced by a wide binary operator try
24046 /// to use a narrow binary operator and/or avoid concatenation and extraction.
24047 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
24048                                           bool LegalOperations) {
24049   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24050   // some of these bailouts with other transforms.
24051 
24052   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24053     return V;
24054 
24055   // The extract index must be a constant, so we can map it to a concat operand.
24056   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24057   if (!ExtractIndexC)
24058     return SDValue();
24059 
24060   // We are looking for an optionally bitcasted wide vector binary operator
24061   // feeding an extract subvector.
24062   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24063   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24064   unsigned BOpcode = BinOp.getOpcode();
24065   if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24066     return SDValue();
24067 
24068   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24069   // reduced to the unary fneg when it is visited, and we probably want to deal
24070   // with fneg in a target-specific way.
24071   if (BOpcode == ISD::FSUB) {
24072     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24073     if (C && C->getValueAPF().isNegZero())
24074       return SDValue();
24075   }
24076 
24077   // The binop must be a vector type, so we can extract some fraction of it.
24078   EVT WideBVT = BinOp.getValueType();
24079   // The optimisations below currently assume we are dealing with fixed length
24080   // vectors. It is possible to add support for scalable vectors, but at the
24081   // moment we've done no analysis to prove whether they are profitable or not.
24082   if (!WideBVT.isFixedLengthVector())
24083     return SDValue();
24084 
24085   EVT VT = Extract->getValueType(0);
24086   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24087   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24088          "Extract index is not a multiple of the vector length.");
24089 
24090   // Bail out if this is not a proper multiple width extraction.
24091   unsigned WideWidth = WideBVT.getSizeInBits();
24092   unsigned NarrowWidth = VT.getSizeInBits();
24093   if (WideWidth % NarrowWidth != 0)
24094     return SDValue();
24095 
24096   // Bail out if we are extracting a fraction of a single operation. This can
24097   // occur because we potentially looked through a bitcast of the binop.
24098   unsigned NarrowingRatio = WideWidth / NarrowWidth;
24099   unsigned WideNumElts = WideBVT.getVectorNumElements();
24100   if (WideNumElts % NarrowingRatio != 0)
24101     return SDValue();
24102 
24103   // Bail out if the target does not support a narrower version of the binop.
24104   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24105                                    WideNumElts / NarrowingRatio);
24106   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24107                                              LegalOperations))
24108     return SDValue();
24109 
24110   // If extraction is cheap, we don't need to look at the binop operands
24111   // for concat ops. The narrow binop alone makes this transform profitable.
24112   // We can't just reuse the original extract index operand because we may have
24113   // bitcasted.
24114   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24115   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24116   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24117       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24118     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
24119     SDLoc DL(Extract);
24120     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24121     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24122                             BinOp.getOperand(0), NewExtIndex);
24123     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24124                             BinOp.getOperand(1), NewExtIndex);
24125     SDValue NarrowBinOp =
24126         DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
24127     return DAG.getBitcast(VT, NarrowBinOp);
24128   }
24129 
24130   // Only handle the case where we are doubling and then halving. A larger ratio
24131   // may require more than two narrow binops to replace the wide binop.
24132   if (NarrowingRatio != 2)
24133     return SDValue();
24134 
24135   // TODO: The motivating case for this transform is an x86 AVX1 target. That
24136   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
24137   // flavors, but no other 256-bit integer support. This could be extended to
24138   // handle any binop, but that may require fixing/adding other folds to avoid
24139   // codegen regressions.
24140   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
24141     return SDValue();
24142 
24143   // We need at least one concatenation operation of a binop operand to make
24144   // this transform worthwhile. The concat must double the input vector sizes.
24145   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
24146     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
24147       return V.getOperand(ConcatOpNum);
24148     return SDValue();
24149   };
24150   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
24151   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
24152 
24153   if (SubVecL || SubVecR) {
24154     // If a binop operand was not the result of a concat, we must extract a
24155     // half-sized operand for our new narrow binop:
24156     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
24157     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
24158     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
24159     SDLoc DL(Extract);
24160     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24161     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
24162                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24163                                       BinOp.getOperand(0), IndexC);
24164 
24165     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
24166                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24167                                       BinOp.getOperand(1), IndexC);
24168 
24169     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
24170     return DAG.getBitcast(VT, NarrowBinOp);
24171   }
24172 
24173   return SDValue();
24174 }
24175 
24176 /// If we are extracting a subvector from a wide vector load, convert to a
24177 /// narrow load to eliminate the extraction:
24178 /// (extract_subvector (load wide vector)) --> (load narrow vector)
24179 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
24180   // TODO: Add support for big-endian. The offset calculation must be adjusted.
24181   if (DAG.getDataLayout().isBigEndian())
24182     return SDValue();
24183 
24184   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
24185   if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
24186     return SDValue();
24187 
24188   // Allow targets to opt-out.
24189   EVT VT = Extract->getValueType(0);
24190 
24191   // We can only create byte sized loads.
24192   if (!VT.isByteSized())
24193     return SDValue();
24194 
24195   unsigned Index = Extract->getConstantOperandVal(1);
24196   unsigned NumElts = VT.getVectorMinNumElements();
24197   // A fixed length vector being extracted from a scalable vector
24198   // may not be any *smaller* than the scalable one.
24199   if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
24200     return SDValue();
24201 
24202   // The definition of EXTRACT_SUBVECTOR states that the index must be a
24203   // multiple of the minimum number of elements in the result type.
24204   assert(Index % NumElts == 0 && "The extract subvector index is not a "
24205                                  "multiple of the result's element count");
24206 
24207   // It's fine to use TypeSize here as we know the offset will not be negative.
24208   TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
24209 
24210   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24211   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
24212     return SDValue();
24213 
24214   // The narrow load will be offset from the base address of the old load if
24215   // we are extracting from something besides index 0 (little-endian).
24216   SDLoc DL(Extract);
24217 
24218   // TODO: Use "BaseIndexOffset" to make this more effective.
24219   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
24220 
24221   uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
24222   MachineFunction &MF = DAG.getMachineFunction();
24223   MachineMemOperand *MMO;
24224   if (Offset.isScalable()) {
24225     MachinePointerInfo MPI =
24226         MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
24227     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
24228   } else
24229     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
24230                                   StoreSize);
24231 
24232   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
24233   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
24234   return NewLd;
24235 }
24236 
24237 /// Given  EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
24238 /// try to produce  VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
24239 ///                                EXTRACT_SUBVECTOR(Op?, ?),
24240 ///                                Mask'))
24241 /// iff it is legal and profitable to do so. Notably, the trimmed mask
24242 /// (containing only the elements that are extracted)
24243 /// must reference at most two subvectors.
24244 static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
24245                                                      SelectionDAG &DAG,
24246                                                      const TargetLowering &TLI,
24247                                                      bool LegalOperations) {
24248   assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
24249          "Must only be called on EXTRACT_SUBVECTOR's");
24250 
24251   SDValue N0 = N->getOperand(0);
24252 
24253   // Only deal with non-scalable vectors.
24254   EVT NarrowVT = N->getValueType(0);
24255   EVT WideVT = N0.getValueType();
24256   if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
24257     return SDValue();
24258 
24259   // The operand must be a shufflevector.
24260   auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
24261   if (!WideShuffleVector)
24262     return SDValue();
24263 
24264   // The old shuffleneeds to go away.
24265   if (!WideShuffleVector->hasOneUse())
24266     return SDValue();
24267 
24268   // And the narrow shufflevector that we'll form must be legal.
24269   if (LegalOperations &&
24270       !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT))
24271     return SDValue();
24272 
24273   uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
24274   int NumEltsExtracted = NarrowVT.getVectorNumElements();
24275   assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
24276          "Extract index is not a multiple of the output vector length.");
24277 
24278   int WideNumElts = WideVT.getVectorNumElements();
24279 
24280   SmallVector<int, 16> NewMask;
24281   NewMask.reserve(NumEltsExtracted);
24282   SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
24283       DemandedSubvectors;
24284 
24285   // Try to decode the wide mask into narrow mask from at most two subvectors.
24286   for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
24287                                                   NumEltsExtracted)) {
24288     assert((M >= -1) && (M < (2 * WideNumElts)) &&
24289            "Out-of-bounds shuffle mask?");
24290 
24291     if (M < 0) {
24292       // Does not depend on operands, does not require adjustment.
24293       NewMask.emplace_back(M);
24294       continue;
24295     }
24296 
24297     // From which operand of the shuffle does this shuffle mask element pick?
24298     int WideShufOpIdx = M / WideNumElts;
24299     // Which element of that operand is picked?
24300     int OpEltIdx = M % WideNumElts;
24301 
24302     assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
24303            "Shuffle mask vector decomposition failure.");
24304 
24305     // And which NumEltsExtracted-sized subvector of that operand is that?
24306     int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
24307     // And which element within that subvector of that operand is that?
24308     int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
24309 
24310     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
24311            "Shuffle mask subvector decomposition failure.");
24312 
24313     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
24314             WideShufOpIdx * WideNumElts) == M &&
24315            "Shuffle mask full decomposition failure.");
24316 
24317     SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
24318 
24319     if (Op.isUndef()) {
24320       // Picking from an undef operand. Let's adjust mask instead.
24321       NewMask.emplace_back(-1);
24322       continue;
24323     }
24324 
24325     const std::pair<SDValue, int> DemandedSubvector =
24326         std::make_pair(Op, OpSubvecIdx);
24327 
24328     if (DemandedSubvectors.insert(DemandedSubvector)) {
24329       if (DemandedSubvectors.size() > 2)
24330         return SDValue(); // We can't handle more than two subvectors.
24331       // How many elements into the WideVT does this subvector start?
24332       int Index = NumEltsExtracted * OpSubvecIdx;
24333       // Bail out if the extraction isn't going to be cheap.
24334       if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
24335         return SDValue();
24336     }
24337 
24338     // Ok, but from which operand of the new shuffle will this element pick?
24339     int NewOpIdx =
24340         getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
24341     assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
24342 
24343     int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
24344     NewMask.emplace_back(AdjM);
24345   }
24346   assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
24347   assert(DemandedSubvectors.size() <= 2 &&
24348          "Should have ended up demanding at most two subvectors.");
24349 
24350   // Did we discover that the shuffle does not actually depend on operands?
24351   if (DemandedSubvectors.empty())
24352     return DAG.getUNDEF(NarrowVT);
24353 
24354   // Profitability check: only deal with extractions from the first subvector
24355   // unless the mask becomes an identity mask.
24356   if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
24357       any_of(NewMask, [](int M) { return M < 0; }))
24358     for (auto &DemandedSubvector : DemandedSubvectors)
24359       if (DemandedSubvector.second != 0)
24360         return SDValue();
24361 
24362   // We still perform the exact same EXTRACT_SUBVECTOR,  just on different
24363   // operand[s]/index[es], so there is no point in checking for it's legality.
24364 
24365   // Do not turn a legal shuffle into an illegal one.
24366   if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
24367       !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
24368     return SDValue();
24369 
24370   SDLoc DL(N);
24371 
24372   SmallVector<SDValue, 2> NewOps;
24373   for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
24374            &DemandedSubvector : DemandedSubvectors) {
24375     // How many elements into the WideVT does this subvector start?
24376     int Index = NumEltsExtracted * DemandedSubvector.second;
24377     SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
24378     NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
24379                                     DemandedSubvector.first, IndexC));
24380   }
24381   assert((NewOps.size() == 1 || NewOps.size() == 2) &&
24382          "Should end up with either one or two ops");
24383 
24384   // If we ended up with only one operand, pad with an undef.
24385   if (NewOps.size() == 1)
24386     NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
24387 
24388   return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
24389 }
24390 
24391 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
24392   EVT NVT = N->getValueType(0);
24393   SDValue V = N->getOperand(0);
24394   uint64_t ExtIdx = N->getConstantOperandVal(1);
24395 
24396   // Extract from UNDEF is UNDEF.
24397   if (V.isUndef())
24398     return DAG.getUNDEF(NVT);
24399 
24400   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
24401     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
24402       return NarrowLoad;
24403 
24404   // Combine an extract of an extract into a single extract_subvector.
24405   // ext (ext X, C), 0 --> ext X, C
24406   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
24407     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
24408                                     V.getConstantOperandVal(1)) &&
24409         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
24410       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
24411                          V.getOperand(1));
24412     }
24413   }
24414 
24415   // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
24416   if (V.getOpcode() == ISD::SPLAT_VECTOR)
24417     if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
24418       if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
24419         return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0));
24420 
24421   // Try to move vector bitcast after extract_subv by scaling extraction index:
24422   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
24423   if (V.getOpcode() == ISD::BITCAST &&
24424       V.getOperand(0).getValueType().isVector() &&
24425       (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
24426     SDValue SrcOp = V.getOperand(0);
24427     EVT SrcVT = SrcOp.getValueType();
24428     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
24429     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
24430     if ((SrcNumElts % DestNumElts) == 0) {
24431       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
24432       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
24433       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
24434                                       NewExtEC);
24435       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
24436         SDLoc DL(N);
24437         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
24438         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24439                                          V.getOperand(0), NewIndex);
24440         return DAG.getBitcast(NVT, NewExtract);
24441       }
24442     }
24443     if ((DestNumElts % SrcNumElts) == 0) {
24444       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
24445       if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
24446         ElementCount NewExtEC =
24447             NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
24448         EVT ScalarVT = SrcVT.getScalarType();
24449         if ((ExtIdx % DestSrcRatio) == 0) {
24450           SDLoc DL(N);
24451           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
24452           EVT NewExtVT =
24453               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
24454           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
24455             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24456             SDValue NewExtract =
24457                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24458                             V.getOperand(0), NewIndex);
24459             return DAG.getBitcast(NVT, NewExtract);
24460           }
24461           if (NewExtEC.isScalar() &&
24462               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
24463             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24464             SDValue NewExtract =
24465                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
24466                             V.getOperand(0), NewIndex);
24467             return DAG.getBitcast(NVT, NewExtract);
24468           }
24469         }
24470       }
24471     }
24472   }
24473 
24474   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
24475     unsigned ExtNumElts = NVT.getVectorMinNumElements();
24476     EVT ConcatSrcVT = V.getOperand(0).getValueType();
24477     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
24478            "Concat and extract subvector do not change element type");
24479     assert((ExtIdx % ExtNumElts) == 0 &&
24480            "Extract index is not a multiple of the input vector length.");
24481 
24482     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
24483     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
24484 
24485     // If the concatenated source types match this extract, it's a direct
24486     // simplification:
24487     // extract_subvec (concat V1, V2, ...), i --> Vi
24488     if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
24489       return V.getOperand(ConcatOpIdx);
24490 
24491     // If the concatenated source vectors are a multiple length of this extract,
24492     // then extract a fraction of one of those source vectors directly from a
24493     // concat operand. Example:
24494     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
24495     //   v2i8 extract_subvec v8i8 Y, 6
24496     if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
24497         ConcatSrcNumElts % ExtNumElts == 0) {
24498       SDLoc DL(N);
24499       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
24500       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
24501              "Trying to extract from >1 concat operand?");
24502       assert(NewExtIdx % ExtNumElts == 0 &&
24503              "Extract index is not a multiple of the input vector length.");
24504       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
24505       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
24506                          V.getOperand(ConcatOpIdx), NewIndexC);
24507     }
24508   }
24509 
24510   if (SDValue V =
24511           foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
24512     return V;
24513 
24514   V = peekThroughBitcasts(V);
24515 
24516   // If the input is a build vector. Try to make a smaller build vector.
24517   if (V.getOpcode() == ISD::BUILD_VECTOR) {
24518     EVT InVT = V.getValueType();
24519     unsigned ExtractSize = NVT.getSizeInBits();
24520     unsigned EltSize = InVT.getScalarSizeInBits();
24521     // Only do this if we won't split any elements.
24522     if (ExtractSize % EltSize == 0) {
24523       unsigned NumElems = ExtractSize / EltSize;
24524       EVT EltVT = InVT.getVectorElementType();
24525       EVT ExtractVT =
24526           NumElems == 1 ? EltVT
24527                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
24528       if ((Level < AfterLegalizeDAG ||
24529            (NumElems == 1 ||
24530             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
24531           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
24532         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
24533 
24534         if (NumElems == 1) {
24535           SDValue Src = V->getOperand(IdxVal);
24536           if (EltVT != Src.getValueType())
24537             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Src);
24538           return DAG.getBitcast(NVT, Src);
24539         }
24540 
24541         // Extract the pieces from the original build_vector.
24542         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
24543                                               V->ops().slice(IdxVal, NumElems));
24544         return DAG.getBitcast(NVT, BuildVec);
24545       }
24546     }
24547   }
24548 
24549   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24550     // Handle only simple case where vector being inserted and vector
24551     // being extracted are of same size.
24552     EVT SmallVT = V.getOperand(1).getValueType();
24553     if (!NVT.bitsEq(SmallVT))
24554       return SDValue();
24555 
24556     // Combine:
24557     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
24558     // Into:
24559     //    indices are equal or bit offsets are equal => V1
24560     //    otherwise => (extract_subvec V1, ExtIdx)
24561     uint64_t InsIdx = V.getConstantOperandVal(2);
24562     if (InsIdx * SmallVT.getScalarSizeInBits() ==
24563         ExtIdx * NVT.getScalarSizeInBits()) {
24564       if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
24565         return SDValue();
24566 
24567       return DAG.getBitcast(NVT, V.getOperand(1));
24568     }
24569     return DAG.getNode(
24570         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
24571         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
24572         N->getOperand(1));
24573   }
24574 
24575   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
24576     return NarrowBOp;
24577 
24578   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
24579     return SDValue(N, 0);
24580 
24581   return SDValue();
24582 }
24583 
24584 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
24585 /// followed by concatenation. Narrow vector ops may have better performance
24586 /// than wide ops, and this can unlock further narrowing of other vector ops.
24587 /// Targets can invert this transform later if it is not profitable.
24588 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
24589                                          SelectionDAG &DAG) {
24590   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
24591   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
24592       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
24593       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
24594     return SDValue();
24595 
24596   // Split the wide shuffle mask into halves. Any mask element that is accessing
24597   // operand 1 is offset down to account for narrowing of the vectors.
24598   ArrayRef<int> Mask = Shuf->getMask();
24599   EVT VT = Shuf->getValueType(0);
24600   unsigned NumElts = VT.getVectorNumElements();
24601   unsigned HalfNumElts = NumElts / 2;
24602   SmallVector<int, 16> Mask0(HalfNumElts, -1);
24603   SmallVector<int, 16> Mask1(HalfNumElts, -1);
24604   for (unsigned i = 0; i != NumElts; ++i) {
24605     if (Mask[i] == -1)
24606       continue;
24607     // If we reference the upper (undef) subvector then the element is undef.
24608     if ((Mask[i] % NumElts) >= HalfNumElts)
24609       continue;
24610     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
24611     if (i < HalfNumElts)
24612       Mask0[i] = M;
24613     else
24614       Mask1[i - HalfNumElts] = M;
24615   }
24616 
24617   // Ask the target if this is a valid transform.
24618   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24619   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
24620                                 HalfNumElts);
24621   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
24622       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
24623     return SDValue();
24624 
24625   // shuffle (concat X, undef), (concat Y, undef), Mask -->
24626   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
24627   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
24628   SDLoc DL(Shuf);
24629   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
24630   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
24631   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
24632 }
24633 
24634 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
24635 // or turn a shuffle of a single concat into simpler shuffle then concat.
24636 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
24637   EVT VT = N->getValueType(0);
24638   unsigned NumElts = VT.getVectorNumElements();
24639 
24640   SDValue N0 = N->getOperand(0);
24641   SDValue N1 = N->getOperand(1);
24642   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
24643   ArrayRef<int> Mask = SVN->getMask();
24644 
24645   SmallVector<SDValue, 4> Ops;
24646   EVT ConcatVT = N0.getOperand(0).getValueType();
24647   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
24648   unsigned NumConcats = NumElts / NumElemsPerConcat;
24649 
24650   auto IsUndefMaskElt = [](int i) { return i == -1; };
24651 
24652   // Special case: shuffle(concat(A,B)) can be more efficiently represented
24653   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
24654   // half vector elements.
24655   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
24656       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
24657                    IsUndefMaskElt)) {
24658     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
24659                               N0.getOperand(1),
24660                               Mask.slice(0, NumElemsPerConcat));
24661     N1 = DAG.getUNDEF(ConcatVT);
24662     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
24663   }
24664 
24665   // Look at every vector that's inserted. We're looking for exact
24666   // subvector-sized copies from a concatenated vector
24667   for (unsigned I = 0; I != NumConcats; ++I) {
24668     unsigned Begin = I * NumElemsPerConcat;
24669     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
24670 
24671     // Make sure we're dealing with a copy.
24672     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
24673       Ops.push_back(DAG.getUNDEF(ConcatVT));
24674       continue;
24675     }
24676 
24677     int OpIdx = -1;
24678     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
24679       if (IsUndefMaskElt(SubMask[i]))
24680         continue;
24681       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
24682         return SDValue();
24683       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
24684       if (0 <= OpIdx && EltOpIdx != OpIdx)
24685         return SDValue();
24686       OpIdx = EltOpIdx;
24687     }
24688     assert(0 <= OpIdx && "Unknown concat_vectors op");
24689 
24690     if (OpIdx < (int)N0.getNumOperands())
24691       Ops.push_back(N0.getOperand(OpIdx));
24692     else
24693       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
24694   }
24695 
24696   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24697 }
24698 
24699 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
24700 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
24701 //
24702 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
24703 // a simplification in some sense, but it isn't appropriate in general: some
24704 // BUILD_VECTORs are substantially cheaper than others. The general case
24705 // of a BUILD_VECTOR requires inserting each element individually (or
24706 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
24707 // all constants is a single constant pool load.  A BUILD_VECTOR where each
24708 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
24709 // are undef lowers to a small number of element insertions.
24710 //
24711 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
24712 // We don't fold shuffles where one side is a non-zero constant, and we don't
24713 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
24714 // non-constant operands. This seems to work out reasonably well in practice.
24715 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
24716                                        SelectionDAG &DAG,
24717                                        const TargetLowering &TLI) {
24718   EVT VT = SVN->getValueType(0);
24719   unsigned NumElts = VT.getVectorNumElements();
24720   SDValue N0 = SVN->getOperand(0);
24721   SDValue N1 = SVN->getOperand(1);
24722 
24723   if (!N0->hasOneUse())
24724     return SDValue();
24725 
24726   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
24727   // discussed above.
24728   if (!N1.isUndef()) {
24729     if (!N1->hasOneUse())
24730       return SDValue();
24731 
24732     bool N0AnyConst = isAnyConstantBuildVector(N0);
24733     bool N1AnyConst = isAnyConstantBuildVector(N1);
24734     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
24735       return SDValue();
24736     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
24737       return SDValue();
24738   }
24739 
24740   // If both inputs are splats of the same value then we can safely merge this
24741   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
24742   bool IsSplat = false;
24743   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
24744   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
24745   if (BV0 && BV1)
24746     if (SDValue Splat0 = BV0->getSplatValue())
24747       IsSplat = (Splat0 == BV1->getSplatValue());
24748 
24749   SmallVector<SDValue, 8> Ops;
24750   SmallSet<SDValue, 16> DuplicateOps;
24751   for (int M : SVN->getMask()) {
24752     SDValue Op = DAG.getUNDEF(VT.getScalarType());
24753     if (M >= 0) {
24754       int Idx = M < (int)NumElts ? M : M - NumElts;
24755       SDValue &S = (M < (int)NumElts ? N0 : N1);
24756       if (S.getOpcode() == ISD::BUILD_VECTOR) {
24757         Op = S.getOperand(Idx);
24758       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
24759         SDValue Op0 = S.getOperand(0);
24760         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
24761       } else {
24762         // Operand can't be combined - bail out.
24763         return SDValue();
24764       }
24765     }
24766 
24767     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
24768     // generating a splat; semantically, this is fine, but it's likely to
24769     // generate low-quality code if the target can't reconstruct an appropriate
24770     // shuffle.
24771     if (!Op.isUndef() && !isIntOrFPConstant(Op))
24772       if (!IsSplat && !DuplicateOps.insert(Op).second)
24773         return SDValue();
24774 
24775     Ops.push_back(Op);
24776   }
24777 
24778   // BUILD_VECTOR requires all inputs to be of the same type, find the
24779   // maximum type and extend them all.
24780   EVT SVT = VT.getScalarType();
24781   if (SVT.isInteger())
24782     for (SDValue &Op : Ops)
24783       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
24784   if (SVT != VT.getScalarType())
24785     for (SDValue &Op : Ops)
24786       Op = Op.isUndef() ? DAG.getUNDEF(SVT)
24787                         : (TLI.isZExtFree(Op.getValueType(), SVT)
24788                                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
24789                                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
24790   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
24791 }
24792 
24793 // Match shuffles that can be converted to *_vector_extend_in_reg.
24794 // This is often generated during legalization.
24795 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
24796 // and returns the EVT to which the extension should be performed.
24797 // NOTE: this assumes that the src is the first operand of the shuffle.
24798 static std::optional<EVT> canCombineShuffleToExtendVectorInreg(
24799     unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
24800     SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
24801     bool LegalOperations) {
24802   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24803 
24804   // TODO Add support for big-endian when we have a test case.
24805   if (!VT.isInteger() || IsBigEndian)
24806     return std::nullopt;
24807 
24808   unsigned NumElts = VT.getVectorNumElements();
24809   unsigned EltSizeInBits = VT.getScalarSizeInBits();
24810 
24811   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
24812   // power-of-2 extensions as they are the most likely.
24813   // FIXME: should try Scale == NumElts case too,
24814   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
24815     // The vector width must be a multiple of Scale.
24816     if (NumElts % Scale != 0)
24817       continue;
24818 
24819     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
24820     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
24821 
24822     if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
24823         (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
24824       continue;
24825 
24826     if (Match(Scale))
24827       return OutVT;
24828   }
24829 
24830   return std::nullopt;
24831 }
24832 
24833 // Match shuffles that can be converted to any_vector_extend_in_reg.
24834 // This is often generated during legalization.
24835 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
24836 static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN,
24837                                                     SelectionDAG &DAG,
24838                                                     const TargetLowering &TLI,
24839                                                     bool LegalOperations) {
24840   EVT VT = SVN->getValueType(0);
24841   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24842 
24843   // TODO Add support for big-endian when we have a test case.
24844   if (!VT.isInteger() || IsBigEndian)
24845     return SDValue();
24846 
24847   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
24848   auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
24849                       Mask = SVN->getMask()](unsigned Scale) {
24850     for (unsigned i = 0; i != NumElts; ++i) {
24851       if (Mask[i] < 0)
24852         continue;
24853       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
24854         continue;
24855       return false;
24856     }
24857     return true;
24858   };
24859 
24860   unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
24861   SDValue N0 = SVN->getOperand(0);
24862   // Never create an illegal type. Only create unsupported operations if we
24863   // are pre-legalization.
24864   std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
24865       Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
24866   if (!OutVT)
24867     return SDValue();
24868   return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
24869 }
24870 
24871 // Match shuffles that can be converted to zero_extend_vector_inreg.
24872 // This is often generated during legalization.
24873 // e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
24874 static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN,
24875                                                      SelectionDAG &DAG,
24876                                                      const TargetLowering &TLI,
24877                                                      bool LegalOperations) {
24878   bool LegalTypes = true;
24879   EVT VT = SVN->getValueType(0);
24880   assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
24881   unsigned NumElts = VT.getVectorNumElements();
24882   unsigned EltSizeInBits = VT.getScalarSizeInBits();
24883 
24884   // TODO: add support for big-endian when we have a test case.
24885   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24886   if (!VT.isInteger() || IsBigEndian)
24887     return SDValue();
24888 
24889   SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end());
24890   auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
24891     for (int &Indice : Mask) {
24892       if (Indice < 0)
24893         continue;
24894       int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
24895       int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
24896       Fn(Indice, OpIdx, OpEltIdx);
24897     }
24898   };
24899 
24900   // Which elements of which operand does this shuffle demand?
24901   std::array<APInt, 2> OpsDemandedElts;
24902   for (APInt &OpDemandedElts : OpsDemandedElts)
24903     OpDemandedElts = APInt::getZero(NumElts);
24904   ForEachDecomposedIndice(
24905       [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
24906         OpsDemandedElts[OpIdx].setBit(OpEltIdx);
24907       });
24908 
24909   // Element-wise(!), which of these demanded elements are know to be zero?
24910   std::array<APInt, 2> OpsKnownZeroElts;
24911   for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
24912     std::get<2>(I) =
24913         DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
24914 
24915   // Manifest zeroable element knowledge in the shuffle mask.
24916   // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
24917   //       this is a local invention, but it won't leak into DAG.
24918   // FIXME: should we not manifest them, but just check when matching?
24919   bool HadZeroableElts = false;
24920   ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
24921                               int &Indice, int OpIdx, int OpEltIdx) {
24922     if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
24923       Indice = -2; // Zeroable element.
24924       HadZeroableElts = true;
24925     }
24926   });
24927 
24928   // Don't proceed unless we've refined at least one zeroable mask indice.
24929   // If we didn't, then we are still trying to match the same shuffle mask
24930   // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
24931   // and evidently failed. Proceeding will lead to endless combine loops.
24932   if (!HadZeroableElts)
24933     return SDValue();
24934 
24935   // The shuffle may be more fine-grained than we want. Widen elements first.
24936   // FIXME: should we do this before manifesting zeroable shuffle mask indices?
24937   SmallVector<int, 16> ScaledMask;
24938   getShuffleMaskWithWidestElts(Mask, ScaledMask);
24939   assert(Mask.size() >= ScaledMask.size() &&
24940          Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
24941   int Prescale = Mask.size() / ScaledMask.size();
24942 
24943   NumElts = ScaledMask.size();
24944   EltSizeInBits *= Prescale;
24945 
24946   EVT PrescaledVT = EVT::getVectorVT(
24947       *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
24948       NumElts);
24949 
24950   if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
24951     return SDValue();
24952 
24953   // For example,
24954   // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
24955   // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
24956   auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
24957     assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
24958            "Unexpected mask scaling factor.");
24959     ArrayRef<int> Mask = ScaledMask;
24960     for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
24961          SrcElt != NumSrcElts; ++SrcElt) {
24962       // Analyze the shuffle mask in Scale-sized chunks.
24963       ArrayRef<int> MaskChunk = Mask.take_front(Scale);
24964       assert(MaskChunk.size() == Scale && "Unexpected mask size.");
24965       Mask = Mask.drop_front(MaskChunk.size());
24966       // The first indice in this chunk must be SrcElt, but not zero!
24967       // FIXME: undef should be fine, but that results in more-defined result.
24968       if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
24969         return false;
24970       // The rest of the indices in this chunk must be zeros.
24971       // FIXME: undef should be fine, but that results in more-defined result.
24972       if (!all_of(MaskChunk.drop_front(1),
24973                   [](int Indice) { return Indice == -2; }))
24974         return false;
24975     }
24976     assert(Mask.empty() && "Did not process the whole mask?");
24977     return true;
24978   };
24979 
24980   unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
24981   for (bool Commuted : {false, true}) {
24982     SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
24983     if (Commuted)
24984       ShuffleVectorSDNode::commuteMask(ScaledMask);
24985     std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
24986         Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
24987         LegalOperations);
24988     if (OutVT)
24989       return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
24990                                             DAG.getBitcast(PrescaledVT, Op)));
24991   }
24992   return SDValue();
24993 }
24994 
24995 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
24996 // each source element of a large type into the lowest elements of a smaller
24997 // destination type. This is often generated during legalization.
24998 // If the source node itself was a '*_extend_vector_inreg' node then we should
24999 // then be able to remove it.
25000 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
25001                                         SelectionDAG &DAG) {
25002   EVT VT = SVN->getValueType(0);
25003   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25004 
25005   // TODO Add support for big-endian when we have a test case.
25006   if (!VT.isInteger() || IsBigEndian)
25007     return SDValue();
25008 
25009   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
25010 
25011   unsigned Opcode = N0.getOpcode();
25012   if (!ISD::isExtVecInRegOpcode(Opcode))
25013     return SDValue();
25014 
25015   SDValue N00 = N0.getOperand(0);
25016   ArrayRef<int> Mask = SVN->getMask();
25017   unsigned NumElts = VT.getVectorNumElements();
25018   unsigned EltSizeInBits = VT.getScalarSizeInBits();
25019   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25020   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25021 
25022   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25023     return SDValue();
25024   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25025 
25026   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25027   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25028   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25029   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25030     for (unsigned i = 0; i != NumElts; ++i) {
25031       if (Mask[i] < 0)
25032         continue;
25033       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25034         continue;
25035       return false;
25036     }
25037     return true;
25038   };
25039 
25040   // At the moment we just handle the case where we've truncated back to the
25041   // same size as before the extension.
25042   // TODO: handle more extension/truncation cases as cases arise.
25043   if (EltSizeInBits != ExtSrcSizeInBits)
25044     return SDValue();
25045 
25046   // We can remove *extend_vector_inreg only if the truncation happens at
25047   // the same scale as the extension.
25048   if (isTruncate(ExtScale))
25049     return DAG.getBitcast(VT, N00);
25050 
25051   return SDValue();
25052 }
25053 
25054 // Combine shuffles of splat-shuffles of the form:
25055 // shuffle (shuffle V, undef, splat-mask), undef, M
25056 // If splat-mask contains undef elements, we need to be careful about
25057 // introducing undef's in the folded mask which are not the result of composing
25058 // the masks of the shuffles.
25059 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
25060                                         SelectionDAG &DAG) {
25061   EVT VT = Shuf->getValueType(0);
25062   unsigned NumElts = VT.getVectorNumElements();
25063 
25064   if (!Shuf->getOperand(1).isUndef())
25065     return SDValue();
25066 
25067   // See if this unary non-splat shuffle actually *is* a splat shuffle,
25068   // in disguise, with all demanded elements being identical.
25069   // FIXME: this can be done per-operand.
25070   if (!Shuf->isSplat()) {
25071     APInt DemandedElts(NumElts, 0);
25072     for (int Idx : Shuf->getMask()) {
25073       if (Idx < 0)
25074         continue; // Ignore sentinel indices.
25075       assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25076       DemandedElts.setBit(Idx);
25077     }
25078     assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25079     APInt UndefElts;
25080     if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25081       // Even if all demanded elements are splat, some of them could be undef.
25082       // Which lowest demanded element is *not* known-undef?
25083       std::optional<unsigned> MinNonUndefIdx;
25084       for (int Idx : Shuf->getMask()) {
25085         if (Idx < 0 || UndefElts[Idx])
25086           continue; // Ignore sentinel indices, and undef elements.
25087         MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25088       }
25089       if (!MinNonUndefIdx)
25090         return DAG.getUNDEF(VT); // All undef - result is undef.
25091       assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25092       SmallVector<int, 8> SplatMask(Shuf->getMask().begin(),
25093                                     Shuf->getMask().end());
25094       for (int &Idx : SplatMask) {
25095         if (Idx < 0)
25096           continue; // Passthrough sentinel indices.
25097         // Otherwise, just pick the lowest demanded non-undef element.
25098         // Or sentinel undef, if we know we'd pick a known-undef element.
25099         Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25100       }
25101       assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25102       return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25103                                   Shuf->getOperand(1), SplatMask);
25104     }
25105   }
25106 
25107   // If the inner operand is a known splat with no undefs, just return that directly.
25108   // TODO: Create DemandedElts mask from Shuf's mask.
25109   // TODO: Allow undef elements and merge with the shuffle code below.
25110   if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
25111     return Shuf->getOperand(0);
25112 
25113   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25114   if (!Splat || !Splat->isSplat())
25115     return SDValue();
25116 
25117   ArrayRef<int> ShufMask = Shuf->getMask();
25118   ArrayRef<int> SplatMask = Splat->getMask();
25119   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
25120 
25121   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
25122   // every undef mask element in the splat-shuffle has a corresponding undef
25123   // element in the user-shuffle's mask or if the composition of mask elements
25124   // would result in undef.
25125   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
25126   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
25127   //   In this case it is not legal to simplify to the splat-shuffle because we
25128   //   may be exposing the users of the shuffle an undef element at index 1
25129   //   which was not there before the combine.
25130   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
25131   //   In this case the composition of masks yields SplatMask, so it's ok to
25132   //   simplify to the splat-shuffle.
25133   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
25134   //   In this case the composed mask includes all undef elements of SplatMask
25135   //   and in addition sets element zero to undef. It is safe to simplify to
25136   //   the splat-shuffle.
25137   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
25138                                        ArrayRef<int> SplatMask) {
25139     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
25140       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
25141           SplatMask[UserMask[i]] != -1)
25142         return false;
25143     return true;
25144   };
25145   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
25146     return Shuf->getOperand(0);
25147 
25148   // Create a new shuffle with a mask that is composed of the two shuffles'
25149   // masks.
25150   SmallVector<int, 32> NewMask;
25151   for (int Idx : ShufMask)
25152     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
25153 
25154   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
25155                               Splat->getOperand(0), Splat->getOperand(1),
25156                               NewMask);
25157 }
25158 
25159 // Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
25160 // the mask can be treated as a larger type.
25161 static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN,
25162                                        SelectionDAG &DAG,
25163                                        const TargetLowering &TLI,
25164                                        bool LegalOperations) {
25165   SDValue Op0 = SVN->getOperand(0);
25166   SDValue Op1 = SVN->getOperand(1);
25167   EVT VT = SVN->getValueType(0);
25168   if (Op0.getOpcode() != ISD::BITCAST)
25169     return SDValue();
25170   EVT InVT = Op0.getOperand(0).getValueType();
25171   if (!InVT.isVector() ||
25172       (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
25173                           Op1.getOperand(0).getValueType() != InVT)))
25174     return SDValue();
25175   if (isAnyConstantBuildVector(Op0.getOperand(0)) &&
25176       (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
25177     return SDValue();
25178 
25179   int VTLanes = VT.getVectorNumElements();
25180   int InLanes = InVT.getVectorNumElements();
25181   if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
25182       (LegalOperations &&
25183        !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, InVT)))
25184     return SDValue();
25185   int Factor = VTLanes / InLanes;
25186 
25187   // Check that each group of lanes in the mask are either undef or make a valid
25188   // mask for the wider lane type.
25189   ArrayRef<int> Mask = SVN->getMask();
25190   SmallVector<int> NewMask;
25191   if (!widenShuffleMaskElts(Factor, Mask, NewMask))
25192     return SDValue();
25193 
25194   if (!TLI.isShuffleMaskLegal(NewMask, InVT))
25195     return SDValue();
25196 
25197   // Create the new shuffle with the new mask and bitcast it back to the
25198   // original type.
25199   SDLoc DL(SVN);
25200   Op0 = Op0.getOperand(0);
25201   Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
25202   SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
25203   return DAG.getBitcast(VT, NewShuf);
25204 }
25205 
25206 /// Combine shuffle of shuffle of the form:
25207 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
25208 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
25209                                      SelectionDAG &DAG) {
25210   if (!OuterShuf->getOperand(1).isUndef())
25211     return SDValue();
25212   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
25213   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
25214     return SDValue();
25215 
25216   ArrayRef<int> OuterMask = OuterShuf->getMask();
25217   ArrayRef<int> InnerMask = InnerShuf->getMask();
25218   unsigned NumElts = OuterMask.size();
25219   assert(NumElts == InnerMask.size() && "Mask length mismatch");
25220   SmallVector<int, 32> CombinedMask(NumElts, -1);
25221   int SplatIndex = -1;
25222   for (unsigned i = 0; i != NumElts; ++i) {
25223     // Undef lanes remain undef.
25224     int OuterMaskElt = OuterMask[i];
25225     if (OuterMaskElt == -1)
25226       continue;
25227 
25228     // Peek through the shuffle masks to get the underlying source element.
25229     int InnerMaskElt = InnerMask[OuterMaskElt];
25230     if (InnerMaskElt == -1)
25231       continue;
25232 
25233     // Initialize the splatted element.
25234     if (SplatIndex == -1)
25235       SplatIndex = InnerMaskElt;
25236 
25237     // Non-matching index - this is not a splat.
25238     if (SplatIndex != InnerMaskElt)
25239       return SDValue();
25240 
25241     CombinedMask[i] = InnerMaskElt;
25242   }
25243   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
25244           getSplatIndex(CombinedMask) != -1) &&
25245          "Expected a splat mask");
25246 
25247   // TODO: The transform may be a win even if the mask is not legal.
25248   EVT VT = OuterShuf->getValueType(0);
25249   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
25250   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
25251     return SDValue();
25252 
25253   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
25254                               InnerShuf->getOperand(1), CombinedMask);
25255 }
25256 
25257 /// If the shuffle mask is taking exactly one element from the first vector
25258 /// operand and passing through all other elements from the second vector
25259 /// operand, return the index of the mask element that is choosing an element
25260 /// from the first operand. Otherwise, return -1.
25261 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
25262   int MaskSize = Mask.size();
25263   int EltFromOp0 = -1;
25264   // TODO: This does not match if there are undef elements in the shuffle mask.
25265   // Should we ignore undefs in the shuffle mask instead? The trade-off is
25266   // removing an instruction (a shuffle), but losing the knowledge that some
25267   // vector lanes are not needed.
25268   for (int i = 0; i != MaskSize; ++i) {
25269     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
25270       // We're looking for a shuffle of exactly one element from operand 0.
25271       if (EltFromOp0 != -1)
25272         return -1;
25273       EltFromOp0 = i;
25274     } else if (Mask[i] != i + MaskSize) {
25275       // Nothing from operand 1 can change lanes.
25276       return -1;
25277     }
25278   }
25279   return EltFromOp0;
25280 }
25281 
25282 /// If a shuffle inserts exactly one element from a source vector operand into
25283 /// another vector operand and we can access the specified element as a scalar,
25284 /// then we can eliminate the shuffle.
25285 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
25286                                       SelectionDAG &DAG) {
25287   // First, check if we are taking one element of a vector and shuffling that
25288   // element into another vector.
25289   ArrayRef<int> Mask = Shuf->getMask();
25290   SmallVector<int, 16> CommutedMask(Mask);
25291   SDValue Op0 = Shuf->getOperand(0);
25292   SDValue Op1 = Shuf->getOperand(1);
25293   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
25294   if (ShufOp0Index == -1) {
25295     // Commute mask and check again.
25296     ShuffleVectorSDNode::commuteMask(CommutedMask);
25297     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
25298     if (ShufOp0Index == -1)
25299       return SDValue();
25300     // Commute operands to match the commuted shuffle mask.
25301     std::swap(Op0, Op1);
25302     Mask = CommutedMask;
25303   }
25304 
25305   // The shuffle inserts exactly one element from operand 0 into operand 1.
25306   // Now see if we can access that element as a scalar via a real insert element
25307   // instruction.
25308   // TODO: We can try harder to locate the element as a scalar. Examples: it
25309   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
25310   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
25311          "Shuffle mask value must be from operand 0");
25312   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
25313     return SDValue();
25314 
25315   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
25316   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
25317     return SDValue();
25318 
25319   // There's an existing insertelement with constant insertion index, so we
25320   // don't need to check the legality/profitability of a replacement operation
25321   // that differs at most in the constant value. The target should be able to
25322   // lower any of those in a similar way. If not, legalization will expand this
25323   // to a scalar-to-vector plus shuffle.
25324   //
25325   // Note that the shuffle may move the scalar from the position that the insert
25326   // element used. Therefore, our new insert element occurs at the shuffle's
25327   // mask index value, not the insert's index value.
25328   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
25329   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
25330   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
25331                      Op1, Op0.getOperand(1), NewInsIndex);
25332 }
25333 
25334 /// If we have a unary shuffle of a shuffle, see if it can be folded away
25335 /// completely. This has the potential to lose undef knowledge because the first
25336 /// shuffle may not have an undef mask element where the second one does. So
25337 /// only call this after doing simplifications based on demanded elements.
25338 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
25339   // shuf (shuf0 X, Y, Mask0), undef, Mask
25340   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25341   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
25342     return SDValue();
25343 
25344   ArrayRef<int> Mask = Shuf->getMask();
25345   ArrayRef<int> Mask0 = Shuf0->getMask();
25346   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
25347     // Ignore undef elements.
25348     if (Mask[i] == -1)
25349       continue;
25350     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
25351 
25352     // Is the element of the shuffle operand chosen by this shuffle the same as
25353     // the element chosen by the shuffle operand itself?
25354     if (Mask0[Mask[i]] != Mask0[i])
25355       return SDValue();
25356   }
25357   // Every element of this shuffle is identical to the result of the previous
25358   // shuffle, so we can replace this value.
25359   return Shuf->getOperand(0);
25360 }
25361 
25362 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
25363   EVT VT = N->getValueType(0);
25364   unsigned NumElts = VT.getVectorNumElements();
25365 
25366   SDValue N0 = N->getOperand(0);
25367   SDValue N1 = N->getOperand(1);
25368 
25369   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
25370 
25371   // Canonicalize shuffle undef, undef -> undef
25372   if (N0.isUndef() && N1.isUndef())
25373     return DAG.getUNDEF(VT);
25374 
25375   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25376 
25377   // Canonicalize shuffle v, v -> v, undef
25378   if (N0 == N1)
25379     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
25380                                 createUnaryMask(SVN->getMask(), NumElts));
25381 
25382   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
25383   if (N0.isUndef())
25384     return DAG.getCommutedVectorShuffle(*SVN);
25385 
25386   // Remove references to rhs if it is undef
25387   if (N1.isUndef()) {
25388     bool Changed = false;
25389     SmallVector<int, 8> NewMask;
25390     for (unsigned i = 0; i != NumElts; ++i) {
25391       int Idx = SVN->getMaskElt(i);
25392       if (Idx >= (int)NumElts) {
25393         Idx = -1;
25394         Changed = true;
25395       }
25396       NewMask.push_back(Idx);
25397     }
25398     if (Changed)
25399       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
25400   }
25401 
25402   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
25403     return InsElt;
25404 
25405   // A shuffle of a single vector that is a splatted value can always be folded.
25406   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
25407     return V;
25408 
25409   if (SDValue V = formSplatFromShuffles(SVN, DAG))
25410     return V;
25411 
25412   // If it is a splat, check if the argument vector is another splat or a
25413   // build_vector.
25414   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
25415     int SplatIndex = SVN->getSplatIndex();
25416     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
25417         TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
25418       // splat (vector_bo L, R), Index -->
25419       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
25420       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
25421       SDLoc DL(N);
25422       EVT EltVT = VT.getScalarType();
25423       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
25424       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
25425       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
25426       SDValue NewBO =
25427           DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
25428       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
25429       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
25430       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
25431     }
25432 
25433     // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
25434     // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
25435     if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
25436         N0.hasOneUse()) {
25437       if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
25438         return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
25439 
25440       if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT)
25441         if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
25442           if (Idx->getAPIntValue() == SplatIndex)
25443             return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
25444 
25445       // Look through a bitcast if LE and splatting lane 0, through to a
25446       // scalar_to_vector or a build_vector.
25447       if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
25448           SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
25449           (N0.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR ||
25450            N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR)) {
25451         EVT N00VT = N0.getOperand(0).getValueType();
25452         if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
25453             VT.isInteger() && N00VT.isInteger()) {
25454           EVT InVT =
25455               TLI.getTypeToTransformTo(*DAG.getContext(), VT.getScalarType());
25456           SDValue Op = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0),
25457                                           SDLoc(N), InVT);
25458           return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
25459         }
25460       }
25461     }
25462 
25463     // If this is a bit convert that changes the element type of the vector but
25464     // not the number of vector elements, look through it.  Be careful not to
25465     // look though conversions that change things like v4f32 to v2f64.
25466     SDNode *V = N0.getNode();
25467     if (V->getOpcode() == ISD::BITCAST) {
25468       SDValue ConvInput = V->getOperand(0);
25469       if (ConvInput.getValueType().isVector() &&
25470           ConvInput.getValueType().getVectorNumElements() == NumElts)
25471         V = ConvInput.getNode();
25472     }
25473 
25474     if (V->getOpcode() == ISD::BUILD_VECTOR) {
25475       assert(V->getNumOperands() == NumElts &&
25476              "BUILD_VECTOR has wrong number of operands");
25477       SDValue Base;
25478       bool AllSame = true;
25479       for (unsigned i = 0; i != NumElts; ++i) {
25480         if (!V->getOperand(i).isUndef()) {
25481           Base = V->getOperand(i);
25482           break;
25483         }
25484       }
25485       // Splat of <u, u, u, u>, return <u, u, u, u>
25486       if (!Base.getNode())
25487         return N0;
25488       for (unsigned i = 0; i != NumElts; ++i) {
25489         if (V->getOperand(i) != Base) {
25490           AllSame = false;
25491           break;
25492         }
25493       }
25494       // Splat of <x, x, x, x>, return <x, x, x, x>
25495       if (AllSame)
25496         return N0;
25497 
25498       // Canonicalize any other splat as a build_vector.
25499       SDValue Splatted = V->getOperand(SplatIndex);
25500       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
25501       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
25502 
25503       // We may have jumped through bitcasts, so the type of the
25504       // BUILD_VECTOR may not match the type of the shuffle.
25505       if (V->getValueType(0) != VT)
25506         NewBV = DAG.getBitcast(VT, NewBV);
25507       return NewBV;
25508     }
25509   }
25510 
25511   // Simplify source operands based on shuffle mask.
25512   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
25513     return SDValue(N, 0);
25514 
25515   // This is intentionally placed after demanded elements simplification because
25516   // it could eliminate knowledge of undef elements created by this shuffle.
25517   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
25518     return ShufOp;
25519 
25520   // Match shuffles that can be converted to any_vector_extend_in_reg.
25521   if (SDValue V =
25522           combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
25523     return V;
25524 
25525   // Combine "truncate_vector_in_reg" style shuffles.
25526   if (SDValue V = combineTruncationShuffle(SVN, DAG))
25527     return V;
25528 
25529   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
25530       Level < AfterLegalizeVectorOps &&
25531       (N1.isUndef() ||
25532       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
25533        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
25534     if (SDValue V = partitionShuffleOfConcats(N, DAG))
25535       return V;
25536   }
25537 
25538   // A shuffle of a concat of the same narrow vector can be reduced to use
25539   // only low-half elements of a concat with undef:
25540   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
25541   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
25542       N0.getNumOperands() == 2 &&
25543       N0.getOperand(0) == N0.getOperand(1)) {
25544     int HalfNumElts = (int)NumElts / 2;
25545     SmallVector<int, 8> NewMask;
25546     for (unsigned i = 0; i != NumElts; ++i) {
25547       int Idx = SVN->getMaskElt(i);
25548       if (Idx >= HalfNumElts) {
25549         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
25550         Idx -= HalfNumElts;
25551       }
25552       NewMask.push_back(Idx);
25553     }
25554     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
25555       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
25556       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
25557                                    N0.getOperand(0), UndefVec);
25558       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
25559     }
25560   }
25561 
25562   // See if we can replace a shuffle with an insert_subvector.
25563   // e.g. v2i32 into v8i32:
25564   // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
25565   // --> insert_subvector(lhs,rhs1,4).
25566   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
25567       TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
25568     auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
25569       // Ensure RHS subvectors are legal.
25570       assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
25571       EVT SubVT = RHS.getOperand(0).getValueType();
25572       int NumSubVecs = RHS.getNumOperands();
25573       int NumSubElts = SubVT.getVectorNumElements();
25574       assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
25575       if (!TLI.isTypeLegal(SubVT))
25576         return SDValue();
25577 
25578       // Don't bother if we have an unary shuffle (matches undef + LHS elts).
25579       if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
25580         return SDValue();
25581 
25582       // Search [NumSubElts] spans for RHS sequence.
25583       // TODO: Can we avoid nested loops to increase performance?
25584       SmallVector<int> InsertionMask(NumElts);
25585       for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
25586         for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
25587           // Reset mask to identity.
25588           std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
25589 
25590           // Add subvector insertion.
25591           std::iota(InsertionMask.begin() + SubIdx,
25592                     InsertionMask.begin() + SubIdx + NumSubElts,
25593                     NumElts + (SubVec * NumSubElts));
25594 
25595           // See if the shuffle mask matches the reference insertion mask.
25596           bool MatchingShuffle = true;
25597           for (int i = 0; i != (int)NumElts; ++i) {
25598             int ExpectIdx = InsertionMask[i];
25599             int ActualIdx = Mask[i];
25600             if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
25601               MatchingShuffle = false;
25602               break;
25603             }
25604           }
25605 
25606           if (MatchingShuffle)
25607             return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
25608                                RHS.getOperand(SubVec),
25609                                DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
25610         }
25611       }
25612       return SDValue();
25613     };
25614     ArrayRef<int> Mask = SVN->getMask();
25615     if (N1.getOpcode() == ISD::CONCAT_VECTORS)
25616       if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
25617         return InsertN1;
25618     if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
25619       SmallVector<int> CommuteMask(Mask);
25620       ShuffleVectorSDNode::commuteMask(CommuteMask);
25621       if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
25622         return InsertN0;
25623     }
25624   }
25625 
25626   // If we're not performing a select/blend shuffle, see if we can convert the
25627   // shuffle into a AND node, with all the out-of-lane elements are known zero.
25628   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25629     bool IsInLaneMask = true;
25630     ArrayRef<int> Mask = SVN->getMask();
25631     SmallVector<int, 16> ClearMask(NumElts, -1);
25632     APInt DemandedLHS = APInt::getZero(NumElts);
25633     APInt DemandedRHS = APInt::getZero(NumElts);
25634     for (int I = 0; I != (int)NumElts; ++I) {
25635       int M = Mask[I];
25636       if (M < 0)
25637         continue;
25638       ClearMask[I] = M == I ? I : (I + NumElts);
25639       IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
25640       if (M != I) {
25641         APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
25642         Demanded.setBit(M % NumElts);
25643       }
25644     }
25645     // TODO: Should we try to mask with N1 as well?
25646     if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
25647         (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
25648         (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
25649       SDLoc DL(N);
25650       EVT IntVT = VT.changeVectorElementTypeToInteger();
25651       EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
25652       // Transform the type to a legal type so that the buildvector constant
25653       // elements are not illegal. Make sure that the result is larger than the
25654       // original type, incase the value is split into two (eg i64->i32).
25655       if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
25656         IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
25657       if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
25658         SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
25659         SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
25660         SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
25661         for (int I = 0; I != (int)NumElts; ++I)
25662           if (0 <= Mask[I])
25663             AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
25664 
25665         // See if a clear mask is legal instead of going via
25666         // XformToShuffleWithZero which loses UNDEF mask elements.
25667         if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
25668           return DAG.getBitcast(
25669               VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
25670                                       DAG.getConstant(0, DL, IntVT), ClearMask));
25671 
25672         if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
25673           return DAG.getBitcast(
25674               VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
25675                               DAG.getBuildVector(IntVT, DL, AndMask)));
25676       }
25677     }
25678   }
25679 
25680   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25681   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25682   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
25683     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
25684       return Res;
25685 
25686   // If this shuffle only has a single input that is a bitcasted shuffle,
25687   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
25688   // back to their original types.
25689   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
25690       N1.isUndef() && Level < AfterLegalizeVectorOps &&
25691       TLI.isTypeLegal(VT)) {
25692 
25693     SDValue BC0 = peekThroughOneUseBitcasts(N0);
25694     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
25695       EVT SVT = VT.getScalarType();
25696       EVT InnerVT = BC0->getValueType(0);
25697       EVT InnerSVT = InnerVT.getScalarType();
25698 
25699       // Determine which shuffle works with the smaller scalar type.
25700       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
25701       EVT ScaleSVT = ScaleVT.getScalarType();
25702 
25703       if (TLI.isTypeLegal(ScaleVT) &&
25704           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
25705           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
25706         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25707         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25708 
25709         // Scale the shuffle masks to the smaller scalar type.
25710         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
25711         SmallVector<int, 8> InnerMask;
25712         SmallVector<int, 8> OuterMask;
25713         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
25714         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
25715 
25716         // Merge the shuffle masks.
25717         SmallVector<int, 8> NewMask;
25718         for (int M : OuterMask)
25719           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
25720 
25721         // Test for shuffle mask legality over both commutations.
25722         SDValue SV0 = BC0->getOperand(0);
25723         SDValue SV1 = BC0->getOperand(1);
25724         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25725         if (!LegalMask) {
25726           std::swap(SV0, SV1);
25727           ShuffleVectorSDNode::commuteMask(NewMask);
25728           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25729         }
25730 
25731         if (LegalMask) {
25732           SV0 = DAG.getBitcast(ScaleVT, SV0);
25733           SV1 = DAG.getBitcast(ScaleVT, SV1);
25734           return DAG.getBitcast(
25735               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
25736         }
25737       }
25738     }
25739   }
25740 
25741   // Match shuffles of bitcasts, so long as the mask can be treated as the
25742   // larger type.
25743   if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
25744     return V;
25745 
25746   // Compute the combined shuffle mask for a shuffle with SV0 as the first
25747   // operand, and SV1 as the second operand.
25748   // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
25749   //      Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
25750   auto MergeInnerShuffle =
25751       [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
25752                      ShuffleVectorSDNode *OtherSVN, SDValue N1,
25753                      const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
25754                      SmallVectorImpl<int> &Mask) -> bool {
25755     // Don't try to fold splats; they're likely to simplify somehow, or they
25756     // might be free.
25757     if (OtherSVN->isSplat())
25758       return false;
25759 
25760     SV0 = SV1 = SDValue();
25761     Mask.clear();
25762 
25763     for (unsigned i = 0; i != NumElts; ++i) {
25764       int Idx = SVN->getMaskElt(i);
25765       if (Idx < 0) {
25766         // Propagate Undef.
25767         Mask.push_back(Idx);
25768         continue;
25769       }
25770 
25771       if (Commute)
25772         Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
25773 
25774       SDValue CurrentVec;
25775       if (Idx < (int)NumElts) {
25776         // This shuffle index refers to the inner shuffle N0. Lookup the inner
25777         // shuffle mask to identify which vector is actually referenced.
25778         Idx = OtherSVN->getMaskElt(Idx);
25779         if (Idx < 0) {
25780           // Propagate Undef.
25781           Mask.push_back(Idx);
25782           continue;
25783         }
25784         CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
25785                                           : OtherSVN->getOperand(1);
25786       } else {
25787         // This shuffle index references an element within N1.
25788         CurrentVec = N1;
25789       }
25790 
25791       // Simple case where 'CurrentVec' is UNDEF.
25792       if (CurrentVec.isUndef()) {
25793         Mask.push_back(-1);
25794         continue;
25795       }
25796 
25797       // Canonicalize the shuffle index. We don't know yet if CurrentVec
25798       // will be the first or second operand of the combined shuffle.
25799       Idx = Idx % NumElts;
25800       if (!SV0.getNode() || SV0 == CurrentVec) {
25801         // Ok. CurrentVec is the left hand side.
25802         // Update the mask accordingly.
25803         SV0 = CurrentVec;
25804         Mask.push_back(Idx);
25805         continue;
25806       }
25807       if (!SV1.getNode() || SV1 == CurrentVec) {
25808         // Ok. CurrentVec is the right hand side.
25809         // Update the mask accordingly.
25810         SV1 = CurrentVec;
25811         Mask.push_back(Idx + NumElts);
25812         continue;
25813       }
25814 
25815       // Last chance - see if the vector is another shuffle and if it
25816       // uses one of the existing candidate shuffle ops.
25817       if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
25818         int InnerIdx = CurrentSVN->getMaskElt(Idx);
25819         if (InnerIdx < 0) {
25820           Mask.push_back(-1);
25821           continue;
25822         }
25823         SDValue InnerVec = (InnerIdx < (int)NumElts)
25824                                ? CurrentSVN->getOperand(0)
25825                                : CurrentSVN->getOperand(1);
25826         if (InnerVec.isUndef()) {
25827           Mask.push_back(-1);
25828           continue;
25829         }
25830         InnerIdx %= NumElts;
25831         if (InnerVec == SV0) {
25832           Mask.push_back(InnerIdx);
25833           continue;
25834         }
25835         if (InnerVec == SV1) {
25836           Mask.push_back(InnerIdx + NumElts);
25837           continue;
25838         }
25839       }
25840 
25841       // Bail out if we cannot convert the shuffle pair into a single shuffle.
25842       return false;
25843     }
25844 
25845     if (llvm::all_of(Mask, [](int M) { return M < 0; }))
25846       return true;
25847 
25848     // Avoid introducing shuffles with illegal mask.
25849     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
25850     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
25851     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
25852     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
25853     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
25854     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
25855     if (TLI.isShuffleMaskLegal(Mask, VT))
25856       return true;
25857 
25858     std::swap(SV0, SV1);
25859     ShuffleVectorSDNode::commuteMask(Mask);
25860     return TLI.isShuffleMaskLegal(Mask, VT);
25861   };
25862 
25863   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25864     // Canonicalize shuffles according to rules:
25865     //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
25866     //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
25867     //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
25868     if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
25869         N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
25870       // The incoming shuffle must be of the same type as the result of the
25871       // current shuffle.
25872       assert(N1->getOperand(0).getValueType() == VT &&
25873              "Shuffle types don't match");
25874 
25875       SDValue SV0 = N1->getOperand(0);
25876       SDValue SV1 = N1->getOperand(1);
25877       bool HasSameOp0 = N0 == SV0;
25878       bool IsSV1Undef = SV1.isUndef();
25879       if (HasSameOp0 || IsSV1Undef || N0 == SV1)
25880         // Commute the operands of this shuffle so merging below will trigger.
25881         return DAG.getCommutedVectorShuffle(*SVN);
25882     }
25883 
25884     // Canonicalize splat shuffles to the RHS to improve merging below.
25885     //  shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
25886     if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
25887         N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
25888         cast<ShuffleVectorSDNode>(N0)->isSplat() &&
25889         !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
25890       return DAG.getCommutedVectorShuffle(*SVN);
25891     }
25892 
25893     // Try to fold according to rules:
25894     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
25895     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
25896     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
25897     // Don't try to fold shuffles with illegal type.
25898     // Only fold if this shuffle is the only user of the other shuffle.
25899     // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
25900     for (int i = 0; i != 2; ++i) {
25901       if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
25902           N->isOnlyUserOf(N->getOperand(i).getNode())) {
25903         // The incoming shuffle must be of the same type as the result of the
25904         // current shuffle.
25905         auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
25906         assert(OtherSV->getOperand(0).getValueType() == VT &&
25907                "Shuffle types don't match");
25908 
25909         SDValue SV0, SV1;
25910         SmallVector<int, 4> Mask;
25911         if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
25912                               SV0, SV1, Mask)) {
25913           // Check if all indices in Mask are Undef. In case, propagate Undef.
25914           if (llvm::all_of(Mask, [](int M) { return M < 0; }))
25915             return DAG.getUNDEF(VT);
25916 
25917           return DAG.getVectorShuffle(VT, SDLoc(N),
25918                                       SV0 ? SV0 : DAG.getUNDEF(VT),
25919                                       SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
25920         }
25921       }
25922     }
25923 
25924     // Merge shuffles through binops if we are able to merge it with at least
25925     // one other shuffles.
25926     // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
25927     // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
25928     unsigned SrcOpcode = N0.getOpcode();
25929     if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
25930         (N1.isUndef() ||
25931          (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
25932       // Get binop source ops, or just pass on the undef.
25933       SDValue Op00 = N0.getOperand(0);
25934       SDValue Op01 = N0.getOperand(1);
25935       SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
25936       SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
25937       // TODO: We might be able to relax the VT check but we don't currently
25938       // have any isBinOp() that has different result/ops VTs so play safe until
25939       // we have test coverage.
25940       if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
25941           Op01.getValueType() == VT && Op11.getValueType() == VT &&
25942           (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
25943            Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
25944            Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
25945            Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
25946         auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
25947                                         SmallVectorImpl<int> &Mask, bool LeftOp,
25948                                         bool Commute) {
25949           SDValue InnerN = Commute ? N1 : N0;
25950           SDValue Op0 = LeftOp ? Op00 : Op01;
25951           SDValue Op1 = LeftOp ? Op10 : Op11;
25952           if (Commute)
25953             std::swap(Op0, Op1);
25954           // Only accept the merged shuffle if we don't introduce undef elements,
25955           // or the inner shuffle already contained undef elements.
25956           auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
25957           return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
25958                  MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
25959                                    Mask) &&
25960                  (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
25961                   llvm::none_of(Mask, [](int M) { return M < 0; }));
25962         };
25963 
25964         // Ensure we don't increase the number of shuffles - we must merge a
25965         // shuffle from at least one of the LHS and RHS ops.
25966         bool MergedLeft = false;
25967         SDValue LeftSV0, LeftSV1;
25968         SmallVector<int, 4> LeftMask;
25969         if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
25970             CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
25971           MergedLeft = true;
25972         } else {
25973           LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
25974           LeftSV0 = Op00, LeftSV1 = Op10;
25975         }
25976 
25977         bool MergedRight = false;
25978         SDValue RightSV0, RightSV1;
25979         SmallVector<int, 4> RightMask;
25980         if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
25981             CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
25982           MergedRight = true;
25983         } else {
25984           RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
25985           RightSV0 = Op01, RightSV1 = Op11;
25986         }
25987 
25988         if (MergedLeft || MergedRight) {
25989           SDLoc DL(N);
25990           SDValue LHS = DAG.getVectorShuffle(
25991               VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
25992               LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
25993           SDValue RHS = DAG.getVectorShuffle(
25994               VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
25995               RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
25996           return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
25997         }
25998       }
25999     }
26000   }
26001 
26002   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
26003     return V;
26004 
26005   // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26006   // Perform this really late, because it could eliminate knowledge
26007   // of undef elements created by this shuffle.
26008   if (Level < AfterLegalizeTypes)
26009     if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26010                                                           LegalOperations))
26011       return V;
26012 
26013   return SDValue();
26014 }
26015 
26016 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26017   EVT VT = N->getValueType(0);
26018   if (!VT.isFixedLengthVector())
26019     return SDValue();
26020 
26021   // Try to convert a scalar binop with an extracted vector element to a vector
26022   // binop. This is intended to reduce potentially expensive register moves.
26023   // TODO: Check if both operands are extracted.
26024   // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26025   // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26026   SDValue Scalar = N->getOperand(0);
26027   unsigned Opcode = Scalar.getOpcode();
26028   EVT VecEltVT = VT.getScalarType();
26029   if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26030       TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26031       Scalar.getOperand(0).getValueType() == VecEltVT &&
26032       Scalar.getOperand(1).getValueType() == VecEltVT &&
26033       Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26034       Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26035       DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26036     // Match an extract element and get a shuffle mask equivalent.
26037     SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26038 
26039     for (int i : {0, 1}) {
26040       // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26041       // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26042       SDValue EE = Scalar.getOperand(i);
26043       auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26044       if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26045           EE.getOperand(0).getValueType() == VT &&
26046           isa<ConstantSDNode>(EE.getOperand(1))) {
26047         // Mask = {ExtractIndex, undef, undef....}
26048         ShufMask[0] = EE.getConstantOperandVal(1);
26049         // Make sure the shuffle is legal if we are crossing lanes.
26050         if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26051           SDLoc DL(N);
26052           SDValue V[] = {EE.getOperand(0),
26053                          DAG.getConstant(C->getAPIntValue(), DL, VT)};
26054           SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26055           return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26056                                       ShufMask);
26057         }
26058       }
26059     }
26060   }
26061 
26062   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26063   // with a VECTOR_SHUFFLE and possible truncate.
26064   if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26065       !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26066     return SDValue();
26067 
26068   // If we have an implicit truncate, truncate here if it is legal.
26069   if (VecEltVT != Scalar.getValueType() &&
26070       Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26071     SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26072     return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26073   }
26074 
26075   auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26076   if (!ExtIndexC)
26077     return SDValue();
26078 
26079   SDValue SrcVec = Scalar.getOperand(0);
26080   EVT SrcVT = SrcVec.getValueType();
26081   unsigned SrcNumElts = SrcVT.getVectorNumElements();
26082   unsigned VTNumElts = VT.getVectorNumElements();
26083   if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26084     // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26085     SmallVector<int, 8> Mask(SrcNumElts, -1);
26086     Mask[0] = ExtIndexC->getZExtValue();
26087     SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26088         SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26089     if (!LegalShuffle)
26090       return SDValue();
26091 
26092     // If the initial vector is the same size, the shuffle is the result.
26093     if (VT == SrcVT)
26094       return LegalShuffle;
26095 
26096     // If not, shorten the shuffled vector.
26097     if (VTNumElts != SrcNumElts) {
26098       SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
26099       EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
26100                                    SrcVT.getVectorElementType(), VTNumElts);
26101       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
26102                          ZeroIdx);
26103     }
26104   }
26105 
26106   return SDValue();
26107 }
26108 
26109 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
26110   EVT VT = N->getValueType(0);
26111   SDValue N0 = N->getOperand(0);
26112   SDValue N1 = N->getOperand(1);
26113   SDValue N2 = N->getOperand(2);
26114   uint64_t InsIdx = N->getConstantOperandVal(2);
26115 
26116   // If inserting an UNDEF, just return the original vector.
26117   if (N1.isUndef())
26118     return N0;
26119 
26120   // If this is an insert of an extracted vector into an undef vector, we can
26121   // just use the input to the extract if the types match, and can simplify
26122   // in some cases even if they don't.
26123   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26124       N1.getOperand(1) == N2) {
26125     EVT SrcVT = N1.getOperand(0).getValueType();
26126     if (SrcVT == VT)
26127       return N1.getOperand(0);
26128     // TODO: To remove the zero check, need to adjust the offset to
26129     // a multiple of the new src type.
26130     if (isNullConstant(N2) &&
26131         VT.isScalableVector() == SrcVT.isScalableVector()) {
26132       if (VT.getVectorMinNumElements() >= SrcVT.getVectorMinNumElements())
26133         return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26134                            VT, N0, N1.getOperand(0), N2);
26135       else
26136         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
26137                            VT, N1.getOperand(0), N2);
26138     }
26139   }
26140 
26141   // Simplify scalar inserts into an undef vector:
26142   // insert_subvector undef, (splat X), N2 -> splat X
26143   if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
26144     if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
26145       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
26146 
26147   // If we are inserting a bitcast value into an undef, with the same
26148   // number of elements, just use the bitcast input of the extract.
26149   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
26150   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
26151   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
26152       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26153       N1.getOperand(0).getOperand(1) == N2 &&
26154       N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
26155           VT.getVectorElementCount() &&
26156       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
26157           VT.getSizeInBits()) {
26158     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
26159   }
26160 
26161   // If both N1 and N2 are bitcast values on which insert_subvector
26162   // would makes sense, pull the bitcast through.
26163   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
26164   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
26165   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
26166     SDValue CN0 = N0.getOperand(0);
26167     SDValue CN1 = N1.getOperand(0);
26168     EVT CN0VT = CN0.getValueType();
26169     EVT CN1VT = CN1.getValueType();
26170     if (CN0VT.isVector() && CN1VT.isVector() &&
26171         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
26172         CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
26173       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26174                                       CN0.getValueType(), CN0, CN1, N2);
26175       return DAG.getBitcast(VT, NewINSERT);
26176     }
26177   }
26178 
26179   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
26180   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
26181   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
26182   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26183       N0.getOperand(1).getValueType() == N1.getValueType() &&
26184       N0.getOperand(2) == N2)
26185     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
26186                        N1, N2);
26187 
26188   // Eliminate an intermediate insert into an undef vector:
26189   // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
26190   // insert_subvector undef, X, 0
26191   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
26192       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
26193       isNullConstant(N2))
26194     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
26195                        N1.getOperand(1), N2);
26196 
26197   // Push subvector bitcasts to the output, adjusting the index as we go.
26198   // insert_subvector(bitcast(v), bitcast(s), c1)
26199   // -> bitcast(insert_subvector(v, s, c2))
26200   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
26201       N1.getOpcode() == ISD::BITCAST) {
26202     SDValue N0Src = peekThroughBitcasts(N0);
26203     SDValue N1Src = peekThroughBitcasts(N1);
26204     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
26205     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
26206     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
26207         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
26208       EVT NewVT;
26209       SDLoc DL(N);
26210       SDValue NewIdx;
26211       LLVMContext &Ctx = *DAG.getContext();
26212       ElementCount NumElts = VT.getVectorElementCount();
26213       unsigned EltSizeInBits = VT.getScalarSizeInBits();
26214       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
26215         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
26216         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
26217         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
26218       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
26219         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
26220         if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
26221           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
26222                                    NumElts.divideCoefficientBy(Scale));
26223           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
26224         }
26225       }
26226       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
26227         SDValue Res = DAG.getBitcast(NewVT, N0Src);
26228         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
26229         return DAG.getBitcast(VT, Res);
26230       }
26231     }
26232   }
26233 
26234   // Canonicalize insert_subvector dag nodes.
26235   // Example:
26236   // (insert_subvector (insert_subvector A, Idx0), Idx1)
26237   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
26238   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
26239       N1.getValueType() == N0.getOperand(1).getValueType()) {
26240     unsigned OtherIdx = N0.getConstantOperandVal(2);
26241     if (InsIdx < OtherIdx) {
26242       // Swap nodes.
26243       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
26244                                   N0.getOperand(0), N1, N2);
26245       AddToWorklist(NewOp.getNode());
26246       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
26247                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
26248     }
26249   }
26250 
26251   // If the input vector is a concatenation, and the insert replaces
26252   // one of the pieces, we can optimize into a single concat_vectors.
26253   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
26254       N0.getOperand(0).getValueType() == N1.getValueType() &&
26255       N0.getOperand(0).getValueType().isScalableVector() ==
26256           N1.getValueType().isScalableVector()) {
26257     unsigned Factor = N1.getValueType().getVectorMinNumElements();
26258     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
26259     Ops[InsIdx / Factor] = N1;
26260     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26261   }
26262 
26263   // Simplify source operands based on insertion.
26264   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
26265     return SDValue(N, 0);
26266 
26267   return SDValue();
26268 }
26269 
26270 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
26271   SDValue N0 = N->getOperand(0);
26272 
26273   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
26274   if (N0->getOpcode() == ISD::FP16_TO_FP)
26275     return N0->getOperand(0);
26276 
26277   return SDValue();
26278 }
26279 
26280 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
26281   auto Op = N->getOpcode();
26282   assert((Op == ISD::FP16_TO_FP || Op == ISD::BF16_TO_FP) &&
26283          "opcode should be FP16_TO_FP or BF16_TO_FP.");
26284   SDValue N0 = N->getOperand(0);
26285 
26286   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
26287   // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26288   if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
26289     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
26290     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
26291       return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
26292     }
26293   }
26294 
26295   return SDValue();
26296 }
26297 
26298 SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
26299   SDValue N0 = N->getOperand(0);
26300 
26301   // fold (fp_to_bf16 (bf16_to_fp op)) -> op
26302   if (N0->getOpcode() == ISD::BF16_TO_FP)
26303     return N0->getOperand(0);
26304 
26305   return SDValue();
26306 }
26307 
26308 SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
26309   // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26310   return visitFP16_TO_FP(N);
26311 }
26312 
26313 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
26314   SDValue N0 = N->getOperand(0);
26315   EVT VT = N0.getValueType();
26316   unsigned Opcode = N->getOpcode();
26317 
26318   // VECREDUCE over 1-element vector is just an extract.
26319   if (VT.getVectorElementCount().isScalar()) {
26320     SDLoc dl(N);
26321     SDValue Res =
26322         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
26323                     DAG.getVectorIdxConstant(0, dl));
26324     if (Res.getValueType() != N->getValueType(0))
26325       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
26326     return Res;
26327   }
26328 
26329   // On an boolean vector an and/or reduction is the same as a umin/umax
26330   // reduction. Convert them if the latter is legal while the former isn't.
26331   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
26332     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
26333         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
26334     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
26335         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
26336         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
26337       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
26338   }
26339 
26340   // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
26341   // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
26342   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26343       TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
26344     SDValue Vec = N0.getOperand(0);
26345     SDValue Subvec = N0.getOperand(1);
26346     if ((Opcode == ISD::VECREDUCE_OR &&
26347          (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
26348         (Opcode == ISD::VECREDUCE_AND &&
26349          (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
26350       return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
26351   }
26352 
26353   return SDValue();
26354 }
26355 
26356 SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
26357   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
26358 
26359   // FSUB -> FMA combines:
26360   if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
26361     AddToWorklist(Fused.getNode());
26362     return Fused;
26363   }
26364   return SDValue();
26365 }
26366 
26367 SDValue DAGCombiner::visitVPOp(SDNode *N) {
26368 
26369   if (N->getOpcode() == ISD::VP_GATHER)
26370     if (SDValue SD = visitVPGATHER(N))
26371       return SD;
26372 
26373   if (N->getOpcode() == ISD::VP_SCATTER)
26374     if (SDValue SD = visitVPSCATTER(N))
26375       return SD;
26376 
26377   if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
26378     if (SDValue SD = visitVP_STRIDED_LOAD(N))
26379       return SD;
26380 
26381   if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
26382     if (SDValue SD = visitVP_STRIDED_STORE(N))
26383       return SD;
26384 
26385   // VP operations in which all vector elements are disabled - either by
26386   // determining that the mask is all false or that the EVL is 0 - can be
26387   // eliminated.
26388   bool AreAllEltsDisabled = false;
26389   if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
26390     AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
26391   if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
26392     AreAllEltsDisabled |=
26393         ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
26394 
26395   // This is the only generic VP combine we support for now.
26396   if (!AreAllEltsDisabled) {
26397     switch (N->getOpcode()) {
26398     case ISD::VP_FADD:
26399       return visitVP_FADD(N);
26400     case ISD::VP_FSUB:
26401       return visitVP_FSUB(N);
26402     case ISD::VP_FMA:
26403       return visitFMA<VPMatchContext>(N);
26404     }
26405     return SDValue();
26406   }
26407 
26408   // Binary operations can be replaced by UNDEF.
26409   if (ISD::isVPBinaryOp(N->getOpcode()))
26410     return DAG.getUNDEF(N->getValueType(0));
26411 
26412   // VP Memory operations can be replaced by either the chain (stores) or the
26413   // chain + undef (loads).
26414   if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
26415     if (MemSD->writeMem())
26416       return MemSD->getChain();
26417     return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
26418   }
26419 
26420   // Reduction operations return the start operand when no elements are active.
26421   if (ISD::isVPReduction(N->getOpcode()))
26422     return N->getOperand(0);
26423 
26424   return SDValue();
26425 }
26426 
26427 SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
26428   SDValue Chain = N->getOperand(0);
26429   SDValue Ptr = N->getOperand(1);
26430   EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26431 
26432   // Check if the memory, where FP state is written to, is used only in a single
26433   // load operation.
26434   LoadSDNode *LdNode = nullptr;
26435   for (auto *U : Ptr->uses()) {
26436     if (U == N)
26437       continue;
26438     if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
26439       if (LdNode && LdNode != Ld)
26440         return SDValue();
26441       LdNode = Ld;
26442       continue;
26443     }
26444     return SDValue();
26445   }
26446   if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26447       !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26448       !LdNode->getChain().reachesChainWithoutSideEffects(SDValue(N, 0)))
26449     return SDValue();
26450 
26451   // Check if the loaded value is used only in a store operation.
26452   StoreSDNode *StNode = nullptr;
26453   for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
26454     SDUse &U = I.getUse();
26455     if (U.getResNo() == 0) {
26456       if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
26457         if (StNode)
26458           return SDValue();
26459         StNode = St;
26460       } else {
26461         return SDValue();
26462       }
26463     }
26464   }
26465   if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26466       !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26467       !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26468     return SDValue();
26469 
26470   // Create new node GET_FPENV_MEM, which uses the store address to write FP
26471   // environment.
26472   SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
26473                                 StNode->getMemOperand());
26474   CombineTo(StNode, Res, false);
26475   return Res;
26476 }
26477 
26478 SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
26479   SDValue Chain = N->getOperand(0);
26480   SDValue Ptr = N->getOperand(1);
26481   EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26482 
26483   // Check if the address of FP state is used also in a store operation only.
26484   StoreSDNode *StNode = nullptr;
26485   for (auto *U : Ptr->uses()) {
26486     if (U == N)
26487       continue;
26488     if (auto *St = dyn_cast<StoreSDNode>(U)) {
26489       if (StNode && StNode != St)
26490         return SDValue();
26491       StNode = St;
26492       continue;
26493     }
26494     return SDValue();
26495   }
26496   if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26497       !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26498       !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
26499     return SDValue();
26500 
26501   // Check if the stored value is loaded from some location and the loaded
26502   // value is used only in the store operation.
26503   SDValue StValue = StNode->getValue();
26504   auto *LdNode = dyn_cast<LoadSDNode>(StValue);
26505   if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26506       !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26507       !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26508     return SDValue();
26509 
26510   // Create new node SET_FPENV_MEM, which uses the load address to read FP
26511   // environment.
26512   SDValue Res =
26513       DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
26514                       LdNode->getMemOperand());
26515   return Res;
26516 }
26517 
26518 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
26519 /// with the destination vector and a zero vector.
26520 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
26521 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
26522 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
26523   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
26524 
26525   EVT VT = N->getValueType(0);
26526   SDValue LHS = N->getOperand(0);
26527   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
26528   SDLoc DL(N);
26529 
26530   // Make sure we're not running after operation legalization where it
26531   // may have custom lowered the vector shuffles.
26532   if (LegalOperations)
26533     return SDValue();
26534 
26535   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
26536     return SDValue();
26537 
26538   EVT RVT = RHS.getValueType();
26539   unsigned NumElts = RHS.getNumOperands();
26540 
26541   // Attempt to create a valid clear mask, splitting the mask into
26542   // sub elements and checking to see if each is
26543   // all zeros or all ones - suitable for shuffle masking.
26544   auto BuildClearMask = [&](int Split) {
26545     int NumSubElts = NumElts * Split;
26546     int NumSubBits = RVT.getScalarSizeInBits() / Split;
26547 
26548     SmallVector<int, 8> Indices;
26549     for (int i = 0; i != NumSubElts; ++i) {
26550       int EltIdx = i / Split;
26551       int SubIdx = i % Split;
26552       SDValue Elt = RHS.getOperand(EltIdx);
26553       // X & undef --> 0 (not undef). So this lane must be converted to choose
26554       // from the zero constant vector (same as if the element had all 0-bits).
26555       if (Elt.isUndef()) {
26556         Indices.push_back(i + NumSubElts);
26557         continue;
26558       }
26559 
26560       APInt Bits;
26561       if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
26562         Bits = Cst->getAPIntValue();
26563       else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
26564         Bits = CstFP->getValueAPF().bitcastToAPInt();
26565       else
26566         return SDValue();
26567 
26568       // Extract the sub element from the constant bit mask.
26569       if (DAG.getDataLayout().isBigEndian())
26570         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
26571       else
26572         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
26573 
26574       if (Bits.isAllOnes())
26575         Indices.push_back(i);
26576       else if (Bits == 0)
26577         Indices.push_back(i + NumSubElts);
26578       else
26579         return SDValue();
26580     }
26581 
26582     // Let's see if the target supports this vector_shuffle.
26583     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
26584     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
26585     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
26586       return SDValue();
26587 
26588     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
26589     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
26590                                                    DAG.getBitcast(ClearVT, LHS),
26591                                                    Zero, Indices));
26592   };
26593 
26594   // Determine maximum split level (byte level masking).
26595   int MaxSplit = 1;
26596   if (RVT.getScalarSizeInBits() % 8 == 0)
26597     MaxSplit = RVT.getScalarSizeInBits() / 8;
26598 
26599   for (int Split = 1; Split <= MaxSplit; ++Split)
26600     if (RVT.getScalarSizeInBits() % Split == 0)
26601       if (SDValue S = BuildClearMask(Split))
26602         return S;
26603 
26604   return SDValue();
26605 }
26606 
26607 /// If a vector binop is performed on splat values, it may be profitable to
26608 /// extract, scalarize, and insert/splat.
26609 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
26610                                       const SDLoc &DL) {
26611   SDValue N0 = N->getOperand(0);
26612   SDValue N1 = N->getOperand(1);
26613   unsigned Opcode = N->getOpcode();
26614   EVT VT = N->getValueType(0);
26615   EVT EltVT = VT.getVectorElementType();
26616   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26617 
26618   // TODO: Remove/replace the extract cost check? If the elements are available
26619   //       as scalars, then there may be no extract cost. Should we ask if
26620   //       inserting a scalar back into a vector is cheap instead?
26621   int Index0, Index1;
26622   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26623   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
26624   // Extract element from splat_vector should be free.
26625   // TODO: use DAG.isSplatValue instead?
26626   bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
26627                            N1.getOpcode() == ISD::SPLAT_VECTOR;
26628   if (!Src0 || !Src1 || Index0 != Index1 ||
26629       Src0.getValueType().getVectorElementType() != EltVT ||
26630       Src1.getValueType().getVectorElementType() != EltVT ||
26631       !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
26632       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
26633     return SDValue();
26634 
26635   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26636   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
26637   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
26638   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
26639 
26640   // If all lanes but 1 are undefined, no need to splat the scalar result.
26641   // TODO: Keep track of undefs and use that info in the general case.
26642   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
26643       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
26644       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
26645     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
26646     // build_vec ..undef, (bo X, Y), undef...
26647     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
26648     Ops[Index0] = ScalarBO;
26649     return DAG.getBuildVector(VT, DL, Ops);
26650   }
26651 
26652   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
26653   return DAG.getSplat(VT, DL, ScalarBO);
26654 }
26655 
26656 /// Visit a vector cast operation, like FP_EXTEND.
26657 SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
26658   EVT VT = N->getValueType(0);
26659   assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
26660   EVT EltVT = VT.getVectorElementType();
26661   unsigned Opcode = N->getOpcode();
26662 
26663   SDValue N0 = N->getOperand(0);
26664   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26665 
26666   // TODO: promote operation might be also good here?
26667   int Index0;
26668   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26669   if (Src0 &&
26670       (N0.getOpcode() == ISD::SPLAT_VECTOR ||
26671        TLI.isExtractVecEltCheap(VT, Index0)) &&
26672       TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
26673       TLI.preferScalarizeSplat(N)) {
26674     EVT SrcVT = N0.getValueType();
26675     EVT SrcEltVT = SrcVT.getVectorElementType();
26676     SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26677     SDValue Elt =
26678         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
26679     SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
26680     if (VT.isScalableVector())
26681       return DAG.getSplatVector(VT, DL, ScalarBO);
26682     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
26683     return DAG.getBuildVector(VT, DL, Ops);
26684   }
26685 
26686   return SDValue();
26687 }
26688 
26689 /// Visit a binary vector operation, like ADD.
26690 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
26691   EVT VT = N->getValueType(0);
26692   assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
26693 
26694   SDValue LHS = N->getOperand(0);
26695   SDValue RHS = N->getOperand(1);
26696   unsigned Opcode = N->getOpcode();
26697   SDNodeFlags Flags = N->getFlags();
26698 
26699   // Move unary shuffles with identical masks after a vector binop:
26700   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
26701   //   --> shuffle (VBinOp A, B), Undef, Mask
26702   // This does not require type legality checks because we are creating the
26703   // same types of operations that are in the original sequence. We do have to
26704   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
26705   // though. This code is adapted from the identical transform in instcombine.
26706   if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
26707     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
26708     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
26709     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
26710         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
26711         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
26712       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
26713                                      RHS.getOperand(0), Flags);
26714       SDValue UndefV = LHS.getOperand(1);
26715       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
26716     }
26717 
26718     // Try to sink a splat shuffle after a binop with a uniform constant.
26719     // This is limited to cases where neither the shuffle nor the constant have
26720     // undefined elements because that could be poison-unsafe or inhibit
26721     // demanded elements analysis. It is further limited to not change a splat
26722     // of an inserted scalar because that may be optimized better by
26723     // load-folding or other target-specific behaviors.
26724     if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
26725         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
26726         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
26727       // binop (splat X), (splat C) --> splat (binop X, C)
26728       SDValue X = Shuf0->getOperand(0);
26729       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
26730       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
26731                                   Shuf0->getMask());
26732     }
26733     if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
26734         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
26735         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
26736       // binop (splat C), (splat X) --> splat (binop C, X)
26737       SDValue X = Shuf1->getOperand(0);
26738       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
26739       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
26740                                   Shuf1->getMask());
26741     }
26742   }
26743 
26744   // The following pattern is likely to emerge with vector reduction ops. Moving
26745   // the binary operation ahead of insertion may allow using a narrower vector
26746   // instruction that has better performance than the wide version of the op:
26747   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
26748   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
26749       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
26750       LHS.getOperand(2) == RHS.getOperand(2) &&
26751       (LHS.hasOneUse() || RHS.hasOneUse())) {
26752     SDValue X = LHS.getOperand(1);
26753     SDValue Y = RHS.getOperand(1);
26754     SDValue Z = LHS.getOperand(2);
26755     EVT NarrowVT = X.getValueType();
26756     if (NarrowVT == Y.getValueType() &&
26757         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
26758                                               LegalOperations)) {
26759       // (binop undef, undef) may not return undef, so compute that result.
26760       SDValue VecC =
26761           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
26762       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
26763       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
26764     }
26765   }
26766 
26767   // Make sure all but the first op are undef or constant.
26768   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
26769     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
26770            all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
26771              return Op.isUndef() ||
26772                     ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
26773            });
26774   };
26775 
26776   // The following pattern is likely to emerge with vector reduction ops. Moving
26777   // the binary operation ahead of the concat may allow using a narrower vector
26778   // instruction that has better performance than the wide version of the op:
26779   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
26780   //   concat (VBinOp X, Y), VecC
26781   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
26782       (LHS.hasOneUse() || RHS.hasOneUse())) {
26783     EVT NarrowVT = LHS.getOperand(0).getValueType();
26784     if (NarrowVT == RHS.getOperand(0).getValueType() &&
26785         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
26786       unsigned NumOperands = LHS.getNumOperands();
26787       SmallVector<SDValue, 4> ConcatOps;
26788       for (unsigned i = 0; i != NumOperands; ++i) {
26789         // This constant fold for operands 1 and up.
26790         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
26791                                         RHS.getOperand(i)));
26792       }
26793 
26794       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
26795     }
26796   }
26797 
26798   if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
26799     return V;
26800 
26801   return SDValue();
26802 }
26803 
26804 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
26805                                     SDValue N2) {
26806   assert(N0.getOpcode() == ISD::SETCC &&
26807          "First argument must be a SetCC node!");
26808 
26809   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
26810                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
26811 
26812   // If we got a simplified select_cc node back from SimplifySelectCC, then
26813   // break it down into a new SETCC node, and a new SELECT node, and then return
26814   // the SELECT node, since we were called with a SELECT node.
26815   if (SCC.getNode()) {
26816     // Check to see if we got a select_cc back (to turn into setcc/select).
26817     // Otherwise, just return whatever node we got back, like fabs.
26818     if (SCC.getOpcode() == ISD::SELECT_CC) {
26819       const SDNodeFlags Flags = N0->getFlags();
26820       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
26821                                   N0.getValueType(),
26822                                   SCC.getOperand(0), SCC.getOperand(1),
26823                                   SCC.getOperand(4), Flags);
26824       AddToWorklist(SETCC.getNode());
26825       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
26826                                          SCC.getOperand(2), SCC.getOperand(3));
26827       SelectNode->setFlags(Flags);
26828       return SelectNode;
26829     }
26830 
26831     return SCC;
26832   }
26833   return SDValue();
26834 }
26835 
26836 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
26837 /// being selected between, see if we can simplify the select.  Callers of this
26838 /// should assume that TheSelect is deleted if this returns true.  As such, they
26839 /// should return the appropriate thing (e.g. the node) back to the top-level of
26840 /// the DAG combiner loop to avoid it being looked at.
26841 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
26842                                     SDValue RHS) {
26843   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
26844   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
26845   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
26846     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
26847       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
26848       SDValue Sqrt = RHS;
26849       ISD::CondCode CC;
26850       SDValue CmpLHS;
26851       const ConstantFPSDNode *Zero = nullptr;
26852 
26853       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
26854         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
26855         CmpLHS = TheSelect->getOperand(0);
26856         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
26857       } else {
26858         // SELECT or VSELECT
26859         SDValue Cmp = TheSelect->getOperand(0);
26860         if (Cmp.getOpcode() == ISD::SETCC) {
26861           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
26862           CmpLHS = Cmp.getOperand(0);
26863           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
26864         }
26865       }
26866       if (Zero && Zero->isZero() &&
26867           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
26868           CC == ISD::SETULT || CC == ISD::SETLT)) {
26869         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
26870         CombineTo(TheSelect, Sqrt);
26871         return true;
26872       }
26873     }
26874   }
26875   // Cannot simplify select with vector condition
26876   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
26877 
26878   // If this is a select from two identical things, try to pull the operation
26879   // through the select.
26880   if (LHS.getOpcode() != RHS.getOpcode() ||
26881       !LHS.hasOneUse() || !RHS.hasOneUse())
26882     return false;
26883 
26884   // If this is a load and the token chain is identical, replace the select
26885   // of two loads with a load through a select of the address to load from.
26886   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
26887   // constants have been dropped into the constant pool.
26888   if (LHS.getOpcode() == ISD::LOAD) {
26889     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
26890     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
26891 
26892     // Token chains must be identical.
26893     if (LHS.getOperand(0) != RHS.getOperand(0) ||
26894         // Do not let this transformation reduce the number of volatile loads.
26895         // Be conservative for atomics for the moment
26896         // TODO: This does appear to be legal for unordered atomics (see D66309)
26897         !LLD->isSimple() || !RLD->isSimple() ||
26898         // FIXME: If either is a pre/post inc/dec load,
26899         // we'd need to split out the address adjustment.
26900         LLD->isIndexed() || RLD->isIndexed() ||
26901         // If this is an EXTLOAD, the VT's must match.
26902         LLD->getMemoryVT() != RLD->getMemoryVT() ||
26903         // If this is an EXTLOAD, the kind of extension must match.
26904         (LLD->getExtensionType() != RLD->getExtensionType() &&
26905          // The only exception is if one of the extensions is anyext.
26906          LLD->getExtensionType() != ISD::EXTLOAD &&
26907          RLD->getExtensionType() != ISD::EXTLOAD) ||
26908         // FIXME: this discards src value information.  This is
26909         // over-conservative. It would be beneficial to be able to remember
26910         // both potential memory locations.  Since we are discarding
26911         // src value info, don't do the transformation if the memory
26912         // locations are not in the default address space.
26913         LLD->getPointerInfo().getAddrSpace() != 0 ||
26914         RLD->getPointerInfo().getAddrSpace() != 0 ||
26915         // We can't produce a CMOV of a TargetFrameIndex since we won't
26916         // generate the address generation required.
26917         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
26918         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
26919         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
26920                                       LLD->getBasePtr().getValueType()))
26921       return false;
26922 
26923     // The loads must not depend on one another.
26924     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
26925       return false;
26926 
26927     // Check that the select condition doesn't reach either load.  If so,
26928     // folding this will induce a cycle into the DAG.  If not, this is safe to
26929     // xform, so create a select of the addresses.
26930 
26931     SmallPtrSet<const SDNode *, 32> Visited;
26932     SmallVector<const SDNode *, 16> Worklist;
26933 
26934     // Always fail if LLD and RLD are not independent. TheSelect is a
26935     // predecessor to all Nodes in question so we need not search past it.
26936 
26937     Visited.insert(TheSelect);
26938     Worklist.push_back(LLD);
26939     Worklist.push_back(RLD);
26940 
26941     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
26942         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
26943       return false;
26944 
26945     SDValue Addr;
26946     if (TheSelect->getOpcode() == ISD::SELECT) {
26947       // We cannot do this optimization if any pair of {RLD, LLD} is a
26948       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
26949       // Loads, we only need to check if CondNode is a successor to one of the
26950       // loads. We can further avoid this if there's no use of their chain
26951       // value.
26952       SDNode *CondNode = TheSelect->getOperand(0).getNode();
26953       Worklist.push_back(CondNode);
26954 
26955       if ((LLD->hasAnyUseOfValue(1) &&
26956            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
26957           (RLD->hasAnyUseOfValue(1) &&
26958            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
26959         return false;
26960 
26961       Addr = DAG.getSelect(SDLoc(TheSelect),
26962                            LLD->getBasePtr().getValueType(),
26963                            TheSelect->getOperand(0), LLD->getBasePtr(),
26964                            RLD->getBasePtr());
26965     } else {  // Otherwise SELECT_CC
26966       // We cannot do this optimization if any pair of {RLD, LLD} is a
26967       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
26968       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
26969       // one of the loads. We can further avoid this if there's no use of their
26970       // chain value.
26971 
26972       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
26973       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
26974       Worklist.push_back(CondLHS);
26975       Worklist.push_back(CondRHS);
26976 
26977       if ((LLD->hasAnyUseOfValue(1) &&
26978            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
26979           (RLD->hasAnyUseOfValue(1) &&
26980            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
26981         return false;
26982 
26983       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
26984                          LLD->getBasePtr().getValueType(),
26985                          TheSelect->getOperand(0),
26986                          TheSelect->getOperand(1),
26987                          LLD->getBasePtr(), RLD->getBasePtr(),
26988                          TheSelect->getOperand(4));
26989     }
26990 
26991     SDValue Load;
26992     // It is safe to replace the two loads if they have different alignments,
26993     // but the new load must be the minimum (most restrictive) alignment of the
26994     // inputs.
26995     Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
26996     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
26997     if (!RLD->isInvariant())
26998       MMOFlags &= ~MachineMemOperand::MOInvariant;
26999     if (!RLD->isDereferenceable())
27000       MMOFlags &= ~MachineMemOperand::MODereferenceable;
27001     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27002       // FIXME: Discards pointer and AA info.
27003       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27004                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27005                          MMOFlags);
27006     } else {
27007       // FIXME: Discards pointer and AA info.
27008       Load = DAG.getExtLoad(
27009           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
27010                                                   : LLD->getExtensionType(),
27011           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27012           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27013     }
27014 
27015     // Users of the select now use the result of the load.
27016     CombineTo(TheSelect, Load);
27017 
27018     // Users of the old loads now use the new load's chain.  We know the
27019     // old-load value is dead now.
27020     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27021     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27022     return true;
27023   }
27024 
27025   return false;
27026 }
27027 
27028 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27029 /// bitwise 'and'.
27030 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27031                                             SDValue N1, SDValue N2, SDValue N3,
27032                                             ISD::CondCode CC) {
27033   // If this is a select where the false operand is zero and the compare is a
27034   // check of the sign bit, see if we can perform the "gzip trick":
27035   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27036   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27037   EVT XType = N0.getValueType();
27038   EVT AType = N2.getValueType();
27039   if (!isNullConstant(N3) || !XType.bitsGE(AType))
27040     return SDValue();
27041 
27042   // If the comparison is testing for a positive value, we have to invert
27043   // the sign bit mask, so only do that transform if the target has a bitwise
27044   // 'and not' instruction (the invert is free).
27045   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27046     // (X > -1) ? A : 0
27047     // (X >  0) ? X : 0 <-- This is canonical signed max.
27048     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27049       return SDValue();
27050   } else if (CC == ISD::SETLT) {
27051     // (X <  0) ? A : 0
27052     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
27053     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
27054       return SDValue();
27055   } else {
27056     return SDValue();
27057   }
27058 
27059   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
27060   // constant.
27061   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
27062   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27063   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
27064     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
27065     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
27066       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27067       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
27068       AddToWorklist(Shift.getNode());
27069 
27070       if (XType.bitsGT(AType)) {
27071         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27072         AddToWorklist(Shift.getNode());
27073       }
27074 
27075       if (CC == ISD::SETGT)
27076         Shift = DAG.getNOT(DL, Shift, AType);
27077 
27078       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27079     }
27080   }
27081 
27082   unsigned ShCt = XType.getSizeInBits() - 1;
27083   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
27084     return SDValue();
27085 
27086   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27087   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
27088   AddToWorklist(Shift.getNode());
27089 
27090   if (XType.bitsGT(AType)) {
27091     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27092     AddToWorklist(Shift.getNode());
27093   }
27094 
27095   if (CC == ISD::SETGT)
27096     Shift = DAG.getNOT(DL, Shift, AType);
27097 
27098   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27099 }
27100 
27101 // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
27102 SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
27103   SDValue N0 = N->getOperand(0);
27104   SDValue N1 = N->getOperand(1);
27105   SDValue N2 = N->getOperand(2);
27106   SDLoc DL(N);
27107 
27108   unsigned BinOpc = N1.getOpcode();
27109   if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
27110       (N1.getResNo() != N2.getResNo()))
27111     return SDValue();
27112 
27113   // The use checks are intentionally on SDNode because we may be dealing
27114   // with opcodes that produce more than one SDValue.
27115   // TODO: Do we really need to check N0 (the condition operand of the select)?
27116   //       But removing that clause could cause an infinite loop...
27117   if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
27118     return SDValue();
27119 
27120   // Binops may include opcodes that return multiple values, so all values
27121   // must be created/propagated from the newly created binops below.
27122   SDVTList OpVTs = N1->getVTList();
27123 
27124   // Fold select(cond, binop(x, y), binop(z, y))
27125   //  --> binop(select(cond, x, z), y)
27126   if (N1.getOperand(1) == N2.getOperand(1)) {
27127     SDValue N10 = N1.getOperand(0);
27128     SDValue N20 = N2.getOperand(0);
27129     SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
27130     SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
27131     NewBinOp->setFlags(N1->getFlags());
27132     NewBinOp->intersectFlagsWith(N2->getFlags());
27133     return SDValue(NewBinOp.getNode(), N1.getResNo());
27134   }
27135 
27136   // Fold select(cond, binop(x, y), binop(x, z))
27137   //  --> binop(x, select(cond, y, z))
27138   if (N1.getOperand(0) == N2.getOperand(0)) {
27139     SDValue N11 = N1.getOperand(1);
27140     SDValue N21 = N2.getOperand(1);
27141     // Second op VT might be different (e.g. shift amount type)
27142     if (N11.getValueType() == N21.getValueType()) {
27143       SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
27144       SDValue NewBinOp =
27145           DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
27146       NewBinOp->setFlags(N1->getFlags());
27147       NewBinOp->intersectFlagsWith(N2->getFlags());
27148       return SDValue(NewBinOp.getNode(), N1.getResNo());
27149     }
27150   }
27151 
27152   // TODO: Handle isCommutativeBinOp patterns as well?
27153   return SDValue();
27154 }
27155 
27156 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
27157 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
27158   SDValue N0 = N->getOperand(0);
27159   EVT VT = N->getValueType(0);
27160   bool IsFabs = N->getOpcode() == ISD::FABS;
27161   bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
27162 
27163   if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
27164     return SDValue();
27165 
27166   SDValue Int = N0.getOperand(0);
27167   EVT IntVT = Int.getValueType();
27168 
27169   // The operand to cast should be integer.
27170   if (!IntVT.isInteger() || IntVT.isVector())
27171     return SDValue();
27172 
27173   // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
27174   // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
27175   APInt SignMask;
27176   if (N0.getValueType().isVector()) {
27177     // For vector, create a sign mask (0x80...) or its inverse (for fabs,
27178     // 0x7f...) per element and splat it.
27179     SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
27180     if (IsFabs)
27181       SignMask = ~SignMask;
27182     SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
27183   } else {
27184     // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
27185     SignMask = APInt::getSignMask(IntVT.getSizeInBits());
27186     if (IsFabs)
27187       SignMask = ~SignMask;
27188   }
27189   SDLoc DL(N0);
27190   Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
27191                     DAG.getConstant(SignMask, DL, IntVT));
27192   AddToWorklist(Int.getNode());
27193   return DAG.getBitcast(VT, Int);
27194 }
27195 
27196 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
27197 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
27198 /// in it. This may be a win when the constant is not otherwise available
27199 /// because it replaces two constant pool loads with one.
27200 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
27201     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
27202     ISD::CondCode CC) {
27203   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
27204     return SDValue();
27205 
27206   // If we are before legalize types, we want the other legalization to happen
27207   // first (for example, to avoid messing with soft float).
27208   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
27209   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
27210   EVT VT = N2.getValueType();
27211   if (!TV || !FV || !TLI.isTypeLegal(VT))
27212     return SDValue();
27213 
27214   // If a constant can be materialized without loads, this does not make sense.
27215   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
27216       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
27217       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
27218     return SDValue();
27219 
27220   // If both constants have multiple uses, then we won't need to do an extra
27221   // load. The values are likely around in registers for other users.
27222   if (!TV->hasOneUse() && !FV->hasOneUse())
27223     return SDValue();
27224 
27225   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
27226                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
27227   Type *FPTy = Elts[0]->getType();
27228   const DataLayout &TD = DAG.getDataLayout();
27229 
27230   // Create a ConstantArray of the two constants.
27231   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
27232   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
27233                                       TD.getPrefTypeAlign(FPTy));
27234   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
27235 
27236   // Get offsets to the 0 and 1 elements of the array, so we can select between
27237   // them.
27238   SDValue Zero = DAG.getIntPtrConstant(0, DL);
27239   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
27240   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
27241   SDValue Cond =
27242       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
27243   AddToWorklist(Cond.getNode());
27244   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
27245   AddToWorklist(CstOffset.getNode());
27246   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
27247   AddToWorklist(CPIdx.getNode());
27248   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
27249                      MachinePointerInfo::getConstantPool(
27250                          DAG.getMachineFunction()), Alignment);
27251 }
27252 
27253 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
27254 /// where 'cond' is the comparison specified by CC.
27255 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
27256                                       SDValue N2, SDValue N3, ISD::CondCode CC,
27257                                       bool NotExtCompare) {
27258   // (x ? y : y) -> y.
27259   if (N2 == N3) return N2;
27260 
27261   EVT CmpOpVT = N0.getValueType();
27262   EVT CmpResVT = getSetCCResultType(CmpOpVT);
27263   EVT VT = N2.getValueType();
27264   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
27265   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27266   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
27267 
27268   // Determine if the condition we're dealing with is constant.
27269   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
27270     AddToWorklist(SCC.getNode());
27271     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
27272       // fold select_cc true, x, y -> x
27273       // fold select_cc false, x, y -> y
27274       return !(SCCC->isZero()) ? N2 : N3;
27275     }
27276   }
27277 
27278   if (SDValue V =
27279           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
27280     return V;
27281 
27282   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
27283     return V;
27284 
27285   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
27286   // where y is has a single bit set.
27287   // A plaintext description would be, we can turn the SELECT_CC into an AND
27288   // when the condition can be materialized as an all-ones register.  Any
27289   // single bit-test can be materialized as an all-ones register with
27290   // shift-left and shift-right-arith.
27291   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
27292       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
27293     SDValue AndLHS = N0->getOperand(0);
27294     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
27295     if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
27296       // Shift the tested bit over the sign bit.
27297       const APInt &AndMask = ConstAndRHS->getAPIntValue();
27298       if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
27299         unsigned ShCt = AndMask.getBitWidth() - 1;
27300         SDValue ShlAmt =
27301             DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
27302                             getShiftAmountTy(AndLHS.getValueType()));
27303         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
27304 
27305         // Now arithmetic right shift it all the way over, so the result is
27306         // either all-ones, or zero.
27307         SDValue ShrAmt =
27308           DAG.getConstant(ShCt, SDLoc(Shl),
27309                           getShiftAmountTy(Shl.getValueType()));
27310         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
27311 
27312         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
27313       }
27314     }
27315   }
27316 
27317   // fold select C, 16, 0 -> shl C, 4
27318   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
27319   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
27320 
27321   if ((Fold || Swap) &&
27322       TLI.getBooleanContents(CmpOpVT) ==
27323           TargetLowering::ZeroOrOneBooleanContent &&
27324       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
27325 
27326     if (Swap) {
27327       CC = ISD::getSetCCInverse(CC, CmpOpVT);
27328       std::swap(N2C, N3C);
27329     }
27330 
27331     // If the caller doesn't want us to simplify this into a zext of a compare,
27332     // don't do it.
27333     if (NotExtCompare && N2C->isOne())
27334       return SDValue();
27335 
27336     SDValue Temp, SCC;
27337     // zext (setcc n0, n1)
27338     if (LegalTypes) {
27339       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
27340       Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
27341     } else {
27342       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
27343       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
27344     }
27345 
27346     AddToWorklist(SCC.getNode());
27347     AddToWorklist(Temp.getNode());
27348 
27349     if (N2C->isOne())
27350       return Temp;
27351 
27352     unsigned ShCt = N2C->getAPIntValue().logBase2();
27353     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
27354       return SDValue();
27355 
27356     // shl setcc result by log2 n2c
27357     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
27358                        DAG.getConstant(ShCt, SDLoc(Temp),
27359                                        getShiftAmountTy(Temp.getValueType())));
27360   }
27361 
27362   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
27363   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
27364   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
27365   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
27366   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
27367   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
27368   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
27369   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
27370   if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
27371     SDValue ValueOnZero = N2;
27372     SDValue Count = N3;
27373     // If the condition is NE instead of E, swap the operands.
27374     if (CC == ISD::SETNE)
27375       std::swap(ValueOnZero, Count);
27376     // Check if the value on zero is a constant equal to the bits in the type.
27377     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
27378       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
27379         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
27380         // legal, combine to just cttz.
27381         if ((Count.getOpcode() == ISD::CTTZ ||
27382              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
27383             N0 == Count.getOperand(0) &&
27384             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
27385           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
27386         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
27387         // legal, combine to just ctlz.
27388         if ((Count.getOpcode() == ISD::CTLZ ||
27389              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
27390             N0 == Count.getOperand(0) &&
27391             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
27392           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
27393       }
27394     }
27395   }
27396 
27397   // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
27398   // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
27399   if (!NotExtCompare && N1C && N2C && N3C &&
27400       N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
27401       ((N1C->isAllOnes() && CC == ISD::SETGT) ||
27402        (N1C->isZero() && CC == ISD::SETLT)) &&
27403       !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
27404     SDValue ASR = DAG.getNode(
27405         ISD::SRA, DL, CmpOpVT, N0,
27406         DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
27407     return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
27408                        DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
27409   }
27410 
27411   if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27412     return S;
27413   if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27414     return S;
27415 
27416   return SDValue();
27417 }
27418 
27419 /// This is a stub for TargetLowering::SimplifySetCC.
27420 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
27421                                    ISD::CondCode Cond, const SDLoc &DL,
27422                                    bool foldBooleans) {
27423   TargetLowering::DAGCombinerInfo
27424     DagCombineInfo(DAG, Level, false, this);
27425   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
27426 }
27427 
27428 /// Given an ISD::SDIV node expressing a divide by constant, return
27429 /// a DAG expression to select that will generate the same value by multiplying
27430 /// by a magic number.
27431 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27432 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
27433   // when optimising for minimum size, we don't want to expand a div to a mul
27434   // and a shift.
27435   if (DAG.getMachineFunction().getFunction().hasMinSize())
27436     return SDValue();
27437 
27438   SmallVector<SDNode *, 8> Built;
27439   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
27440     for (SDNode *N : Built)
27441       AddToWorklist(N);
27442     return S;
27443   }
27444 
27445   return SDValue();
27446 }
27447 
27448 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
27449 /// DAG expression that will generate the same value by right shifting.
27450 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
27451   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27452   if (!C)
27453     return SDValue();
27454 
27455   // Avoid division by zero.
27456   if (C->isZero())
27457     return SDValue();
27458 
27459   SmallVector<SDNode *, 8> Built;
27460   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
27461     for (SDNode *N : Built)
27462       AddToWorklist(N);
27463     return S;
27464   }
27465 
27466   return SDValue();
27467 }
27468 
27469 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
27470 /// expression that will generate the same value by multiplying by a magic
27471 /// number.
27472 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27473 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
27474   // when optimising for minimum size, we don't want to expand a div to a mul
27475   // and a shift.
27476   if (DAG.getMachineFunction().getFunction().hasMinSize())
27477     return SDValue();
27478 
27479   SmallVector<SDNode *, 8> Built;
27480   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
27481     for (SDNode *N : Built)
27482       AddToWorklist(N);
27483     return S;
27484   }
27485 
27486   return SDValue();
27487 }
27488 
27489 /// Given an ISD::SREM node expressing a remainder by constant power of 2,
27490 /// return a DAG expression that will generate the same value.
27491 SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
27492   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27493   if (!C)
27494     return SDValue();
27495 
27496   // Avoid division by zero.
27497   if (C->isZero())
27498     return SDValue();
27499 
27500   SmallVector<SDNode *, 8> Built;
27501   if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
27502     for (SDNode *N : Built)
27503       AddToWorklist(N);
27504     return S;
27505   }
27506 
27507   return SDValue();
27508 }
27509 
27510 // This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
27511 //
27512 // Returns the node that represents `Log2(Op)`. This may create a new node. If
27513 // we are unable to compute `Log2(Op)` its return `SDValue()`.
27514 //
27515 // All nodes will be created at `DL` and the output will be of type `VT`.
27516 //
27517 // This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
27518 // `AssumeNonZero` if this function should simply assume (not require proving
27519 // `Op` is non-zero).
27520 static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
27521                                    SDValue Op, unsigned Depth,
27522                                    bool AssumeNonZero) {
27523   assert(VT.isInteger() && "Only integer types are supported!");
27524 
27525   auto PeekThroughCastsAndTrunc = [](SDValue V) {
27526     while (true) {
27527       switch (V.getOpcode()) {
27528       case ISD::TRUNCATE:
27529       case ISD::ZERO_EXTEND:
27530         V = V.getOperand(0);
27531         break;
27532       default:
27533         return V;
27534       }
27535     }
27536   };
27537 
27538   if (VT.isScalableVector())
27539     return SDValue();
27540 
27541   Op = PeekThroughCastsAndTrunc(Op);
27542 
27543   // Helper for determining whether a value is a power-2 constant scalar or a
27544   // vector of such elements.
27545   SmallVector<APInt> Pow2Constants;
27546   auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
27547     if (C->isZero() || C->isOpaque())
27548       return false;
27549     // TODO: We may also be able to support negative powers of 2 here.
27550     if (C->getAPIntValue().isPowerOf2()) {
27551       Pow2Constants.emplace_back(C->getAPIntValue());
27552       return true;
27553     }
27554     return false;
27555   };
27556 
27557   if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
27558     if (!VT.isVector())
27559       return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
27560     // We need to create a build vector
27561     SmallVector<SDValue> Log2Ops;
27562     for (const APInt &Pow2 : Pow2Constants)
27563       Log2Ops.emplace_back(
27564           DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
27565     return DAG.getBuildVector(VT, DL, Log2Ops);
27566   }
27567 
27568   if (Depth >= DAG.MaxRecursionDepth)
27569     return SDValue();
27570 
27571   auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
27572     ToCast = PeekThroughCastsAndTrunc(ToCast);
27573     EVT CurVT = ToCast.getValueType();
27574     if (NewVT == CurVT)
27575       return ToCast;
27576 
27577     if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
27578       return DAG.getBitcast(NewVT, ToCast);
27579 
27580     return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
27581   };
27582 
27583   // log2(X << Y) -> log2(X) + Y
27584   if (Op.getOpcode() == ISD::SHL) {
27585     // 1 << Y and X nuw/nsw << Y are all non-zero.
27586     if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
27587         Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
27588       if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
27589                                              Depth + 1, AssumeNonZero))
27590         return DAG.getNode(ISD::ADD, DL, VT, LogX,
27591                            CastToVT(VT, Op.getOperand(1)));
27592   }
27593 
27594   // c ? X : Y -> c ? Log2(X) : Log2(Y)
27595   if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
27596       Op.hasOneUse()) {
27597     if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
27598                                            Depth + 1, AssumeNonZero))
27599       if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
27600                                              Depth + 1, AssumeNonZero))
27601         return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
27602   }
27603 
27604   // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
27605   // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
27606   if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
27607       Op.hasOneUse()) {
27608     // Use AssumeNonZero as false here. Otherwise we can hit case where
27609     // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
27610     if (SDValue LogX =
27611             takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
27612                                 /*AssumeNonZero*/ false))
27613       if (SDValue LogY =
27614               takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
27615                                   /*AssumeNonZero*/ false))
27616         return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
27617   }
27618 
27619   return SDValue();
27620 }
27621 
27622 /// Determines the LogBase2 value for a non-null input value using the
27623 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
27624 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
27625                                    bool KnownNonZero, bool InexpensiveOnly,
27626                                    std::optional<EVT> OutVT) {
27627   EVT VT = OutVT ? *OutVT : V.getValueType();
27628   SDValue InexpensiveLogBase2 =
27629       takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
27630   if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
27631     return InexpensiveLogBase2;
27632 
27633   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
27634   SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
27635   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
27636   return LogBase2;
27637 }
27638 
27639 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27640 /// For the reciprocal, we need to find the zero of the function:
27641 ///   F(X) = 1/X - A [which has a zero at X = 1/A]
27642 ///     =>
27643 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
27644 ///     does not require additional intermediate precision]
27645 /// For the last iteration, put numerator N into it to gain more precision:
27646 ///   Result = N X_i + X_i (N - N A X_i)
27647 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
27648                                       SDNodeFlags Flags) {
27649   if (LegalDAG)
27650     return SDValue();
27651 
27652   // TODO: Handle extended types?
27653   EVT VT = Op.getValueType();
27654   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
27655       VT.getScalarType() != MVT::f64)
27656     return SDValue();
27657 
27658   // If estimates are explicitly disabled for this function, we're done.
27659   MachineFunction &MF = DAG.getMachineFunction();
27660   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
27661   if (Enabled == TLI.ReciprocalEstimate::Disabled)
27662     return SDValue();
27663 
27664   // Estimates may be explicitly enabled for this type with a custom number of
27665   // refinement steps.
27666   int Iterations = TLI.getDivRefinementSteps(VT, MF);
27667   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
27668     AddToWorklist(Est.getNode());
27669 
27670     SDLoc DL(Op);
27671     if (Iterations) {
27672       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
27673 
27674       // Newton iterations: Est = Est + Est (N - Arg * Est)
27675       // If this is the last iteration, also multiply by the numerator.
27676       for (int i = 0; i < Iterations; ++i) {
27677         SDValue MulEst = Est;
27678 
27679         if (i == Iterations - 1) {
27680           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
27681           AddToWorklist(MulEst.getNode());
27682         }
27683 
27684         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
27685         AddToWorklist(NewEst.getNode());
27686 
27687         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
27688                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
27689         AddToWorklist(NewEst.getNode());
27690 
27691         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
27692         AddToWorklist(NewEst.getNode());
27693 
27694         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
27695         AddToWorklist(Est.getNode());
27696       }
27697     } else {
27698       // If no iterations are available, multiply with N.
27699       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
27700       AddToWorklist(Est.getNode());
27701     }
27702 
27703     return Est;
27704   }
27705 
27706   return SDValue();
27707 }
27708 
27709 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27710 /// For the reciprocal sqrt, we need to find the zero of the function:
27711 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
27712 ///     =>
27713 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
27714 /// As a result, we precompute A/2 prior to the iteration loop.
27715 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
27716                                          unsigned Iterations,
27717                                          SDNodeFlags Flags, bool Reciprocal) {
27718   EVT VT = Arg.getValueType();
27719   SDLoc DL(Arg);
27720   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
27721 
27722   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
27723   // this entire sequence requires only one FP constant.
27724   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
27725   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
27726 
27727   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
27728   for (unsigned i = 0; i < Iterations; ++i) {
27729     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
27730     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
27731     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
27732     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
27733   }
27734 
27735   // If non-reciprocal square root is requested, multiply the result by Arg.
27736   if (!Reciprocal)
27737     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
27738 
27739   return Est;
27740 }
27741 
27742 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27743 /// For the reciprocal sqrt, we need to find the zero of the function:
27744 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
27745 ///     =>
27746 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
27747 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
27748                                          unsigned Iterations,
27749                                          SDNodeFlags Flags, bool Reciprocal) {
27750   EVT VT = Arg.getValueType();
27751   SDLoc DL(Arg);
27752   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
27753   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
27754 
27755   // This routine must enter the loop below to work correctly
27756   // when (Reciprocal == false).
27757   assert(Iterations > 0);
27758 
27759   // Newton iterations for reciprocal square root:
27760   // E = (E * -0.5) * ((A * E) * E + -3.0)
27761   for (unsigned i = 0; i < Iterations; ++i) {
27762     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
27763     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
27764     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
27765 
27766     // When calculating a square root at the last iteration build:
27767     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
27768     // (notice a common subexpression)
27769     SDValue LHS;
27770     if (Reciprocal || (i + 1) < Iterations) {
27771       // RSQRT: LHS = (E * -0.5)
27772       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
27773     } else {
27774       // SQRT: LHS = (A * E) * -0.5
27775       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
27776     }
27777 
27778     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
27779   }
27780 
27781   return Est;
27782 }
27783 
27784 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
27785 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
27786 /// Op can be zero.
27787 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
27788                                            bool Reciprocal) {
27789   if (LegalDAG)
27790     return SDValue();
27791 
27792   // TODO: Handle extended types?
27793   EVT VT = Op.getValueType();
27794   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
27795       VT.getScalarType() != MVT::f64)
27796     return SDValue();
27797 
27798   // If estimates are explicitly disabled for this function, we're done.
27799   MachineFunction &MF = DAG.getMachineFunction();
27800   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
27801   if (Enabled == TLI.ReciprocalEstimate::Disabled)
27802     return SDValue();
27803 
27804   // Estimates may be explicitly enabled for this type with a custom number of
27805   // refinement steps.
27806   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
27807 
27808   bool UseOneConstNR = false;
27809   if (SDValue Est =
27810       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
27811                           Reciprocal)) {
27812     AddToWorklist(Est.getNode());
27813 
27814     if (Iterations > 0)
27815       Est = UseOneConstNR
27816             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
27817             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
27818     if (!Reciprocal) {
27819       SDLoc DL(Op);
27820       // Try the target specific test first.
27821       SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
27822 
27823       // The estimate is now completely wrong if the input was exactly 0.0 or
27824       // possibly a denormal. Force the answer to 0.0 or value provided by
27825       // target for those cases.
27826       Est = DAG.getNode(
27827           Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
27828           Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
27829     }
27830     return Est;
27831   }
27832 
27833   return SDValue();
27834 }
27835 
27836 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
27837   return buildSqrtEstimateImpl(Op, Flags, true);
27838 }
27839 
27840 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
27841   return buildSqrtEstimateImpl(Op, Flags, false);
27842 }
27843 
27844 /// Return true if there is any possibility that the two addresses overlap.
27845 bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
27846 
27847   struct MemUseCharacteristics {
27848     bool IsVolatile;
27849     bool IsAtomic;
27850     SDValue BasePtr;
27851     int64_t Offset;
27852     std::optional<int64_t> NumBytes;
27853     MachineMemOperand *MMO;
27854   };
27855 
27856   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
27857     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
27858       int64_t Offset = 0;
27859       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
27860         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
27861                      ? C->getSExtValue()
27862                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
27863                            ? -1 * C->getSExtValue()
27864                            : 0;
27865       uint64_t Size =
27866           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
27867       return {LSN->isVolatile(),
27868               LSN->isAtomic(),
27869               LSN->getBasePtr(),
27870               Offset /*base offset*/,
27871               std::optional<int64_t>(Size),
27872               LSN->getMemOperand()};
27873     }
27874     if (const auto *LN = cast<LifetimeSDNode>(N))
27875       return {false /*isVolatile*/,
27876               /*isAtomic*/ false,
27877               LN->getOperand(1),
27878               (LN->hasOffset()) ? LN->getOffset() : 0,
27879               (LN->hasOffset()) ? std::optional<int64_t>(LN->getSize())
27880                                 : std::optional<int64_t>(),
27881               (MachineMemOperand *)nullptr};
27882     // Default.
27883     return {false /*isvolatile*/,
27884             /*isAtomic*/ false,          SDValue(),
27885             (int64_t)0 /*offset*/,       std::optional<int64_t>() /*size*/,
27886             (MachineMemOperand *)nullptr};
27887   };
27888 
27889   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
27890                         MUC1 = getCharacteristics(Op1);
27891 
27892   // If they are to the same address, then they must be aliases.
27893   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
27894       MUC0.Offset == MUC1.Offset)
27895     return true;
27896 
27897   // If they are both volatile then they cannot be reordered.
27898   if (MUC0.IsVolatile && MUC1.IsVolatile)
27899     return true;
27900 
27901   // Be conservative about atomics for the moment
27902   // TODO: This is way overconservative for unordered atomics (see D66309)
27903   if (MUC0.IsAtomic && MUC1.IsAtomic)
27904     return true;
27905 
27906   if (MUC0.MMO && MUC1.MMO) {
27907     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
27908         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
27909       return false;
27910   }
27911 
27912   // Try to prove that there is aliasing, or that there is no aliasing. Either
27913   // way, we can return now. If nothing can be proved, proceed with more tests.
27914   bool IsAlias;
27915   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
27916                                        DAG, IsAlias))
27917     return IsAlias;
27918 
27919   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
27920   // either are not known.
27921   if (!MUC0.MMO || !MUC1.MMO)
27922     return true;
27923 
27924   // If one operation reads from invariant memory, and the other may store, they
27925   // cannot alias. These should really be checking the equivalent of mayWrite,
27926   // but it only matters for memory nodes other than load /store.
27927   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
27928       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
27929     return false;
27930 
27931   // If we know required SrcValue1 and SrcValue2 have relatively large
27932   // alignment compared to the size and offset of the access, we may be able
27933   // to prove they do not alias. This check is conservative for now to catch
27934   // cases created by splitting vector types, it only works when the offsets are
27935   // multiples of the size of the data.
27936   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
27937   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
27938   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
27939   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
27940   auto &Size0 = MUC0.NumBytes;
27941   auto &Size1 = MUC1.NumBytes;
27942   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
27943       Size0.has_value() && Size1.has_value() && *Size0 == *Size1 &&
27944       OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
27945       SrcValOffset1 % *Size1 == 0) {
27946     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
27947     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
27948 
27949     // There is no overlap between these relatively aligned accesses of
27950     // similar size. Return no alias.
27951     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
27952       return false;
27953   }
27954 
27955   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
27956                    ? CombinerGlobalAA
27957                    : DAG.getSubtarget().useAA();
27958 #ifndef NDEBUG
27959   if (CombinerAAOnlyFunc.getNumOccurrences() &&
27960       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
27961     UseAA = false;
27962 #endif
27963 
27964   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0 &&
27965       Size1) {
27966     // Use alias analysis information.
27967     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
27968     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
27969     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
27970     if (AA->isNoAlias(
27971             MemoryLocation(MUC0.MMO->getValue(), Overlap0,
27972                            UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
27973             MemoryLocation(MUC1.MMO->getValue(), Overlap1,
27974                            UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
27975       return false;
27976   }
27977 
27978   // Otherwise we have to assume they alias.
27979   return true;
27980 }
27981 
27982 /// Walk up chain skipping non-aliasing memory nodes,
27983 /// looking for aliasing nodes and adding them to the Aliases vector.
27984 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
27985                                    SmallVectorImpl<SDValue> &Aliases) {
27986   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
27987   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
27988 
27989   // Get alias information for node.
27990   // TODO: relax aliasing for unordered atomics (see D66309)
27991   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
27992 
27993   // Starting off.
27994   Chains.push_back(OriginalChain);
27995   unsigned Depth = 0;
27996 
27997   // Attempt to improve chain by a single step
27998   auto ImproveChain = [&](SDValue &C) -> bool {
27999     switch (C.getOpcode()) {
28000     case ISD::EntryToken:
28001       // No need to mark EntryToken.
28002       C = SDValue();
28003       return true;
28004     case ISD::LOAD:
28005     case ISD::STORE: {
28006       // Get alias information for C.
28007       // TODO: Relax aliasing for unordered atomics (see D66309)
28008       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28009                       cast<LSBaseSDNode>(C.getNode())->isSimple();
28010       if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28011         // Look further up the chain.
28012         C = C.getOperand(0);
28013         return true;
28014       }
28015       // Alias, so stop here.
28016       return false;
28017     }
28018 
28019     case ISD::CopyFromReg:
28020       // Always forward past CopyFromReg.
28021       C = C.getOperand(0);
28022       return true;
28023 
28024     case ISD::LIFETIME_START:
28025     case ISD::LIFETIME_END: {
28026       // We can forward past any lifetime start/end that can be proven not to
28027       // alias the memory access.
28028       if (!mayAlias(N, C.getNode())) {
28029         // Look further up the chain.
28030         C = C.getOperand(0);
28031         return true;
28032       }
28033       return false;
28034     }
28035     default:
28036       return false;
28037     }
28038   };
28039 
28040   // Look at each chain and determine if it is an alias.  If so, add it to the
28041   // aliases list.  If not, then continue up the chain looking for the next
28042   // candidate.
28043   while (!Chains.empty()) {
28044     SDValue Chain = Chains.pop_back_val();
28045 
28046     // Don't bother if we've seen Chain before.
28047     if (!Visited.insert(Chain.getNode()).second)
28048       continue;
28049 
28050     // For TokenFactor nodes, look at each operand and only continue up the
28051     // chain until we reach the depth limit.
28052     //
28053     // FIXME: The depth check could be made to return the last non-aliasing
28054     // chain we found before we hit a tokenfactor rather than the original
28055     // chain.
28056     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
28057       Aliases.clear();
28058       Aliases.push_back(OriginalChain);
28059       return;
28060     }
28061 
28062     if (Chain.getOpcode() == ISD::TokenFactor) {
28063       // We have to check each of the operands of the token factor for "small"
28064       // token factors, so we queue them up.  Adding the operands to the queue
28065       // (stack) in reverse order maintains the original order and increases the
28066       // likelihood that getNode will find a matching token factor (CSE.)
28067       if (Chain.getNumOperands() > 16) {
28068         Aliases.push_back(Chain);
28069         continue;
28070       }
28071       for (unsigned n = Chain.getNumOperands(); n;)
28072         Chains.push_back(Chain.getOperand(--n));
28073       ++Depth;
28074       continue;
28075     }
28076     // Everything else
28077     if (ImproveChain(Chain)) {
28078       // Updated Chain Found, Consider new chain if one exists.
28079       if (Chain.getNode())
28080         Chains.push_back(Chain);
28081       ++Depth;
28082       continue;
28083     }
28084     // No Improved Chain Possible, treat as Alias.
28085     Aliases.push_back(Chain);
28086   }
28087 }
28088 
28089 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
28090 /// (aliasing node.)
28091 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
28092   if (OptLevel == CodeGenOptLevel::None)
28093     return OldChain;
28094 
28095   // Ops for replacing token factor.
28096   SmallVector<SDValue, 8> Aliases;
28097 
28098   // Accumulate all the aliases to this node.
28099   GatherAllAliases(N, OldChain, Aliases);
28100 
28101   // If no operands then chain to entry token.
28102   if (Aliases.empty())
28103     return DAG.getEntryNode();
28104 
28105   // If a single operand then chain to it.  We don't need to revisit it.
28106   if (Aliases.size() == 1)
28107     return Aliases[0];
28108 
28109   // Construct a custom tailored token factor.
28110   return DAG.getTokenFactor(SDLoc(N), Aliases);
28111 }
28112 
28113 // This function tries to collect a bunch of potentially interesting
28114 // nodes to improve the chains of, all at once. This might seem
28115 // redundant, as this function gets called when visiting every store
28116 // node, so why not let the work be done on each store as it's visited?
28117 //
28118 // I believe this is mainly important because mergeConsecutiveStores
28119 // is unable to deal with merging stores of different sizes, so unless
28120 // we improve the chains of all the potential candidates up-front
28121 // before running mergeConsecutiveStores, it might only see some of
28122 // the nodes that will eventually be candidates, and then not be able
28123 // to go from a partially-merged state to the desired final
28124 // fully-merged state.
28125 
28126 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
28127   SmallVector<StoreSDNode *, 8> ChainedStores;
28128   StoreSDNode *STChain = St;
28129   // Intervals records which offsets from BaseIndex have been covered. In
28130   // the common case, every store writes to the immediately previous address
28131   // space and thus merged with the previous interval at insertion time.
28132 
28133   using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
28134                                  IntervalMapHalfOpenInfo<int64_t>>;
28135   IMap::Allocator A;
28136   IMap Intervals(A);
28137 
28138   // This holds the base pointer, index, and the offset in bytes from the base
28139   // pointer.
28140   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
28141 
28142   // We must have a base and an offset.
28143   if (!BasePtr.getBase().getNode())
28144     return false;
28145 
28146   // Do not handle stores to undef base pointers.
28147   if (BasePtr.getBase().isUndef())
28148     return false;
28149 
28150   // Do not handle stores to opaque types
28151   if (St->getMemoryVT().isZeroSized())
28152     return false;
28153 
28154   // BaseIndexOffset assumes that offsets are fixed-size, which
28155   // is not valid for scalable vectors where the offsets are
28156   // scaled by `vscale`, so bail out early.
28157   if (St->getMemoryVT().isScalableVT())
28158     return false;
28159 
28160   // Add ST's interval.
28161   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
28162                    std::monostate{});
28163 
28164   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
28165     if (Chain->getMemoryVT().isScalableVector())
28166       return false;
28167 
28168     // If the chain has more than one use, then we can't reorder the mem ops.
28169     if (!SDValue(Chain, 0)->hasOneUse())
28170       break;
28171     // TODO: Relax for unordered atomics (see D66309)
28172     if (!Chain->isSimple() || Chain->isIndexed())
28173       break;
28174 
28175     // Find the base pointer and offset for this memory node.
28176     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
28177     // Check that the base pointer is the same as the original one.
28178     int64_t Offset;
28179     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
28180       break;
28181     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
28182     // Make sure we don't overlap with other intervals by checking the ones to
28183     // the left or right before inserting.
28184     auto I = Intervals.find(Offset);
28185     // If there's a next interval, we should end before it.
28186     if (I != Intervals.end() && I.start() < (Offset + Length))
28187       break;
28188     // If there's a previous interval, we should start after it.
28189     if (I != Intervals.begin() && (--I).stop() <= Offset)
28190       break;
28191     Intervals.insert(Offset, Offset + Length, std::monostate{});
28192 
28193     ChainedStores.push_back(Chain);
28194     STChain = Chain;
28195   }
28196 
28197   // If we didn't find a chained store, exit.
28198   if (ChainedStores.empty())
28199     return false;
28200 
28201   // Improve all chained stores (St and ChainedStores members) starting from
28202   // where the store chain ended and return single TokenFactor.
28203   SDValue NewChain = STChain->getChain();
28204   SmallVector<SDValue, 8> TFOps;
28205   for (unsigned I = ChainedStores.size(); I;) {
28206     StoreSDNode *S = ChainedStores[--I];
28207     SDValue BetterChain = FindBetterChain(S, NewChain);
28208     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
28209         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
28210     TFOps.push_back(SDValue(S, 0));
28211     ChainedStores[I] = S;
28212   }
28213 
28214   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
28215   SDValue BetterChain = FindBetterChain(St, NewChain);
28216   SDValue NewST;
28217   if (St->isTruncatingStore())
28218     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
28219                               St->getBasePtr(), St->getMemoryVT(),
28220                               St->getMemOperand());
28221   else
28222     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
28223                          St->getBasePtr(), St->getMemOperand());
28224 
28225   TFOps.push_back(NewST);
28226 
28227   // If we improved every element of TFOps, then we've lost the dependence on
28228   // NewChain to successors of St and we need to add it back to TFOps. Do so at
28229   // the beginning to keep relative order consistent with FindBetterChains.
28230   auto hasImprovedChain = [&](SDValue ST) -> bool {
28231     return ST->getOperand(0) != NewChain;
28232   };
28233   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
28234   if (AddNewChain)
28235     TFOps.insert(TFOps.begin(), NewChain);
28236 
28237   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
28238   CombineTo(St, TF);
28239 
28240   // Add TF and its operands to the worklist.
28241   AddToWorklist(TF.getNode());
28242   for (const SDValue &Op : TF->ops())
28243     AddToWorklist(Op.getNode());
28244   AddToWorklist(STChain);
28245   return true;
28246 }
28247 
28248 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
28249   if (OptLevel == CodeGenOptLevel::None)
28250     return false;
28251 
28252   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
28253 
28254   // We must have a base and an offset.
28255   if (!BasePtr.getBase().getNode())
28256     return false;
28257 
28258   // Do not handle stores to undef base pointers.
28259   if (BasePtr.getBase().isUndef())
28260     return false;
28261 
28262   // Directly improve a chain of disjoint stores starting at St.
28263   if (parallelizeChainedStores(St))
28264     return true;
28265 
28266   // Improve St's Chain..
28267   SDValue BetterChain = FindBetterChain(St, St->getChain());
28268   if (St->getChain() != BetterChain) {
28269     replaceStoreChain(St, BetterChain);
28270     return true;
28271   }
28272   return false;
28273 }
28274 
28275 /// This is the entry point for the file.
28276 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
28277                            CodeGenOptLevel OptLevel) {
28278   /// This is the main entry point to this class.
28279   DAGCombiner(*this, AA, OptLevel).Run(Level);
28280 }
28281