1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
10 // both before and after the DAG is legalized.
11 //
12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
13 // primarily intended to handle simplification opportunities that are implicit
14 // in the LLVM IR and exposed by the various codegen lowering phases.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/APFloat.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/IntervalMap.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallBitVector.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallSet.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/Statistic.h"
30 #include "llvm/Analysis/AliasAnalysis.h"
31 #include "llvm/Analysis/MemoryLocation.h"
32 #include "llvm/Analysis/TargetLibraryInfo.h"
33 #include "llvm/Analysis/ValueTracking.h"
34 #include "llvm/Analysis/VectorUtils.h"
35 #include "llvm/CodeGen/ByteProvider.h"
36 #include "llvm/CodeGen/DAGCombine.h"
37 #include "llvm/CodeGen/ISDOpcodes.h"
38 #include "llvm/CodeGen/MachineFunction.h"
39 #include "llvm/CodeGen/MachineMemOperand.h"
40 #include "llvm/CodeGen/MachineValueType.h"
41 #include "llvm/CodeGen/RuntimeLibcalls.h"
42 #include "llvm/CodeGen/SelectionDAG.h"
43 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
44 #include "llvm/CodeGen/SelectionDAGNodes.h"
45 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
46 #include "llvm/CodeGen/TargetLowering.h"
47 #include "llvm/CodeGen/TargetRegisterInfo.h"
48 #include "llvm/CodeGen/TargetSubtargetInfo.h"
49 #include "llvm/CodeGen/ValueTypes.h"
50 #include "llvm/IR/Attributes.h"
51 #include "llvm/IR/Constant.h"
52 #include "llvm/IR/DataLayout.h"
53 #include "llvm/IR/DerivedTypes.h"
54 #include "llvm/IR/Function.h"
55 #include "llvm/IR/Metadata.h"
56 #include "llvm/Support/Casting.h"
57 #include "llvm/Support/CodeGen.h"
58 #include "llvm/Support/CommandLine.h"
59 #include "llvm/Support/Compiler.h"
60 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/DebugCounter.h"
62 #include "llvm/Support/ErrorHandling.h"
63 #include "llvm/Support/KnownBits.h"
64 #include "llvm/Support/MathExtras.h"
65 #include "llvm/Support/raw_ostream.h"
66 #include "llvm/Target/TargetMachine.h"
67 #include "llvm/Target/TargetOptions.h"
68 #include <algorithm>
69 #include <cassert>
70 #include <cstdint>
71 #include <functional>
72 #include <iterator>
73 #include <optional>
74 #include <string>
75 #include <tuple>
76 #include <utility>
77 #include <variant>
78 
79 using namespace llvm;
80 
81 #define DEBUG_TYPE "dagcombine"
82 
83 STATISTIC(NodesCombined   , "Number of dag nodes combined");
84 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
85 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
86 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
87 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
88 STATISTIC(SlicedLoads, "Number of load sliced");
89 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
90 
91 DEBUG_COUNTER(DAGCombineCounter, "dagcombine",
92               "Controls whether a DAG combine is performed for a node");
93 
94 static cl::opt<bool>
95 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
96                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
97 
98 static cl::opt<bool>
99 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
100         cl::desc("Enable DAG combiner's use of TBAA"));
101 
102 #ifndef NDEBUG
103 static cl::opt<std::string>
104 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
105                    cl::desc("Only use DAG-combiner alias analysis in this"
106                             " function"));
107 #endif
108 
109 /// Hidden option to stress test load slicing, i.e., when this option
110 /// is enabled, load slicing bypasses most of its profitability guards.
111 static cl::opt<bool>
112 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
113                   cl::desc("Bypass the profitability model of load slicing"),
114                   cl::init(false));
115 
116 static cl::opt<bool>
117   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
118                     cl::desc("DAG combiner may split indexing from loads"));
119 
120 static cl::opt<bool>
121     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
122                        cl::desc("DAG combiner enable merging multiple stores "
123                                 "into a wider store"));
124 
125 static cl::opt<unsigned> TokenFactorInlineLimit(
126     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
127     cl::desc("Limit the number of operands to inline for Token Factors"));
128 
129 static cl::opt<unsigned> StoreMergeDependenceLimit(
130     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
131     cl::desc("Limit the number of times for the same StoreNode and RootNode "
132              "to bail out in store merging dependence check"));
133 
134 static cl::opt<bool> EnableReduceLoadOpStoreWidth(
135     "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true),
136     cl::desc("DAG combiner enable reducing the width of load/op/store "
137              "sequence"));
138 
139 static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore(
140     "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true),
141     cl::desc("DAG combiner enable load/<replace bytes>/store with "
142              "a narrower store"));
143 
144 static cl::opt<bool> EnableVectorFCopySignExtendRound(
145     "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false),
146     cl::desc(
147         "Enable merging extends and rounds into FCOPYSIGN on vector types"));
148 
149 namespace {
150 
151   class DAGCombiner {
152     SelectionDAG &DAG;
153     const TargetLowering &TLI;
154     const SelectionDAGTargetInfo *STI;
155     CombineLevel Level = BeforeLegalizeTypes;
156     CodeGenOptLevel OptLevel;
157     bool LegalDAG = false;
158     bool LegalOperations = false;
159     bool LegalTypes = false;
160     bool ForCodeSize;
161     bool DisableGenericCombines;
162 
163     /// Worklist of all of the nodes that need to be simplified.
164     ///
165     /// This must behave as a stack -- new nodes to process are pushed onto the
166     /// back and when processing we pop off of the back.
167     ///
168     /// The worklist will not contain duplicates but may contain null entries
169     /// due to nodes being deleted from the underlying DAG.
170     SmallVector<SDNode *, 64> Worklist;
171 
172     /// Mapping from an SDNode to its position on the worklist.
173     ///
174     /// This is used to find and remove nodes from the worklist (by nulling
175     /// them) when they are deleted from the underlying DAG. It relies on
176     /// stable indices of nodes within the worklist.
177     DenseMap<SDNode *, unsigned> WorklistMap;
178 
179     /// This records all nodes attempted to be added to the worklist since we
180     /// considered a new worklist entry. As we keep do not add duplicate nodes
181     /// in the worklist, this is different from the tail of the worklist.
182     SmallSetVector<SDNode *, 32> PruningList;
183 
184     /// Set of nodes which have been combined (at least once).
185     ///
186     /// This is used to allow us to reliably add any operands of a DAG node
187     /// which have not yet been combined to the worklist.
188     SmallPtrSet<SDNode *, 32> CombinedNodes;
189 
190     /// Map from candidate StoreNode to the pair of RootNode and count.
191     /// The count is used to track how many times we have seen the StoreNode
192     /// with the same RootNode bail out in dependence check. If we have seen
193     /// the bail out for the same pair many times over a limit, we won't
194     /// consider the StoreNode with the same RootNode as store merging
195     /// candidate again.
196     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
197 
198     // AA - Used for DAG load/store alias analysis.
199     AliasAnalysis *AA;
200 
201     /// When an instruction is simplified, add all users of the instruction to
202     /// the work lists because they might get more simplified now.
203     void AddUsersToWorklist(SDNode *N) {
204       for (SDNode *Node : N->uses())
205         AddToWorklist(Node);
206     }
207 
208     /// Convenient shorthand to add a node and all of its user to the worklist.
209     void AddToWorklistWithUsers(SDNode *N) {
210       AddUsersToWorklist(N);
211       AddToWorklist(N);
212     }
213 
214     // Prune potentially dangling nodes. This is called after
215     // any visit to a node, but should also be called during a visit after any
216     // failed combine which may have created a DAG node.
217     void clearAddedDanglingWorklistEntries() {
218       // Check any nodes added to the worklist to see if they are prunable.
219       while (!PruningList.empty()) {
220         auto *N = PruningList.pop_back_val();
221         if (N->use_empty())
222           recursivelyDeleteUnusedNodes(N);
223       }
224     }
225 
226     SDNode *getNextWorklistEntry() {
227       // Before we do any work, remove nodes that are not in use.
228       clearAddedDanglingWorklistEntries();
229       SDNode *N = nullptr;
230       // The Worklist holds the SDNodes in order, but it may contain null
231       // entries.
232       while (!N && !Worklist.empty()) {
233         N = Worklist.pop_back_val();
234       }
235 
236       if (N) {
237         bool GoodWorklistEntry = WorklistMap.erase(N);
238         (void)GoodWorklistEntry;
239         assert(GoodWorklistEntry &&
240                "Found a worklist entry without a corresponding map entry!");
241       }
242       return N;
243     }
244 
245     /// Call the node-specific routine that folds each particular type of node.
246     SDValue visit(SDNode *N);
247 
248   public:
249     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL)
250         : DAG(D), TLI(D.getTargetLoweringInfo()),
251           STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) {
252       ForCodeSize = DAG.shouldOptForSize();
253       DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel);
254 
255       MaximumLegalStoreInBits = 0;
256       // We use the minimum store size here, since that's all we can guarantee
257       // for the scalable vector types.
258       for (MVT VT : MVT::all_valuetypes())
259         if (EVT(VT).isSimple() && VT != MVT::Other &&
260             TLI.isTypeLegal(EVT(VT)) &&
261             VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits)
262           MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue();
263     }
264 
265     void ConsiderForPruning(SDNode *N) {
266       // Mark this for potential pruning.
267       PruningList.insert(N);
268     }
269 
270     /// Add to the worklist making sure its instance is at the back (next to be
271     /// processed.)
272     void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true) {
273       assert(N->getOpcode() != ISD::DELETED_NODE &&
274              "Deleted Node added to Worklist");
275 
276       // Skip handle nodes as they can't usefully be combined and confuse the
277       // zero-use deletion strategy.
278       if (N->getOpcode() == ISD::HANDLENODE)
279         return;
280 
281       if (IsCandidateForPruning)
282         ConsiderForPruning(N);
283 
284       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
285         Worklist.push_back(N);
286     }
287 
288     /// Remove all instances of N from the worklist.
289     void removeFromWorklist(SDNode *N) {
290       CombinedNodes.erase(N);
291       PruningList.remove(N);
292       StoreRootCountMap.erase(N);
293 
294       auto It = WorklistMap.find(N);
295       if (It == WorklistMap.end())
296         return; // Not in the worklist.
297 
298       // Null out the entry rather than erasing it to avoid a linear operation.
299       Worklist[It->second] = nullptr;
300       WorklistMap.erase(It);
301     }
302 
303     void deleteAndRecombine(SDNode *N);
304     bool recursivelyDeleteUnusedNodes(SDNode *N);
305 
306     /// Replaces all uses of the results of one DAG node with new values.
307     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
308                       bool AddTo = true);
309 
310     /// Replaces all uses of the results of one DAG node with new values.
311     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
312       return CombineTo(N, &Res, 1, AddTo);
313     }
314 
315     /// Replaces all uses of the results of one DAG node with new values.
316     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
317                       bool AddTo = true) {
318       SDValue To[] = { Res0, Res1 };
319       return CombineTo(N, To, 2, AddTo);
320     }
321 
322     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
323 
324   private:
325     unsigned MaximumLegalStoreInBits;
326 
327     /// Check the specified integer node value to see if it can be simplified or
328     /// if things it uses can be simplified by bit propagation.
329     /// If so, return true.
330     bool SimplifyDemandedBits(SDValue Op) {
331       unsigned BitWidth = Op.getScalarValueSizeInBits();
332       APInt DemandedBits = APInt::getAllOnes(BitWidth);
333       return SimplifyDemandedBits(Op, DemandedBits);
334     }
335 
336     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
337       TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
338       KnownBits Known;
339       if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false))
340         return false;
341 
342       // Revisit the node.
343       AddToWorklist(Op.getNode());
344 
345       CommitTargetLoweringOpt(TLO);
346       return true;
347     }
348 
349     /// Check the specified vector node value to see if it can be simplified or
350     /// if things it uses can be simplified as it only uses some of the
351     /// elements. If so, return true.
352     bool SimplifyDemandedVectorElts(SDValue Op) {
353       // TODO: For now just pretend it cannot be simplified.
354       if (Op.getValueType().isScalableVector())
355         return false;
356 
357       unsigned NumElts = Op.getValueType().getVectorNumElements();
358       APInt DemandedElts = APInt::getAllOnes(NumElts);
359       return SimplifyDemandedVectorElts(Op, DemandedElts);
360     }
361 
362     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
363                               const APInt &DemandedElts,
364                               bool AssumeSingleUse = false);
365     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
366                                     bool AssumeSingleUse = false);
367 
368     bool CombineToPreIndexedLoadStore(SDNode *N);
369     bool CombineToPostIndexedLoadStore(SDNode *N);
370     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
371     bool SliceUpLoad(SDNode *N);
372 
373     // Looks up the chain to find a unique (unaliased) store feeding the passed
374     // load. If no such store is found, returns a nullptr.
375     // Note: This will look past a CALLSEQ_START if the load is chained to it so
376     //       so that it can find stack stores for byval params.
377     StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
378     // Scalars have size 0 to distinguish from singleton vectors.
379     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
380     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
381     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
382 
383     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
384     ///   load.
385     ///
386     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
387     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
388     /// \param EltNo index of the vector element to load.
389     /// \param OriginalLoad load that EVE came from to be replaced.
390     /// \returns EVE on success SDValue() on failure.
391     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
392                                          SDValue EltNo,
393                                          LoadSDNode *OriginalLoad);
394     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
395     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
396     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
397     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
398     SDValue PromoteIntBinOp(SDValue Op);
399     SDValue PromoteIntShiftOp(SDValue Op);
400     SDValue PromoteExtend(SDValue Op);
401     bool PromoteLoad(SDValue Op);
402 
403     SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
404                                 SDValue RHS, SDValue True, SDValue False,
405                                 ISD::CondCode CC);
406 
407     /// Call the node-specific routine that knows how to fold each
408     /// particular type of node. If that doesn't do anything, try the
409     /// target-specific DAG combines.
410     SDValue combine(SDNode *N);
411 
412     // Visitation implementation - Implement dag node combining for different
413     // node types.  The semantics are as follows:
414     // Return Value:
415     //   SDValue.getNode() == 0 - No change was made
416     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
417     //   otherwise              - N should be replaced by the returned Operand.
418     //
419     SDValue visitTokenFactor(SDNode *N);
420     SDValue visitMERGE_VALUES(SDNode *N);
421     SDValue visitADD(SDNode *N);
422     SDValue visitADDLike(SDNode *N);
423     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
424     SDValue visitSUB(SDNode *N);
425     SDValue visitADDSAT(SDNode *N);
426     SDValue visitSUBSAT(SDNode *N);
427     SDValue visitADDC(SDNode *N);
428     SDValue visitADDO(SDNode *N);
429     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
430     SDValue visitSUBC(SDNode *N);
431     SDValue visitSUBO(SDNode *N);
432     SDValue visitADDE(SDNode *N);
433     SDValue visitUADDO_CARRY(SDNode *N);
434     SDValue visitSADDO_CARRY(SDNode *N);
435     SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
436                                  SDNode *N);
437     SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
438                                  SDNode *N);
439     SDValue visitSUBE(SDNode *N);
440     SDValue visitUSUBO_CARRY(SDNode *N);
441     SDValue visitSSUBO_CARRY(SDNode *N);
442     SDValue visitMUL(SDNode *N);
443     SDValue visitMULFIX(SDNode *N);
444     SDValue useDivRem(SDNode *N);
445     SDValue visitSDIV(SDNode *N);
446     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
447     SDValue visitUDIV(SDNode *N);
448     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
449     SDValue visitREM(SDNode *N);
450     SDValue visitMULHU(SDNode *N);
451     SDValue visitMULHS(SDNode *N);
452     SDValue visitAVG(SDNode *N);
453     SDValue visitABD(SDNode *N);
454     SDValue visitSMUL_LOHI(SDNode *N);
455     SDValue visitUMUL_LOHI(SDNode *N);
456     SDValue visitMULO(SDNode *N);
457     SDValue visitIMINMAX(SDNode *N);
458     SDValue visitAND(SDNode *N);
459     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
460     SDValue visitOR(SDNode *N);
461     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
462     SDValue visitXOR(SDNode *N);
463     SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL);
464     SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL);
465     SDValue visitSHL(SDNode *N);
466     SDValue visitSRA(SDNode *N);
467     SDValue visitSRL(SDNode *N);
468     SDValue visitFunnelShift(SDNode *N);
469     SDValue visitSHLSAT(SDNode *N);
470     SDValue visitRotate(SDNode *N);
471     SDValue visitABS(SDNode *N);
472     SDValue visitBSWAP(SDNode *N);
473     SDValue visitBITREVERSE(SDNode *N);
474     SDValue visitCTLZ(SDNode *N);
475     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
476     SDValue visitCTTZ(SDNode *N);
477     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
478     SDValue visitCTPOP(SDNode *N);
479     SDValue visitSELECT(SDNode *N);
480     SDValue visitVSELECT(SDNode *N);
481     SDValue visitSELECT_CC(SDNode *N);
482     SDValue visitSETCC(SDNode *N);
483     SDValue visitSETCCCARRY(SDNode *N);
484     SDValue visitSIGN_EXTEND(SDNode *N);
485     SDValue visitZERO_EXTEND(SDNode *N);
486     SDValue visitANY_EXTEND(SDNode *N);
487     SDValue visitAssertExt(SDNode *N);
488     SDValue visitAssertAlign(SDNode *N);
489     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
490     SDValue visitEXTEND_VECTOR_INREG(SDNode *N);
491     SDValue visitTRUNCATE(SDNode *N);
492     SDValue visitBITCAST(SDNode *N);
493     SDValue visitFREEZE(SDNode *N);
494     SDValue visitBUILD_PAIR(SDNode *N);
495     SDValue visitFADD(SDNode *N);
496     SDValue visitVP_FADD(SDNode *N);
497     SDValue visitVP_FSUB(SDNode *N);
498     SDValue visitSTRICT_FADD(SDNode *N);
499     SDValue visitFSUB(SDNode *N);
500     SDValue visitFMUL(SDNode *N);
501     template <class MatchContextClass> SDValue visitFMA(SDNode *N);
502     SDValue visitFMAD(SDNode *N);
503     SDValue visitFDIV(SDNode *N);
504     SDValue visitFREM(SDNode *N);
505     SDValue visitFSQRT(SDNode *N);
506     SDValue visitFCOPYSIGN(SDNode *N);
507     SDValue visitFPOW(SDNode *N);
508     SDValue visitSINT_TO_FP(SDNode *N);
509     SDValue visitUINT_TO_FP(SDNode *N);
510     SDValue visitFP_TO_SINT(SDNode *N);
511     SDValue visitFP_TO_UINT(SDNode *N);
512     SDValue visitXRINT(SDNode *N);
513     SDValue visitFP_ROUND(SDNode *N);
514     SDValue visitFP_EXTEND(SDNode *N);
515     SDValue visitFNEG(SDNode *N);
516     SDValue visitFABS(SDNode *N);
517     SDValue visitFCEIL(SDNode *N);
518     SDValue visitFTRUNC(SDNode *N);
519     SDValue visitFFREXP(SDNode *N);
520     SDValue visitFFLOOR(SDNode *N);
521     SDValue visitFMinMax(SDNode *N);
522     SDValue visitBRCOND(SDNode *N);
523     SDValue visitBR_CC(SDNode *N);
524     SDValue visitLOAD(SDNode *N);
525 
526     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
527     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
528     SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
529 
530     bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
531 
532     SDValue visitSTORE(SDNode *N);
533     SDValue visitLIFETIME_END(SDNode *N);
534     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
535     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
536     SDValue visitBUILD_VECTOR(SDNode *N);
537     SDValue visitCONCAT_VECTORS(SDNode *N);
538     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
539     SDValue visitVECTOR_SHUFFLE(SDNode *N);
540     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
541     SDValue visitINSERT_SUBVECTOR(SDNode *N);
542     SDValue visitMLOAD(SDNode *N);
543     SDValue visitMSTORE(SDNode *N);
544     SDValue visitMGATHER(SDNode *N);
545     SDValue visitMSCATTER(SDNode *N);
546     SDValue visitVPGATHER(SDNode *N);
547     SDValue visitVPSCATTER(SDNode *N);
548     SDValue visitVP_STRIDED_LOAD(SDNode *N);
549     SDValue visitVP_STRIDED_STORE(SDNode *N);
550     SDValue visitFP_TO_FP16(SDNode *N);
551     SDValue visitFP16_TO_FP(SDNode *N);
552     SDValue visitFP_TO_BF16(SDNode *N);
553     SDValue visitBF16_TO_FP(SDNode *N);
554     SDValue visitVECREDUCE(SDNode *N);
555     SDValue visitVPOp(SDNode *N);
556     SDValue visitGET_FPENV_MEM(SDNode *N);
557     SDValue visitSET_FPENV_MEM(SDNode *N);
558 
559     template <class MatchContextClass>
560     SDValue visitFADDForFMACombine(SDNode *N);
561     template <class MatchContextClass>
562     SDValue visitFSUBForFMACombine(SDNode *N);
563     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
564 
565     SDValue XformToShuffleWithZero(SDNode *N);
566     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
567                                                     const SDLoc &DL,
568                                                     SDNode *N,
569                                                     SDValue N0,
570                                                     SDValue N1);
571     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
572                                       SDValue N1, SDNodeFlags Flags);
573     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
574                            SDValue N1, SDNodeFlags Flags);
575     SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL,
576                                  EVT VT, SDValue N0, SDValue N1,
577                                  SDNodeFlags Flags = SDNodeFlags());
578 
579     SDValue visitShiftByConstant(SDNode *N);
580 
581     SDValue foldSelectOfConstants(SDNode *N);
582     SDValue foldVSelectOfConstants(SDNode *N);
583     SDValue foldBinOpIntoSelect(SDNode *BO);
584     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
585     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
586     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
587     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
588                              SDValue N2, SDValue N3, ISD::CondCode CC,
589                              bool NotExtCompare = false);
590     SDValue convertSelectOfFPConstantsToLoadOffset(
591         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
592         ISD::CondCode CC);
593     SDValue foldSignChangeInBitcast(SDNode *N);
594     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
595                                    SDValue N2, SDValue N3, ISD::CondCode CC);
596     SDValue foldSelectOfBinops(SDNode *N);
597     SDValue foldSextSetcc(SDNode *N);
598     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
599                               const SDLoc &DL);
600     SDValue foldSubToUSubSat(EVT DstVT, SDNode *N);
601     SDValue foldABSToABD(SDNode *N);
602     SDValue unfoldMaskedMerge(SDNode *N);
603     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
604     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
605                           const SDLoc &DL, bool foldBooleans);
606     SDValue rebuildSetCC(SDValue N);
607 
608     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
609                            SDValue &CC, bool MatchStrict = false) const;
610     bool isOneUseSetCC(SDValue N) const;
611 
612     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
613                                          unsigned HiOp);
614     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
615     SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
616                                  const TargetLowering &TLI);
617 
618     SDValue CombineExtLoad(SDNode *N);
619     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
620     SDValue combineRepeatedFPDivisors(SDNode *N);
621     SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
622     SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
623     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
624     SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
625     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
626     SDValue BuildSDIV(SDNode *N);
627     SDValue BuildSDIVPow2(SDNode *N);
628     SDValue BuildUDIV(SDNode *N);
629     SDValue BuildSREMPow2(SDNode *N);
630     SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N);
631     SDValue BuildLogBase2(SDValue V, const SDLoc &DL,
632                           bool KnownNeverZero = false,
633                           bool InexpensiveOnly = false,
634                           std::optional<EVT> OutVT = std::nullopt);
635     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
636     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
637     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
638     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
639     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
640                                 SDNodeFlags Flags, bool Reciprocal);
641     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
642                                 SDNodeFlags Flags, bool Reciprocal);
643     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
644                                bool DemandHighBits = true);
645     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
646     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
647                               SDValue InnerPos, SDValue InnerNeg, bool HasPos,
648                               unsigned PosOpcode, unsigned NegOpcode,
649                               const SDLoc &DL);
650     SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg,
651                               SDValue InnerPos, SDValue InnerNeg, bool HasPos,
652                               unsigned PosOpcode, unsigned NegOpcode,
653                               const SDLoc &DL);
654     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
655     SDValue MatchLoadCombine(SDNode *N);
656     SDValue mergeTruncStores(StoreSDNode *N);
657     SDValue reduceLoadWidth(SDNode *N);
658     SDValue ReduceLoadOpStoreWidth(SDNode *N);
659     SDValue splitMergedValStore(StoreSDNode *ST);
660     SDValue TransformFPLoadStorePair(SDNode *N);
661     SDValue convertBuildVecZextToZext(SDNode *N);
662     SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N);
663     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
664     SDValue reduceBuildVecTruncToBitCast(SDNode *N);
665     SDValue reduceBuildVecToShuffle(SDNode *N);
666     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
667                                   ArrayRef<int> VectorMask, SDValue VecIn1,
668                                   SDValue VecIn2, unsigned LeftIdx,
669                                   bool DidSplitVec);
670     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
671 
672     /// Walk up chain skipping non-aliasing memory nodes,
673     /// looking for aliasing nodes and adding them to the Aliases vector.
674     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
675                           SmallVectorImpl<SDValue> &Aliases);
676 
677     /// Return true if there is any possibility that the two addresses overlap.
678     bool mayAlias(SDNode *Op0, SDNode *Op1) const;
679 
680     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
681     /// chain (aliasing node.)
682     SDValue FindBetterChain(SDNode *N, SDValue Chain);
683 
684     /// Try to replace a store and any possibly adjacent stores on
685     /// consecutive chains with better chains. Return true only if St is
686     /// replaced.
687     ///
688     /// Notice that other chains may still be replaced even if the function
689     /// returns false.
690     bool findBetterNeighborChains(StoreSDNode *St);
691 
692     // Helper for findBetterNeighborChains. Walk up store chain add additional
693     // chained stores that do not overlap and can be parallelized.
694     bool parallelizeChainedStores(StoreSDNode *St);
695 
696     /// Holds a pointer to an LSBaseSDNode as well as information on where it
697     /// is located in a sequence of memory operations connected by a chain.
698     struct MemOpLink {
699       // Ptr to the mem node.
700       LSBaseSDNode *MemNode;
701 
702       // Offset from the base ptr.
703       int64_t OffsetFromBase;
704 
705       MemOpLink(LSBaseSDNode *N, int64_t Offset)
706           : MemNode(N), OffsetFromBase(Offset) {}
707     };
708 
709     // Classify the origin of a stored value.
710     enum class StoreSource { Unknown, Constant, Extract, Load };
711     StoreSource getStoreSource(SDValue StoreVal) {
712       switch (StoreVal.getOpcode()) {
713       case ISD::Constant:
714       case ISD::ConstantFP:
715         return StoreSource::Constant;
716       case ISD::BUILD_VECTOR:
717         if (ISD::isBuildVectorOfConstantSDNodes(StoreVal.getNode()) ||
718             ISD::isBuildVectorOfConstantFPSDNodes(StoreVal.getNode()))
719           return StoreSource::Constant;
720         return StoreSource::Unknown;
721       case ISD::EXTRACT_VECTOR_ELT:
722       case ISD::EXTRACT_SUBVECTOR:
723         return StoreSource::Extract;
724       case ISD::LOAD:
725         return StoreSource::Load;
726       default:
727         return StoreSource::Unknown;
728       }
729     }
730 
731     /// This is a helper function for visitMUL to check the profitability
732     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
733     /// MulNode is the original multiply, AddNode is (add x, c1),
734     /// and ConstNode is c2.
735     bool isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
736                                      SDValue ConstNode);
737 
738     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
739     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
740     /// the type of the loaded value to be extended.
741     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
742                           EVT LoadResultTy, EVT &ExtVT);
743 
744     /// Helper function to calculate whether the given Load/Store can have its
745     /// width reduced to ExtVT.
746     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
747                            EVT &MemVT, unsigned ShAmt = 0);
748 
749     /// Used by BackwardsPropagateMask to find suitable loads.
750     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
751                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
752                            ConstantSDNode *Mask, SDNode *&NodeToMask);
753     /// Attempt to propagate a given AND node back to load leaves so that they
754     /// can be combined into narrow loads.
755     bool BackwardsPropagateMask(SDNode *N);
756 
757     /// Helper function for mergeConsecutiveStores which merges the component
758     /// store chains.
759     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
760                                 unsigned NumStores);
761 
762     /// Helper function for mergeConsecutiveStores which checks if all the store
763     /// nodes have the same underlying object. We can still reuse the first
764     /// store's pointer info if all the stores are from the same object.
765     bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
766 
767     /// This is a helper function for mergeConsecutiveStores. When the source
768     /// elements of the consecutive stores are all constants or all extracted
769     /// vector elements, try to merge them into one larger store introducing
770     /// bitcasts if necessary.  \return True if a merged store was created.
771     bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
772                                          EVT MemVT, unsigned NumStores,
773                                          bool IsConstantSrc, bool UseVector,
774                                          bool UseTrunc);
775 
776     /// This is a helper function for mergeConsecutiveStores. Stores that
777     /// potentially may be merged with St are placed in StoreNodes. RootNode is
778     /// a chain predecessor to all store candidates.
779     void getStoreMergeCandidates(StoreSDNode *St,
780                                  SmallVectorImpl<MemOpLink> &StoreNodes,
781                                  SDNode *&Root);
782 
783     /// Helper function for mergeConsecutiveStores. Checks if candidate stores
784     /// have indirect dependency through their operands. RootNode is the
785     /// predecessor to all stores calculated by getStoreMergeCandidates and is
786     /// used to prune the dependency check. \return True if safe to merge.
787     bool checkMergeStoreCandidatesForDependencies(
788         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
789         SDNode *RootNode);
790 
791     /// This is a helper function for mergeConsecutiveStores. Given a list of
792     /// store candidates, find the first N that are consecutive in memory.
793     /// Returns 0 if there are not at least 2 consecutive stores to try merging.
794     unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
795                                   int64_t ElementSizeBytes) const;
796 
797     /// This is a helper function for mergeConsecutiveStores. It is used for
798     /// store chains that are composed entirely of constant values.
799     bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes,
800                                   unsigned NumConsecutiveStores,
801                                   EVT MemVT, SDNode *Root, bool AllowVectors);
802 
803     /// This is a helper function for mergeConsecutiveStores. It is used for
804     /// store chains that are composed entirely of extracted vector elements.
805     /// When extracting multiple vector elements, try to store them in one
806     /// vector store rather than a sequence of scalar stores.
807     bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes,
808                                  unsigned NumConsecutiveStores, EVT MemVT,
809                                  SDNode *Root);
810 
811     /// This is a helper function for mergeConsecutiveStores. It is used for
812     /// store chains that are composed entirely of loaded values.
813     bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
814                               unsigned NumConsecutiveStores, EVT MemVT,
815                               SDNode *Root, bool AllowVectors,
816                               bool IsNonTemporalStore, bool IsNonTemporalLoad);
817 
818     /// Merge consecutive store operations into a wide store.
819     /// This optimization uses wide integers or vectors when possible.
820     /// \return true if stores were merged.
821     bool mergeConsecutiveStores(StoreSDNode *St);
822 
823     /// Try to transform a truncation where C is a constant:
824     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
825     ///
826     /// \p N needs to be a truncation and its first operand an AND. Other
827     /// requirements are checked by the function (e.g. that trunc is
828     /// single-use) and if missed an empty SDValue is returned.
829     SDValue distributeTruncateThroughAnd(SDNode *N);
830 
831     /// Helper function to determine whether the target supports operation
832     /// given by \p Opcode for type \p VT, that is, whether the operation
833     /// is legal or custom before legalizing operations, and whether is
834     /// legal (but not custom) after legalization.
835     bool hasOperation(unsigned Opcode, EVT VT) {
836       return TLI.isOperationLegalOrCustom(Opcode, VT, LegalOperations);
837     }
838 
839   public:
840     /// Runs the dag combiner on all nodes in the work list
841     void Run(CombineLevel AtLevel);
842 
843     SelectionDAG &getDAG() const { return DAG; }
844 
845     /// Returns a type large enough to hold any valid shift amount - before type
846     /// legalization these can be huge.
847     EVT getShiftAmountTy(EVT LHSTy) {
848       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
849       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
850     }
851 
852     /// This method returns true if we are running before type legalization or
853     /// if the specified VT is legal.
854     bool isTypeLegal(const EVT &VT) {
855       if (!LegalTypes) return true;
856       return TLI.isTypeLegal(VT);
857     }
858 
859     /// Convenience wrapper around TargetLowering::getSetCCResultType
860     EVT getSetCCResultType(EVT VT) const {
861       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
862     }
863 
864     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
865                          SDValue OrigLoad, SDValue ExtLoad,
866                          ISD::NodeType ExtType);
867   };
868 
869 /// This class is a DAGUpdateListener that removes any deleted
870 /// nodes from the worklist.
871 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
872   DAGCombiner &DC;
873 
874 public:
875   explicit WorklistRemover(DAGCombiner &dc)
876     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
877 
878   void NodeDeleted(SDNode *N, SDNode *E) override {
879     DC.removeFromWorklist(N);
880   }
881 };
882 
883 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
884   DAGCombiner &DC;
885 
886 public:
887   explicit WorklistInserter(DAGCombiner &dc)
888       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
889 
890   // FIXME: Ideally we could add N to the worklist, but this causes exponential
891   //        compile time costs in large DAGs, e.g. Halide.
892   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
893 };
894 
895 class EmptyMatchContext {
896   SelectionDAG &DAG;
897   const TargetLowering &TLI;
898 
899 public:
900   EmptyMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
901       : DAG(DAG), TLI(TLI) {}
902 
903   bool match(SDValue OpN, unsigned Opcode) const {
904     return Opcode == OpN->getOpcode();
905   }
906 
907   // Same as SelectionDAG::getNode().
908   template <typename... ArgT> SDValue getNode(ArgT &&...Args) {
909     return DAG.getNode(std::forward<ArgT>(Args)...);
910   }
911 
912   bool isOperationLegalOrCustom(unsigned Op, EVT VT,
913                                 bool LegalOnly = false) const {
914     return TLI.isOperationLegalOrCustom(Op, VT, LegalOnly);
915   }
916 };
917 
918 class VPMatchContext {
919   SelectionDAG &DAG;
920   const TargetLowering &TLI;
921   SDValue RootMaskOp;
922   SDValue RootVectorLenOp;
923 
924 public:
925   VPMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
926       : DAG(DAG), TLI(TLI), RootMaskOp(), RootVectorLenOp() {
927     assert(Root->isVPOpcode());
928     if (auto RootMaskPos = ISD::getVPMaskIdx(Root->getOpcode()))
929       RootMaskOp = Root->getOperand(*RootMaskPos);
930 
931     if (auto RootVLenPos =
932             ISD::getVPExplicitVectorLengthIdx(Root->getOpcode()))
933       RootVectorLenOp = Root->getOperand(*RootVLenPos);
934   }
935 
936   /// whether \p OpVal is a node that is functionally compatible with the
937   /// NodeType \p Opc
938   bool match(SDValue OpVal, unsigned Opc) const {
939     if (!OpVal->isVPOpcode())
940       return OpVal->getOpcode() == Opc;
941 
942     auto BaseOpc = ISD::getBaseOpcodeForVP(OpVal->getOpcode(),
943                                            !OpVal->getFlags().hasNoFPExcept());
944     if (BaseOpc != Opc)
945       return false;
946 
947     // Make sure the mask of OpVal is true mask or is same as Root's.
948     unsigned VPOpcode = OpVal->getOpcode();
949     if (auto MaskPos = ISD::getVPMaskIdx(VPOpcode)) {
950       SDValue MaskOp = OpVal.getOperand(*MaskPos);
951       if (RootMaskOp != MaskOp &&
952           !ISD::isConstantSplatVectorAllOnes(MaskOp.getNode()))
953         return false;
954     }
955 
956     // Make sure the EVL of OpVal is same as Root's.
957     if (auto VLenPos = ISD::getVPExplicitVectorLengthIdx(VPOpcode))
958       if (RootVectorLenOp != OpVal.getOperand(*VLenPos))
959         return false;
960     return true;
961   }
962 
963   // Specialize based on number of operands.
964   // TODO emit VP intrinsics where MaskOp/VectorLenOp != null
965   // SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return
966   // DAG.getNode(Opcode, DL, VT); }
967   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand) {
968     unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
969     assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
970            ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
971     return DAG.getNode(VPOpcode, DL, VT,
972                        {Operand, RootMaskOp, RootVectorLenOp});
973   }
974 
975   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
976                   SDValue N2) {
977     unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
978     assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
979            ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
980     return DAG.getNode(VPOpcode, DL, VT,
981                        {N1, N2, RootMaskOp, RootVectorLenOp});
982   }
983 
984   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
985                   SDValue N2, SDValue N3) {
986     unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
987     assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
988            ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
989     return DAG.getNode(VPOpcode, DL, VT,
990                        {N1, N2, N3, RootMaskOp, RootVectorLenOp});
991   }
992 
993   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
994                   SDNodeFlags Flags) {
995     unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
996     assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
997            ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
998     return DAG.getNode(VPOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp},
999                        Flags);
1000   }
1001 
1002   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
1003                   SDValue N2, SDNodeFlags Flags) {
1004     unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
1005     assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
1006            ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
1007     return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp},
1008                        Flags);
1009   }
1010 
1011   SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
1012                   SDValue N2, SDValue N3, SDNodeFlags Flags) {
1013     unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
1014     assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
1015            ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
1016     return DAG.getNode(VPOpcode, DL, VT,
1017                        {N1, N2, N3, RootMaskOp, RootVectorLenOp}, Flags);
1018   }
1019 
1020   bool isOperationLegalOrCustom(unsigned Op, EVT VT,
1021                                 bool LegalOnly = false) const {
1022     unsigned VPOp = ISD::getVPForBaseOpcode(Op);
1023     return TLI.isOperationLegalOrCustom(VPOp, VT, LegalOnly);
1024   }
1025 };
1026 
1027 } // end anonymous namespace
1028 
1029 //===----------------------------------------------------------------------===//
1030 //  TargetLowering::DAGCombinerInfo implementation
1031 //===----------------------------------------------------------------------===//
1032 
1033 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
1034   ((DAGCombiner*)DC)->AddToWorklist(N);
1035 }
1036 
1037 SDValue TargetLowering::DAGCombinerInfo::
1038 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
1039   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
1040 }
1041 
1042 SDValue TargetLowering::DAGCombinerInfo::
1043 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
1044   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
1045 }
1046 
1047 SDValue TargetLowering::DAGCombinerInfo::
1048 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
1049   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
1050 }
1051 
1052 bool TargetLowering::DAGCombinerInfo::
1053 recursivelyDeleteUnusedNodes(SDNode *N) {
1054   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
1055 }
1056 
1057 void TargetLowering::DAGCombinerInfo::
1058 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1059   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
1060 }
1061 
1062 //===----------------------------------------------------------------------===//
1063 // Helper Functions
1064 //===----------------------------------------------------------------------===//
1065 
1066 void DAGCombiner::deleteAndRecombine(SDNode *N) {
1067   removeFromWorklist(N);
1068 
1069   // If the operands of this node are only used by the node, they will now be
1070   // dead. Make sure to re-visit them and recursively delete dead nodes.
1071   for (const SDValue &Op : N->ops())
1072     // For an operand generating multiple values, one of the values may
1073     // become dead allowing further simplification (e.g. split index
1074     // arithmetic from an indexed load).
1075     if (Op->hasOneUse() || Op->getNumValues() > 1)
1076       AddToWorklist(Op.getNode());
1077 
1078   DAG.DeleteNode(N);
1079 }
1080 
1081 // APInts must be the same size for most operations, this helper
1082 // function zero extends the shorter of the pair so that they match.
1083 // We provide an Offset so that we can create bitwidths that won't overflow.
1084 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
1085   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
1086   LHS = LHS.zext(Bits);
1087   RHS = RHS.zext(Bits);
1088 }
1089 
1090 // Return true if this node is a setcc, or is a select_cc
1091 // that selects between the target values used for true and false, making it
1092 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
1093 // the appropriate nodes based on the type of node we are checking. This
1094 // simplifies life a bit for the callers.
1095 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
1096                                     SDValue &CC, bool MatchStrict) const {
1097   if (N.getOpcode() == ISD::SETCC) {
1098     LHS = N.getOperand(0);
1099     RHS = N.getOperand(1);
1100     CC  = N.getOperand(2);
1101     return true;
1102   }
1103 
1104   if (MatchStrict &&
1105       (N.getOpcode() == ISD::STRICT_FSETCC ||
1106        N.getOpcode() == ISD::STRICT_FSETCCS)) {
1107     LHS = N.getOperand(1);
1108     RHS = N.getOperand(2);
1109     CC  = N.getOperand(3);
1110     return true;
1111   }
1112 
1113   if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
1114       !TLI.isConstFalseVal(N.getOperand(3)))
1115     return false;
1116 
1117   if (TLI.getBooleanContents(N.getValueType()) ==
1118       TargetLowering::UndefinedBooleanContent)
1119     return false;
1120 
1121   LHS = N.getOperand(0);
1122   RHS = N.getOperand(1);
1123   CC  = N.getOperand(4);
1124   return true;
1125 }
1126 
1127 /// Return true if this is a SetCC-equivalent operation with only one use.
1128 /// If this is true, it allows the users to invert the operation for free when
1129 /// it is profitable to do so.
1130 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1131   SDValue N0, N1, N2;
1132   if (isSetCCEquivalent(N, N0, N1, N2) && N->hasOneUse())
1133     return true;
1134   return false;
1135 }
1136 
1137 static bool isConstantSplatVectorMaskForType(SDNode *N, EVT ScalarTy) {
1138   if (!ScalarTy.isSimple())
1139     return false;
1140 
1141   uint64_t MaskForTy = 0ULL;
1142   switch (ScalarTy.getSimpleVT().SimpleTy) {
1143   case MVT::i8:
1144     MaskForTy = 0xFFULL;
1145     break;
1146   case MVT::i16:
1147     MaskForTy = 0xFFFFULL;
1148     break;
1149   case MVT::i32:
1150     MaskForTy = 0xFFFFFFFFULL;
1151     break;
1152   default:
1153     return false;
1154     break;
1155   }
1156 
1157   APInt Val;
1158   if (ISD::isConstantSplatVector(N, Val))
1159     return Val.getLimitedValue() == MaskForTy;
1160 
1161   return false;
1162 }
1163 
1164 // Determines if it is a constant integer or a splat/build vector of constant
1165 // integers (and undefs).
1166 // Do not permit build vector implicit truncation.
1167 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1168   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1169     return !(Const->isOpaque() && NoOpaques);
1170   if (N.getOpcode() != ISD::BUILD_VECTOR && N.getOpcode() != ISD::SPLAT_VECTOR)
1171     return false;
1172   unsigned BitWidth = N.getScalarValueSizeInBits();
1173   for (const SDValue &Op : N->op_values()) {
1174     if (Op.isUndef())
1175       continue;
1176     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1177     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1178         (Const->isOpaque() && NoOpaques))
1179       return false;
1180   }
1181   return true;
1182 }
1183 
1184 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1185 // undef's.
1186 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1187   if (V.getOpcode() != ISD::BUILD_VECTOR)
1188     return false;
1189   return isConstantOrConstantVector(V, NoOpaques) ||
1190          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
1191 }
1192 
1193 // Determine if this an indexed load with an opaque target constant index.
1194 static bool canSplitIdx(LoadSDNode *LD) {
1195   return MaySplitLoadIndex &&
1196          (LD->getOperand(2).getOpcode() != ISD::TargetConstant ||
1197           !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque());
1198 }
1199 
1200 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1201                                                              const SDLoc &DL,
1202                                                              SDNode *N,
1203                                                              SDValue N0,
1204                                                              SDValue N1) {
1205   // Currently this only tries to ensure we don't undo the GEP splits done by
1206   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1207   // we check if the following transformation would be problematic:
1208   // (load/store (add, (add, x, offset1), offset2)) ->
1209   // (load/store (add, x, offset1+offset2)).
1210 
1211   // (load/store (add, (add, x, y), offset2)) ->
1212   // (load/store (add, (add, x, offset2), y)).
1213 
1214   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1215     return false;
1216 
1217   auto *C2 = dyn_cast<ConstantSDNode>(N1);
1218   if (!C2)
1219     return false;
1220 
1221   const APInt &C2APIntVal = C2->getAPIntValue();
1222   if (C2APIntVal.getSignificantBits() > 64)
1223     return false;
1224 
1225   if (auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
1226     if (N0.hasOneUse())
1227       return false;
1228 
1229     const APInt &C1APIntVal = C1->getAPIntValue();
1230     const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1231     if (CombinedValueIntVal.getSignificantBits() > 64)
1232       return false;
1233     const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1234 
1235     for (SDNode *Node : N->uses()) {
1236       if (auto *LoadStore = dyn_cast<MemSDNode>(Node)) {
1237         // Is x[offset2] already not a legal addressing mode? If so then
1238         // reassociating the constants breaks nothing (we test offset2 because
1239         // that's the one we hope to fold into the load or store).
1240         TargetLoweringBase::AddrMode AM;
1241         AM.HasBaseReg = true;
1242         AM.BaseOffs = C2APIntVal.getSExtValue();
1243         EVT VT = LoadStore->getMemoryVT();
1244         unsigned AS = LoadStore->getAddressSpace();
1245         Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1246         if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1247           continue;
1248 
1249         // Would x[offset1+offset2] still be a legal addressing mode?
1250         AM.BaseOffs = CombinedValue;
1251         if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1252           return true;
1253       }
1254     }
1255   } else {
1256     if (auto *GA = dyn_cast<GlobalAddressSDNode>(N0.getOperand(1)))
1257       if (GA->getOpcode() == ISD::GlobalAddress && TLI.isOffsetFoldingLegal(GA))
1258         return false;
1259 
1260     for (SDNode *Node : N->uses()) {
1261       auto *LoadStore = dyn_cast<MemSDNode>(Node);
1262       if (!LoadStore)
1263         return false;
1264 
1265       // Is x[offset2] a legal addressing mode? If so then
1266       // reassociating the constants breaks address pattern
1267       TargetLoweringBase::AddrMode AM;
1268       AM.HasBaseReg = true;
1269       AM.BaseOffs = C2APIntVal.getSExtValue();
1270       EVT VT = LoadStore->getMemoryVT();
1271       unsigned AS = LoadStore->getAddressSpace();
1272       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1273       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1274         return false;
1275     }
1276     return true;
1277   }
1278 
1279   return false;
1280 }
1281 
1282 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1283 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1284 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1285                                                SDValue N0, SDValue N1,
1286                                                SDNodeFlags Flags) {
1287   EVT VT = N0.getValueType();
1288 
1289   if (N0.getOpcode() != Opc)
1290     return SDValue();
1291 
1292   SDValue N00 = N0.getOperand(0);
1293   SDValue N01 = N0.getOperand(1);
1294 
1295   if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) {
1296     if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) {
1297       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1298       if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1}))
1299         return DAG.getNode(Opc, DL, VT, N00, OpNode);
1300       return SDValue();
1301     }
1302     if (TLI.isReassocProfitable(DAG, N0, N1)) {
1303       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1304       //              iff (op x, c1) has one use
1305       SDNodeFlags NewFlags;
1306       if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
1307           Flags.hasNoUnsignedWrap())
1308         NewFlags.setNoUnsignedWrap(true);
1309       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
1310       return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
1311     }
1312   }
1313 
1314   // Check for repeated operand logic simplifications.
1315   if (Opc == ISD::AND || Opc == ISD::OR) {
1316     // (N00 & N01) & N00 --> N00 & N01
1317     // (N00 & N01) & N01 --> N00 & N01
1318     // (N00 | N01) | N00 --> N00 | N01
1319     // (N00 | N01) | N01 --> N00 | N01
1320     if (N1 == N00 || N1 == N01)
1321       return N0;
1322   }
1323   if (Opc == ISD::XOR) {
1324     // (N00 ^ N01) ^ N00 --> N01
1325     if (N1 == N00)
1326       return N01;
1327     // (N00 ^ N01) ^ N01 --> N00
1328     if (N1 == N01)
1329       return N00;
1330   }
1331 
1332   if (TLI.isReassocProfitable(DAG, N0, N1)) {
1333     if (N1 != N01) {
1334       // Reassociate if (op N00, N1) already exist
1335       if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N00, N1})) {
1336         // if Op (Op N00, N1), N01 already exist
1337         // we need to stop reassciate to avoid dead loop
1338         if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N01}))
1339           return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N01);
1340       }
1341     }
1342 
1343     if (N1 != N00) {
1344       // Reassociate if (op N01, N1) already exist
1345       if (SDNode *NE = DAG.getNodeIfExists(Opc, DAG.getVTList(VT), {N01, N1})) {
1346         // if Op (Op N01, N1), N00 already exist
1347         // we need to stop reassciate to avoid dead loop
1348         if (!DAG.doesNodeExist(Opc, DAG.getVTList(VT), {SDValue(NE, 0), N00}))
1349           return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00);
1350       }
1351     }
1352 
1353     // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND
1354     // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same
1355     // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are
1356     // comparisons with the same predicate. This enables optimizations as the
1357     // following one:
1358     // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C)
1359     // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C)
1360     if (Opc == ISD::AND || Opc == ISD::OR) {
1361       if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC &&
1362           N01->getOpcode() == ISD::SETCC) {
1363         ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get();
1364         ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get();
1365         ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get();
1366         if (CC1 == CC00 && CC1 != CC01) {
1367           SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags);
1368           return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags);
1369         }
1370         if (CC1 == CC01 && CC1 != CC00) {
1371           SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags);
1372           return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags);
1373         }
1374       }
1375     }
1376   }
1377 
1378   return SDValue();
1379 }
1380 
1381 // Try to reassociate commutative binops.
1382 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1383                                     SDValue N1, SDNodeFlags Flags) {
1384   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1385 
1386   // Floating-point reassociation is not allowed without loose FP math.
1387   if (N0.getValueType().isFloatingPoint() ||
1388       N1.getValueType().isFloatingPoint())
1389     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1390       return SDValue();
1391 
1392   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
1393     return Combined;
1394   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
1395     return Combined;
1396   return SDValue();
1397 }
1398 
1399 // Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
1400 // Note that we only expect Flags to be passed from FP operations. For integer
1401 // operations they need to be dropped.
1402 SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
1403                                           const SDLoc &DL, EVT VT, SDValue N0,
1404                                           SDValue N1, SDNodeFlags Flags) {
1405   if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
1406       N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
1407       N0->hasOneUse() && N1->hasOneUse() &&
1408       TLI.isOperationLegalOrCustom(Opc, N0.getOperand(0).getValueType()) &&
1409       TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
1410     SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
1411     return DAG.getNode(RedOpc, DL, VT,
1412                        DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
1413                                    N0.getOperand(0), N1.getOperand(0)));
1414   }
1415   return SDValue();
1416 }
1417 
1418 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1419                                bool AddTo) {
1420   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1421   ++NodesCombined;
1422   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1423              To[0].dump(&DAG);
1424              dbgs() << " and " << NumTo - 1 << " other values\n");
1425   for (unsigned i = 0, e = NumTo; i != e; ++i)
1426     assert((!To[i].getNode() ||
1427             N->getValueType(i) == To[i].getValueType()) &&
1428            "Cannot combine value to value of different type!");
1429 
1430   WorklistRemover DeadNodes(*this);
1431   DAG.ReplaceAllUsesWith(N, To);
1432   if (AddTo) {
1433     // Push the new nodes and any users onto the worklist
1434     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1435       if (To[i].getNode())
1436         AddToWorklistWithUsers(To[i].getNode());
1437     }
1438   }
1439 
1440   // Finally, if the node is now dead, remove it from the graph.  The node
1441   // may not be dead if the replacement process recursively simplified to
1442   // something else needing this node.
1443   if (N->use_empty())
1444     deleteAndRecombine(N);
1445   return SDValue(N, 0);
1446 }
1447 
1448 void DAGCombiner::
1449 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1450   // Replace the old value with the new one.
1451   ++NodesCombined;
1452   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG);
1453              dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n');
1454 
1455   // Replace all uses.
1456   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1457 
1458   // Push the new node and any (possibly new) users onto the worklist.
1459   AddToWorklistWithUsers(TLO.New.getNode());
1460 
1461   // Finally, if the node is now dead, remove it from the graph.
1462   recursivelyDeleteUnusedNodes(TLO.Old.getNode());
1463 }
1464 
1465 /// Check the specified integer node value to see if it can be simplified or if
1466 /// things it uses can be simplified by bit propagation. If so, return true.
1467 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1468                                        const APInt &DemandedElts,
1469                                        bool AssumeSingleUse) {
1470   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1471   KnownBits Known;
1472   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0,
1473                                 AssumeSingleUse))
1474     return false;
1475 
1476   // Revisit the node.
1477   AddToWorklist(Op.getNode());
1478 
1479   CommitTargetLoweringOpt(TLO);
1480   return true;
1481 }
1482 
1483 /// Check the specified vector node value to see if it can be simplified or
1484 /// if things it uses can be simplified as it only uses some of the elements.
1485 /// If so, return true.
1486 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1487                                              const APInt &DemandedElts,
1488                                              bool AssumeSingleUse) {
1489   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1490   APInt KnownUndef, KnownZero;
1491   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1492                                       TLO, 0, AssumeSingleUse))
1493     return false;
1494 
1495   // Revisit the node.
1496   AddToWorklist(Op.getNode());
1497 
1498   CommitTargetLoweringOpt(TLO);
1499   return true;
1500 }
1501 
1502 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1503   SDLoc DL(Load);
1504   EVT VT = Load->getValueType(0);
1505   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1506 
1507   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1508              Trunc.dump(&DAG); dbgs() << '\n');
1509 
1510   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1511   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1512 
1513   AddToWorklist(Trunc.getNode());
1514   recursivelyDeleteUnusedNodes(Load);
1515 }
1516 
1517 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1518   Replace = false;
1519   SDLoc DL(Op);
1520   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1521     LoadSDNode *LD = cast<LoadSDNode>(Op);
1522     EVT MemVT = LD->getMemoryVT();
1523     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1524                                                       : LD->getExtensionType();
1525     Replace = true;
1526     return DAG.getExtLoad(ExtType, DL, PVT,
1527                           LD->getChain(), LD->getBasePtr(),
1528                           MemVT, LD->getMemOperand());
1529   }
1530 
1531   unsigned Opc = Op.getOpcode();
1532   switch (Opc) {
1533   default: break;
1534   case ISD::AssertSext:
1535     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1536       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1537     break;
1538   case ISD::AssertZext:
1539     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1540       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1541     break;
1542   case ISD::Constant: {
1543     unsigned ExtOpc =
1544       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1545     return DAG.getNode(ExtOpc, DL, PVT, Op);
1546   }
1547   }
1548 
1549   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1550     return SDValue();
1551   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1552 }
1553 
1554 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1555   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1556     return SDValue();
1557   EVT OldVT = Op.getValueType();
1558   SDLoc DL(Op);
1559   bool Replace = false;
1560   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1561   if (!NewOp.getNode())
1562     return SDValue();
1563   AddToWorklist(NewOp.getNode());
1564 
1565   if (Replace)
1566     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1567   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1568                      DAG.getValueType(OldVT));
1569 }
1570 
1571 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1572   EVT OldVT = Op.getValueType();
1573   SDLoc DL(Op);
1574   bool Replace = false;
1575   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1576   if (!NewOp.getNode())
1577     return SDValue();
1578   AddToWorklist(NewOp.getNode());
1579 
1580   if (Replace)
1581     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1582   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1583 }
1584 
1585 /// Promote the specified integer binary operation if the target indicates it is
1586 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1587 /// i32 since i16 instructions are longer.
1588 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1589   if (!LegalOperations)
1590     return SDValue();
1591 
1592   EVT VT = Op.getValueType();
1593   if (VT.isVector() || !VT.isInteger())
1594     return SDValue();
1595 
1596   // If operation type is 'undesirable', e.g. i16 on x86, consider
1597   // promoting it.
1598   unsigned Opc = Op.getOpcode();
1599   if (TLI.isTypeDesirableForOp(Opc, VT))
1600     return SDValue();
1601 
1602   EVT PVT = VT;
1603   // Consult target whether it is a good idea to promote this operation and
1604   // what's the right type to promote it to.
1605   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1606     assert(PVT != VT && "Don't know what type to promote to!");
1607 
1608     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1609 
1610     bool Replace0 = false;
1611     SDValue N0 = Op.getOperand(0);
1612     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1613 
1614     bool Replace1 = false;
1615     SDValue N1 = Op.getOperand(1);
1616     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1617     SDLoc DL(Op);
1618 
1619     SDValue RV =
1620         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1621 
1622     // We are always replacing N0/N1's use in N and only need additional
1623     // replacements if there are additional uses.
1624     // Note: We are checking uses of the *nodes* (SDNode) rather than values
1625     //       (SDValue) here because the node may reference multiple values
1626     //       (for example, the chain value of a load node).
1627     Replace0 &= !N0->hasOneUse();
1628     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1629 
1630     // Combine Op here so it is preserved past replacements.
1631     CombineTo(Op.getNode(), RV);
1632 
1633     // If operands have a use ordering, make sure we deal with
1634     // predecessor first.
1635     if (Replace0 && Replace1 && N0->isPredecessorOf(N1.getNode())) {
1636       std::swap(N0, N1);
1637       std::swap(NN0, NN1);
1638     }
1639 
1640     if (Replace0) {
1641       AddToWorklist(NN0.getNode());
1642       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1643     }
1644     if (Replace1) {
1645       AddToWorklist(NN1.getNode());
1646       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1647     }
1648     return Op;
1649   }
1650   return SDValue();
1651 }
1652 
1653 /// Promote the specified integer shift operation if the target indicates it is
1654 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1655 /// i32 since i16 instructions are longer.
1656 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1657   if (!LegalOperations)
1658     return SDValue();
1659 
1660   EVT VT = Op.getValueType();
1661   if (VT.isVector() || !VT.isInteger())
1662     return SDValue();
1663 
1664   // If operation type is 'undesirable', e.g. i16 on x86, consider
1665   // promoting it.
1666   unsigned Opc = Op.getOpcode();
1667   if (TLI.isTypeDesirableForOp(Opc, VT))
1668     return SDValue();
1669 
1670   EVT PVT = VT;
1671   // Consult target whether it is a good idea to promote this operation and
1672   // what's the right type to promote it to.
1673   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1674     assert(PVT != VT && "Don't know what type to promote to!");
1675 
1676     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1677 
1678     bool Replace = false;
1679     SDValue N0 = Op.getOperand(0);
1680     if (Opc == ISD::SRA)
1681       N0 = SExtPromoteOperand(N0, PVT);
1682     else if (Opc == ISD::SRL)
1683       N0 = ZExtPromoteOperand(N0, PVT);
1684     else
1685       N0 = PromoteOperand(N0, PVT, Replace);
1686 
1687     if (!N0.getNode())
1688       return SDValue();
1689 
1690     SDLoc DL(Op);
1691     SDValue N1 = Op.getOperand(1);
1692     SDValue RV =
1693         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1694 
1695     if (Replace)
1696       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1697 
1698     // Deal with Op being deleted.
1699     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1700       return RV;
1701   }
1702   return SDValue();
1703 }
1704 
1705 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1706   if (!LegalOperations)
1707     return SDValue();
1708 
1709   EVT VT = Op.getValueType();
1710   if (VT.isVector() || !VT.isInteger())
1711     return SDValue();
1712 
1713   // If operation type is 'undesirable', e.g. i16 on x86, consider
1714   // promoting it.
1715   unsigned Opc = Op.getOpcode();
1716   if (TLI.isTypeDesirableForOp(Opc, VT))
1717     return SDValue();
1718 
1719   EVT PVT = VT;
1720   // Consult target whether it is a good idea to promote this operation and
1721   // what's the right type to promote it to.
1722   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1723     assert(PVT != VT && "Don't know what type to promote to!");
1724     // fold (aext (aext x)) -> (aext x)
1725     // fold (aext (zext x)) -> (zext x)
1726     // fold (aext (sext x)) -> (sext x)
1727     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.dump(&DAG));
1728     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1729   }
1730   return SDValue();
1731 }
1732 
1733 bool DAGCombiner::PromoteLoad(SDValue Op) {
1734   if (!LegalOperations)
1735     return false;
1736 
1737   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1738     return false;
1739 
1740   EVT VT = Op.getValueType();
1741   if (VT.isVector() || !VT.isInteger())
1742     return false;
1743 
1744   // If operation type is 'undesirable', e.g. i16 on x86, consider
1745   // promoting it.
1746   unsigned Opc = Op.getOpcode();
1747   if (TLI.isTypeDesirableForOp(Opc, VT))
1748     return false;
1749 
1750   EVT PVT = VT;
1751   // Consult target whether it is a good idea to promote this operation and
1752   // what's the right type to promote it to.
1753   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1754     assert(PVT != VT && "Don't know what type to promote to!");
1755 
1756     SDLoc DL(Op);
1757     SDNode *N = Op.getNode();
1758     LoadSDNode *LD = cast<LoadSDNode>(N);
1759     EVT MemVT = LD->getMemoryVT();
1760     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1761                                                       : LD->getExtensionType();
1762     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1763                                    LD->getChain(), LD->getBasePtr(),
1764                                    MemVT, LD->getMemOperand());
1765     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1766 
1767     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1768                Result.dump(&DAG); dbgs() << '\n');
1769 
1770     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1771     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1772 
1773     AddToWorklist(Result.getNode());
1774     recursivelyDeleteUnusedNodes(N);
1775     return true;
1776   }
1777 
1778   return false;
1779 }
1780 
1781 /// Recursively delete a node which has no uses and any operands for
1782 /// which it is the only use.
1783 ///
1784 /// Note that this both deletes the nodes and removes them from the worklist.
1785 /// It also adds any nodes who have had a user deleted to the worklist as they
1786 /// may now have only one use and subject to other combines.
1787 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1788   if (!N->use_empty())
1789     return false;
1790 
1791   SmallSetVector<SDNode *, 16> Nodes;
1792   Nodes.insert(N);
1793   do {
1794     N = Nodes.pop_back_val();
1795     if (!N)
1796       continue;
1797 
1798     if (N->use_empty()) {
1799       for (const SDValue &ChildN : N->op_values())
1800         Nodes.insert(ChildN.getNode());
1801 
1802       removeFromWorklist(N);
1803       DAG.DeleteNode(N);
1804     } else {
1805       AddToWorklist(N);
1806     }
1807   } while (!Nodes.empty());
1808   return true;
1809 }
1810 
1811 //===----------------------------------------------------------------------===//
1812 //  Main DAG Combiner implementation
1813 //===----------------------------------------------------------------------===//
1814 
1815 void DAGCombiner::Run(CombineLevel AtLevel) {
1816   // set the instance variables, so that the various visit routines may use it.
1817   Level = AtLevel;
1818   LegalDAG = Level >= AfterLegalizeDAG;
1819   LegalOperations = Level >= AfterLegalizeVectorOps;
1820   LegalTypes = Level >= AfterLegalizeTypes;
1821 
1822   WorklistInserter AddNodes(*this);
1823 
1824   // Add all the dag nodes to the worklist.
1825   //
1826   // Note: All nodes are not added to PruningList here, this is because the only
1827   // nodes which can be deleted are those which have no uses and all other nodes
1828   // which would otherwise be added to the worklist by the first call to
1829   // getNextWorklistEntry are already present in it.
1830   for (SDNode &Node : DAG.allnodes())
1831     AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
1832 
1833   // Create a dummy node (which is not added to allnodes), that adds a reference
1834   // to the root node, preventing it from being deleted, and tracking any
1835   // changes of the root.
1836   HandleSDNode Dummy(DAG.getRoot());
1837 
1838   // While we have a valid worklist entry node, try to combine it.
1839   while (SDNode *N = getNextWorklistEntry()) {
1840     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1841     // N is deleted from the DAG, since they too may now be dead or may have a
1842     // reduced number of uses, allowing other xforms.
1843     if (recursivelyDeleteUnusedNodes(N))
1844       continue;
1845 
1846     WorklistRemover DeadNodes(*this);
1847 
1848     // If this combine is running after legalizing the DAG, re-legalize any
1849     // nodes pulled off the worklist.
1850     if (LegalDAG) {
1851       SmallSetVector<SDNode *, 16> UpdatedNodes;
1852       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1853 
1854       for (SDNode *LN : UpdatedNodes)
1855         AddToWorklistWithUsers(LN);
1856 
1857       if (!NIsValid)
1858         continue;
1859     }
1860 
1861     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1862 
1863     // Add any operands of the new node which have not yet been combined to the
1864     // worklist as well. Because the worklist uniques things already, this
1865     // won't repeatedly process the same operand.
1866     for (const SDValue &ChildN : N->op_values())
1867       if (!CombinedNodes.count(ChildN.getNode()))
1868         AddToWorklist(ChildN.getNode());
1869 
1870     CombinedNodes.insert(N);
1871     SDValue RV = combine(N);
1872 
1873     if (!RV.getNode())
1874       continue;
1875 
1876     ++NodesCombined;
1877 
1878     // If we get back the same node we passed in, rather than a new node or
1879     // zero, we know that the node must have defined multiple values and
1880     // CombineTo was used.  Since CombineTo takes care of the worklist
1881     // mechanics for us, we have no work to do in this case.
1882     if (RV.getNode() == N)
1883       continue;
1884 
1885     assert(N->getOpcode() != ISD::DELETED_NODE &&
1886            RV.getOpcode() != ISD::DELETED_NODE &&
1887            "Node was deleted but visit returned new node!");
1888 
1889     LLVM_DEBUG(dbgs() << " ... into: "; RV.dump(&DAG));
1890 
1891     if (N->getNumValues() == RV->getNumValues())
1892       DAG.ReplaceAllUsesWith(N, RV.getNode());
1893     else {
1894       assert(N->getValueType(0) == RV.getValueType() &&
1895              N->getNumValues() == 1 && "Type mismatch");
1896       DAG.ReplaceAllUsesWith(N, &RV);
1897     }
1898 
1899     // Push the new node and any users onto the worklist.  Omit this if the
1900     // new node is the EntryToken (e.g. if a store managed to get optimized
1901     // out), because re-visiting the EntryToken and its users will not uncover
1902     // any additional opportunities, but there may be a large number of such
1903     // users, potentially causing compile time explosion.
1904     if (RV.getOpcode() != ISD::EntryToken)
1905       AddToWorklistWithUsers(RV.getNode());
1906 
1907     // Finally, if the node is now dead, remove it from the graph.  The node
1908     // may not be dead if the replacement process recursively simplified to
1909     // something else needing this node. This will also take care of adding any
1910     // operands which have lost a user to the worklist.
1911     recursivelyDeleteUnusedNodes(N);
1912   }
1913 
1914   // If the root changed (e.g. it was a dead load, update the root).
1915   DAG.setRoot(Dummy.getValue());
1916   DAG.RemoveDeadNodes();
1917 }
1918 
1919 SDValue DAGCombiner::visit(SDNode *N) {
1920   // clang-format off
1921   switch (N->getOpcode()) {
1922   default: break;
1923   case ISD::TokenFactor:        return visitTokenFactor(N);
1924   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1925   case ISD::ADD:                return visitADD(N);
1926   case ISD::SUB:                return visitSUB(N);
1927   case ISD::SADDSAT:
1928   case ISD::UADDSAT:            return visitADDSAT(N);
1929   case ISD::SSUBSAT:
1930   case ISD::USUBSAT:            return visitSUBSAT(N);
1931   case ISD::ADDC:               return visitADDC(N);
1932   case ISD::SADDO:
1933   case ISD::UADDO:              return visitADDO(N);
1934   case ISD::SUBC:               return visitSUBC(N);
1935   case ISD::SSUBO:
1936   case ISD::USUBO:              return visitSUBO(N);
1937   case ISD::ADDE:               return visitADDE(N);
1938   case ISD::UADDO_CARRY:        return visitUADDO_CARRY(N);
1939   case ISD::SADDO_CARRY:        return visitSADDO_CARRY(N);
1940   case ISD::SUBE:               return visitSUBE(N);
1941   case ISD::USUBO_CARRY:        return visitUSUBO_CARRY(N);
1942   case ISD::SSUBO_CARRY:        return visitSSUBO_CARRY(N);
1943   case ISD::SMULFIX:
1944   case ISD::SMULFIXSAT:
1945   case ISD::UMULFIX:
1946   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1947   case ISD::MUL:                return visitMUL(N);
1948   case ISD::SDIV:               return visitSDIV(N);
1949   case ISD::UDIV:               return visitUDIV(N);
1950   case ISD::SREM:
1951   case ISD::UREM:               return visitREM(N);
1952   case ISD::MULHU:              return visitMULHU(N);
1953   case ISD::MULHS:              return visitMULHS(N);
1954   case ISD::AVGFLOORS:
1955   case ISD::AVGFLOORU:
1956   case ISD::AVGCEILS:
1957   case ISD::AVGCEILU:           return visitAVG(N);
1958   case ISD::ABDS:
1959   case ISD::ABDU:               return visitABD(N);
1960   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1961   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1962   case ISD::SMULO:
1963   case ISD::UMULO:              return visitMULO(N);
1964   case ISD::SMIN:
1965   case ISD::SMAX:
1966   case ISD::UMIN:
1967   case ISD::UMAX:               return visitIMINMAX(N);
1968   case ISD::AND:                return visitAND(N);
1969   case ISD::OR:                 return visitOR(N);
1970   case ISD::XOR:                return visitXOR(N);
1971   case ISD::SHL:                return visitSHL(N);
1972   case ISD::SRA:                return visitSRA(N);
1973   case ISD::SRL:                return visitSRL(N);
1974   case ISD::ROTR:
1975   case ISD::ROTL:               return visitRotate(N);
1976   case ISD::FSHL:
1977   case ISD::FSHR:               return visitFunnelShift(N);
1978   case ISD::SSHLSAT:
1979   case ISD::USHLSAT:            return visitSHLSAT(N);
1980   case ISD::ABS:                return visitABS(N);
1981   case ISD::BSWAP:              return visitBSWAP(N);
1982   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1983   case ISD::CTLZ:               return visitCTLZ(N);
1984   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1985   case ISD::CTTZ:               return visitCTTZ(N);
1986   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1987   case ISD::CTPOP:              return visitCTPOP(N);
1988   case ISD::SELECT:             return visitSELECT(N);
1989   case ISD::VSELECT:            return visitVSELECT(N);
1990   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1991   case ISD::SETCC:              return visitSETCC(N);
1992   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1993   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1994   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1995   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1996   case ISD::AssertSext:
1997   case ISD::AssertZext:         return visitAssertExt(N);
1998   case ISD::AssertAlign:        return visitAssertAlign(N);
1999   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
2000   case ISD::SIGN_EXTEND_VECTOR_INREG:
2001   case ISD::ZERO_EXTEND_VECTOR_INREG:
2002   case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N);
2003   case ISD::TRUNCATE:           return visitTRUNCATE(N);
2004   case ISD::BITCAST:            return visitBITCAST(N);
2005   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
2006   case ISD::FADD:               return visitFADD(N);
2007   case ISD::STRICT_FADD:        return visitSTRICT_FADD(N);
2008   case ISD::FSUB:               return visitFSUB(N);
2009   case ISD::FMUL:               return visitFMUL(N);
2010   case ISD::FMA:                return visitFMA<EmptyMatchContext>(N);
2011   case ISD::FMAD:               return visitFMAD(N);
2012   case ISD::FDIV:               return visitFDIV(N);
2013   case ISD::FREM:               return visitFREM(N);
2014   case ISD::FSQRT:              return visitFSQRT(N);
2015   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
2016   case ISD::FPOW:               return visitFPOW(N);
2017   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
2018   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
2019   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
2020   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
2021   case ISD::LRINT:
2022   case ISD::LLRINT:             return visitXRINT(N);
2023   case ISD::FP_ROUND:           return visitFP_ROUND(N);
2024   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
2025   case ISD::FNEG:               return visitFNEG(N);
2026   case ISD::FABS:               return visitFABS(N);
2027   case ISD::FFLOOR:             return visitFFLOOR(N);
2028   case ISD::FMINNUM:
2029   case ISD::FMAXNUM:
2030   case ISD::FMINIMUM:
2031   case ISD::FMAXIMUM:           return visitFMinMax(N);
2032   case ISD::FCEIL:              return visitFCEIL(N);
2033   case ISD::FTRUNC:             return visitFTRUNC(N);
2034   case ISD::FFREXP:             return visitFFREXP(N);
2035   case ISD::BRCOND:             return visitBRCOND(N);
2036   case ISD::BR_CC:              return visitBR_CC(N);
2037   case ISD::LOAD:               return visitLOAD(N);
2038   case ISD::STORE:              return visitSTORE(N);
2039   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
2040   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
2041   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
2042   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
2043   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
2044   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
2045   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
2046   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
2047   case ISD::MGATHER:            return visitMGATHER(N);
2048   case ISD::MLOAD:              return visitMLOAD(N);
2049   case ISD::MSCATTER:           return visitMSCATTER(N);
2050   case ISD::MSTORE:             return visitMSTORE(N);
2051   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
2052   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
2053   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
2054   case ISD::FP_TO_BF16:         return visitFP_TO_BF16(N);
2055   case ISD::BF16_TO_FP:         return visitBF16_TO_FP(N);
2056   case ISD::FREEZE:             return visitFREEZE(N);
2057   case ISD::GET_FPENV_MEM:      return visitGET_FPENV_MEM(N);
2058   case ISD::SET_FPENV_MEM:      return visitSET_FPENV_MEM(N);
2059   case ISD::VECREDUCE_FADD:
2060   case ISD::VECREDUCE_FMUL:
2061   case ISD::VECREDUCE_ADD:
2062   case ISD::VECREDUCE_MUL:
2063   case ISD::VECREDUCE_AND:
2064   case ISD::VECREDUCE_OR:
2065   case ISD::VECREDUCE_XOR:
2066   case ISD::VECREDUCE_SMAX:
2067   case ISD::VECREDUCE_SMIN:
2068   case ISD::VECREDUCE_UMAX:
2069   case ISD::VECREDUCE_UMIN:
2070   case ISD::VECREDUCE_FMAX:
2071   case ISD::VECREDUCE_FMIN:
2072   case ISD::VECREDUCE_FMAXIMUM:
2073   case ISD::VECREDUCE_FMINIMUM:     return visitVECREDUCE(N);
2074 #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
2075 #include "llvm/IR/VPIntrinsics.def"
2076     return visitVPOp(N);
2077   }
2078   // clang-format on
2079   return SDValue();
2080 }
2081 
2082 SDValue DAGCombiner::combine(SDNode *N) {
2083   if (!DebugCounter::shouldExecute(DAGCombineCounter))
2084     return SDValue();
2085 
2086   SDValue RV;
2087   if (!DisableGenericCombines)
2088     RV = visit(N);
2089 
2090   // If nothing happened, try a target-specific DAG combine.
2091   if (!RV.getNode()) {
2092     assert(N->getOpcode() != ISD::DELETED_NODE &&
2093            "Node was deleted but visit returned NULL!");
2094 
2095     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
2096         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
2097 
2098       // Expose the DAG combiner to the target combiner impls.
2099       TargetLowering::DAGCombinerInfo
2100         DagCombineInfo(DAG, Level, false, this);
2101 
2102       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
2103     }
2104   }
2105 
2106   // If nothing happened still, try promoting the operation.
2107   if (!RV.getNode()) {
2108     switch (N->getOpcode()) {
2109     default: break;
2110     case ISD::ADD:
2111     case ISD::SUB:
2112     case ISD::MUL:
2113     case ISD::AND:
2114     case ISD::OR:
2115     case ISD::XOR:
2116       RV = PromoteIntBinOp(SDValue(N, 0));
2117       break;
2118     case ISD::SHL:
2119     case ISD::SRA:
2120     case ISD::SRL:
2121       RV = PromoteIntShiftOp(SDValue(N, 0));
2122       break;
2123     case ISD::SIGN_EXTEND:
2124     case ISD::ZERO_EXTEND:
2125     case ISD::ANY_EXTEND:
2126       RV = PromoteExtend(SDValue(N, 0));
2127       break;
2128     case ISD::LOAD:
2129       if (PromoteLoad(SDValue(N, 0)))
2130         RV = SDValue(N, 0);
2131       break;
2132     }
2133   }
2134 
2135   // If N is a commutative binary node, try to eliminate it if the commuted
2136   // version is already present in the DAG.
2137   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) {
2138     SDValue N0 = N->getOperand(0);
2139     SDValue N1 = N->getOperand(1);
2140 
2141     // Constant operands are canonicalized to RHS.
2142     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
2143       SDValue Ops[] = {N1, N0};
2144       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
2145                                             N->getFlags());
2146       if (CSENode)
2147         return SDValue(CSENode, 0);
2148     }
2149   }
2150 
2151   return RV;
2152 }
2153 
2154 /// Given a node, return its input chain if it has one, otherwise return a null
2155 /// sd operand.
2156 static SDValue getInputChainForNode(SDNode *N) {
2157   if (unsigned NumOps = N->getNumOperands()) {
2158     if (N->getOperand(0).getValueType() == MVT::Other)
2159       return N->getOperand(0);
2160     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
2161       return N->getOperand(NumOps-1);
2162     for (unsigned i = 1; i < NumOps-1; ++i)
2163       if (N->getOperand(i).getValueType() == MVT::Other)
2164         return N->getOperand(i);
2165   }
2166   return SDValue();
2167 }
2168 
2169 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
2170   // If N has two operands, where one has an input chain equal to the other,
2171   // the 'other' chain is redundant.
2172   if (N->getNumOperands() == 2) {
2173     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
2174       return N->getOperand(0);
2175     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
2176       return N->getOperand(1);
2177   }
2178 
2179   // Don't simplify token factors if optnone.
2180   if (OptLevel == CodeGenOptLevel::None)
2181     return SDValue();
2182 
2183   // Don't simplify the token factor if the node itself has too many operands.
2184   if (N->getNumOperands() > TokenFactorInlineLimit)
2185     return SDValue();
2186 
2187   // If the sole user is a token factor, we should make sure we have a
2188   // chance to merge them together. This prevents TF chains from inhibiting
2189   // optimizations.
2190   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
2191     AddToWorklist(*(N->use_begin()));
2192 
2193   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
2194   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
2195   SmallPtrSet<SDNode*, 16> SeenOps;
2196   bool Changed = false;             // If we should replace this token factor.
2197 
2198   // Start out with this token factor.
2199   TFs.push_back(N);
2200 
2201   // Iterate through token factors.  The TFs grows when new token factors are
2202   // encountered.
2203   for (unsigned i = 0; i < TFs.size(); ++i) {
2204     // Limit number of nodes to inline, to avoid quadratic compile times.
2205     // We have to add the outstanding Token Factors to Ops, otherwise we might
2206     // drop Ops from the resulting Token Factors.
2207     if (Ops.size() > TokenFactorInlineLimit) {
2208       for (unsigned j = i; j < TFs.size(); j++)
2209         Ops.emplace_back(TFs[j], 0);
2210       // Drop unprocessed Token Factors from TFs, so we do not add them to the
2211       // combiner worklist later.
2212       TFs.resize(i);
2213       break;
2214     }
2215 
2216     SDNode *TF = TFs[i];
2217     // Check each of the operands.
2218     for (const SDValue &Op : TF->op_values()) {
2219       switch (Op.getOpcode()) {
2220       case ISD::EntryToken:
2221         // Entry tokens don't need to be added to the list. They are
2222         // redundant.
2223         Changed = true;
2224         break;
2225 
2226       case ISD::TokenFactor:
2227         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2228           // Queue up for processing.
2229           TFs.push_back(Op.getNode());
2230           Changed = true;
2231           break;
2232         }
2233         [[fallthrough]];
2234 
2235       default:
2236         // Only add if it isn't already in the list.
2237         if (SeenOps.insert(Op.getNode()).second)
2238           Ops.push_back(Op);
2239         else
2240           Changed = true;
2241         break;
2242       }
2243     }
2244   }
2245 
2246   // Re-visit inlined Token Factors, to clean them up in case they have been
2247   // removed. Skip the first Token Factor, as this is the current node.
2248   for (unsigned i = 1, e = TFs.size(); i < e; i++)
2249     AddToWorklist(TFs[i]);
2250 
2251   // Remove Nodes that are chained to another node in the list. Do so
2252   // by walking up chains breath-first stopping when we've seen
2253   // another operand. In general we must climb to the EntryNode, but we can exit
2254   // early if we find all remaining work is associated with just one operand as
2255   // no further pruning is possible.
2256 
2257   // List of nodes to search through and original Ops from which they originate.
2258   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
2259   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2260   SmallPtrSet<SDNode *, 16> SeenChains;
2261   bool DidPruneOps = false;
2262 
2263   unsigned NumLeftToConsider = 0;
2264   for (const SDValue &Op : Ops) {
2265     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2266     OpWorkCount.push_back(1);
2267   }
2268 
2269   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2270     // If this is an Op, we can remove the op from the list. Remark any
2271     // search associated with it as from the current OpNumber.
2272     if (SeenOps.contains(Op)) {
2273       Changed = true;
2274       DidPruneOps = true;
2275       unsigned OrigOpNumber = 0;
2276       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2277         OrigOpNumber++;
2278       assert((OrigOpNumber != Ops.size()) &&
2279              "expected to find TokenFactor Operand");
2280       // Re-mark worklist from OrigOpNumber to OpNumber
2281       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2282         if (Worklist[i].second == OrigOpNumber) {
2283           Worklist[i].second = OpNumber;
2284         }
2285       }
2286       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2287       OpWorkCount[OrigOpNumber] = 0;
2288       NumLeftToConsider--;
2289     }
2290     // Add if it's a new chain
2291     if (SeenChains.insert(Op).second) {
2292       OpWorkCount[OpNumber]++;
2293       Worklist.push_back(std::make_pair(Op, OpNumber));
2294     }
2295   };
2296 
2297   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2298     // We need at least be consider at least 2 Ops to prune.
2299     if (NumLeftToConsider <= 1)
2300       break;
2301     auto CurNode = Worklist[i].first;
2302     auto CurOpNumber = Worklist[i].second;
2303     assert((OpWorkCount[CurOpNumber] > 0) &&
2304            "Node should not appear in worklist");
2305     switch (CurNode->getOpcode()) {
2306     case ISD::EntryToken:
2307       // Hitting EntryToken is the only way for the search to terminate without
2308       // hitting
2309       // another operand's search. Prevent us from marking this operand
2310       // considered.
2311       NumLeftToConsider++;
2312       break;
2313     case ISD::TokenFactor:
2314       for (const SDValue &Op : CurNode->op_values())
2315         AddToWorklist(i, Op.getNode(), CurOpNumber);
2316       break;
2317     case ISD::LIFETIME_START:
2318     case ISD::LIFETIME_END:
2319     case ISD::CopyFromReg:
2320     case ISD::CopyToReg:
2321       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2322       break;
2323     default:
2324       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2325         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2326       break;
2327     }
2328     OpWorkCount[CurOpNumber]--;
2329     if (OpWorkCount[CurOpNumber] == 0)
2330       NumLeftToConsider--;
2331   }
2332 
2333   // If we've changed things around then replace token factor.
2334   if (Changed) {
2335     SDValue Result;
2336     if (Ops.empty()) {
2337       // The entry token is the only possible outcome.
2338       Result = DAG.getEntryNode();
2339     } else {
2340       if (DidPruneOps) {
2341         SmallVector<SDValue, 8> PrunedOps;
2342         //
2343         for (const SDValue &Op : Ops) {
2344           if (SeenChains.count(Op.getNode()) == 0)
2345             PrunedOps.push_back(Op);
2346         }
2347         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2348       } else {
2349         Result = DAG.getTokenFactor(SDLoc(N), Ops);
2350       }
2351     }
2352     return Result;
2353   }
2354   return SDValue();
2355 }
2356 
2357 /// MERGE_VALUES can always be eliminated.
2358 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2359   WorklistRemover DeadNodes(*this);
2360   // Replacing results may cause a different MERGE_VALUES to suddenly
2361   // be CSE'd with N, and carry its uses with it. Iterate until no
2362   // uses remain, to ensure that the node can be safely deleted.
2363   // First add the users of this node to the work list so that they
2364   // can be tried again once they have new operands.
2365   AddUsersToWorklist(N);
2366   do {
2367     // Do as a single replacement to avoid rewalking use lists.
2368     SmallVector<SDValue, 8> Ops;
2369     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2370       Ops.push_back(N->getOperand(i));
2371     DAG.ReplaceAllUsesWith(N, Ops.data());
2372   } while (!N->use_empty());
2373   deleteAndRecombine(N);
2374   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2375 }
2376 
2377 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2378 /// ConstantSDNode pointer else nullptr.
2379 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2380   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2381   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2382 }
2383 
2384 // isTruncateOf - If N is a truncate of some other value, return true, record
2385 // the value being truncated in Op and which of Op's bits are zero/one in Known.
2386 // This function computes KnownBits to avoid a duplicated call to
2387 // computeKnownBits in the caller.
2388 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
2389                          KnownBits &Known) {
2390   if (N->getOpcode() == ISD::TRUNCATE) {
2391     Op = N->getOperand(0);
2392     Known = DAG.computeKnownBits(Op);
2393     return true;
2394   }
2395 
2396   if (N.getOpcode() != ISD::SETCC ||
2397       N.getValueType().getScalarType() != MVT::i1 ||
2398       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
2399     return false;
2400 
2401   SDValue Op0 = N->getOperand(0);
2402   SDValue Op1 = N->getOperand(1);
2403   assert(Op0.getValueType() == Op1.getValueType());
2404 
2405   if (isNullOrNullSplat(Op0))
2406     Op = Op1;
2407   else if (isNullOrNullSplat(Op1))
2408     Op = Op0;
2409   else
2410     return false;
2411 
2412   Known = DAG.computeKnownBits(Op);
2413 
2414   return (Known.Zero | 1).isAllOnes();
2415 }
2416 
2417 /// Return true if 'Use' is a load or a store that uses N as its base pointer
2418 /// and that N may be folded in the load / store addressing mode.
2419 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
2420                                     const TargetLowering &TLI) {
2421   EVT VT;
2422   unsigned AS;
2423 
2424   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) {
2425     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2426       return false;
2427     VT = LD->getMemoryVT();
2428     AS = LD->getAddressSpace();
2429   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) {
2430     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2431       return false;
2432     VT = ST->getMemoryVT();
2433     AS = ST->getAddressSpace();
2434   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(Use)) {
2435     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
2436       return false;
2437     VT = LD->getMemoryVT();
2438     AS = LD->getAddressSpace();
2439   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(Use)) {
2440     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
2441       return false;
2442     VT = ST->getMemoryVT();
2443     AS = ST->getAddressSpace();
2444   } else {
2445     return false;
2446   }
2447 
2448   TargetLowering::AddrMode AM;
2449   if (N->getOpcode() == ISD::ADD) {
2450     AM.HasBaseReg = true;
2451     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2452     if (Offset)
2453       // [reg +/- imm]
2454       AM.BaseOffs = Offset->getSExtValue();
2455     else
2456       // [reg +/- reg]
2457       AM.Scale = 1;
2458   } else if (N->getOpcode() == ISD::SUB) {
2459     AM.HasBaseReg = true;
2460     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
2461     if (Offset)
2462       // [reg +/- imm]
2463       AM.BaseOffs = -Offset->getSExtValue();
2464     else
2465       // [reg +/- reg]
2466       AM.Scale = 1;
2467   } else {
2468     return false;
2469   }
2470 
2471   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
2472                                    VT.getTypeForEVT(*DAG.getContext()), AS);
2473 }
2474 
2475 /// This inverts a canonicalization in IR that replaces a variable select arm
2476 /// with an identity constant. Codegen improves if we re-use the variable
2477 /// operand rather than load a constant. This can also be converted into a
2478 /// masked vector operation if the target supports it.
2479 static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
2480                                               bool ShouldCommuteOperands) {
2481   // Match a select as operand 1. The identity constant that we are looking for
2482   // is only valid as operand 1 of a non-commutative binop.
2483   SDValue N0 = N->getOperand(0);
2484   SDValue N1 = N->getOperand(1);
2485   if (ShouldCommuteOperands)
2486     std::swap(N0, N1);
2487 
2488   // TODO: Should this apply to scalar select too?
2489   if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
2490     return SDValue();
2491 
2492   // We can't hoist all instructions because of immediate UB (not speculatable).
2493   // For example div/rem by zero.
2494   if (!DAG.isSafeToSpeculativelyExecuteNode(N))
2495     return SDValue();
2496 
2497   unsigned Opcode = N->getOpcode();
2498   EVT VT = N->getValueType(0);
2499   SDValue Cond = N1.getOperand(0);
2500   SDValue TVal = N1.getOperand(1);
2501   SDValue FVal = N1.getOperand(2);
2502 
2503   // This transform increases uses of N0, so freeze it to be safe.
2504   // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal)
2505   unsigned OpNo = ShouldCommuteOperands ? 0 : 1;
2506   if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) {
2507     SDValue F0 = DAG.getFreeze(N0);
2508     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags());
2509     return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO);
2510   }
2511   // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0
2512   if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) {
2513     SDValue F0 = DAG.getFreeze(N0);
2514     SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags());
2515     return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0);
2516   }
2517 
2518   return SDValue();
2519 }
2520 
2521 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2522   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2523          "Unexpected binary operator");
2524 
2525   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2526   auto BinOpcode = BO->getOpcode();
2527   EVT VT = BO->getValueType(0);
2528   if (TLI.shouldFoldSelectWithIdentityConstant(BinOpcode, VT)) {
2529     if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, false))
2530       return Sel;
2531 
2532     if (TLI.isCommutativeBinOp(BO->getOpcode()))
2533       if (SDValue Sel = foldSelectWithIdentityConstant(BO, DAG, true))
2534         return Sel;
2535   }
2536 
2537   // Don't do this unless the old select is going away. We want to eliminate the
2538   // binary operator, not replace a binop with a select.
2539   // TODO: Handle ISD::SELECT_CC.
2540   unsigned SelOpNo = 0;
2541   SDValue Sel = BO->getOperand(0);
2542   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2543     SelOpNo = 1;
2544     Sel = BO->getOperand(1);
2545 
2546     // Peek through trunc to shift amount type.
2547     if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
2548          BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
2549       // This is valid when the truncated bits of x are already zero.
2550       SDValue Op;
2551       KnownBits Known;
2552       if (isTruncateOf(DAG, Sel, Op, Known) &&
2553           Known.countMaxActiveBits() < Sel.getScalarValueSizeInBits())
2554         Sel = Op;
2555     }
2556   }
2557 
2558   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2559     return SDValue();
2560 
2561   SDValue CT = Sel.getOperand(1);
2562   if (!isConstantOrConstantVector(CT, true) &&
2563       !DAG.isConstantFPBuildVectorOrConstantFP(CT))
2564     return SDValue();
2565 
2566   SDValue CF = Sel.getOperand(2);
2567   if (!isConstantOrConstantVector(CF, true) &&
2568       !DAG.isConstantFPBuildVectorOrConstantFP(CF))
2569     return SDValue();
2570 
2571   // Bail out if any constants are opaque because we can't constant fold those.
2572   // The exception is "and" and "or" with either 0 or -1 in which case we can
2573   // propagate non constant operands into select. I.e.:
2574   // and (select Cond, 0, -1), X --> select Cond, 0, X
2575   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2576   bool CanFoldNonConst =
2577       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2578       ((isNullOrNullSplat(CT) && isAllOnesOrAllOnesSplat(CF)) ||
2579        (isNullOrNullSplat(CF) && isAllOnesOrAllOnesSplat(CT)));
2580 
2581   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2582   if (!CanFoldNonConst &&
2583       !isConstantOrConstantVector(CBO, true) &&
2584       !DAG.isConstantFPBuildVectorOrConstantFP(CBO))
2585     return SDValue();
2586 
2587   SDLoc DL(Sel);
2588   SDValue NewCT, NewCF;
2589 
2590   if (CanFoldNonConst) {
2591     // If CBO is an opaque constant, we can't rely on getNode to constant fold.
2592     if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) ||
2593         (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT)))
2594       NewCT = CT;
2595     else
2596       NewCT = CBO;
2597 
2598     if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) ||
2599         (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF)))
2600       NewCF = CF;
2601     else
2602       NewCF = CBO;
2603   } else {
2604     // We have a select-of-constants followed by a binary operator with a
2605     // constant. Eliminate the binop by pulling the constant math into the
2606     // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
2607     // CBO, CF + CBO
2608     NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
2609                     : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
2610     if (!NewCT)
2611       return SDValue();
2612 
2613     NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
2614                     : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
2615     if (!NewCF)
2616       return SDValue();
2617   }
2618 
2619   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2620   SelectOp->setFlags(BO->getFlags());
2621   return SelectOp;
2622 }
2623 
2624 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2625   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2626          "Expecting add or sub");
2627 
2628   // Match a constant operand and a zext operand for the math instruction:
2629   // add Z, C
2630   // sub C, Z
2631   bool IsAdd = N->getOpcode() == ISD::ADD;
2632   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2633   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2634   auto *CN = dyn_cast<ConstantSDNode>(C);
2635   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2636     return SDValue();
2637 
2638   // Match the zext operand as a setcc of a boolean.
2639   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2640       Z.getOperand(0).getValueType() != MVT::i1)
2641     return SDValue();
2642 
2643   // Match the compare as: setcc (X & 1), 0, eq.
2644   SDValue SetCC = Z.getOperand(0);
2645   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2646   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2647       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2648       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2649     return SDValue();
2650 
2651   // We are adding/subtracting a constant and an inverted low bit. Turn that
2652   // into a subtract/add of the low bit with incremented/decremented constant:
2653   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2654   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2655   EVT VT = C.getValueType();
2656   SDLoc DL(N);
2657   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2658   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2659                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2660   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2661 }
2662 
2663 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2664 /// a shift and add with a different constant.
2665 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2666   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2667          "Expecting add or sub");
2668 
2669   // We need a constant operand for the add/sub, and the other operand is a
2670   // logical shift right: add (srl), C or sub C, (srl).
2671   bool IsAdd = N->getOpcode() == ISD::ADD;
2672   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2673   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2674   if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) ||
2675       ShiftOp.getOpcode() != ISD::SRL)
2676     return SDValue();
2677 
2678   // The shift must be of a 'not' value.
2679   SDValue Not = ShiftOp.getOperand(0);
2680   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2681     return SDValue();
2682 
2683   // The shift must be moving the sign bit to the least-significant-bit.
2684   EVT VT = ShiftOp.getValueType();
2685   SDValue ShAmt = ShiftOp.getOperand(1);
2686   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2687   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2688     return SDValue();
2689 
2690   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2691   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2692   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2693   SDLoc DL(N);
2694   if (SDValue NewC = DAG.FoldConstantArithmetic(
2695           IsAdd ? ISD::ADD : ISD::SUB, DL, VT,
2696           {ConstantOp, DAG.getConstant(1, DL, VT)})) {
2697     SDValue NewShift = DAG.getNode(IsAdd ? ISD::SRA : ISD::SRL, DL, VT,
2698                                    Not.getOperand(0), ShAmt);
2699     return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC);
2700   }
2701 
2702   return SDValue();
2703 }
2704 
2705 static bool
2706 areBitwiseNotOfEachother(SDValue Op0, SDValue Op1) {
2707   return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
2708          (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
2709 }
2710 
2711 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2712 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2713 /// are no common bits set in the operands).
2714 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2715   SDValue N0 = N->getOperand(0);
2716   SDValue N1 = N->getOperand(1);
2717   EVT VT = N0.getValueType();
2718   SDLoc DL(N);
2719 
2720   // fold (add x, undef) -> undef
2721   if (N0.isUndef())
2722     return N0;
2723   if (N1.isUndef())
2724     return N1;
2725 
2726   // fold (add c1, c2) -> c1+c2
2727   if (SDValue C = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}))
2728     return C;
2729 
2730   // canonicalize constant to RHS
2731   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2732       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2733     return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2734 
2735   if (areBitwiseNotOfEachother(N0, N1))
2736     return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()),
2737                            SDLoc(N), VT);
2738 
2739   // fold vector ops
2740   if (VT.isVector()) {
2741     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
2742       return FoldedVOp;
2743 
2744     // fold (add x, 0) -> x, vector edition
2745     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
2746       return N0;
2747   }
2748 
2749   // fold (add x, 0) -> x
2750   if (isNullConstant(N1))
2751     return N0;
2752 
2753   if (N0.getOpcode() == ISD::SUB) {
2754     SDValue N00 = N0.getOperand(0);
2755     SDValue N01 = N0.getOperand(1);
2756 
2757     // fold ((A-c1)+c2) -> (A+(c2-c1))
2758     if (SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N01}))
2759       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2760 
2761     // fold ((c1-A)+c2) -> (c1+c2)-A
2762     if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N00}))
2763       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2764   }
2765 
2766   // add (sext i1 X), 1 -> zext (not i1 X)
2767   // We don't transform this pattern:
2768   //   add (zext i1 X), -1 -> sext (not i1 X)
2769   // because most (?) targets generate better code for the zext form.
2770   if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2771       isOneOrOneSplat(N1)) {
2772     SDValue X = N0.getOperand(0);
2773     if ((!LegalOperations ||
2774          (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2775           TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2776         X.getScalarValueSizeInBits() == 1) {
2777       SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2778       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2779     }
2780   }
2781 
2782   // Fold (add (or x, c0), c1) -> (add x, (c0 + c1))
2783   // iff (or x, c0) is equivalent to (add x, c0).
2784   // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1))
2785   // iff (xor x, c0) is equivalent to (add x, c0).
2786   if (DAG.isADDLike(N0)) {
2787     SDValue N01 = N0.getOperand(1);
2788     if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01}))
2789       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add);
2790   }
2791 
2792   if (SDValue NewSel = foldBinOpIntoSelect(N))
2793     return NewSel;
2794 
2795   // reassociate add
2796   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N, N0, N1)) {
2797     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2798       return RADD;
2799 
2800     // Reassociate (add (or x, c), y) -> (add add(x, y), c)) if (or x, c) is
2801     // equivalent to (add x, c).
2802     // Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
2803     // equivalent to (add x, c).
2804     // Do this optimization only when adding c does not introduce instructions
2805     // for adding carries.
2806     auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
2807       if (DAG.isADDLike(N0) && N0.hasOneUse() &&
2808           isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2809         // If N0's type does not split or is a sign mask, it does not introduce
2810         // add carry.
2811         auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
2812         bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
2813                           TyActn == TargetLoweringBase::TypePromoteInteger ||
2814                           isMinSignedConstant(N0.getOperand(1));
2815         if (NoAddCarry)
2816           return DAG.getNode(
2817               ISD::ADD, DL, VT,
2818               DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2819               N0.getOperand(1));
2820       }
2821       return SDValue();
2822     };
2823     if (SDValue Add = ReassociateAddOr(N0, N1))
2824       return Add;
2825     if (SDValue Add = ReassociateAddOr(N1, N0))
2826       return Add;
2827 
2828     // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
2829     if (SDValue SD =
2830             reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
2831       return SD;
2832   }
2833   // fold ((0-A) + B) -> B-A
2834   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2835     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2836 
2837   // fold (A + (0-B)) -> A-B
2838   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2839     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2840 
2841   // fold (A+(B-A)) -> B
2842   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2843     return N1.getOperand(0);
2844 
2845   // fold ((B-A)+A) -> B
2846   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2847     return N0.getOperand(0);
2848 
2849   // fold ((A-B)+(C-A)) -> (C-B)
2850   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2851       N0.getOperand(0) == N1.getOperand(1))
2852     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2853                        N0.getOperand(1));
2854 
2855   // fold ((A-B)+(B-C)) -> (A-C)
2856   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2857       N0.getOperand(1) == N1.getOperand(0))
2858     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2859                        N1.getOperand(1));
2860 
2861   // fold (A+(B-(A+C))) to (B-C)
2862   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2863       N0 == N1.getOperand(1).getOperand(0))
2864     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2865                        N1.getOperand(1).getOperand(1));
2866 
2867   // fold (A+(B-(C+A))) to (B-C)
2868   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2869       N0 == N1.getOperand(1).getOperand(1))
2870     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2871                        N1.getOperand(1).getOperand(0));
2872 
2873   // fold (A+((B-A)+or-C)) to (B+or-C)
2874   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2875       N1.getOperand(0).getOpcode() == ISD::SUB &&
2876       N0 == N1.getOperand(0).getOperand(1))
2877     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2878                        N1.getOperand(1));
2879 
2880   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2881   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2882       N0->hasOneUse() && N1->hasOneUse()) {
2883     SDValue N00 = N0.getOperand(0);
2884     SDValue N01 = N0.getOperand(1);
2885     SDValue N10 = N1.getOperand(0);
2886     SDValue N11 = N1.getOperand(1);
2887 
2888     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2889       return DAG.getNode(ISD::SUB, DL, VT,
2890                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2891                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2892   }
2893 
2894   // fold (add (umax X, C), -C) --> (usubsat X, C)
2895   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2896     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2897       return (!Max && !Op) ||
2898              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2899     };
2900     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2901                                   /*AllowUndefs*/ true))
2902       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2903                          N0.getOperand(1));
2904   }
2905 
2906   if (SimplifyDemandedBits(SDValue(N, 0)))
2907     return SDValue(N, 0);
2908 
2909   if (isOneOrOneSplat(N1)) {
2910     // fold (add (xor a, -1), 1) -> (sub 0, a)
2911     if (isBitwiseNot(N0))
2912       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2913                          N0.getOperand(0));
2914 
2915     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2916     if (N0.getOpcode() == ISD::ADD) {
2917       SDValue A, Xor;
2918 
2919       if (isBitwiseNot(N0.getOperand(0))) {
2920         A = N0.getOperand(1);
2921         Xor = N0.getOperand(0);
2922       } else if (isBitwiseNot(N0.getOperand(1))) {
2923         A = N0.getOperand(0);
2924         Xor = N0.getOperand(1);
2925       }
2926 
2927       if (Xor)
2928         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2929     }
2930 
2931     // Look for:
2932     //   add (add x, y), 1
2933     // And if the target does not like this form then turn into:
2934     //   sub y, (xor x, -1)
2935     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
2936         N0.hasOneUse() &&
2937         // Limit this to after legalization if the add has wrap flags
2938         (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
2939                                        !N->getFlags().hasNoSignedWrap()))) {
2940       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2941                                 DAG.getAllOnesConstant(DL, VT));
2942       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2943     }
2944   }
2945 
2946   // (x - y) + -1  ->  add (xor y, -1), x
2947   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
2948       isAllOnesOrAllOnesSplat(N1)) {
2949     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2950     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2951   }
2952 
2953   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2954     return Combined;
2955 
2956   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2957     return Combined;
2958 
2959   return SDValue();
2960 }
2961 
2962 SDValue DAGCombiner::visitADD(SDNode *N) {
2963   SDValue N0 = N->getOperand(0);
2964   SDValue N1 = N->getOperand(1);
2965   EVT VT = N0.getValueType();
2966   SDLoc DL(N);
2967 
2968   if (SDValue Combined = visitADDLike(N))
2969     return Combined;
2970 
2971   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2972     return V;
2973 
2974   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2975     return V;
2976 
2977   // fold (a+b) -> (a|b) iff a and b share no bits.
2978   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2979       DAG.haveNoCommonBitsSet(N0, N1))
2980     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2981 
2982   // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)).
2983   if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) {
2984     const APInt &C0 = N0->getConstantOperandAPInt(0);
2985     const APInt &C1 = N1->getConstantOperandAPInt(0);
2986     return DAG.getVScale(DL, VT, C0 + C1);
2987   }
2988 
2989   // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2)
2990   if (N0.getOpcode() == ISD::ADD &&
2991       N0.getOperand(1).getOpcode() == ISD::VSCALE &&
2992       N1.getOpcode() == ISD::VSCALE) {
2993     const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0);
2994     const APInt &VS1 = N1->getConstantOperandAPInt(0);
2995     SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1);
2996     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS);
2997   }
2998 
2999   // Fold (add step_vector(c1), step_vector(c2)  to step_vector(c1+c2))
3000   if (N0.getOpcode() == ISD::STEP_VECTOR &&
3001       N1.getOpcode() == ISD::STEP_VECTOR) {
3002     const APInt &C0 = N0->getConstantOperandAPInt(0);
3003     const APInt &C1 = N1->getConstantOperandAPInt(0);
3004     APInt NewStep = C0 + C1;
3005     return DAG.getStepVector(DL, VT, NewStep);
3006   }
3007 
3008   // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2)
3009   if (N0.getOpcode() == ISD::ADD &&
3010       N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR &&
3011       N1.getOpcode() == ISD::STEP_VECTOR) {
3012     const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0);
3013     const APInt &SV1 = N1->getConstantOperandAPInt(0);
3014     APInt NewStep = SV0 + SV1;
3015     SDValue SV = DAG.getStepVector(DL, VT, NewStep);
3016     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), SV);
3017   }
3018 
3019   return SDValue();
3020 }
3021 
3022 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
3023   unsigned Opcode = N->getOpcode();
3024   SDValue N0 = N->getOperand(0);
3025   SDValue N1 = N->getOperand(1);
3026   EVT VT = N0.getValueType();
3027   bool IsSigned = Opcode == ISD::SADDSAT;
3028   SDLoc DL(N);
3029 
3030   // fold (add_sat x, undef) -> -1
3031   if (N0.isUndef() || N1.isUndef())
3032     return DAG.getAllOnesConstant(DL, VT);
3033 
3034   // fold (add_sat c1, c2) -> c3
3035   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
3036     return C;
3037 
3038   // canonicalize constant to RHS
3039   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3040       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3041     return DAG.getNode(Opcode, DL, VT, N1, N0);
3042 
3043   // fold vector ops
3044   if (VT.isVector()) {
3045     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3046       return FoldedVOp;
3047 
3048     // fold (add_sat x, 0) -> x, vector edition
3049     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3050       return N0;
3051   }
3052 
3053   // fold (add_sat x, 0) -> x
3054   if (isNullConstant(N1))
3055     return N0;
3056 
3057   // If it cannot overflow, transform into an add.
3058   if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3059     return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
3060 
3061   return SDValue();
3062 }
3063 
3064 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V,
3065                           bool ForceCarryReconstruction = false) {
3066   bool Masked = false;
3067 
3068   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
3069   while (true) {
3070     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
3071       V = V.getOperand(0);
3072       continue;
3073     }
3074 
3075     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
3076       if (ForceCarryReconstruction)
3077         return V;
3078 
3079       Masked = true;
3080       V = V.getOperand(0);
3081       continue;
3082     }
3083 
3084     if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
3085       return V;
3086 
3087     break;
3088   }
3089 
3090   // If this is not a carry, return.
3091   if (V.getResNo() != 1)
3092     return SDValue();
3093 
3094   if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
3095       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
3096     return SDValue();
3097 
3098   EVT VT = V->getValueType(0);
3099   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
3100     return SDValue();
3101 
3102   // If the result is masked, then no matter what kind of bool it is we can
3103   // return. If it isn't, then we need to make sure the bool type is either 0 or
3104   // 1 and not other values.
3105   if (Masked ||
3106       TLI.getBooleanContents(V.getValueType()) ==
3107           TargetLoweringBase::ZeroOrOneBooleanContent)
3108     return V;
3109 
3110   return SDValue();
3111 }
3112 
3113 /// Given the operands of an add/sub operation, see if the 2nd operand is a
3114 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
3115 /// the opcode and bypass the mask operation.
3116 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
3117                                  SelectionDAG &DAG, const SDLoc &DL) {
3118   if (N1.getOpcode() == ISD::ZERO_EXTEND)
3119     N1 = N1.getOperand(0);
3120 
3121   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
3122     return SDValue();
3123 
3124   EVT VT = N0.getValueType();
3125   SDValue N10 = N1.getOperand(0);
3126   if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE)
3127     N10 = N10.getOperand(0);
3128 
3129   if (N10.getValueType() != VT)
3130     return SDValue();
3131 
3132   if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits())
3133     return SDValue();
3134 
3135   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
3136   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
3137   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10);
3138 }
3139 
3140 /// Helper for doing combines based on N0 and N1 being added to each other.
3141 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
3142                                           SDNode *LocReference) {
3143   EVT VT = N0.getValueType();
3144   SDLoc DL(LocReference);
3145 
3146   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
3147   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
3148       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
3149     return DAG.getNode(ISD::SUB, DL, VT, N0,
3150                        DAG.getNode(ISD::SHL, DL, VT,
3151                                    N1.getOperand(0).getOperand(1),
3152                                    N1.getOperand(1)));
3153 
3154   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
3155     return V;
3156 
3157   // Look for:
3158   //   add (add x, 1), y
3159   // And if the target does not like this form then turn into:
3160   //   sub y, (xor x, -1)
3161   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
3162       N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
3163       // Limit this to after legalization if the add has wrap flags
3164       (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
3165                                      !N0->getFlags().hasNoSignedWrap()))) {
3166     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
3167                               DAG.getAllOnesConstant(DL, VT));
3168     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
3169   }
3170 
3171   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse()) {
3172     // Hoist one-use subtraction by non-opaque constant:
3173     //   (x - C) + y  ->  (x + y) - C
3174     // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3175     if (isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3176       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
3177       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
3178     }
3179     // Hoist one-use subtraction from non-opaque constant:
3180     //   (C - x) + y  ->  (y - x) + C
3181     if (isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3182       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
3183       return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
3184     }
3185   }
3186 
3187   // add (mul x, C), x -> mul x, C+1
3188   if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
3189       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
3190       N0.hasOneUse()) {
3191     SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
3192                                DAG.getConstant(1, DL, VT));
3193     return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
3194   }
3195 
3196   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
3197   // rather than 'add 0/-1' (the zext should get folded).
3198   // add (sext i1 Y), X --> sub X, (zext i1 Y)
3199   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
3200       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
3201       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
3202     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
3203     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
3204   }
3205 
3206   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
3207   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3208     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3209     if (TN->getVT() == MVT::i1) {
3210       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3211                                  DAG.getConstant(1, DL, VT));
3212       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
3213     }
3214   }
3215 
3216   // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3217   if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
3218       N1.getResNo() == 0)
3219     return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
3220                        N0, N1.getOperand(0), N1.getOperand(2));
3221 
3222   // (add X, Carry) -> (uaddo_carry X, 0, Carry)
3223   if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT))
3224     if (SDValue Carry = getAsCarry(TLI, N1))
3225       return DAG.getNode(ISD::UADDO_CARRY, DL,
3226                          DAG.getVTList(VT, Carry.getValueType()), N0,
3227                          DAG.getConstant(0, DL, VT), Carry);
3228 
3229   return SDValue();
3230 }
3231 
3232 SDValue DAGCombiner::visitADDC(SDNode *N) {
3233   SDValue N0 = N->getOperand(0);
3234   SDValue N1 = N->getOperand(1);
3235   EVT VT = N0.getValueType();
3236   SDLoc DL(N);
3237 
3238   // If the flag result is dead, turn this into an ADD.
3239   if (!N->hasAnyUseOfValue(1))
3240     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3241                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3242 
3243   // canonicalize constant to RHS.
3244   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3245   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3246   if (N0C && !N1C)
3247     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
3248 
3249   // fold (addc x, 0) -> x + no carry out
3250   if (isNullConstant(N1))
3251     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
3252                                         DL, MVT::Glue));
3253 
3254   // If it cannot overflow, transform into an add.
3255   if (DAG.computeOverflowForUnsignedAdd(N0, N1) == SelectionDAG::OFK_Never)
3256     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3257                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3258 
3259   return SDValue();
3260 }
3261 
3262 /**
3263  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
3264  * then the flip also occurs if computing the inverse is the same cost.
3265  * This function returns an empty SDValue in case it cannot flip the boolean
3266  * without increasing the cost of the computation. If you want to flip a boolean
3267  * no matter what, use DAG.getLogicalNOT.
3268  */
3269 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
3270                                   const TargetLowering &TLI,
3271                                   bool Force) {
3272   if (Force && isa<ConstantSDNode>(V))
3273     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3274 
3275   if (V.getOpcode() != ISD::XOR)
3276     return SDValue();
3277 
3278   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
3279   if (!Const)
3280     return SDValue();
3281 
3282   EVT VT = V.getValueType();
3283 
3284   bool IsFlip = false;
3285   switch(TLI.getBooleanContents(VT)) {
3286     case TargetLowering::ZeroOrOneBooleanContent:
3287       IsFlip = Const->isOne();
3288       break;
3289     case TargetLowering::ZeroOrNegativeOneBooleanContent:
3290       IsFlip = Const->isAllOnes();
3291       break;
3292     case TargetLowering::UndefinedBooleanContent:
3293       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
3294       break;
3295   }
3296 
3297   if (IsFlip)
3298     return V.getOperand(0);
3299   if (Force)
3300     return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
3301   return SDValue();
3302 }
3303 
3304 SDValue DAGCombiner::visitADDO(SDNode *N) {
3305   SDValue N0 = N->getOperand(0);
3306   SDValue N1 = N->getOperand(1);
3307   EVT VT = N0.getValueType();
3308   bool IsSigned = (ISD::SADDO == N->getOpcode());
3309 
3310   EVT CarryVT = N->getValueType(1);
3311   SDLoc DL(N);
3312 
3313   // If the flag result is dead, turn this into an ADD.
3314   if (!N->hasAnyUseOfValue(1))
3315     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3316                      DAG.getUNDEF(CarryVT));
3317 
3318   // canonicalize constant to RHS.
3319   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3320       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3321     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
3322 
3323   // fold (addo x, 0) -> x + no carry out
3324   if (isNullOrNullSplat(N1))
3325     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3326 
3327   // If it cannot overflow, transform into an add.
3328   if (DAG.willNotOverflowAdd(IsSigned, N0, N1))
3329     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
3330                      DAG.getConstant(0, DL, CarryVT));
3331 
3332   if (IsSigned) {
3333     // fold (saddo (xor a, -1), 1) -> (ssub 0, a).
3334     if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
3335       return DAG.getNode(ISD::SSUBO, DL, N->getVTList(),
3336                          DAG.getConstant(0, DL, VT), N0.getOperand(0));
3337   } else {
3338     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
3339     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
3340       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
3341                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
3342       return CombineTo(
3343           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3344     }
3345 
3346     if (SDValue Combined = visitUADDOLike(N0, N1, N))
3347       return Combined;
3348 
3349     if (SDValue Combined = visitUADDOLike(N1, N0, N))
3350       return Combined;
3351   }
3352 
3353   return SDValue();
3354 }
3355 
3356 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
3357   EVT VT = N0.getValueType();
3358   if (VT.isVector())
3359     return SDValue();
3360 
3361   // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
3362   // If Y + 1 cannot overflow.
3363   if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
3364     SDValue Y = N1.getOperand(0);
3365     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
3366     if (DAG.computeOverflowForUnsignedAdd(Y, One) == SelectionDAG::OFK_Never)
3367       return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
3368                          N1.getOperand(2));
3369   }
3370 
3371   // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
3372   if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT))
3373     if (SDValue Carry = getAsCarry(TLI, N1))
3374       return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
3375                          DAG.getConstant(0, SDLoc(N), VT), Carry);
3376 
3377   return SDValue();
3378 }
3379 
3380 SDValue DAGCombiner::visitADDE(SDNode *N) {
3381   SDValue N0 = N->getOperand(0);
3382   SDValue N1 = N->getOperand(1);
3383   SDValue CarryIn = N->getOperand(2);
3384 
3385   // canonicalize constant to RHS
3386   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3387   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3388   if (N0C && !N1C)
3389     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
3390                        N1, N0, CarryIn);
3391 
3392   // fold (adde x, y, false) -> (addc x, y)
3393   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3394     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
3395 
3396   return SDValue();
3397 }
3398 
3399 SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
3400   SDValue N0 = N->getOperand(0);
3401   SDValue N1 = N->getOperand(1);
3402   SDValue CarryIn = N->getOperand(2);
3403   SDLoc DL(N);
3404 
3405   // canonicalize constant to RHS
3406   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3407   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3408   if (N0C && !N1C)
3409     return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3410 
3411   // fold (uaddo_carry x, y, false) -> (uaddo x, y)
3412   if (isNullConstant(CarryIn)) {
3413     if (!LegalOperations ||
3414         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
3415       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
3416   }
3417 
3418   // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
3419   if (isNullConstant(N0) && isNullConstant(N1)) {
3420     EVT VT = N0.getValueType();
3421     EVT CarryVT = CarryIn.getValueType();
3422     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
3423     AddToWorklist(CarryExt.getNode());
3424     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
3425                                     DAG.getConstant(1, DL, VT)),
3426                      DAG.getConstant(0, DL, CarryVT));
3427   }
3428 
3429   if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
3430     return Combined;
3431 
3432   if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
3433     return Combined;
3434 
3435   // We want to avoid useless duplication.
3436   // TODO: This is done automatically for binary operations. As UADDO_CARRY is
3437   // not a binary operation, this is not really possible to leverage this
3438   // existing mechanism for it. However, if more operations require the same
3439   // deduplication logic, then it may be worth generalize.
3440   SDValue Ops[] = {N1, N0, CarryIn};
3441   SDNode *CSENode =
3442       DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
3443   if (CSENode)
3444     return SDValue(CSENode, 0);
3445 
3446   return SDValue();
3447 }
3448 
3449 /**
3450  * If we are facing some sort of diamond carry propapagtion pattern try to
3451  * break it up to generate something like:
3452  *   (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
3453  *
3454  * The end result is usually an increase in operation required, but because the
3455  * carry is now linearized, other transforms can kick in and optimize the DAG.
3456  *
3457  * Patterns typically look something like
3458  *                (uaddo A, B)
3459  *                /          \
3460  *             Carry         Sum
3461  *               |             \
3462  *               | (uaddo_carry *, 0, Z)
3463  *               |       /
3464  *                \   Carry
3465  *                 |   /
3466  * (uaddo_carry X, *, *)
3467  *
3468  * But numerous variation exist. Our goal is to identify A, B, X and Z and
3469  * produce a combine with a single path for carry propagation.
3470  */
3471 static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner,
3472                                          SelectionDAG &DAG, SDValue X,
3473                                          SDValue Carry0, SDValue Carry1,
3474                                          SDNode *N) {
3475   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
3476     return SDValue();
3477   if (Carry1.getOpcode() != ISD::UADDO)
3478     return SDValue();
3479 
3480   SDValue Z;
3481 
3482   /**
3483    * First look for a suitable Z. It will present itself in the form of
3484    * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
3485    */
3486   if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
3487       isNullConstant(Carry0.getOperand(1))) {
3488     Z = Carry0.getOperand(2);
3489   } else if (Carry0.getOpcode() == ISD::UADDO &&
3490              isOneConstant(Carry0.getOperand(1))) {
3491     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
3492     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3493   } else {
3494     // We couldn't find a suitable Z.
3495     return SDValue();
3496   }
3497 
3498 
3499   auto cancelDiamond = [&](SDValue A,SDValue B) {
3500     SDLoc DL(N);
3501     SDValue NewY =
3502         DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
3503     Combiner.AddToWorklist(NewY.getNode());
3504     return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
3505                        DAG.getConstant(0, DL, X.getValueType()),
3506                        NewY.getValue(1));
3507   };
3508 
3509   /**
3510    *         (uaddo A, B)
3511    *              |
3512    *             Sum
3513    *              |
3514    * (uaddo_carry *, 0, Z)
3515    */
3516   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3517     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3518   }
3519 
3520   /**
3521    * (uaddo_carry A, 0, Z)
3522    *         |
3523    *        Sum
3524    *         |
3525    *  (uaddo *, B)
3526    */
3527   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3528     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3529   }
3530 
3531   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3532     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3533   }
3534 
3535   return SDValue();
3536 }
3537 
3538 // If we are facing some sort of diamond carry/borrow in/out pattern try to
3539 // match patterns like:
3540 //
3541 //          (uaddo A, B)            CarryIn
3542 //            |  \                     |
3543 //            |   \                    |
3544 //    PartialSum   PartialCarryOutX   /
3545 //            |        |             /
3546 //            |    ____|____________/
3547 //            |   /    |
3548 //     (uaddo *, *)    \________
3549 //       |  \                   \
3550 //       |   \                   |
3551 //       |    PartialCarryOutY   |
3552 //       |        \              |
3553 //       |         \            /
3554 //   AddCarrySum    |    ______/
3555 //                  |   /
3556 //   CarryOut = (or *, *)
3557 //
3558 // And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
3559 //
3560 //    {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
3561 //
3562 // Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
3563 // with a single path for carry/borrow out propagation.
3564 static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
3565                                    SDValue N0, SDValue N1, SDNode *N) {
3566   SDValue Carry0 = getAsCarry(TLI, N0);
3567   if (!Carry0)
3568     return SDValue();
3569   SDValue Carry1 = getAsCarry(TLI, N1);
3570   if (!Carry1)
3571     return SDValue();
3572 
3573   unsigned Opcode = Carry0.getOpcode();
3574   if (Opcode != Carry1.getOpcode())
3575     return SDValue();
3576   if (Opcode != ISD::UADDO && Opcode != ISD::USUBO)
3577     return SDValue();
3578   // Guarantee identical type of CarryOut
3579   EVT CarryOutType = N->getValueType(0);
3580   if (CarryOutType != Carry0.getValue(1).getValueType() ||
3581       CarryOutType != Carry1.getValue(1).getValueType())
3582     return SDValue();
3583 
3584   // Canonicalize the add/sub of A and B (the top node in the above ASCII art)
3585   // as Carry0 and the add/sub of the carry in as Carry1 (the middle node).
3586   if (Carry1.getNode()->isOperandOf(Carry0.getNode()))
3587     std::swap(Carry0, Carry1);
3588 
3589   // Check if nodes are connected in expected way.
3590   if (Carry1.getOperand(0) != Carry0.getValue(0) &&
3591       Carry1.getOperand(1) != Carry0.getValue(0))
3592     return SDValue();
3593 
3594   // The carry in value must be on the righthand side for subtraction.
3595   unsigned CarryInOperandNum =
3596       Carry1.getOperand(0) == Carry0.getValue(0) ? 1 : 0;
3597   if (Opcode == ISD::USUBO && CarryInOperandNum != 1)
3598     return SDValue();
3599   SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
3600 
3601   unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
3602   if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
3603     return SDValue();
3604 
3605   // Verify that the carry/borrow in is plausibly a carry/borrow bit.
3606   CarryIn = getAsCarry(TLI, CarryIn, true);
3607   if (!CarryIn)
3608     return SDValue();
3609 
3610   SDLoc DL(N);
3611   SDValue Merged =
3612       DAG.getNode(NewOp, DL, Carry1->getVTList(), Carry0.getOperand(0),
3613                   Carry0.getOperand(1), CarryIn);
3614 
3615   // Please note that because we have proven that the result of the UADDO/USUBO
3616   // of A and B feeds into the UADDO/USUBO that does the carry/borrow in, we can
3617   // therefore prove that if the first UADDO/USUBO overflows, the second
3618   // UADDO/USUBO cannot. For example consider 8-bit numbers where 0xFF is the
3619   // maximum value.
3620   //
3621   //   0xFF + 0xFF == 0xFE with carry but 0xFE + 1 does not carry
3622   //   0x00 - 0xFF == 1 with a carry/borrow but 1 - 1 == 0 (no carry/borrow)
3623   //
3624   // This is important because it means that OR and XOR can be used to merge
3625   // carry flags; and that AND can return a constant zero.
3626   //
3627   // TODO: match other operations that can merge flags (ADD, etc)
3628   DAG.ReplaceAllUsesOfValueWith(Carry1.getValue(0), Merged.getValue(0));
3629   if (N->getOpcode() == ISD::AND)
3630     return DAG.getConstant(0, DL, CarryOutType);
3631   return Merged.getValue(1);
3632 }
3633 
3634 SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
3635                                           SDValue CarryIn, SDNode *N) {
3636   // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
3637   // carry.
3638   if (isBitwiseNot(N0))
3639     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3640       SDLoc DL(N);
3641       SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
3642                                 N0.getOperand(0), NotC);
3643       return CombineTo(
3644           N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
3645     }
3646 
3647   // Iff the flag result is dead:
3648   // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
3649   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3650   // or the dependency between the instructions.
3651   if ((N0.getOpcode() == ISD::ADD ||
3652        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3653         N0.getValue(1) != CarryIn)) &&
3654       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3655     return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
3656                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3657 
3658   /**
3659    * When one of the uaddo_carry argument is itself a carry, we may be facing
3660    * a diamond carry propagation. In which case we try to transform the DAG
3661    * to ensure linear carry propagation if that is possible.
3662    */
3663   if (auto Y = getAsCarry(TLI, N1)) {
3664     // Because both are carries, Y and Z can be swapped.
3665     if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3666       return R;
3667     if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3668       return R;
3669   }
3670 
3671   return SDValue();
3672 }
3673 
3674 SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1,
3675                                           SDValue CarryIn, SDNode *N) {
3676   // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c)
3677   if (isBitwiseNot(N0)) {
3678     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true))
3679       return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1,
3680                          N0.getOperand(0), NotC);
3681   }
3682 
3683   return SDValue();
3684 }
3685 
3686 SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
3687   SDValue N0 = N->getOperand(0);
3688   SDValue N1 = N->getOperand(1);
3689   SDValue CarryIn = N->getOperand(2);
3690   SDLoc DL(N);
3691 
3692   // canonicalize constant to RHS
3693   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3694   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3695   if (N0C && !N1C)
3696     return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
3697 
3698   // fold (saddo_carry x, y, false) -> (saddo x, y)
3699   if (isNullConstant(CarryIn)) {
3700     if (!LegalOperations ||
3701         TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
3702       return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
3703   }
3704 
3705   if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N))
3706     return Combined;
3707 
3708   if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N))
3709     return Combined;
3710 
3711   return SDValue();
3712 }
3713 
3714 // Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
3715 // clamp/truncation if necessary.
3716 static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
3717                                    SDValue RHS, SelectionDAG &DAG,
3718                                    const SDLoc &DL) {
3719   assert(DstVT.getScalarSizeInBits() <= SrcVT.getScalarSizeInBits() &&
3720          "Illegal truncation");
3721 
3722   if (DstVT == SrcVT)
3723     return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3724 
3725   // If the LHS is zero-extended then we can perform the USUBSAT as DstVT by
3726   // clamping RHS.
3727   APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
3728                                           DstVT.getScalarSizeInBits());
3729   if (!DAG.MaskedValueIsZero(LHS, UpperBits))
3730     return SDValue();
3731 
3732   SDValue SatLimit =
3733       DAG.getConstant(APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
3734                                            DstVT.getScalarSizeInBits()),
3735                       DL, SrcVT);
3736   RHS = DAG.getNode(ISD::UMIN, DL, SrcVT, RHS, SatLimit);
3737   RHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, RHS);
3738   LHS = DAG.getNode(ISD::TRUNCATE, DL, DstVT, LHS);
3739   return DAG.getNode(ISD::USUBSAT, DL, DstVT, LHS, RHS);
3740 }
3741 
3742 // Try to find umax(a,b) - b or a - umin(a,b) patterns that may be converted to
3743 // usubsat(a,b), optionally as a truncated type.
3744 SDValue DAGCombiner::foldSubToUSubSat(EVT DstVT, SDNode *N) {
3745   if (N->getOpcode() != ISD::SUB ||
3746       !(!LegalOperations || hasOperation(ISD::USUBSAT, DstVT)))
3747     return SDValue();
3748 
3749   EVT SubVT = N->getValueType(0);
3750   SDValue Op0 = N->getOperand(0);
3751   SDValue Op1 = N->getOperand(1);
3752 
3753   // Try to find umax(a,b) - b or a - umin(a,b) patterns
3754   // they may be converted to usubsat(a,b).
3755   if (Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
3756     SDValue MaxLHS = Op0.getOperand(0);
3757     SDValue MaxRHS = Op0.getOperand(1);
3758     if (MaxLHS == Op1)
3759       return getTruncatedUSUBSAT(DstVT, SubVT, MaxRHS, Op1, DAG, SDLoc(N));
3760     if (MaxRHS == Op1)
3761       return getTruncatedUSUBSAT(DstVT, SubVT, MaxLHS, Op1, DAG, SDLoc(N));
3762   }
3763 
3764   if (Op1.getOpcode() == ISD::UMIN && Op1.hasOneUse()) {
3765     SDValue MinLHS = Op1.getOperand(0);
3766     SDValue MinRHS = Op1.getOperand(1);
3767     if (MinLHS == Op0)
3768       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinRHS, DAG, SDLoc(N));
3769     if (MinRHS == Op0)
3770       return getTruncatedUSUBSAT(DstVT, SubVT, Op0, MinLHS, DAG, SDLoc(N));
3771   }
3772 
3773   // sub(a,trunc(umin(zext(a),b))) -> usubsat(a,trunc(umin(b,SatLimit)))
3774   if (Op1.getOpcode() == ISD::TRUNCATE &&
3775       Op1.getOperand(0).getOpcode() == ISD::UMIN &&
3776       Op1.getOperand(0).hasOneUse()) {
3777     SDValue MinLHS = Op1.getOperand(0).getOperand(0);
3778     SDValue MinRHS = Op1.getOperand(0).getOperand(1);
3779     if (MinLHS.getOpcode() == ISD::ZERO_EXTEND && MinLHS.getOperand(0) == Op0)
3780       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinLHS, MinRHS,
3781                                  DAG, SDLoc(N));
3782     if (MinRHS.getOpcode() == ISD::ZERO_EXTEND && MinRHS.getOperand(0) == Op0)
3783       return getTruncatedUSUBSAT(DstVT, MinLHS.getValueType(), MinRHS, MinLHS,
3784                                  DAG, SDLoc(N));
3785   }
3786 
3787   return SDValue();
3788 }
3789 
3790 // Since it may not be valid to emit a fold to zero for vector initializers
3791 // check if we can before folding.
3792 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3793                              SelectionDAG &DAG, bool LegalOperations) {
3794   if (!VT.isVector())
3795     return DAG.getConstant(0, DL, VT);
3796   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3797     return DAG.getConstant(0, DL, VT);
3798   return SDValue();
3799 }
3800 
3801 SDValue DAGCombiner::visitSUB(SDNode *N) {
3802   SDValue N0 = N->getOperand(0);
3803   SDValue N1 = N->getOperand(1);
3804   EVT VT = N0.getValueType();
3805   SDLoc DL(N);
3806 
3807   auto PeekThroughFreeze = [](SDValue N) {
3808     if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
3809       return N->getOperand(0);
3810     return N;
3811   };
3812 
3813   // fold (sub x, x) -> 0
3814   // FIXME: Refactor this and xor and other similar operations together.
3815   if (PeekThroughFreeze(N0) == PeekThroughFreeze(N1))
3816     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3817 
3818   // fold (sub c1, c2) -> c3
3819   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1}))
3820     return C;
3821 
3822   // fold vector ops
3823   if (VT.isVector()) {
3824     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
3825       return FoldedVOp;
3826 
3827     // fold (sub x, 0) -> x, vector edition
3828     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
3829       return N0;
3830   }
3831 
3832   if (SDValue NewSel = foldBinOpIntoSelect(N))
3833     return NewSel;
3834 
3835   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3836 
3837   // fold (sub x, c) -> (add x, -c)
3838   if (N1C) {
3839     return DAG.getNode(ISD::ADD, DL, VT, N0,
3840                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3841   }
3842 
3843   if (isNullOrNullSplat(N0)) {
3844     unsigned BitWidth = VT.getScalarSizeInBits();
3845     // Right-shifting everything out but the sign bit followed by negation is
3846     // the same as flipping arithmetic/logical shift type without the negation:
3847     // -(X >>u 31) -> (X >>s 31)
3848     // -(X >>s 31) -> (X >>u 31)
3849     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3850       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3851       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3852         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3853         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3854           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3855       }
3856     }
3857 
3858     // 0 - X --> 0 if the sub is NUW.
3859     if (N->getFlags().hasNoUnsignedWrap())
3860       return N0;
3861 
3862     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3863       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3864       // N1 must be 0 because negating the minimum signed value is undefined.
3865       if (N->getFlags().hasNoSignedWrap())
3866         return N0;
3867 
3868       // 0 - X --> X if X is 0 or the minimum signed value.
3869       return N1;
3870     }
3871 
3872     // Convert 0 - abs(x).
3873     if (N1.getOpcode() == ISD::ABS && N1.hasOneUse() &&
3874         !TLI.isOperationLegalOrCustom(ISD::ABS, VT))
3875       if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true))
3876         return Result;
3877 
3878     // Fold neg(splat(neg(x)) -> splat(x)
3879     if (VT.isVector()) {
3880       SDValue N1S = DAG.getSplatValue(N1, true);
3881       if (N1S && N1S.getOpcode() == ISD::SUB &&
3882           isNullConstant(N1S.getOperand(0)))
3883         return DAG.getSplat(VT, DL, N1S.getOperand(1));
3884     }
3885   }
3886 
3887   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3888   if (isAllOnesOrAllOnesSplat(N0))
3889     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3890 
3891   // fold (A - (0-B)) -> A+B
3892   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3893     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3894 
3895   // fold A-(A-B) -> B
3896   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3897     return N1.getOperand(1);
3898 
3899   // fold (A+B)-A -> B
3900   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3901     return N0.getOperand(1);
3902 
3903   // fold (A+B)-B -> A
3904   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3905     return N0.getOperand(0);
3906 
3907   // fold (A+C1)-C2 -> A+(C1-C2)
3908   if (N0.getOpcode() == ISD::ADD) {
3909     SDValue N01 = N0.getOperand(1);
3910     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N01, N1}))
3911       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3912   }
3913 
3914   // fold C2-(A+C1) -> (C2-C1)-A
3915   if (N1.getOpcode() == ISD::ADD) {
3916     SDValue N11 = N1.getOperand(1);
3917     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}))
3918       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3919   }
3920 
3921   // fold (A-C1)-C2 -> A-(C1+C2)
3922   if (N0.getOpcode() == ISD::SUB) {
3923     SDValue N01 = N0.getOperand(1);
3924     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N01, N1}))
3925       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3926   }
3927 
3928   // fold (c1-A)-c2 -> (c1-c2)-A
3929   if (N0.getOpcode() == ISD::SUB) {
3930     SDValue N00 = N0.getOperand(0);
3931     if (SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N00, N1}))
3932       return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3933   }
3934 
3935   // fold ((A+(B+or-C))-B) -> A+or-C
3936   if (N0.getOpcode() == ISD::ADD &&
3937       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3938        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3939       N0.getOperand(1).getOperand(0) == N1)
3940     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3941                        N0.getOperand(1).getOperand(1));
3942 
3943   // fold ((A+(C+B))-B) -> A+C
3944   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3945       N0.getOperand(1).getOperand(1) == N1)
3946     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3947                        N0.getOperand(1).getOperand(0));
3948 
3949   // fold ((A-(B-C))-C) -> A-B
3950   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3951       N0.getOperand(1).getOperand(1) == N1)
3952     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3953                        N0.getOperand(1).getOperand(0));
3954 
3955   // fold (A-(B-C)) -> A+(C-B)
3956   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3957     return DAG.getNode(ISD::ADD, DL, VT, N0,
3958                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3959                                    N1.getOperand(0)));
3960 
3961   // A - (A & B)  ->  A & (~B)
3962   if (N1.getOpcode() == ISD::AND) {
3963     SDValue A = N1.getOperand(0);
3964     SDValue B = N1.getOperand(1);
3965     if (A != N0)
3966       std::swap(A, B);
3967     if (A == N0 &&
3968         (N1.hasOneUse() || isConstantOrConstantVector(B, /*NoOpaques=*/true))) {
3969       SDValue InvB =
3970           DAG.getNode(ISD::XOR, DL, VT, B, DAG.getAllOnesConstant(DL, VT));
3971       return DAG.getNode(ISD::AND, DL, VT, A, InvB);
3972     }
3973   }
3974 
3975   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3976   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3977     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3978         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3979       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3980                                 N1.getOperand(0).getOperand(1),
3981                                 N1.getOperand(1));
3982       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3983     }
3984     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3985         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3986       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3987                                 N1.getOperand(0),
3988                                 N1.getOperand(1).getOperand(1));
3989       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3990     }
3991   }
3992 
3993   // If either operand of a sub is undef, the result is undef
3994   if (N0.isUndef())
3995     return N0;
3996   if (N1.isUndef())
3997     return N1;
3998 
3999   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
4000     return V;
4001 
4002   if (SDValue V = foldAddSubOfSignBit(N, DAG))
4003     return V;
4004 
4005   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
4006     return V;
4007 
4008   if (SDValue V = foldSubToUSubSat(VT, N))
4009     return V;
4010 
4011   // (x - y) - 1  ->  add (xor y, -1), x
4012   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) {
4013     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
4014                               DAG.getAllOnesConstant(DL, VT));
4015     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
4016   }
4017 
4018   // Look for:
4019   //   sub y, (xor x, -1)
4020   // And if the target does not like this form then turn into:
4021   //   add (add x, y), 1
4022   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
4023     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
4024     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
4025   }
4026 
4027   // Hoist one-use addition by non-opaque constant:
4028   //   (x + C) - y  ->  (x - y) + C
4029   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
4030       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4031     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4032     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
4033   }
4034   // y - (x + C)  ->  (y - x) - C
4035   if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() &&
4036       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
4037     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
4038     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
4039   }
4040   // (x - C) - y  ->  (x - y) - C
4041   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
4042   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4043       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
4044     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
4045     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
4046   }
4047   // (C - x) - y  ->  C - (x + y)
4048   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
4049       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
4050     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
4051     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
4052   }
4053 
4054   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
4055   // rather than 'sub 0/1' (the sext should get folded).
4056   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
4057   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
4058       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
4059       TLI.getBooleanContents(VT) ==
4060           TargetLowering::ZeroOrNegativeOneBooleanContent) {
4061     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
4062     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
4063   }
4064 
4065   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
4066   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
4067     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
4068       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
4069       SDValue S0 = N1.getOperand(0);
4070       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0))
4071         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
4072           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
4073             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
4074     }
4075   }
4076 
4077   // If the relocation model supports it, consider symbol offsets.
4078   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
4079     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
4080       // fold (sub Sym+c1, Sym+c2) -> c1-c2
4081       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
4082         if (GA->getGlobal() == GB->getGlobal())
4083           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
4084                                  DL, VT);
4085     }
4086 
4087   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
4088   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
4089     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
4090     if (TN->getVT() == MVT::i1) {
4091       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
4092                                  DAG.getConstant(1, DL, VT));
4093       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
4094     }
4095   }
4096 
4097   // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C))
4098   if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) {
4099     const APInt &IntVal = N1.getConstantOperandAPInt(0);
4100     return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal));
4101   }
4102 
4103   // canonicalize (sub X, step_vector(C)) to (add X, step_vector(-C))
4104   if (N1.getOpcode() == ISD::STEP_VECTOR && N1.hasOneUse()) {
4105     APInt NewStep = -N1.getConstantOperandAPInt(0);
4106     return DAG.getNode(ISD::ADD, DL, VT, N0,
4107                        DAG.getStepVector(DL, VT, NewStep));
4108   }
4109 
4110   // Prefer an add for more folding potential and possibly better codegen:
4111   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
4112   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
4113     SDValue ShAmt = N1.getOperand(1);
4114     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
4115     if (ShAmtC &&
4116         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
4117       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
4118       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
4119     }
4120   }
4121 
4122   // As with the previous fold, prefer add for more folding potential.
4123   // Subtracting SMIN/0 is the same as adding SMIN/0:
4124   // N0 - (X << BW-1) --> N0 + (X << BW-1)
4125   if (N1.getOpcode() == ISD::SHL) {
4126     ConstantSDNode *ShlC = isConstOrConstSplat(N1.getOperand(1));
4127     if (ShlC && ShlC->getAPIntValue() == VT.getScalarSizeInBits() - 1)
4128       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
4129   }
4130 
4131   // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
4132   if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
4133       N0.getResNo() == 0 && N0.hasOneUse())
4134     return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
4135                        N0.getOperand(0), N1, N0.getOperand(2));
4136 
4137   if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT)) {
4138     // (sub Carry, X)  ->  (uaddo_carry (sub 0, X), 0, Carry)
4139     if (SDValue Carry = getAsCarry(TLI, N0)) {
4140       SDValue X = N1;
4141       SDValue Zero = DAG.getConstant(0, DL, VT);
4142       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
4143       return DAG.getNode(ISD::UADDO_CARRY, DL,
4144                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
4145                          Carry);
4146     }
4147   }
4148 
4149   // If there's no chance of borrowing from adjacent bits, then sub is xor:
4150   // sub C0, X --> xor X, C0
4151   if (ConstantSDNode *C0 = isConstOrConstSplat(N0)) {
4152     if (!C0->isOpaque()) {
4153       const APInt &C0Val = C0->getAPIntValue();
4154       const APInt &MaybeOnes = ~DAG.computeKnownBits(N1).Zero;
4155       if ((C0Val - MaybeOnes) == (C0Val ^ MaybeOnes))
4156         return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
4157     }
4158   }
4159 
4160   // max(a,b) - min(a,b) --> abd(a,b)
4161   auto MatchSubMaxMin = [&](unsigned Max, unsigned Min, unsigned Abd) {
4162     if (N0.getOpcode() != Max || N1.getOpcode() != Min)
4163       return SDValue();
4164     if ((N0.getOperand(0) != N1.getOperand(0) ||
4165          N0.getOperand(1) != N1.getOperand(1)) &&
4166         (N0.getOperand(0) != N1.getOperand(1) ||
4167          N0.getOperand(1) != N1.getOperand(0)))
4168       return SDValue();
4169     if (!hasOperation(Abd, VT))
4170       return SDValue();
4171     return DAG.getNode(Abd, DL, VT, N0.getOperand(0), N0.getOperand(1));
4172   };
4173   if (SDValue R = MatchSubMaxMin(ISD::SMAX, ISD::SMIN, ISD::ABDS))
4174     return R;
4175   if (SDValue R = MatchSubMaxMin(ISD::UMAX, ISD::UMIN, ISD::ABDU))
4176     return R;
4177 
4178   return SDValue();
4179 }
4180 
4181 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
4182   unsigned Opcode = N->getOpcode();
4183   SDValue N0 = N->getOperand(0);
4184   SDValue N1 = N->getOperand(1);
4185   EVT VT = N0.getValueType();
4186   bool IsSigned = Opcode == ISD::SSUBSAT;
4187   SDLoc DL(N);
4188 
4189   // fold (sub_sat x, undef) -> 0
4190   if (N0.isUndef() || N1.isUndef())
4191     return DAG.getConstant(0, DL, VT);
4192 
4193   // fold (sub_sat x, x) -> 0
4194   if (N0 == N1)
4195     return DAG.getConstant(0, DL, VT);
4196 
4197   // fold (sub_sat c1, c2) -> c3
4198   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
4199     return C;
4200 
4201   // fold vector ops
4202   if (VT.isVector()) {
4203     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4204       return FoldedVOp;
4205 
4206     // fold (sub_sat x, 0) -> x, vector edition
4207     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
4208       return N0;
4209   }
4210 
4211   // fold (sub_sat x, 0) -> x
4212   if (isNullConstant(N1))
4213     return N0;
4214 
4215   // If it cannot overflow, transform into an sub.
4216   if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4217     return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
4218 
4219   return SDValue();
4220 }
4221 
4222 SDValue DAGCombiner::visitSUBC(SDNode *N) {
4223   SDValue N0 = N->getOperand(0);
4224   SDValue N1 = N->getOperand(1);
4225   EVT VT = N0.getValueType();
4226   SDLoc DL(N);
4227 
4228   // If the flag result is dead, turn this into an SUB.
4229   if (!N->hasAnyUseOfValue(1))
4230     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4231                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4232 
4233   // fold (subc x, x) -> 0 + no borrow
4234   if (N0 == N1)
4235     return CombineTo(N, DAG.getConstant(0, DL, VT),
4236                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4237 
4238   // fold (subc x, 0) -> x + no borrow
4239   if (isNullConstant(N1))
4240     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4241 
4242   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4243   if (isAllOnesConstant(N0))
4244     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4245                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
4246 
4247   return SDValue();
4248 }
4249 
4250 SDValue DAGCombiner::visitSUBO(SDNode *N) {
4251   SDValue N0 = N->getOperand(0);
4252   SDValue N1 = N->getOperand(1);
4253   EVT VT = N0.getValueType();
4254   bool IsSigned = (ISD::SSUBO == N->getOpcode());
4255 
4256   EVT CarryVT = N->getValueType(1);
4257   SDLoc DL(N);
4258 
4259   // If the flag result is dead, turn this into an SUB.
4260   if (!N->hasAnyUseOfValue(1))
4261     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4262                      DAG.getUNDEF(CarryVT));
4263 
4264   // fold (subo x, x) -> 0 + no borrow
4265   if (N0 == N1)
4266     return CombineTo(N, DAG.getConstant(0, DL, VT),
4267                      DAG.getConstant(0, DL, CarryVT));
4268 
4269   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
4270 
4271   // fold (subox, c) -> (addo x, -c)
4272   if (IsSigned && N1C && !N1C->isMinSignedValue()) {
4273     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
4274                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
4275   }
4276 
4277   // fold (subo x, 0) -> x + no borrow
4278   if (isNullOrNullSplat(N1))
4279     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
4280 
4281   // If it cannot overflow, transform into an sub.
4282   if (DAG.willNotOverflowSub(IsSigned, N0, N1))
4283     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
4284                      DAG.getConstant(0, DL, CarryVT));
4285 
4286   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
4287   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
4288     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
4289                      DAG.getConstant(0, DL, CarryVT));
4290 
4291   return SDValue();
4292 }
4293 
4294 SDValue DAGCombiner::visitSUBE(SDNode *N) {
4295   SDValue N0 = N->getOperand(0);
4296   SDValue N1 = N->getOperand(1);
4297   SDValue CarryIn = N->getOperand(2);
4298 
4299   // fold (sube x, y, false) -> (subc x, y)
4300   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
4301     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
4302 
4303   return SDValue();
4304 }
4305 
4306 SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
4307   SDValue N0 = N->getOperand(0);
4308   SDValue N1 = N->getOperand(1);
4309   SDValue CarryIn = N->getOperand(2);
4310 
4311   // fold (usubo_carry x, y, false) -> (usubo x, y)
4312   if (isNullConstant(CarryIn)) {
4313     if (!LegalOperations ||
4314         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
4315       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
4316   }
4317 
4318   return SDValue();
4319 }
4320 
4321 SDValue DAGCombiner::visitSSUBO_CARRY(SDNode *N) {
4322   SDValue N0 = N->getOperand(0);
4323   SDValue N1 = N->getOperand(1);
4324   SDValue CarryIn = N->getOperand(2);
4325 
4326   // fold (ssubo_carry x, y, false) -> (ssubo x, y)
4327   if (isNullConstant(CarryIn)) {
4328     if (!LegalOperations ||
4329         TLI.isOperationLegalOrCustom(ISD::SSUBO, N->getValueType(0)))
4330       return DAG.getNode(ISD::SSUBO, SDLoc(N), N->getVTList(), N0, N1);
4331   }
4332 
4333   return SDValue();
4334 }
4335 
4336 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
4337 // UMULFIXSAT here.
4338 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
4339   SDValue N0 = N->getOperand(0);
4340   SDValue N1 = N->getOperand(1);
4341   SDValue Scale = N->getOperand(2);
4342   EVT VT = N0.getValueType();
4343 
4344   // fold (mulfix x, undef, scale) -> 0
4345   if (N0.isUndef() || N1.isUndef())
4346     return DAG.getConstant(0, SDLoc(N), VT);
4347 
4348   // Canonicalize constant to RHS (vector doesn't have to splat)
4349   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4350      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4351     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
4352 
4353   // fold (mulfix x, 0, scale) -> 0
4354   if (isNullConstant(N1))
4355     return DAG.getConstant(0, SDLoc(N), VT);
4356 
4357   return SDValue();
4358 }
4359 
4360 SDValue DAGCombiner::visitMUL(SDNode *N) {
4361   SDValue N0 = N->getOperand(0);
4362   SDValue N1 = N->getOperand(1);
4363   EVT VT = N0.getValueType();
4364   SDLoc DL(N);
4365 
4366   // fold (mul x, undef) -> 0
4367   if (N0.isUndef() || N1.isUndef())
4368     return DAG.getConstant(0, DL, VT);
4369 
4370   // fold (mul c1, c2) -> c1*c2
4371   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, DL, VT, {N0, N1}))
4372     return C;
4373 
4374   // canonicalize constant to RHS (vector doesn't have to splat)
4375   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4376       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4377     return DAG.getNode(ISD::MUL, DL, VT, N1, N0);
4378 
4379   bool N1IsConst = false;
4380   bool N1IsOpaqueConst = false;
4381   APInt ConstValue1;
4382 
4383   // fold vector ops
4384   if (VT.isVector()) {
4385     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4386       return FoldedVOp;
4387 
4388     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
4389     assert((!N1IsConst ||
4390             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
4391            "Splat APInt should be element width");
4392   } else {
4393     N1IsConst = isa<ConstantSDNode>(N1);
4394     if (N1IsConst) {
4395       ConstValue1 = N1->getAsAPIntVal();
4396       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
4397     }
4398   }
4399 
4400   // fold (mul x, 0) -> 0
4401   if (N1IsConst && ConstValue1.isZero())
4402     return N1;
4403 
4404   // fold (mul x, 1) -> x
4405   if (N1IsConst && ConstValue1.isOne())
4406     return N0;
4407 
4408   if (SDValue NewSel = foldBinOpIntoSelect(N))
4409     return NewSel;
4410 
4411   // fold (mul x, -1) -> 0-x
4412   if (N1IsConst && ConstValue1.isAllOnes())
4413     return DAG.getNegative(N0, DL, VT);
4414 
4415   // fold (mul x, (1 << c)) -> x << c
4416   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4417       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
4418     if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4419       EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4420       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4421       return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
4422     }
4423   }
4424 
4425   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
4426   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
4427     unsigned Log2Val = (-ConstValue1).logBase2();
4428     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4429 
4430     // FIXME: If the input is something that is easily negated (e.g. a
4431     // single-use add), we should put the negate there.
4432     return DAG.getNode(ISD::SUB, DL, VT,
4433                        DAG.getConstant(0, DL, VT),
4434                        DAG.getNode(ISD::SHL, DL, VT, N0,
4435                             DAG.getConstant(Log2Val, DL, ShiftVT)));
4436   }
4437 
4438   // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
4439   // hi result is in use in case we hit this mid-legalization.
4440   for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
4441     if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) {
4442       SDVTList LoHiVT = DAG.getVTList(VT, VT);
4443       // TODO: Can we match commutable operands with getNodeIfExists?
4444       if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1}))
4445         if (LoHi->hasAnyUseOfValue(1))
4446           return SDValue(LoHi, 0);
4447       if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0}))
4448         if (LoHi->hasAnyUseOfValue(1))
4449           return SDValue(LoHi, 0);
4450     }
4451   }
4452 
4453   // Try to transform:
4454   // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub.
4455   // mul x, (2^N + 1) --> add (shl x, N), x
4456   // mul x, (2^N - 1) --> sub (shl x, N), x
4457   // Examples: x * 33 --> (x << 5) + x
4458   //           x * 15 --> (x << 4) - x
4459   //           x * -33 --> -((x << 5) + x)
4460   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
4461   // (2) multiply-by-(power-of-2 +/- power-of-2) into shifts and add/sub.
4462   // mul x, (2^N + 2^M) --> (add (shl x, N), (shl x, M))
4463   // mul x, (2^N - 2^M) --> (sub (shl x, N), (shl x, M))
4464   // Examples: x * 0x8800 --> (x << 15) + (x << 11)
4465   //           x * 0xf800 --> (x << 16) - (x << 11)
4466   //           x * -0x8800 --> -((x << 15) + (x << 11))
4467   //           x * -0xf800 --> -((x << 16) - (x << 11)) ; (x << 11) - (x << 16)
4468   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
4469     // TODO: We could handle more general decomposition of any constant by
4470     //       having the target set a limit on number of ops and making a
4471     //       callback to determine that sequence (similar to sqrt expansion).
4472     unsigned MathOp = ISD::DELETED_NODE;
4473     APInt MulC = ConstValue1.abs();
4474     // The constant `2` should be treated as (2^0 + 1).
4475     unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
4476     MulC.lshrInPlace(TZeros);
4477     if ((MulC - 1).isPowerOf2())
4478       MathOp = ISD::ADD;
4479     else if ((MulC + 1).isPowerOf2())
4480       MathOp = ISD::SUB;
4481 
4482     if (MathOp != ISD::DELETED_NODE) {
4483       unsigned ShAmt =
4484           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
4485       ShAmt += TZeros;
4486       assert(ShAmt < VT.getScalarSizeInBits() &&
4487              "multiply-by-constant generated out of bounds shift");
4488       SDValue Shl =
4489           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
4490       SDValue R =
4491           TZeros ? DAG.getNode(MathOp, DL, VT, Shl,
4492                                DAG.getNode(ISD::SHL, DL, VT, N0,
4493                                            DAG.getConstant(TZeros, DL, VT)))
4494                  : DAG.getNode(MathOp, DL, VT, Shl, N0);
4495       if (ConstValue1.isNegative())
4496         R = DAG.getNegative(R, DL, VT);
4497       return R;
4498     }
4499   }
4500 
4501   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
4502   if (N0.getOpcode() == ISD::SHL) {
4503     SDValue N01 = N0.getOperand(1);
4504     if (SDValue C3 = DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {N1, N01}))
4505       return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), C3);
4506   }
4507 
4508   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
4509   // use.
4510   {
4511     SDValue Sh, Y;
4512 
4513     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
4514     if (N0.getOpcode() == ISD::SHL &&
4515         isConstantOrConstantVector(N0.getOperand(1)) && N0->hasOneUse()) {
4516       Sh = N0; Y = N1;
4517     } else if (N1.getOpcode() == ISD::SHL &&
4518                isConstantOrConstantVector(N1.getOperand(1)) &&
4519                N1->hasOneUse()) {
4520       Sh = N1; Y = N0;
4521     }
4522 
4523     if (Sh.getNode()) {
4524       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, Sh.getOperand(0), Y);
4525       return DAG.getNode(ISD::SHL, DL, VT, Mul, Sh.getOperand(1));
4526     }
4527   }
4528 
4529   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
4530   if (N0.getOpcode() == ISD::ADD &&
4531       DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
4532       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
4533       isMulAddWithConstProfitable(N, N0, N1))
4534     return DAG.getNode(
4535         ISD::ADD, DL, VT,
4536         DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1),
4537         DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1));
4538 
4539   // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)).
4540   ConstantSDNode *NC1 = isConstOrConstSplat(N1);
4541   if (N0.getOpcode() == ISD::VSCALE && NC1) {
4542     const APInt &C0 = N0.getConstantOperandAPInt(0);
4543     const APInt &C1 = NC1->getAPIntValue();
4544     return DAG.getVScale(DL, VT, C0 * C1);
4545   }
4546 
4547   // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
4548   APInt MulVal;
4549   if (N0.getOpcode() == ISD::STEP_VECTOR &&
4550       ISD::isConstantSplatVector(N1.getNode(), MulVal)) {
4551     const APInt &C0 = N0.getConstantOperandAPInt(0);
4552     APInt NewStep = C0 * MulVal;
4553     return DAG.getStepVector(DL, VT, NewStep);
4554   }
4555 
4556   // Fold ((mul x, 0/undef) -> 0,
4557   //       (mul x, 1) -> x) -> x)
4558   // -> and(x, mask)
4559   // We can replace vectors with '0' and '1' factors with a clearing mask.
4560   if (VT.isFixedLengthVector()) {
4561     unsigned NumElts = VT.getVectorNumElements();
4562     SmallBitVector ClearMask;
4563     ClearMask.reserve(NumElts);
4564     auto IsClearMask = [&ClearMask](ConstantSDNode *V) {
4565       if (!V || V->isZero()) {
4566         ClearMask.push_back(true);
4567         return true;
4568       }
4569       ClearMask.push_back(false);
4570       return V->isOne();
4571     };
4572     if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::AND, VT)) &&
4573         ISD::matchUnaryPredicate(N1, IsClearMask, /*AllowUndefs*/ true)) {
4574       assert(N1.getOpcode() == ISD::BUILD_VECTOR && "Unknown constant vector");
4575       EVT LegalSVT = N1.getOperand(0).getValueType();
4576       SDValue Zero = DAG.getConstant(0, DL, LegalSVT);
4577       SDValue AllOnes = DAG.getAllOnesConstant(DL, LegalSVT);
4578       SmallVector<SDValue, 16> Mask(NumElts, AllOnes);
4579       for (unsigned I = 0; I != NumElts; ++I)
4580         if (ClearMask[I])
4581           Mask[I] = Zero;
4582       return DAG.getNode(ISD::AND, DL, VT, N0, DAG.getBuildVector(VT, DL, Mask));
4583     }
4584   }
4585 
4586   // reassociate mul
4587   if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
4588     return RMUL;
4589 
4590   // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
4591   if (SDValue SD =
4592           reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
4593     return SD;
4594 
4595   // Simplify the operands using demanded-bits information.
4596   if (SimplifyDemandedBits(SDValue(N, 0)))
4597     return SDValue(N, 0);
4598 
4599   return SDValue();
4600 }
4601 
4602 /// Return true if divmod libcall is available.
4603 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
4604                                      const TargetLowering &TLI) {
4605   RTLIB::Libcall LC;
4606   EVT NodeType = Node->getValueType(0);
4607   if (!NodeType.isSimple())
4608     return false;
4609   switch (NodeType.getSimpleVT().SimpleTy) {
4610   default: return false; // No libcall for vector types.
4611   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
4612   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
4613   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
4614   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
4615   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
4616   }
4617 
4618   return TLI.getLibcallName(LC) != nullptr;
4619 }
4620 
4621 /// Issue divrem if both quotient and remainder are needed.
4622 SDValue DAGCombiner::useDivRem(SDNode *Node) {
4623   if (Node->use_empty())
4624     return SDValue(); // This is a dead node, leave it alone.
4625 
4626   unsigned Opcode = Node->getOpcode();
4627   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
4628   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
4629 
4630   // DivMod lib calls can still work on non-legal types if using lib-calls.
4631   EVT VT = Node->getValueType(0);
4632   if (VT.isVector() || !VT.isInteger())
4633     return SDValue();
4634 
4635   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
4636     return SDValue();
4637 
4638   // If DIVREM is going to get expanded into a libcall,
4639   // but there is no libcall available, then don't combine.
4640   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
4641       !isDivRemLibcallAvailable(Node, isSigned, TLI))
4642     return SDValue();
4643 
4644   // If div is legal, it's better to do the normal expansion
4645   unsigned OtherOpcode = 0;
4646   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
4647     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
4648     if (TLI.isOperationLegalOrCustom(Opcode, VT))
4649       return SDValue();
4650   } else {
4651     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4652     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
4653       return SDValue();
4654   }
4655 
4656   SDValue Op0 = Node->getOperand(0);
4657   SDValue Op1 = Node->getOperand(1);
4658   SDValue combined;
4659   for (SDNode *User : Op0->uses()) {
4660     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
4661         User->use_empty())
4662       continue;
4663     // Convert the other matching node(s), too;
4664     // otherwise, the DIVREM may get target-legalized into something
4665     // target-specific that we won't be able to recognize.
4666     unsigned UserOpc = User->getOpcode();
4667     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
4668         User->getOperand(0) == Op0 &&
4669         User->getOperand(1) == Op1) {
4670       if (!combined) {
4671         if (UserOpc == OtherOpcode) {
4672           SDVTList VTs = DAG.getVTList(VT, VT);
4673           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
4674         } else if (UserOpc == DivRemOpc) {
4675           combined = SDValue(User, 0);
4676         } else {
4677           assert(UserOpc == Opcode);
4678           continue;
4679         }
4680       }
4681       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
4682         CombineTo(User, combined);
4683       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
4684         CombineTo(User, combined.getValue(1));
4685     }
4686   }
4687   return combined;
4688 }
4689 
4690 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
4691   SDValue N0 = N->getOperand(0);
4692   SDValue N1 = N->getOperand(1);
4693   EVT VT = N->getValueType(0);
4694   SDLoc DL(N);
4695 
4696   unsigned Opc = N->getOpcode();
4697   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
4698   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4699 
4700   // X / undef -> undef
4701   // X % undef -> undef
4702   // X / 0 -> undef
4703   // X % 0 -> undef
4704   // NOTE: This includes vectors where any divisor element is zero/undef.
4705   if (DAG.isUndef(Opc, {N0, N1}))
4706     return DAG.getUNDEF(VT);
4707 
4708   // undef / X -> 0
4709   // undef % X -> 0
4710   if (N0.isUndef())
4711     return DAG.getConstant(0, DL, VT);
4712 
4713   // 0 / X -> 0
4714   // 0 % X -> 0
4715   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4716   if (N0C && N0C->isZero())
4717     return N0;
4718 
4719   // X / X -> 1
4720   // X % X -> 0
4721   if (N0 == N1)
4722     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
4723 
4724   // X / 1 -> X
4725   // X % 1 -> 0
4726   // If this is a boolean op (single-bit element type), we can't have
4727   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
4728   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
4729   // it's a 1.
4730   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
4731     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
4732 
4733   return SDValue();
4734 }
4735 
4736 SDValue DAGCombiner::visitSDIV(SDNode *N) {
4737   SDValue N0 = N->getOperand(0);
4738   SDValue N1 = N->getOperand(1);
4739   EVT VT = N->getValueType(0);
4740   EVT CCVT = getSetCCResultType(VT);
4741   SDLoc DL(N);
4742 
4743   // fold (sdiv c1, c2) -> c1/c2
4744   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1}))
4745     return C;
4746 
4747   // fold vector ops
4748   if (VT.isVector())
4749     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4750       return FoldedVOp;
4751 
4752   // fold (sdiv X, -1) -> 0-X
4753   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4754   if (N1C && N1C->isAllOnes())
4755     return DAG.getNegative(N0, DL, VT);
4756 
4757   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
4758   if (N1C && N1C->isMinSignedValue())
4759     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4760                          DAG.getConstant(1, DL, VT),
4761                          DAG.getConstant(0, DL, VT));
4762 
4763   if (SDValue V = simplifyDivRem(N, DAG))
4764     return V;
4765 
4766   if (SDValue NewSel = foldBinOpIntoSelect(N))
4767     return NewSel;
4768 
4769   // If we know the sign bits of both operands are zero, strength reduce to a
4770   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
4771   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4772     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
4773 
4774   if (SDValue V = visitSDIVLike(N0, N1, N)) {
4775     // If the corresponding remainder node exists, update its users with
4776     // (Dividend - (Quotient * Divisor).
4777     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
4778                                               { N0, N1 })) {
4779       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4780       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4781       AddToWorklist(Mul.getNode());
4782       AddToWorklist(Sub.getNode());
4783       CombineTo(RemNode, Sub);
4784     }
4785     return V;
4786   }
4787 
4788   // sdiv, srem -> sdivrem
4789   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4790   // true.  Otherwise, we break the simplification logic in visitREM().
4791   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4792   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4793     if (SDValue DivRem = useDivRem(N))
4794         return DivRem;
4795 
4796   return SDValue();
4797 }
4798 
4799 static bool isDivisorPowerOfTwo(SDValue Divisor) {
4800   // Helper for determining whether a value is a power-2 constant scalar or a
4801   // vector of such elements.
4802   auto IsPowerOfTwo = [](ConstantSDNode *C) {
4803     if (C->isZero() || C->isOpaque())
4804       return false;
4805     if (C->getAPIntValue().isPowerOf2())
4806       return true;
4807     if (C->getAPIntValue().isNegatedPowerOf2())
4808       return true;
4809     return false;
4810   };
4811 
4812   return ISD::matchUnaryPredicate(Divisor, IsPowerOfTwo);
4813 }
4814 
4815 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4816   SDLoc DL(N);
4817   EVT VT = N->getValueType(0);
4818   EVT CCVT = getSetCCResultType(VT);
4819   unsigned BitWidth = VT.getScalarSizeInBits();
4820 
4821   // fold (sdiv X, pow2) -> simple ops after legalize
4822   // FIXME: We check for the exact bit here because the generic lowering gives
4823   // better results in that case. The target-specific lowering should learn how
4824   // to handle exact sdivs efficiently.
4825   if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1)) {
4826     // Target-specific implementation of sdiv x, pow2.
4827     if (SDValue Res = BuildSDIVPow2(N))
4828       return Res;
4829 
4830     // Create constants that are functions of the shift amount value.
4831     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
4832     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
4833     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
4834     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
4835     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
4836     if (!isConstantOrConstantVector(Inexact))
4837       return SDValue();
4838 
4839     // Splat the sign bit into the register
4840     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
4841                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
4842     AddToWorklist(Sign.getNode());
4843 
4844     // Add (N0 < 0) ? abs2 - 1 : 0;
4845     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
4846     AddToWorklist(Srl.getNode());
4847     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
4848     AddToWorklist(Add.getNode());
4849     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
4850     AddToWorklist(Sra.getNode());
4851 
4852     // Special case: (sdiv X, 1) -> X
4853     // Special Case: (sdiv X, -1) -> 0-X
4854     SDValue One = DAG.getConstant(1, DL, VT);
4855     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
4856     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
4857     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
4858     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
4859     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
4860 
4861     // If dividing by a positive value, we're done. Otherwise, the result must
4862     // be negated.
4863     SDValue Zero = DAG.getConstant(0, DL, VT);
4864     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
4865 
4866     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
4867     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
4868     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
4869     return Res;
4870   }
4871 
4872   // If integer divide is expensive and we satisfy the requirements, emit an
4873   // alternate sequence.  Targets may check function attributes for size/speed
4874   // trade-offs.
4875   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4876   if (isConstantOrConstantVector(N1) &&
4877       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4878     if (SDValue Op = BuildSDIV(N))
4879       return Op;
4880 
4881   return SDValue();
4882 }
4883 
4884 SDValue DAGCombiner::visitUDIV(SDNode *N) {
4885   SDValue N0 = N->getOperand(0);
4886   SDValue N1 = N->getOperand(1);
4887   EVT VT = N->getValueType(0);
4888   EVT CCVT = getSetCCResultType(VT);
4889   SDLoc DL(N);
4890 
4891   // fold (udiv c1, c2) -> c1/c2
4892   if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1}))
4893     return C;
4894 
4895   // fold vector ops
4896   if (VT.isVector())
4897     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
4898       return FoldedVOp;
4899 
4900   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4901   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4902   if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) {
4903     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4904                          DAG.getConstant(1, DL, VT),
4905                          DAG.getConstant(0, DL, VT));
4906   }
4907 
4908   if (SDValue V = simplifyDivRem(N, DAG))
4909     return V;
4910 
4911   if (SDValue NewSel = foldBinOpIntoSelect(N))
4912     return NewSel;
4913 
4914   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4915     // If the corresponding remainder node exists, update its users with
4916     // (Dividend - (Quotient * Divisor).
4917     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4918                                               { N0, N1 })) {
4919       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4920       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4921       AddToWorklist(Mul.getNode());
4922       AddToWorklist(Sub.getNode());
4923       CombineTo(RemNode, Sub);
4924     }
4925     return V;
4926   }
4927 
4928   // sdiv, srem -> sdivrem
4929   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4930   // true.  Otherwise, we break the simplification logic in visitREM().
4931   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4932   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4933     if (SDValue DivRem = useDivRem(N))
4934         return DivRem;
4935 
4936   return SDValue();
4937 }
4938 
4939 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4940   SDLoc DL(N);
4941   EVT VT = N->getValueType(0);
4942 
4943   // fold (udiv x, (1 << c)) -> x >>u c
4944   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) {
4945     if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
4946       AddToWorklist(LogBase2.getNode());
4947 
4948       EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4949       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4950       AddToWorklist(Trunc.getNode());
4951       return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4952     }
4953   }
4954 
4955   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4956   if (N1.getOpcode() == ISD::SHL) {
4957     SDValue N10 = N1.getOperand(0);
4958     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) {
4959       if (SDValue LogBase2 = BuildLogBase2(N10, DL)) {
4960         AddToWorklist(LogBase2.getNode());
4961 
4962         EVT ADDVT = N1.getOperand(1).getValueType();
4963         SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4964         AddToWorklist(Trunc.getNode());
4965         SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4966         AddToWorklist(Add.getNode());
4967         return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4968       }
4969     }
4970   }
4971 
4972   // fold (udiv x, c) -> alternate
4973   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4974   if (isConstantOrConstantVector(N1) &&
4975       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4976     if (SDValue Op = BuildUDIV(N))
4977       return Op;
4978 
4979   return SDValue();
4980 }
4981 
4982 SDValue DAGCombiner::buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N) {
4983   if (!N->getFlags().hasExact() && isDivisorPowerOfTwo(N1) &&
4984       !DAG.doesNodeExist(ISD::SDIV, N->getVTList(), {N0, N1})) {
4985     // Target-specific implementation of srem x, pow2.
4986     if (SDValue Res = BuildSREMPow2(N))
4987       return Res;
4988   }
4989   return SDValue();
4990 }
4991 
4992 // handles ISD::SREM and ISD::UREM
4993 SDValue DAGCombiner::visitREM(SDNode *N) {
4994   unsigned Opcode = N->getOpcode();
4995   SDValue N0 = N->getOperand(0);
4996   SDValue N1 = N->getOperand(1);
4997   EVT VT = N->getValueType(0);
4998   EVT CCVT = getSetCCResultType(VT);
4999 
5000   bool isSigned = (Opcode == ISD::SREM);
5001   SDLoc DL(N);
5002 
5003   // fold (rem c1, c2) -> c1%c2
5004   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5005     return C;
5006 
5007   // fold (urem X, -1) -> select(FX == -1, 0, FX)
5008   // Freeze the numerator to avoid a miscompile with an undefined value.
5009   if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) &&
5010       CCVT.isVector() == VT.isVector()) {
5011     SDValue F0 = DAG.getFreeze(N0);
5012     SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ);
5013     return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0);
5014   }
5015 
5016   if (SDValue V = simplifyDivRem(N, DAG))
5017     return V;
5018 
5019   if (SDValue NewSel = foldBinOpIntoSelect(N))
5020     return NewSel;
5021 
5022   if (isSigned) {
5023     // If we know the sign bits of both operands are zero, strength reduce to a
5024     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
5025     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
5026       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
5027   } else {
5028     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
5029       // fold (urem x, pow2) -> (and x, pow2-1)
5030       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5031       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5032       AddToWorklist(Add.getNode());
5033       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5034     }
5035     // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
5036     // fold (urem x, (lshr pow2, y)) -> (and x, (add (lshr pow2, y), -1))
5037     // TODO: We should sink the following into isKnownToBePowerOfTwo
5038     // using a OrZero parameter analogous to our handling in ValueTracking.
5039     if ((N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) &&
5040         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
5041       SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
5042       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
5043       AddToWorklist(Add.getNode());
5044       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
5045     }
5046   }
5047 
5048   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
5049 
5050   // If X/C can be simplified by the division-by-constant logic, lower
5051   // X%C to the equivalent of X-X/C*C.
5052   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
5053   // speculative DIV must not cause a DIVREM conversion.  We guard against this
5054   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
5055   // combine will not return a DIVREM.  Regardless, checking cheapness here
5056   // makes sense since the simplification results in fatter code.
5057   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
5058     if (isSigned) {
5059       // check if we can build faster implementation for srem
5060       if (SDValue OptimizedRem = buildOptimizedSREM(N0, N1, N))
5061         return OptimizedRem;
5062     }
5063 
5064     SDValue OptimizedDiv =
5065         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
5066     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != N) {
5067       // If the equivalent Div node also exists, update its users.
5068       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
5069       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
5070                                                 { N0, N1 }))
5071         CombineTo(DivNode, OptimizedDiv);
5072       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
5073       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
5074       AddToWorklist(OptimizedDiv.getNode());
5075       AddToWorklist(Mul.getNode());
5076       return Sub;
5077     }
5078   }
5079 
5080   // sdiv, srem -> sdivrem
5081   if (SDValue DivRem = useDivRem(N))
5082     return DivRem.getValue(1);
5083 
5084   return SDValue();
5085 }
5086 
5087 SDValue DAGCombiner::visitMULHS(SDNode *N) {
5088   SDValue N0 = N->getOperand(0);
5089   SDValue N1 = N->getOperand(1);
5090   EVT VT = N->getValueType(0);
5091   SDLoc DL(N);
5092 
5093   // fold (mulhs c1, c2)
5094   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1}))
5095     return C;
5096 
5097   // canonicalize constant to RHS.
5098   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5099       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5100     return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0);
5101 
5102   if (VT.isVector()) {
5103     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5104       return FoldedVOp;
5105 
5106     // fold (mulhs x, 0) -> 0
5107     // do not return N1, because undef node may exist.
5108     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5109       return DAG.getConstant(0, DL, VT);
5110   }
5111 
5112   // fold (mulhs x, 0) -> 0
5113   if (isNullConstant(N1))
5114     return N1;
5115 
5116   // fold (mulhs x, 1) -> (sra x, size(x)-1)
5117   if (isOneConstant(N1))
5118     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
5119                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
5120                                        getShiftAmountTy(N0.getValueType())));
5121 
5122   // fold (mulhs x, undef) -> 0
5123   if (N0.isUndef() || N1.isUndef())
5124     return DAG.getConstant(0, DL, VT);
5125 
5126   // If the type twice as wide is legal, transform the mulhs to a wider multiply
5127   // plus a shift.
5128   if (!TLI.isOperationLegalOrCustom(ISD::MULHS, VT) && VT.isSimple() &&
5129       !VT.isVector()) {
5130     MVT Simple = VT.getSimpleVT();
5131     unsigned SimpleSize = Simple.getSizeInBits();
5132     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5133     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5134       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5135       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5136       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5137       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5138             DAG.getConstant(SimpleSize, DL,
5139                             getShiftAmountTy(N1.getValueType())));
5140       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5141     }
5142   }
5143 
5144   return SDValue();
5145 }
5146 
5147 SDValue DAGCombiner::visitMULHU(SDNode *N) {
5148   SDValue N0 = N->getOperand(0);
5149   SDValue N1 = N->getOperand(1);
5150   EVT VT = N->getValueType(0);
5151   SDLoc DL(N);
5152 
5153   // fold (mulhu c1, c2)
5154   if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1}))
5155     return C;
5156 
5157   // canonicalize constant to RHS.
5158   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5159       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5160     return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0);
5161 
5162   if (VT.isVector()) {
5163     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5164       return FoldedVOp;
5165 
5166     // fold (mulhu x, 0) -> 0
5167     // do not return N1, because undef node may exist.
5168     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
5169       return DAG.getConstant(0, DL, VT);
5170   }
5171 
5172   // fold (mulhu x, 0) -> 0
5173   if (isNullConstant(N1))
5174     return N1;
5175 
5176   // fold (mulhu x, 1) -> 0
5177   if (isOneConstant(N1))
5178     return DAG.getConstant(0, DL, N0.getValueType());
5179 
5180   // fold (mulhu x, undef) -> 0
5181   if (N0.isUndef() || N1.isUndef())
5182     return DAG.getConstant(0, DL, VT);
5183 
5184   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
5185   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
5186       hasOperation(ISD::SRL, VT)) {
5187     if (SDValue LogBase2 = BuildLogBase2(N1, DL)) {
5188       unsigned NumEltBits = VT.getScalarSizeInBits();
5189       SDValue SRLAmt = DAG.getNode(
5190           ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
5191       EVT ShiftVT = getShiftAmountTy(N0.getValueType());
5192       SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
5193       return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
5194     }
5195   }
5196 
5197   // If the type twice as wide is legal, transform the mulhu to a wider multiply
5198   // plus a shift.
5199   if (!TLI.isOperationLegalOrCustom(ISD::MULHU, VT) && VT.isSimple() &&
5200       !VT.isVector()) {
5201     MVT Simple = VT.getSimpleVT();
5202     unsigned SimpleSize = Simple.getSizeInBits();
5203     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5204     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5205       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5206       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5207       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
5208       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
5209             DAG.getConstant(SimpleSize, DL,
5210                             getShiftAmountTy(N1.getValueType())));
5211       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
5212     }
5213   }
5214 
5215   // Simplify the operands using demanded-bits information.
5216   // We don't have demanded bits support for MULHU so this just enables constant
5217   // folding based on known bits.
5218   if (SimplifyDemandedBits(SDValue(N, 0)))
5219     return SDValue(N, 0);
5220 
5221   return SDValue();
5222 }
5223 
5224 SDValue DAGCombiner::visitAVG(SDNode *N) {
5225   unsigned Opcode = N->getOpcode();
5226   SDValue N0 = N->getOperand(0);
5227   SDValue N1 = N->getOperand(1);
5228   EVT VT = N->getValueType(0);
5229   SDLoc DL(N);
5230 
5231   // fold (avg c1, c2)
5232   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5233     return C;
5234 
5235   // canonicalize constant to RHS.
5236   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5237       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5238     return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5239 
5240   if (VT.isVector()) {
5241     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5242       return FoldedVOp;
5243 
5244     // fold (avgfloor x, 0) -> x >> 1
5245     if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
5246       if (Opcode == ISD::AVGFLOORS)
5247         return DAG.getNode(ISD::SRA, DL, VT, N0, DAG.getConstant(1, DL, VT));
5248       if (Opcode == ISD::AVGFLOORU)
5249         return DAG.getNode(ISD::SRL, DL, VT, N0, DAG.getConstant(1, DL, VT));
5250     }
5251   }
5252 
5253   // fold (avg x, undef) -> x
5254   if (N0.isUndef())
5255     return N1;
5256   if (N1.isUndef())
5257     return N0;
5258 
5259   // Fold (avg x, x) --> x
5260   if (N0 == N1 && Level >= AfterLegalizeTypes)
5261     return N0;
5262 
5263   // TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
5264 
5265   return SDValue();
5266 }
5267 
5268 SDValue DAGCombiner::visitABD(SDNode *N) {
5269   unsigned Opcode = N->getOpcode();
5270   SDValue N0 = N->getOperand(0);
5271   SDValue N1 = N->getOperand(1);
5272   EVT VT = N->getValueType(0);
5273   SDLoc DL(N);
5274 
5275   // fold (abd c1, c2)
5276   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5277     return C;
5278 
5279   // canonicalize constant to RHS.
5280   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5281       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5282     return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
5283 
5284   if (VT.isVector()) {
5285     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5286       return FoldedVOp;
5287 
5288     // fold (abds x, 0) -> abs x
5289     // fold (abdu x, 0) -> x
5290     if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
5291       if (Opcode == ISD::ABDS)
5292         return DAG.getNode(ISD::ABS, DL, VT, N0);
5293       if (Opcode == ISD::ABDU)
5294         return N0;
5295     }
5296   }
5297 
5298   // fold (abd x, undef) -> 0
5299   if (N0.isUndef() || N1.isUndef())
5300     return DAG.getConstant(0, DL, VT);
5301 
5302   // fold (abds x, y) -> (abdu x, y) iff both args are known positive
5303   if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
5304       DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
5305     return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
5306 
5307   return SDValue();
5308 }
5309 
5310 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
5311 /// give the opcodes for the two computations that are being performed. Return
5312 /// true if a simplification was made.
5313 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
5314                                                 unsigned HiOp) {
5315   // If the high half is not needed, just compute the low half.
5316   bool HiExists = N->hasAnyUseOfValue(1);
5317   if (!HiExists && (!LegalOperations ||
5318                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
5319     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5320     return CombineTo(N, Res, Res);
5321   }
5322 
5323   // If the low half is not needed, just compute the high half.
5324   bool LoExists = N->hasAnyUseOfValue(0);
5325   if (!LoExists && (!LegalOperations ||
5326                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
5327     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5328     return CombineTo(N, Res, Res);
5329   }
5330 
5331   // If both halves are used, return as it is.
5332   if (LoExists && HiExists)
5333     return SDValue();
5334 
5335   // If the two computed results can be simplified separately, separate them.
5336   if (LoExists) {
5337     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
5338     AddToWorklist(Lo.getNode());
5339     SDValue LoOpt = combine(Lo.getNode());
5340     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
5341         (!LegalOperations ||
5342          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
5343       return CombineTo(N, LoOpt, LoOpt);
5344   }
5345 
5346   if (HiExists) {
5347     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
5348     AddToWorklist(Hi.getNode());
5349     SDValue HiOpt = combine(Hi.getNode());
5350     if (HiOpt.getNode() && HiOpt != Hi &&
5351         (!LegalOperations ||
5352          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
5353       return CombineTo(N, HiOpt, HiOpt);
5354   }
5355 
5356   return SDValue();
5357 }
5358 
5359 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
5360   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
5361     return Res;
5362 
5363   SDValue N0 = N->getOperand(0);
5364   SDValue N1 = N->getOperand(1);
5365   EVT VT = N->getValueType(0);
5366   SDLoc DL(N);
5367 
5368   // Constant fold.
5369   if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5370     return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1);
5371 
5372   // canonicalize constant to RHS (vector doesn't have to splat)
5373   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5374       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5375     return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N1, N0);
5376 
5377   // If the type is twice as wide is legal, transform the mulhu to a wider
5378   // multiply plus a shift.
5379   if (VT.isSimple() && !VT.isVector()) {
5380     MVT Simple = VT.getSimpleVT();
5381     unsigned SimpleSize = Simple.getSizeInBits();
5382     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5383     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5384       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
5385       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
5386       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5387       // Compute the high part as N1.
5388       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5389             DAG.getConstant(SimpleSize, DL,
5390                             getShiftAmountTy(Lo.getValueType())));
5391       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5392       // Compute the low part as N0.
5393       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5394       return CombineTo(N, Lo, Hi);
5395     }
5396   }
5397 
5398   return SDValue();
5399 }
5400 
5401 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
5402   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
5403     return Res;
5404 
5405   SDValue N0 = N->getOperand(0);
5406   SDValue N1 = N->getOperand(1);
5407   EVT VT = N->getValueType(0);
5408   SDLoc DL(N);
5409 
5410   // Constant fold.
5411   if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1))
5412     return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1);
5413 
5414   // canonicalize constant to RHS (vector doesn't have to splat)
5415   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5416       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5417     return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N1, N0);
5418 
5419   // (umul_lohi N0, 0) -> (0, 0)
5420   if (isNullConstant(N1)) {
5421     SDValue Zero = DAG.getConstant(0, DL, VT);
5422     return CombineTo(N, Zero, Zero);
5423   }
5424 
5425   // (umul_lohi N0, 1) -> (N0, 0)
5426   if (isOneConstant(N1)) {
5427     SDValue Zero = DAG.getConstant(0, DL, VT);
5428     return CombineTo(N, N0, Zero);
5429   }
5430 
5431   // If the type is twice as wide is legal, transform the mulhu to a wider
5432   // multiply plus a shift.
5433   if (VT.isSimple() && !VT.isVector()) {
5434     MVT Simple = VT.getSimpleVT();
5435     unsigned SimpleSize = Simple.getSizeInBits();
5436     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
5437     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
5438       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
5439       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
5440       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
5441       // Compute the high part as N1.
5442       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
5443             DAG.getConstant(SimpleSize, DL,
5444                             getShiftAmountTy(Lo.getValueType())));
5445       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
5446       // Compute the low part as N0.
5447       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
5448       return CombineTo(N, Lo, Hi);
5449     }
5450   }
5451 
5452   return SDValue();
5453 }
5454 
5455 SDValue DAGCombiner::visitMULO(SDNode *N) {
5456   SDValue N0 = N->getOperand(0);
5457   SDValue N1 = N->getOperand(1);
5458   EVT VT = N0.getValueType();
5459   bool IsSigned = (ISD::SMULO == N->getOpcode());
5460 
5461   EVT CarryVT = N->getValueType(1);
5462   SDLoc DL(N);
5463 
5464   ConstantSDNode *N0C = isConstOrConstSplat(N0);
5465   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5466 
5467   // fold operation with constant operands.
5468   // TODO: Move this to FoldConstantArithmetic when it supports nodes with
5469   // multiple results.
5470   if (N0C && N1C) {
5471     bool Overflow;
5472     APInt Result =
5473         IsSigned ? N0C->getAPIntValue().smul_ov(N1C->getAPIntValue(), Overflow)
5474                  : N0C->getAPIntValue().umul_ov(N1C->getAPIntValue(), Overflow);
5475     return CombineTo(N, DAG.getConstant(Result, DL, VT),
5476                      DAG.getBoolConstant(Overflow, DL, CarryVT, CarryVT));
5477   }
5478 
5479   // canonicalize constant to RHS.
5480   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5481       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5482     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
5483 
5484   // fold (mulo x, 0) -> 0 + no carry out
5485   if (isNullOrNullSplat(N1))
5486     return CombineTo(N, DAG.getConstant(0, DL, VT),
5487                      DAG.getConstant(0, DL, CarryVT));
5488 
5489   // (mulo x, 2) -> (addo x, x)
5490   // FIXME: This needs a freeze.
5491   if (N1C && N1C->getAPIntValue() == 2 &&
5492       (!IsSigned || VT.getScalarSizeInBits() > 2))
5493     return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
5494                        N->getVTList(), N0, N0);
5495 
5496   // A 1 bit SMULO overflows if both inputs are 1.
5497   if (IsSigned && VT.getScalarSizeInBits() == 1) {
5498     SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1);
5499     SDValue Cmp = DAG.getSetCC(DL, CarryVT, And,
5500                                DAG.getConstant(0, DL, VT), ISD::SETNE);
5501     return CombineTo(N, And, Cmp);
5502   }
5503 
5504   // If it cannot overflow, transform into a mul.
5505   if (DAG.willNotOverflowMul(IsSigned, N0, N1))
5506     return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1),
5507                      DAG.getConstant(0, DL, CarryVT));
5508   return SDValue();
5509 }
5510 
5511 // Function to calculate whether the Min/Max pair of SDNodes (potentially
5512 // swapped around) make a signed saturate pattern, clamping to between a signed
5513 // saturate of -2^(BW-1) and 2^(BW-1)-1, or an unsigned saturate of 0 and 2^BW.
5514 // Returns the node being clamped and the bitwidth of the clamp in BW. Should
5515 // work with both SMIN/SMAX nodes and setcc/select combo. The operands are the
5516 // same as SimplifySelectCC. N0<N1 ? N2 : N3.
5517 static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
5518                                   SDValue N3, ISD::CondCode CC, unsigned &BW,
5519                                   bool &Unsigned, SelectionDAG &DAG) {
5520   auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
5521                             ISD::CondCode CC) {
5522     // The compare and select operand should be the same or the select operands
5523     // should be truncated versions of the comparison.
5524     if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0)))
5525       return 0;
5526     // The constants need to be the same or a truncated version of each other.
5527     ConstantSDNode *N1C = isConstOrConstSplat(peekThroughTruncates(N1));
5528     ConstantSDNode *N3C = isConstOrConstSplat(peekThroughTruncates(N3));
5529     if (!N1C || !N3C)
5530       return 0;
5531     const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits());
5532     const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits());
5533     if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth()))
5534       return 0;
5535     return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0);
5536   };
5537 
5538   // Check the initial value is a SMIN/SMAX equivalent.
5539   unsigned Opcode0 = isSignedMinMax(N0, N1, N2, N3, CC);
5540   if (!Opcode0)
5541     return SDValue();
5542 
5543   // We could only need one range check, if the fptosi could never produce
5544   // the upper value.
5545   if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
5546     if (isNullOrNullSplat(N3)) {
5547       EVT IntVT = N0.getValueType().getScalarType();
5548       EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
5549       if (FPVT.isSimple()) {
5550         Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
5551         const fltSemantics &Semantics = InputTy->getFltSemantics();
5552         uint32_t MinBitWidth =
5553           APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
5554         if (IntVT.getSizeInBits() >= MinBitWidth) {
5555           Unsigned = true;
5556           BW = PowerOf2Ceil(MinBitWidth);
5557           return N0;
5558         }
5559       }
5560     }
5561   }
5562 
5563   SDValue N00, N01, N02, N03;
5564   ISD::CondCode N0CC;
5565   switch (N0.getOpcode()) {
5566   case ISD::SMIN:
5567   case ISD::SMAX:
5568     N00 = N02 = N0.getOperand(0);
5569     N01 = N03 = N0.getOperand(1);
5570     N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT;
5571     break;
5572   case ISD::SELECT_CC:
5573     N00 = N0.getOperand(0);
5574     N01 = N0.getOperand(1);
5575     N02 = N0.getOperand(2);
5576     N03 = N0.getOperand(3);
5577     N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
5578     break;
5579   case ISD::SELECT:
5580   case ISD::VSELECT:
5581     if (N0.getOperand(0).getOpcode() != ISD::SETCC)
5582       return SDValue();
5583     N00 = N0.getOperand(0).getOperand(0);
5584     N01 = N0.getOperand(0).getOperand(1);
5585     N02 = N0.getOperand(1);
5586     N03 = N0.getOperand(2);
5587     N0CC = cast<CondCodeSDNode>(N0.getOperand(0).getOperand(2))->get();
5588     break;
5589   default:
5590     return SDValue();
5591   }
5592 
5593   unsigned Opcode1 = isSignedMinMax(N00, N01, N02, N03, N0CC);
5594   if (!Opcode1 || Opcode0 == Opcode1)
5595     return SDValue();
5596 
5597   ConstantSDNode *MinCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N1 : N01);
5598   ConstantSDNode *MaxCOp = isConstOrConstSplat(Opcode0 == ISD::SMIN ? N01 : N1);
5599   if (!MinCOp || !MaxCOp || MinCOp->getValueType(0) != MaxCOp->getValueType(0))
5600     return SDValue();
5601 
5602   const APInt &MinC = MinCOp->getAPIntValue();
5603   const APInt &MaxC = MaxCOp->getAPIntValue();
5604   APInt MinCPlus1 = MinC + 1;
5605   if (-MaxC == MinCPlus1 && MinCPlus1.isPowerOf2()) {
5606     BW = MinCPlus1.exactLogBase2() + 1;
5607     Unsigned = false;
5608     return N02;
5609   }
5610 
5611   if (MaxC == 0 && MinCPlus1.isPowerOf2()) {
5612     BW = MinCPlus1.exactLogBase2();
5613     Unsigned = true;
5614     return N02;
5615   }
5616 
5617   return SDValue();
5618 }
5619 
5620 static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5621                                            SDValue N3, ISD::CondCode CC,
5622                                            SelectionDAG &DAG) {
5623   unsigned BW;
5624   bool Unsigned;
5625   SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
5626   if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
5627     return SDValue();
5628   EVT FPVT = Fp.getOperand(0).getValueType();
5629   EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5630   if (FPVT.isVector())
5631     NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5632                              FPVT.getVectorElementCount());
5633   unsigned NewOpc = Unsigned ? ISD::FP_TO_UINT_SAT : ISD::FP_TO_SINT_SAT;
5634   if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(NewOpc, FPVT, NewVT))
5635     return SDValue();
5636   SDLoc DL(Fp);
5637   SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
5638                             DAG.getValueType(NewVT.getScalarType()));
5639   return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
5640 }
5641 
5642 static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
5643                                          SDValue N3, ISD::CondCode CC,
5644                                          SelectionDAG &DAG) {
5645   // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
5646   // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
5647   // be truncated versions of the setcc (N0/N1).
5648   if ((N0 != N2 &&
5649        (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
5650       N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
5651     return SDValue();
5652   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5653   ConstantSDNode *N3C = isConstOrConstSplat(N3);
5654   if (!N1C || !N3C)
5655     return SDValue();
5656   const APInt &C1 = N1C->getAPIntValue();
5657   const APInt &C3 = N3C->getAPIntValue();
5658   if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
5659       C1 != C3.zext(C1.getBitWidth()))
5660     return SDValue();
5661 
5662   unsigned BW = (C1 + 1).exactLogBase2();
5663   EVT FPVT = N0.getOperand(0).getValueType();
5664   EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
5665   if (FPVT.isVector())
5666     NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
5667                              FPVT.getVectorElementCount());
5668   if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
5669                                                         FPVT, NewVT))
5670     return SDValue();
5671 
5672   SDValue Sat =
5673       DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
5674                   DAG.getValueType(NewVT.getScalarType()));
5675   return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
5676 }
5677 
5678 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
5679   SDValue N0 = N->getOperand(0);
5680   SDValue N1 = N->getOperand(1);
5681   EVT VT = N0.getValueType();
5682   unsigned Opcode = N->getOpcode();
5683   SDLoc DL(N);
5684 
5685   // fold operation with constant operands.
5686   if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
5687     return C;
5688 
5689   // If the operands are the same, this is a no-op.
5690   if (N0 == N1)
5691     return N0;
5692 
5693   // canonicalize constant to RHS
5694   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5695       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5696     return DAG.getNode(Opcode, DL, VT, N1, N0);
5697 
5698   // fold vector ops
5699   if (VT.isVector())
5700     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
5701       return FoldedVOp;
5702 
5703   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
5704   // Only do this if the current op isn't legal and the flipped is.
5705   if (!TLI.isOperationLegal(Opcode, VT) &&
5706       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
5707       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
5708     unsigned AltOpcode;
5709     switch (Opcode) {
5710     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
5711     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
5712     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
5713     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
5714     default: llvm_unreachable("Unknown MINMAX opcode");
5715     }
5716     if (TLI.isOperationLegal(AltOpcode, VT))
5717       return DAG.getNode(AltOpcode, DL, VT, N0, N1);
5718   }
5719 
5720   if (Opcode == ISD::SMIN || Opcode == ISD::SMAX)
5721     if (SDValue S = PerformMinMaxFpToSatCombine(
5722             N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
5723       return S;
5724   if (Opcode == ISD::UMIN)
5725     if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
5726       return S;
5727 
5728   // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
5729   auto ReductionOpcode = [](unsigned Opcode) {
5730     switch (Opcode) {
5731     case ISD::SMIN:
5732       return ISD::VECREDUCE_SMIN;
5733     case ISD::SMAX:
5734       return ISD::VECREDUCE_SMAX;
5735     case ISD::UMIN:
5736       return ISD::VECREDUCE_UMIN;
5737     case ISD::UMAX:
5738       return ISD::VECREDUCE_UMAX;
5739     default:
5740       llvm_unreachable("Unexpected opcode");
5741     }
5742   };
5743   if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
5744                                         SDLoc(N), VT, N0, N1))
5745     return SD;
5746 
5747   // Simplify the operands using demanded-bits information.
5748   if (SimplifyDemandedBits(SDValue(N, 0)))
5749     return SDValue(N, 0);
5750 
5751   return SDValue();
5752 }
5753 
5754 /// If this is a bitwise logic instruction and both operands have the same
5755 /// opcode, try to sink the other opcode after the logic instruction.
5756 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
5757   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
5758   EVT VT = N0.getValueType();
5759   unsigned LogicOpcode = N->getOpcode();
5760   unsigned HandOpcode = N0.getOpcode();
5761   assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
5762   assert(HandOpcode == N1.getOpcode() && "Bad input!");
5763 
5764   // Bail early if none of these transforms apply.
5765   if (N0.getNumOperands() == 0)
5766     return SDValue();
5767 
5768   // FIXME: We should check number of uses of the operands to not increase
5769   //        the instruction count for all transforms.
5770 
5771   // Handle size-changing casts (or sign_extend_inreg).
5772   SDValue X = N0.getOperand(0);
5773   SDValue Y = N1.getOperand(0);
5774   EVT XVT = X.getValueType();
5775   SDLoc DL(N);
5776   if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
5777       (HandOpcode == ISD::SIGN_EXTEND_INREG &&
5778        N0.getOperand(1) == N1.getOperand(1))) {
5779     // If both operands have other uses, this transform would create extra
5780     // instructions without eliminating anything.
5781     if (!N0.hasOneUse() && !N1.hasOneUse())
5782       return SDValue();
5783     // We need matching integer source types.
5784     if (XVT != Y.getValueType())
5785       return SDValue();
5786     // Don't create an illegal op during or after legalization. Don't ever
5787     // create an unsupported vector op.
5788     if ((VT.isVector() || LegalOperations) &&
5789         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
5790       return SDValue();
5791     // Avoid infinite looping with PromoteIntBinOp.
5792     // TODO: Should we apply desirable/legal constraints to all opcodes?
5793     if ((HandOpcode == ISD::ANY_EXTEND ||
5794          HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
5795         LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
5796       return SDValue();
5797     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
5798     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5799     if (HandOpcode == ISD::SIGN_EXTEND_INREG)
5800       return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5801     return DAG.getNode(HandOpcode, DL, VT, Logic);
5802   }
5803 
5804   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
5805   if (HandOpcode == ISD::TRUNCATE) {
5806     // If both operands have other uses, this transform would create extra
5807     // instructions without eliminating anything.
5808     if (!N0.hasOneUse() && !N1.hasOneUse())
5809       return SDValue();
5810     // We need matching source types.
5811     if (XVT != Y.getValueType())
5812       return SDValue();
5813     // Don't create an illegal op during or after legalization.
5814     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
5815       return SDValue();
5816     // Be extra careful sinking truncate. If it's free, there's no benefit in
5817     // widening a binop. Also, don't create a logic op on an illegal type.
5818     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
5819       return SDValue();
5820     if (!TLI.isTypeLegal(XVT))
5821       return SDValue();
5822     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5823     return DAG.getNode(HandOpcode, DL, VT, Logic);
5824   }
5825 
5826   // For binops SHL/SRL/SRA/AND:
5827   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
5828   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
5829        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
5830       N0.getOperand(1) == N1.getOperand(1)) {
5831     // If either operand has other uses, this transform is not an improvement.
5832     if (!N0.hasOneUse() || !N1.hasOneUse())
5833       return SDValue();
5834     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5835     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
5836   }
5837 
5838   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
5839   if (HandOpcode == ISD::BSWAP) {
5840     // If either operand has other uses, this transform is not an improvement.
5841     if (!N0.hasOneUse() || !N1.hasOneUse())
5842       return SDValue();
5843     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5844     return DAG.getNode(HandOpcode, DL, VT, Logic);
5845   }
5846 
5847   // For funnel shifts FSHL/FSHR:
5848   // logic_op (OP x, x1, s), (OP y, y1, s) -->
5849   // --> OP (logic_op x, y), (logic_op, x1, y1), s
5850   if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) &&
5851       N0.getOperand(2) == N1.getOperand(2)) {
5852     if (!N0.hasOneUse() || !N1.hasOneUse())
5853       return SDValue();
5854     SDValue X1 = N0.getOperand(1);
5855     SDValue Y1 = N1.getOperand(1);
5856     SDValue S = N0.getOperand(2);
5857     SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y);
5858     SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1);
5859     return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S);
5860   }
5861 
5862   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
5863   // Only perform this optimization up until type legalization, before
5864   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
5865   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
5866   // we don't want to undo this promotion.
5867   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
5868   // on scalars.
5869   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
5870        Level <= AfterLegalizeTypes) {
5871     // Input types must be integer and the same.
5872     if (XVT.isInteger() && XVT == Y.getValueType() &&
5873         !(VT.isVector() && TLI.isTypeLegal(VT) &&
5874           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
5875       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
5876       return DAG.getNode(HandOpcode, DL, VT, Logic);
5877     }
5878   }
5879 
5880   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
5881   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
5882   // If both shuffles use the same mask, and both shuffle within a single
5883   // vector, then it is worthwhile to move the swizzle after the operation.
5884   // The type-legalizer generates this pattern when loading illegal
5885   // vector types from memory. In many cases this allows additional shuffle
5886   // optimizations.
5887   // There are other cases where moving the shuffle after the xor/and/or
5888   // is profitable even if shuffles don't perform a swizzle.
5889   // If both shuffles use the same mask, and both shuffles have the same first
5890   // or second operand, then it might still be profitable to move the shuffle
5891   // after the xor/and/or operation.
5892   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
5893     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
5894     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
5895     assert(X.getValueType() == Y.getValueType() &&
5896            "Inputs to shuffles are not the same type");
5897 
5898     // Check that both shuffles use the same mask. The masks are known to be of
5899     // the same length because the result vector type is the same.
5900     // Check also that shuffles have only one use to avoid introducing extra
5901     // instructions.
5902     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
5903         !SVN0->getMask().equals(SVN1->getMask()))
5904       return SDValue();
5905 
5906     // Don't try to fold this node if it requires introducing a
5907     // build vector of all zeros that might be illegal at this stage.
5908     SDValue ShOp = N0.getOperand(1);
5909     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5910       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5911 
5912     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
5913     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
5914       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
5915                                   N0.getOperand(0), N1.getOperand(0));
5916       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
5917     }
5918 
5919     // Don't try to fold this node if it requires introducing a
5920     // build vector of all zeros that might be illegal at this stage.
5921     ShOp = N0.getOperand(0);
5922     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
5923       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
5924 
5925     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
5926     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
5927       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
5928                                   N1.getOperand(1));
5929       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
5930     }
5931   }
5932 
5933   return SDValue();
5934 }
5935 
5936 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
5937 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
5938                                        const SDLoc &DL) {
5939   SDValue LL, LR, RL, RR, N0CC, N1CC;
5940   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
5941       !isSetCCEquivalent(N1, RL, RR, N1CC))
5942     return SDValue();
5943 
5944   assert(N0.getValueType() == N1.getValueType() &&
5945          "Unexpected operand types for bitwise logic op");
5946   assert(LL.getValueType() == LR.getValueType() &&
5947          RL.getValueType() == RR.getValueType() &&
5948          "Unexpected operand types for setcc");
5949 
5950   // If we're here post-legalization or the logic op type is not i1, the logic
5951   // op type must match a setcc result type. Also, all folds require new
5952   // operations on the left and right operands, so those types must match.
5953   EVT VT = N0.getValueType();
5954   EVT OpVT = LL.getValueType();
5955   if (LegalOperations || VT.getScalarType() != MVT::i1)
5956     if (VT != getSetCCResultType(OpVT))
5957       return SDValue();
5958   if (OpVT != RL.getValueType())
5959     return SDValue();
5960 
5961   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
5962   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
5963   bool IsInteger = OpVT.isInteger();
5964   if (LR == RR && CC0 == CC1 && IsInteger) {
5965     bool IsZero = isNullOrNullSplat(LR);
5966     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
5967 
5968     // All bits clear?
5969     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
5970     // All sign bits clear?
5971     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
5972     // Any bits set?
5973     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
5974     // Any sign bits set?
5975     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
5976 
5977     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
5978     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
5979     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
5980     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
5981     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
5982       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
5983       AddToWorklist(Or.getNode());
5984       return DAG.getSetCC(DL, VT, Or, LR, CC1);
5985     }
5986 
5987     // All bits set?
5988     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
5989     // All sign bits set?
5990     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
5991     // Any bits clear?
5992     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
5993     // Any sign bits clear?
5994     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
5995 
5996     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
5997     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
5998     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
5999     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
6000     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
6001       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
6002       AddToWorklist(And.getNode());
6003       return DAG.getSetCC(DL, VT, And, LR, CC1);
6004     }
6005   }
6006 
6007   // TODO: What is the 'or' equivalent of this fold?
6008   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
6009   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
6010       IsInteger && CC0 == ISD::SETNE &&
6011       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
6012        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
6013     SDValue One = DAG.getConstant(1, DL, OpVT);
6014     SDValue Two = DAG.getConstant(2, DL, OpVT);
6015     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
6016     AddToWorklist(Add.getNode());
6017     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
6018   }
6019 
6020   // Try more general transforms if the predicates match and the only user of
6021   // the compares is the 'and' or 'or'.
6022   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
6023       N0.hasOneUse() && N1.hasOneUse()) {
6024     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
6025     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
6026     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
6027       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
6028       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
6029       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
6030       SDValue Zero = DAG.getConstant(0, DL, OpVT);
6031       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
6032     }
6033 
6034     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
6035     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
6036       // Match a shared variable operand and 2 non-opaque constant operands.
6037       auto MatchDiffPow2 = [&](ConstantSDNode *C0, ConstantSDNode *C1) {
6038         // The difference of the constants must be a single bit.
6039         const APInt &CMax =
6040             APIntOps::umax(C0->getAPIntValue(), C1->getAPIntValue());
6041         const APInt &CMin =
6042             APIntOps::umin(C0->getAPIntValue(), C1->getAPIntValue());
6043         return !C0->isOpaque() && !C1->isOpaque() && (CMax - CMin).isPowerOf2();
6044       };
6045       if (LL == RL && ISD::matchBinaryPredicate(LR, RR, MatchDiffPow2)) {
6046         // and/or (setcc X, CMax, ne), (setcc X, CMin, ne/eq) -->
6047         // setcc ((sub X, CMin), ~(CMax - CMin)), 0, ne/eq
6048         SDValue Max = DAG.getNode(ISD::UMAX, DL, OpVT, LR, RR);
6049         SDValue Min = DAG.getNode(ISD::UMIN, DL, OpVT, LR, RR);
6050         SDValue Offset = DAG.getNode(ISD::SUB, DL, OpVT, LL, Min);
6051         SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, Max, Min);
6052         SDValue Mask = DAG.getNOT(DL, Diff, OpVT);
6053         SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Offset, Mask);
6054         SDValue Zero = DAG.getConstant(0, DL, OpVT);
6055         return DAG.getSetCC(DL, VT, And, Zero, CC0);
6056       }
6057     }
6058   }
6059 
6060   // Canonicalize equivalent operands to LL == RL.
6061   if (LL == RR && LR == RL) {
6062     CC1 = ISD::getSetCCSwappedOperands(CC1);
6063     std::swap(RL, RR);
6064   }
6065 
6066   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6067   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
6068   if (LL == RL && LR == RR) {
6069     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, OpVT)
6070                                 : ISD::getSetCCOrOperation(CC0, CC1, OpVT);
6071     if (NewCC != ISD::SETCC_INVALID &&
6072         (!LegalOperations ||
6073          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
6074           TLI.isOperationLegal(ISD::SETCC, OpVT))))
6075       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
6076   }
6077 
6078   return SDValue();
6079 }
6080 
6081 static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2,
6082                                    SelectionDAG &DAG) {
6083   return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1);
6084 }
6085 
6086 static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2,
6087                                   SelectionDAG &DAG) {
6088   return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1);
6089 }
6090 
6091 static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2,
6092                                      ISD::CondCode CC, unsigned OrAndOpcode,
6093                                      SelectionDAG &DAG,
6094                                      bool isFMAXNUMFMINNUM_IEEE,
6095                                      bool isFMAXNUMFMINNUM) {
6096   // The optimization cannot be applied for all the predicates because
6097   // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle
6098   // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be
6099   // applied at all if one of the operands is a signaling NaN.
6100 
6101   // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands
6102   // are non NaN values.
6103   if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) ||
6104       ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND)))
6105     return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6106                    isFMAXNUMFMINNUM_IEEE
6107                ? ISD::FMINNUM_IEEE
6108                : ISD::DELETED_NODE;
6109   else if (((CC == ISD::SETGT || CC == ISD::SETGE) &&
6110             (OrAndOpcode == ISD::OR)) ||
6111            ((CC == ISD::SETLT || CC == ISD::SETLE) &&
6112             (OrAndOpcode == ISD::AND)))
6113     return arebothOperandsNotNan(Operand1, Operand2, DAG) &&
6114                    isFMAXNUMFMINNUM_IEEE
6115                ? ISD::FMAXNUM_IEEE
6116                : ISD::DELETED_NODE;
6117   // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet
6118   // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/
6119   // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove
6120   // that there are not any sNaNs, then the optimization is not valid
6121   // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply
6122   // the optimization using FMINNUM/FMAXNUM for the following cases. If
6123   // we can prove that we do not have any sNaNs, then we can do the
6124   // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following
6125   // cases.
6126   else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) &&
6127             (OrAndOpcode == ISD::OR)) ||
6128            ((CC == ISD::SETUGT || CC == ISD::SETUGE) &&
6129             (OrAndOpcode == ISD::AND)))
6130     return isFMAXNUMFMINNUM ? ISD::FMINNUM
6131                             : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6132                                       isFMAXNUMFMINNUM_IEEE
6133                                   ? ISD::FMINNUM_IEEE
6134                                   : ISD::DELETED_NODE;
6135   else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) &&
6136             (OrAndOpcode == ISD::OR)) ||
6137            ((CC == ISD::SETULT || CC == ISD::SETULE) &&
6138             (OrAndOpcode == ISD::AND)))
6139     return isFMAXNUMFMINNUM ? ISD::FMAXNUM
6140                             : arebothOperandsNotSNan(Operand1, Operand2, DAG) &&
6141                                       isFMAXNUMFMINNUM_IEEE
6142                                   ? ISD::FMAXNUM_IEEE
6143                                   : ISD::DELETED_NODE;
6144   return ISD::DELETED_NODE;
6145 }
6146 
6147 static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
6148   using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind;
6149   assert(
6150       (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
6151       "Invalid Op to combine SETCC with");
6152 
6153   // TODO: Search past casts/truncates.
6154   SDValue LHS = LogicOp->getOperand(0);
6155   SDValue RHS = LogicOp->getOperand(1);
6156   if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC ||
6157       !LHS->hasOneUse() || !RHS->hasOneUse())
6158     return SDValue();
6159 
6160   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6161   AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC(
6162       LogicOp, LHS.getNode(), RHS.getNode());
6163 
6164   SDValue LHS0 = LHS->getOperand(0);
6165   SDValue RHS0 = RHS->getOperand(0);
6166   SDValue LHS1 = LHS->getOperand(1);
6167   SDValue RHS1 = RHS->getOperand(1);
6168   // TODO: We don't actually need a splat here, for vectors we just need the
6169   // invariants to hold for each element.
6170   auto *LHS1C = isConstOrConstSplat(LHS1);
6171   auto *RHS1C = isConstOrConstSplat(RHS1);
6172   ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
6173   ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
6174   EVT VT = LogicOp->getValueType(0);
6175   EVT OpVT = LHS0.getValueType();
6176   SDLoc DL(LogicOp);
6177 
6178   // Check if the operands of an and/or operation are comparisons and if they
6179   // compare against the same value. Replace the and/or-cmp-cmp sequence with
6180   // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
6181   // sequence will be replaced with min-cmp sequence:
6182   // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
6183   // and and-cmp-cmp will be replaced with max-cmp sequence:
6184   // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
6185   // The optimization does not work for `==` or `!=` .
6186   // The two comparisons should have either the same predicate or the
6187   // predicate of one of the comparisons is the opposite of the other one.
6188   bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) &&
6189                                TLI.isOperationLegal(ISD::FMINNUM_IEEE, OpVT);
6190   bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) &&
6191                           TLI.isOperationLegalOrCustom(ISD::FMINNUM, OpVT);
6192   if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
6193         TLI.isOperationLegal(ISD::SMAX, OpVT) &&
6194         TLI.isOperationLegal(ISD::UMIN, OpVT) &&
6195         TLI.isOperationLegal(ISD::SMIN, OpVT)) ||
6196        (OpVT.isFloatingPoint() &&
6197         (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) &&
6198       !ISD::isIntEqualitySetCC(CCL) && !ISD::isFPEqualitySetCC(CCL) &&
6199       CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO &&
6200       CCL != ISD::SETTRUE &&
6201       (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) {
6202 
6203     SDValue CommonValue, Operand1, Operand2;
6204     ISD::CondCode CC = ISD::SETCC_INVALID;
6205     if (CCL == CCR) {
6206       if (LHS0 == RHS0) {
6207         CommonValue = LHS0;
6208         Operand1 = LHS1;
6209         Operand2 = RHS1;
6210         CC = ISD::getSetCCSwappedOperands(CCL);
6211       } else if (LHS1 == RHS1) {
6212         CommonValue = LHS1;
6213         Operand1 = LHS0;
6214         Operand2 = RHS0;
6215         CC = CCL;
6216       }
6217     } else {
6218       assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
6219       if (LHS0 == RHS1) {
6220         CommonValue = LHS0;
6221         Operand1 = LHS1;
6222         Operand2 = RHS0;
6223         CC = CCR;
6224       } else if (RHS0 == LHS1) {
6225         CommonValue = LHS1;
6226         Operand1 = LHS0;
6227         Operand2 = RHS1;
6228         CC = CCL;
6229       }
6230     }
6231 
6232     // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs
6233     // handle it using OR/AND.
6234     if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue))
6235       CC = ISD::SETCC_INVALID;
6236     else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue))
6237       CC = ISD::SETCC_INVALID;
6238 
6239     if (CC != ISD::SETCC_INVALID) {
6240       unsigned NewOpcode = ISD::DELETED_NODE;
6241       bool IsSigned = isSignedIntSetCC(CC);
6242       if (OpVT.isInteger()) {
6243         bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE ||
6244                        CC == ISD::SETLT || CC == ISD::SETULT);
6245         bool IsOr = (LogicOp->getOpcode() == ISD::OR);
6246         if (IsLess == IsOr)
6247           NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
6248         else
6249           NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
6250       } else if (OpVT.isFloatingPoint())
6251         NewOpcode =
6252             getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(),
6253                                  DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM);
6254 
6255       if (NewOpcode != ISD::DELETED_NODE) {
6256         SDValue MinMaxValue =
6257             DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
6258         return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
6259       }
6260     }
6261   }
6262 
6263   if (TargetPreference == AndOrSETCCFoldKind::None)
6264     return SDValue();
6265 
6266   if (CCL == CCR &&
6267       CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
6268       LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) {
6269     const APInt &APLhs = LHS1C->getAPIntValue();
6270     const APInt &APRhs = RHS1C->getAPIntValue();
6271 
6272     // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
6273     // case this is just a compare).
6274     if (APLhs == (-APRhs) &&
6275         ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
6276          DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
6277       const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
6278       // (icmp eq A, C) | (icmp eq A, -C)
6279       //    -> (icmp eq Abs(A), C)
6280       // (icmp ne A, C) & (icmp ne A, -C)
6281       //    -> (icmp ne Abs(A), C)
6282       SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
6283       return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
6284                          DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
6285     } else if (TargetPreference &
6286                (AndOrSETCCFoldKind::AddAnd | AndOrSETCCFoldKind::NotAnd)) {
6287 
6288       // AndOrSETCCFoldKind::AddAnd:
6289       // A == C0 | A == C1
6290       //  IF IsPow2(smax(C0, C1)-smin(C0, C1))
6291       //    -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
6292       // A != C0 & A != C1
6293       //  IF IsPow2(smax(C0, C1)-smin(C0, C1))
6294       //    -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
6295 
6296       // AndOrSETCCFoldKind::NotAnd:
6297       // A == C0 | A == C1
6298       //  IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6299       //    -> ~A & smin(C0, C1) == 0
6300       // A != C0 & A != C1
6301       //  IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
6302       //    -> ~A & smin(C0, C1) != 0
6303 
6304       const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
6305       const APInt &MinC = APIntOps::smin(APRhs, APLhs);
6306       APInt Dif = MaxC - MinC;
6307       if (!Dif.isZero() && Dif.isPowerOf2()) {
6308         if (MaxC.isAllOnes() &&
6309             (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
6310           SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
6311           SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
6312                                       DAG.getConstant(MinC, DL, OpVT));
6313           return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6314                              DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6315         } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
6316 
6317           SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
6318                                       DAG.getConstant(-MinC, DL, OpVT));
6319           SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
6320                                       DAG.getConstant(~Dif, DL, OpVT));
6321           return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
6322                              DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
6323         }
6324       }
6325     }
6326   }
6327 
6328   return SDValue();
6329 }
6330 
6331 // Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`.
6332 // We canonicalize to the `select` form in the middle end, but the `and` form
6333 // gets better codegen and all tested targets (arm, x86, riscv)
6334 static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F,
6335                                      const SDLoc &DL, SelectionDAG &DAG) {
6336   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6337   if (!isNullConstant(F))
6338     return SDValue();
6339 
6340   EVT CondVT = Cond.getValueType();
6341   if (TLI.getBooleanContents(CondVT) !=
6342       TargetLoweringBase::ZeroOrOneBooleanContent)
6343     return SDValue();
6344 
6345   if (T.getOpcode() != ISD::AND)
6346     return SDValue();
6347 
6348   if (!isOneConstant(T.getOperand(1)))
6349     return SDValue();
6350 
6351   EVT OpVT = T.getValueType();
6352 
6353   SDValue CondMask =
6354       OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT);
6355   return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0));
6356 }
6357 
6358 /// This contains all DAGCombine rules which reduce two values combined by
6359 /// an And operation to a single value. This makes them reusable in the context
6360 /// of visitSELECT(). Rules involving constants are not included as
6361 /// visitSELECT() already handles those cases.
6362 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
6363   EVT VT = N1.getValueType();
6364   SDLoc DL(N);
6365 
6366   // fold (and x, undef) -> 0
6367   if (N0.isUndef() || N1.isUndef())
6368     return DAG.getConstant(0, DL, VT);
6369 
6370   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
6371     return V;
6372 
6373   // Canonicalize:
6374   //   and(x, add) -> and(add, x)
6375   if (N1.getOpcode() == ISD::ADD)
6376     std::swap(N0, N1);
6377 
6378   // TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
6379   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
6380       VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
6381     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
6382       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
6383         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
6384         // immediate for an add, but it is legal if its top c2 bits are set,
6385         // transform the ADD so the immediate doesn't need to be materialized
6386         // in a register.
6387         APInt ADDC = ADDI->getAPIntValue();
6388         APInt SRLC = SRLI->getAPIntValue();
6389         if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
6390             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6391           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
6392                                              SRLC.getZExtValue());
6393           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
6394             ADDC |= Mask;
6395             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
6396               SDLoc DL0(N0);
6397               SDValue NewAdd =
6398                 DAG.getNode(ISD::ADD, DL0, VT,
6399                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
6400               CombineTo(N0.getNode(), NewAdd);
6401               // Return N so it doesn't get rechecked!
6402               return SDValue(N, 0);
6403             }
6404           }
6405         }
6406       }
6407     }
6408   }
6409 
6410   return SDValue();
6411 }
6412 
6413 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
6414                                    EVT LoadResultTy, EVT &ExtVT) {
6415   if (!AndC->getAPIntValue().isMask())
6416     return false;
6417 
6418   unsigned ActiveBits = AndC->getAPIntValue().countr_one();
6419 
6420   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6421   EVT LoadedVT = LoadN->getMemoryVT();
6422 
6423   if (ExtVT == LoadedVT &&
6424       (!LegalOperations ||
6425        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
6426     // ZEXTLOAD will match without needing to change the size of the value being
6427     // loaded.
6428     return true;
6429   }
6430 
6431   // Do not change the width of a volatile or atomic loads.
6432   if (!LoadN->isSimple())
6433     return false;
6434 
6435   // Do not generate loads of non-round integer types since these can
6436   // be expensive (and would be wrong if the type is not byte sized).
6437   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
6438     return false;
6439 
6440   if (LegalOperations &&
6441       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
6442     return false;
6443 
6444   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
6445     return false;
6446 
6447   return true;
6448 }
6449 
6450 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
6451                                     ISD::LoadExtType ExtType, EVT &MemVT,
6452                                     unsigned ShAmt) {
6453   if (!LDST)
6454     return false;
6455   // Only allow byte offsets.
6456   if (ShAmt % 8)
6457     return false;
6458 
6459   // Do not generate loads of non-round integer types since these can
6460   // be expensive (and would be wrong if the type is not byte sized).
6461   if (!MemVT.isRound())
6462     return false;
6463 
6464   // Don't change the width of a volatile or atomic loads.
6465   if (!LDST->isSimple())
6466     return false;
6467 
6468   EVT LdStMemVT = LDST->getMemoryVT();
6469 
6470   // Bail out when changing the scalable property, since we can't be sure that
6471   // we're actually narrowing here.
6472   if (LdStMemVT.isScalableVector() != MemVT.isScalableVector())
6473     return false;
6474 
6475   // Verify that we are actually reducing a load width here.
6476   if (LdStMemVT.bitsLT(MemVT))
6477     return false;
6478 
6479   // Ensure that this isn't going to produce an unsupported memory access.
6480   if (ShAmt) {
6481     assert(ShAmt % 8 == 0 && "ShAmt is byte offset");
6482     const unsigned ByteShAmt = ShAmt / 8;
6483     const Align LDSTAlign = LDST->getAlign();
6484     const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt);
6485     if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
6486                                 LDST->getAddressSpace(), NarrowAlign,
6487                                 LDST->getMemOperand()->getFlags()))
6488       return false;
6489   }
6490 
6491   // It's not possible to generate a constant of extended or untyped type.
6492   EVT PtrType = LDST->getBasePtr().getValueType();
6493   if (PtrType == MVT::Untyped || PtrType.isExtended())
6494     return false;
6495 
6496   if (isa<LoadSDNode>(LDST)) {
6497     LoadSDNode *Load = cast<LoadSDNode>(LDST);
6498     // Don't transform one with multiple uses, this would require adding a new
6499     // load.
6500     if (!SDValue(Load, 0).hasOneUse())
6501       return false;
6502 
6503     if (LegalOperations &&
6504         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
6505       return false;
6506 
6507     // For the transform to be legal, the load must produce only two values
6508     // (the value loaded and the chain).  Don't transform a pre-increment
6509     // load, for example, which produces an extra value.  Otherwise the
6510     // transformation is not equivalent, and the downstream logic to replace
6511     // uses gets things wrong.
6512     if (Load->getNumValues() > 2)
6513       return false;
6514 
6515     // If the load that we're shrinking is an extload and we're not just
6516     // discarding the extension we can't simply shrink the load. Bail.
6517     // TODO: It would be possible to merge the extensions in some cases.
6518     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
6519         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6520       return false;
6521 
6522     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
6523       return false;
6524   } else {
6525     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
6526     StoreSDNode *Store = cast<StoreSDNode>(LDST);
6527     // Can't write outside the original store
6528     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
6529       return false;
6530 
6531     if (LegalOperations &&
6532         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
6533       return false;
6534   }
6535   return true;
6536 }
6537 
6538 bool DAGCombiner::SearchForAndLoads(SDNode *N,
6539                                     SmallVectorImpl<LoadSDNode*> &Loads,
6540                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
6541                                     ConstantSDNode *Mask,
6542                                     SDNode *&NodeToMask) {
6543   // Recursively search for the operands, looking for loads which can be
6544   // narrowed.
6545   for (SDValue Op : N->op_values()) {
6546     if (Op.getValueType().isVector())
6547       return false;
6548 
6549     // Some constants may need fixing up later if they are too large.
6550     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
6551       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
6552           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
6553         NodesWithConsts.insert(N);
6554       continue;
6555     }
6556 
6557     if (!Op.hasOneUse())
6558       return false;
6559 
6560     switch(Op.getOpcode()) {
6561     case ISD::LOAD: {
6562       auto *Load = cast<LoadSDNode>(Op);
6563       EVT ExtVT;
6564       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
6565           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
6566 
6567         // ZEXTLOAD is already small enough.
6568         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
6569             ExtVT.bitsGE(Load->getMemoryVT()))
6570           continue;
6571 
6572         // Use LE to convert equal sized loads to zext.
6573         if (ExtVT.bitsLE(Load->getMemoryVT()))
6574           Loads.push_back(Load);
6575 
6576         continue;
6577       }
6578       return false;
6579     }
6580     case ISD::ZERO_EXTEND:
6581     case ISD::AssertZext: {
6582       unsigned ActiveBits = Mask->getAPIntValue().countr_one();
6583       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
6584       EVT VT = Op.getOpcode() == ISD::AssertZext ?
6585         cast<VTSDNode>(Op.getOperand(1))->getVT() :
6586         Op.getOperand(0).getValueType();
6587 
6588       // We can accept extending nodes if the mask is wider or an equal
6589       // width to the original type.
6590       if (ExtVT.bitsGE(VT))
6591         continue;
6592       break;
6593     }
6594     case ISD::OR:
6595     case ISD::XOR:
6596     case ISD::AND:
6597       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
6598                              NodeToMask))
6599         return false;
6600       continue;
6601     }
6602 
6603     // Allow one node which will masked along with any loads found.
6604     if (NodeToMask)
6605       return false;
6606 
6607     // Also ensure that the node to be masked only produces one data result.
6608     NodeToMask = Op.getNode();
6609     if (NodeToMask->getNumValues() > 1) {
6610       bool HasValue = false;
6611       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
6612         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
6613         if (VT != MVT::Glue && VT != MVT::Other) {
6614           if (HasValue) {
6615             NodeToMask = nullptr;
6616             return false;
6617           }
6618           HasValue = true;
6619         }
6620       }
6621       assert(HasValue && "Node to be masked has no data result?");
6622     }
6623   }
6624   return true;
6625 }
6626 
6627 bool DAGCombiner::BackwardsPropagateMask(SDNode *N) {
6628   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
6629   if (!Mask)
6630     return false;
6631 
6632   if (!Mask->getAPIntValue().isMask())
6633     return false;
6634 
6635   // No need to do anything if the and directly uses a load.
6636   if (isa<LoadSDNode>(N->getOperand(0)))
6637     return false;
6638 
6639   SmallVector<LoadSDNode*, 8> Loads;
6640   SmallPtrSet<SDNode*, 2> NodesWithConsts;
6641   SDNode *FixupNode = nullptr;
6642   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
6643     if (Loads.empty())
6644       return false;
6645 
6646     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
6647     SDValue MaskOp = N->getOperand(1);
6648 
6649     // If it exists, fixup the single node we allow in the tree that needs
6650     // masking.
6651     if (FixupNode) {
6652       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
6653       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
6654                                 FixupNode->getValueType(0),
6655                                 SDValue(FixupNode, 0), MaskOp);
6656       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
6657       if (And.getOpcode() == ISD ::AND)
6658         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
6659     }
6660 
6661     // Narrow any constants that need it.
6662     for (auto *LogicN : NodesWithConsts) {
6663       SDValue Op0 = LogicN->getOperand(0);
6664       SDValue Op1 = LogicN->getOperand(1);
6665 
6666       if (isa<ConstantSDNode>(Op0))
6667         Op0 =
6668             DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp);
6669 
6670       if (isa<ConstantSDNode>(Op1))
6671         Op1 =
6672             DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp);
6673 
6674       if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1))
6675         std::swap(Op0, Op1);
6676 
6677       DAG.UpdateNodeOperands(LogicN, Op0, Op1);
6678     }
6679 
6680     // Create narrow loads.
6681     for (auto *Load : Loads) {
6682       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
6683       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
6684                                 SDValue(Load, 0), MaskOp);
6685       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
6686       if (And.getOpcode() == ISD ::AND)
6687         And = SDValue(
6688             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
6689       SDValue NewLoad = reduceLoadWidth(And.getNode());
6690       assert(NewLoad &&
6691              "Shouldn't be masking the load if it can't be narrowed");
6692       CombineTo(Load, NewLoad, NewLoad.getValue(1));
6693     }
6694     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
6695     return true;
6696   }
6697   return false;
6698 }
6699 
6700 // Unfold
6701 //    x &  (-1 'logical shift' y)
6702 // To
6703 //    (x 'opposite logical shift' y) 'logical shift' y
6704 // if it is better for performance.
6705 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
6706   assert(N->getOpcode() == ISD::AND);
6707 
6708   SDValue N0 = N->getOperand(0);
6709   SDValue N1 = N->getOperand(1);
6710 
6711   // Do we actually prefer shifts over mask?
6712   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
6713     return SDValue();
6714 
6715   // Try to match  (-1 '[outer] logical shift' y)
6716   unsigned OuterShift;
6717   unsigned InnerShift; // The opposite direction to the OuterShift.
6718   SDValue Y;           // Shift amount.
6719   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
6720     if (!M.hasOneUse())
6721       return false;
6722     OuterShift = M->getOpcode();
6723     if (OuterShift == ISD::SHL)
6724       InnerShift = ISD::SRL;
6725     else if (OuterShift == ISD::SRL)
6726       InnerShift = ISD::SHL;
6727     else
6728       return false;
6729     if (!isAllOnesConstant(M->getOperand(0)))
6730       return false;
6731     Y = M->getOperand(1);
6732     return true;
6733   };
6734 
6735   SDValue X;
6736   if (matchMask(N1))
6737     X = N0;
6738   else if (matchMask(N0))
6739     X = N1;
6740   else
6741     return SDValue();
6742 
6743   SDLoc DL(N);
6744   EVT VT = N->getValueType(0);
6745 
6746   //     tmp = x   'opposite logical shift' y
6747   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
6748   //     ret = tmp 'logical shift' y
6749   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
6750 
6751   return T1;
6752 }
6753 
6754 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
6755 /// For a target with a bit test, this is expected to become test + set and save
6756 /// at least 1 instruction.
6757 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
6758   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
6759 
6760   // Look through an optional extension.
6761   SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
6762   if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
6763     And0 = And0.getOperand(0);
6764   if (!isOneConstant(And1) || !And0.hasOneUse())
6765     return SDValue();
6766 
6767   SDValue Src = And0;
6768 
6769   // Attempt to find a 'not' op.
6770   // TODO: Should we favor test+set even without the 'not' op?
6771   bool FoundNot = false;
6772   if (isBitwiseNot(Src)) {
6773     FoundNot = true;
6774     Src = Src.getOperand(0);
6775 
6776     // Look though an optional truncation. The source operand may not be the
6777     // same type as the original 'and', but that is ok because we are masking
6778     // off everything but the low bit.
6779     if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse())
6780       Src = Src.getOperand(0);
6781   }
6782 
6783   // Match a shift-right by constant.
6784   if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
6785     return SDValue();
6786 
6787   // This is probably not worthwhile without a supported type.
6788   EVT SrcVT = Src.getValueType();
6789   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6790   if (!TLI.isTypeLegal(SrcVT))
6791     return SDValue();
6792 
6793   // We might have looked through casts that make this transform invalid.
6794   unsigned BitWidth = SrcVT.getScalarSizeInBits();
6795   SDValue ShiftAmt = Src.getOperand(1);
6796   auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
6797   if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
6798     return SDValue();
6799 
6800   // Set source to shift source.
6801   Src = Src.getOperand(0);
6802 
6803   // Try again to find a 'not' op.
6804   // TODO: Should we favor test+set even with two 'not' ops?
6805   if (!FoundNot) {
6806     if (!isBitwiseNot(Src))
6807       return SDValue();
6808     Src = Src.getOperand(0);
6809   }
6810 
6811   if (!TLI.hasBitTest(Src, ShiftAmt))
6812     return SDValue();
6813 
6814   // Turn this into a bit-test pattern using mask op + setcc:
6815   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
6816   // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
6817   SDLoc DL(And);
6818   SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
6819   EVT CCVT =
6820       TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
6821   SDValue Mask = DAG.getConstant(
6822       APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
6823   SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
6824   SDValue Zero = DAG.getConstant(0, DL, SrcVT);
6825   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
6826   return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
6827 }
6828 
6829 /// For targets that support usubsat, match a bit-hack form of that operation
6830 /// that ends in 'and' and convert it.
6831 static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
6832   SDValue N0 = N->getOperand(0);
6833   SDValue N1 = N->getOperand(1);
6834   EVT VT = N1.getValueType();
6835 
6836   // Canonicalize SRA as operand 1.
6837   if (N0.getOpcode() == ISD::SRA)
6838     std::swap(N0, N1);
6839 
6840   // xor/add with SMIN (signmask) are logically equivalent.
6841   if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD)
6842     return SDValue();
6843 
6844   if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() ||
6845       N0.getOperand(0) != N1.getOperand(0))
6846     return SDValue();
6847 
6848   unsigned BitWidth = VT.getScalarSizeInBits();
6849   ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true);
6850   ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true);
6851   if (!XorC || !XorC->getAPIntValue().isSignMask() ||
6852       !SraC || SraC->getAPIntValue() != BitWidth - 1)
6853     return SDValue();
6854 
6855   // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128
6856   // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128
6857   SDLoc DL(N);
6858   SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT);
6859   return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask);
6860 }
6861 
6862 /// Given a bitwise logic operation N with a matching bitwise logic operand,
6863 /// fold a pattern where 2 of the source operands are identically shifted
6864 /// values. For example:
6865 /// ((X0 << Y) | Z) | (X1 << Y) --> ((X0 | X1) << Y) | Z
6866 static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
6867                                  SelectionDAG &DAG) {
6868   unsigned LogicOpcode = N->getOpcode();
6869   assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6870          "Expected bitwise logic operation");
6871 
6872   if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
6873     return SDValue();
6874 
6875   // Match another bitwise logic op and a shift.
6876   unsigned ShiftOpcode = ShiftOp.getOpcode();
6877   if (LogicOp.getOpcode() != LogicOpcode ||
6878       !(ShiftOpcode == ISD::SHL || ShiftOpcode == ISD::SRL ||
6879         ShiftOpcode == ISD::SRA))
6880     return SDValue();
6881 
6882   // Match another shift op inside the first logic operand. Handle both commuted
6883   // possibilities.
6884   // LOGIC (LOGIC (SH X0, Y), Z), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6885   // LOGIC (LOGIC Z, (SH X0, Y)), (SH X1, Y) --> LOGIC (SH (LOGIC X0, X1), Y), Z
6886   SDValue X1 = ShiftOp.getOperand(0);
6887   SDValue Y = ShiftOp.getOperand(1);
6888   SDValue X0, Z;
6889   if (LogicOp.getOperand(0).getOpcode() == ShiftOpcode &&
6890       LogicOp.getOperand(0).getOperand(1) == Y) {
6891     X0 = LogicOp.getOperand(0).getOperand(0);
6892     Z = LogicOp.getOperand(1);
6893   } else if (LogicOp.getOperand(1).getOpcode() == ShiftOpcode &&
6894              LogicOp.getOperand(1).getOperand(1) == Y) {
6895     X0 = LogicOp.getOperand(1).getOperand(0);
6896     Z = LogicOp.getOperand(0);
6897   } else {
6898     return SDValue();
6899   }
6900 
6901   EVT VT = N->getValueType(0);
6902   SDLoc DL(N);
6903   SDValue LogicX = DAG.getNode(LogicOpcode, DL, VT, X0, X1);
6904   SDValue NewShift = DAG.getNode(ShiftOpcode, DL, VT, LogicX, Y);
6905   return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z);
6906 }
6907 
6908 /// Given a tree of logic operations with shape like
6909 /// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y)))
6910 /// try to match and fold shift operations with the same shift amount.
6911 /// For example:
6912 /// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) -->
6913 /// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W)
6914 static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
6915                                      SDValue RightHand, SelectionDAG &DAG) {
6916   unsigned LogicOpcode = N->getOpcode();
6917   assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
6918          "Expected bitwise logic operation");
6919   if (LeftHand.getOpcode() != LogicOpcode ||
6920       RightHand.getOpcode() != LogicOpcode)
6921     return SDValue();
6922   if (!LeftHand.hasOneUse() || !RightHand.hasOneUse())
6923     return SDValue();
6924 
6925   // Try to match one of following patterns:
6926   // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W)
6927   // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y))
6928   // Note that foldLogicOfShifts will handle commuted versions of the left hand
6929   // itself.
6930   SDValue CombinedShifts, W;
6931   SDValue R0 = RightHand.getOperand(0);
6932   SDValue R1 = RightHand.getOperand(1);
6933   if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG)))
6934     W = R1;
6935   else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG)))
6936     W = R0;
6937   else
6938     return SDValue();
6939 
6940   EVT VT = N->getValueType(0);
6941   SDLoc DL(N);
6942   return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W);
6943 }
6944 
6945 SDValue DAGCombiner::visitAND(SDNode *N) {
6946   SDValue N0 = N->getOperand(0);
6947   SDValue N1 = N->getOperand(1);
6948   EVT VT = N1.getValueType();
6949 
6950   // x & x --> x
6951   if (N0 == N1)
6952     return N0;
6953 
6954   // fold (and c1, c2) -> c1&c2
6955   if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1}))
6956     return C;
6957 
6958   // canonicalize constant to RHS
6959   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6960       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6961     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
6962 
6963   if (areBitwiseNotOfEachother(N0, N1))
6964     return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), SDLoc(N),
6965                            VT);
6966 
6967   // fold vector ops
6968   if (VT.isVector()) {
6969     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
6970       return FoldedVOp;
6971 
6972     // fold (and x, 0) -> 0, vector edition
6973     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
6974       // do not return N1, because undef node may exist in N1
6975       return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
6976                              SDLoc(N), N1.getValueType());
6977 
6978     // fold (and x, -1) -> x, vector edition
6979     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
6980       return N0;
6981 
6982     // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load
6983     auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0);
6984     ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true);
6985     if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat &&
6986         N1.hasOneUse()) {
6987       EVT LoadVT = MLoad->getMemoryVT();
6988       EVT ExtVT = VT;
6989       if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) {
6990         // For this AND to be a zero extension of the masked load the elements
6991         // of the BuildVec must mask the bottom bits of the extended element
6992         // type
6993         uint64_t ElementSize =
6994             LoadVT.getVectorElementType().getScalarSizeInBits();
6995         if (Splat->getAPIntValue().isMask(ElementSize)) {
6996           auto NewLoad = DAG.getMaskedLoad(
6997               ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(),
6998               MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(),
6999               LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(),
7000               ISD::ZEXTLOAD, MLoad->isExpandingLoad());
7001           bool LoadHasOtherUsers = !N0.hasOneUse();
7002           CombineTo(N, NewLoad);
7003           if (LoadHasOtherUsers)
7004             CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1));
7005           return SDValue(N, 0);
7006         }
7007       }
7008     }
7009   }
7010 
7011   // fold (and x, -1) -> x
7012   if (isAllOnesConstant(N1))
7013     return N0;
7014 
7015   // if (and x, c) is known to be zero, return 0
7016   unsigned BitWidth = VT.getScalarSizeInBits();
7017   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7018   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
7019     return DAG.getConstant(0, SDLoc(N), VT);
7020 
7021   if (SDValue R = foldAndOrOfSETCC(N, DAG))
7022     return R;
7023 
7024   if (SDValue NewSel = foldBinOpIntoSelect(N))
7025     return NewSel;
7026 
7027   // reassociate and
7028   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
7029     return RAND;
7030 
7031   // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
7032   if (SDValue SD = reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, SDLoc(N),
7033                                         VT, N0, N1))
7034     return SD;
7035 
7036   // fold (and (or x, C), D) -> D if (C & D) == D
7037   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7038     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
7039   };
7040   if (N0.getOpcode() == ISD::OR &&
7041       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
7042     return N1;
7043 
7044   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7045     SDValue N0Op0 = N0.getOperand(0);
7046     EVT SrcVT = N0Op0.getValueType();
7047     unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
7048     APInt Mask = ~N1C->getAPIntValue();
7049     Mask = Mask.trunc(SrcBitWidth);
7050 
7051     // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
7052     if (DAG.MaskedValueIsZero(N0Op0, Mask))
7053       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0Op0);
7054 
7055     // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
7056     if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
7057         TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
7058         TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
7059         TLI.isNarrowingProfitable(VT, SrcVT)) {
7060       SDLoc DL(N);
7061       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
7062                          DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
7063                                      DAG.getZExtOrTrunc(N1, DL, SrcVT)));
7064     }
7065   }
7066 
7067   // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
7068   if (ISD::isExtOpcode(N0.getOpcode())) {
7069     unsigned ExtOpc = N0.getOpcode();
7070     SDValue N0Op0 = N0.getOperand(0);
7071     if (N0Op0.getOpcode() == ISD::AND &&
7072         (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) &&
7073         DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
7074         DAG.isConstantIntBuildVectorOrConstantInt(N0Op0.getOperand(1)) &&
7075         N0->hasOneUse() && N0Op0->hasOneUse()) {
7076       SDLoc DL(N);
7077       SDValue NewMask =
7078           DAG.getNode(ISD::AND, DL, VT, N1,
7079                       DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1)));
7080       return DAG.getNode(ISD::AND, DL, VT,
7081                          DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)),
7082                          NewMask);
7083     }
7084   }
7085 
7086   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
7087   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
7088   // already be zero by virtue of the width of the base type of the load.
7089   //
7090   // the 'X' node here can either be nothing or an extract_vector_elt to catch
7091   // more cases.
7092   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7093        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
7094        N0.getOperand(0).getOpcode() == ISD::LOAD &&
7095        N0.getOperand(0).getResNo() == 0) ||
7096       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
7097     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
7098                                          N0 : N0.getOperand(0) );
7099 
7100     // Get the constant (if applicable) the zero'th operand is being ANDed with.
7101     // This can be a pure constant or a vector splat, in which case we treat the
7102     // vector as a scalar and use the splat value.
7103     APInt Constant = APInt::getZero(1);
7104     if (const ConstantSDNode *C = isConstOrConstSplat(
7105             N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) {
7106       Constant = C->getAPIntValue();
7107     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
7108       unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
7109       APInt SplatValue, SplatUndef;
7110       unsigned SplatBitSize;
7111       bool HasAnyUndefs;
7112       // Endianness should not matter here. Code below makes sure that we only
7113       // use the result if the SplatBitSize is a multiple of the vector element
7114       // size. And after that we AND all element sized parts of the splat
7115       // together. So the end result should be the same regardless of in which
7116       // order we do those operations.
7117       const bool IsBigEndian = false;
7118       bool IsSplat =
7119           Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
7120                                   HasAnyUndefs, EltBitWidth, IsBigEndian);
7121 
7122       // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
7123       // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
7124       if (IsSplat && (SplatBitSize % EltBitWidth) == 0) {
7125         // Undef bits can contribute to a possible optimisation if set, so
7126         // set them.
7127         SplatValue |= SplatUndef;
7128 
7129         // The splat value may be something like "0x00FFFFFF", which means 0 for
7130         // the first vector value and FF for the rest, repeating. We need a mask
7131         // that will apply equally to all members of the vector, so AND all the
7132         // lanes of the constant together.
7133         Constant = APInt::getAllOnes(EltBitWidth);
7134         for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
7135           Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
7136       }
7137     }
7138 
7139     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
7140     // actually legal and isn't going to get expanded, else this is a false
7141     // optimisation.
7142     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
7143                                                     Load->getValueType(0),
7144                                                     Load->getMemoryVT());
7145 
7146     // Resize the constant to the same size as the original memory access before
7147     // extension. If it is still the AllOnesValue then this AND is completely
7148     // unneeded.
7149     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
7150 
7151     bool B;
7152     switch (Load->getExtensionType()) {
7153     default: B = false; break;
7154     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
7155     case ISD::ZEXTLOAD:
7156     case ISD::NON_EXTLOAD: B = true; break;
7157     }
7158 
7159     if (B && Constant.isAllOnes()) {
7160       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
7161       // preserve semantics once we get rid of the AND.
7162       SDValue NewLoad(Load, 0);
7163 
7164       // Fold the AND away. NewLoad may get replaced immediately.
7165       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
7166 
7167       if (Load->getExtensionType() == ISD::EXTLOAD) {
7168         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
7169                               Load->getValueType(0), SDLoc(Load),
7170                               Load->getChain(), Load->getBasePtr(),
7171                               Load->getOffset(), Load->getMemoryVT(),
7172                               Load->getMemOperand());
7173         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
7174         if (Load->getNumValues() == 3) {
7175           // PRE/POST_INC loads have 3 values.
7176           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
7177                            NewLoad.getValue(2) };
7178           CombineTo(Load, To, 3, true);
7179         } else {
7180           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
7181         }
7182       }
7183 
7184       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7185     }
7186   }
7187 
7188   // Try to convert a constant mask AND into a shuffle clear mask.
7189   if (VT.isVector())
7190     if (SDValue Shuffle = XformToShuffleWithZero(N))
7191       return Shuffle;
7192 
7193   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7194     return Combined;
7195 
7196   if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C &&
7197       ISD::isExtOpcode(N0.getOperand(0).getOpcode())) {
7198     SDValue Ext = N0.getOperand(0);
7199     EVT ExtVT = Ext->getValueType(0);
7200     SDValue Extendee = Ext->getOperand(0);
7201 
7202     unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits();
7203     if (N1C->getAPIntValue().isMask(ScalarWidth) &&
7204         (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) {
7205       //    (and (extract_subvector (zext|anyext|sext v) _) iN_mask)
7206       // => (extract_subvector (iN_zeroext v))
7207       SDValue ZeroExtExtendee =
7208           DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), ExtVT, Extendee);
7209 
7210       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, ZeroExtExtendee,
7211                          N0.getOperand(1));
7212     }
7213   }
7214 
7215   // fold (and (masked_gather x)) -> (zext_masked_gather x)
7216   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
7217     EVT MemVT = GN0->getMemoryVT();
7218     EVT ScalarVT = MemVT.getScalarType();
7219 
7220     if (SDValue(GN0, 0).hasOneUse() &&
7221         isConstantSplatVectorMaskForType(N1.getNode(), ScalarVT) &&
7222         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
7223       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
7224                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
7225 
7226       SDValue ZExtLoad = DAG.getMaskedGather(
7227           DAG.getVTList(VT, MVT::Other), MemVT, SDLoc(N), Ops,
7228           GN0->getMemOperand(), GN0->getIndexType(), ISD::ZEXTLOAD);
7229 
7230       CombineTo(N, ZExtLoad);
7231       AddToWorklist(ZExtLoad.getNode());
7232       // Avoid recheck of N.
7233       return SDValue(N, 0);
7234     }
7235   }
7236 
7237   // fold (and (load x), 255) -> (zextload x, i8)
7238   // fold (and (extload x, i16), 255) -> (zextload x, i8)
7239   if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector())
7240     if (SDValue Res = reduceLoadWidth(N))
7241       return Res;
7242 
7243   if (LegalTypes) {
7244     // Attempt to propagate the AND back up to the leaves which, if they're
7245     // loads, can be combined to narrow loads and the AND node can be removed.
7246     // Perform after legalization so that extend nodes will already be
7247     // combined into the loads.
7248     if (BackwardsPropagateMask(N))
7249       return SDValue(N, 0);
7250   }
7251 
7252   if (SDValue Combined = visitANDLike(N0, N1, N))
7253     return Combined;
7254 
7255   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
7256   if (N0.getOpcode() == N1.getOpcode())
7257     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7258       return V;
7259 
7260   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7261     return R;
7262   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
7263     return R;
7264 
7265   // Masking the negated extension of a boolean is just the zero-extended
7266   // boolean:
7267   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
7268   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
7269   //
7270   // Note: the SimplifyDemandedBits fold below can make an information-losing
7271   // transform, and then we have no way to find this better fold.
7272   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
7273     if (isNullOrNullSplat(N0.getOperand(0))) {
7274       SDValue SubRHS = N0.getOperand(1);
7275       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
7276           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7277         return SubRHS;
7278       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
7279           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
7280         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
7281     }
7282   }
7283 
7284   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
7285   // fold (and (sra)) -> (and (srl)) when possible.
7286   if (SimplifyDemandedBits(SDValue(N, 0)))
7287     return SDValue(N, 0);
7288 
7289   // fold (zext_inreg (extload x)) -> (zextload x)
7290   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
7291   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
7292       (ISD::isEXTLoad(N0.getNode()) ||
7293        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
7294     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
7295     EVT MemVT = LN0->getMemoryVT();
7296     // If we zero all the possible extended bits, then we can turn this into
7297     // a zextload if we are running before legalize or the operation is legal.
7298     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
7299     unsigned MemBitSize = MemVT.getScalarSizeInBits();
7300     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
7301     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
7302         ((!LegalOperations && LN0->isSimple()) ||
7303          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
7304       SDValue ExtLoad =
7305           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
7306                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
7307       AddToWorklist(N);
7308       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
7309       return SDValue(N, 0); // Return N so it doesn't get rechecked!
7310     }
7311   }
7312 
7313   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
7314   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
7315     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
7316                                            N0.getOperand(1), false))
7317       return BSwap;
7318   }
7319 
7320   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
7321     return Shifts;
7322 
7323   if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
7324     return V;
7325 
7326   // Recognize the following pattern:
7327   //
7328   // AndVT = (and (sign_extend NarrowVT to AndVT) #bitmask)
7329   //
7330   // where bitmask is a mask that clears the upper bits of AndVT. The
7331   // number of bits in bitmask must be a power of two.
7332   auto IsAndZeroExtMask = [](SDValue LHS, SDValue RHS) {
7333     if (LHS->getOpcode() != ISD::SIGN_EXTEND)
7334       return false;
7335 
7336     auto *C = dyn_cast<ConstantSDNode>(RHS);
7337     if (!C)
7338       return false;
7339 
7340     if (!C->getAPIntValue().isMask(
7341             LHS.getOperand(0).getValueType().getFixedSizeInBits()))
7342       return false;
7343 
7344     return true;
7345   };
7346 
7347   // Replace (and (sign_extend ...) #bitmask) with (zero_extend ...).
7348   if (IsAndZeroExtMask(N0, N1))
7349     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0));
7350 
7351   if (hasOperation(ISD::USUBSAT, VT))
7352     if (SDValue V = foldAndToUsubsat(N, DAG))
7353       return V;
7354 
7355   // Postpone until legalization completed to avoid interference with bswap
7356   // folding
7357   if (LegalOperations || VT.isVector())
7358     if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
7359       return R;
7360 
7361   return SDValue();
7362 }
7363 
7364 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
7365 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
7366                                         bool DemandHighBits) {
7367   if (!LegalOperations)
7368     return SDValue();
7369 
7370   EVT VT = N->getValueType(0);
7371   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
7372     return SDValue();
7373   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
7374     return SDValue();
7375 
7376   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
7377   bool LookPassAnd0 = false;
7378   bool LookPassAnd1 = false;
7379   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
7380     std::swap(N0, N1);
7381   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
7382     std::swap(N0, N1);
7383   if (N0.getOpcode() == ISD::AND) {
7384     if (!N0->hasOneUse())
7385       return SDValue();
7386     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7387     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
7388     // This is needed for X86.
7389     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
7390                   N01C->getZExtValue() != 0xFFFF))
7391       return SDValue();
7392     N0 = N0.getOperand(0);
7393     LookPassAnd0 = true;
7394   }
7395 
7396   if (N1.getOpcode() == ISD::AND) {
7397     if (!N1->hasOneUse())
7398       return SDValue();
7399     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7400     if (!N11C || N11C->getZExtValue() != 0xFF)
7401       return SDValue();
7402     N1 = N1.getOperand(0);
7403     LookPassAnd1 = true;
7404   }
7405 
7406   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
7407     std::swap(N0, N1);
7408   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
7409     return SDValue();
7410   if (!N0->hasOneUse() || !N1->hasOneUse())
7411     return SDValue();
7412 
7413   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7414   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
7415   if (!N01C || !N11C)
7416     return SDValue();
7417   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
7418     return SDValue();
7419 
7420   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
7421   SDValue N00 = N0->getOperand(0);
7422   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
7423     if (!N00->hasOneUse())
7424       return SDValue();
7425     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
7426     if (!N001C || N001C->getZExtValue() != 0xFF)
7427       return SDValue();
7428     N00 = N00.getOperand(0);
7429     LookPassAnd0 = true;
7430   }
7431 
7432   SDValue N10 = N1->getOperand(0);
7433   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
7434     if (!N10->hasOneUse())
7435       return SDValue();
7436     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
7437     // Also allow 0xFFFF since the bits will be shifted out. This is needed
7438     // for X86.
7439     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
7440                    N101C->getZExtValue() != 0xFFFF))
7441       return SDValue();
7442     N10 = N10.getOperand(0);
7443     LookPassAnd1 = true;
7444   }
7445 
7446   if (N00 != N10)
7447     return SDValue();
7448 
7449   // Make sure everything beyond the low halfword gets set to zero since the SRL
7450   // 16 will clear the top bits.
7451   unsigned OpSizeInBits = VT.getSizeInBits();
7452   if (OpSizeInBits > 16) {
7453     // If the left-shift isn't masked out then the only way this is a bswap is
7454     // if all bits beyond the low 8 are 0. In that case the entire pattern
7455     // reduces to a left shift anyway: leave it for other parts of the combiner.
7456     if (DemandHighBits && !LookPassAnd0)
7457       return SDValue();
7458 
7459     // However, if the right shift isn't masked out then it might be because
7460     // it's not needed. See if we can spot that too. If the high bits aren't
7461     // demanded, we only need bits 23:16 to be zero. Otherwise, we need all
7462     // upper bits to be zero.
7463     if (!LookPassAnd1) {
7464       unsigned HighBit = DemandHighBits ? OpSizeInBits : 24;
7465       if (!DAG.MaskedValueIsZero(N10,
7466                                  APInt::getBitsSet(OpSizeInBits, 16, HighBit)))
7467         return SDValue();
7468     }
7469   }
7470 
7471   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
7472   if (OpSizeInBits > 16) {
7473     SDLoc DL(N);
7474     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
7475                       DAG.getConstant(OpSizeInBits - 16, DL,
7476                                       getShiftAmountTy(VT)));
7477   }
7478   return Res;
7479 }
7480 
7481 /// Return true if the specified node is an element that makes up a 32-bit
7482 /// packed halfword byteswap.
7483 /// ((x & 0x000000ff) << 8) |
7484 /// ((x & 0x0000ff00) >> 8) |
7485 /// ((x & 0x00ff0000) << 8) |
7486 /// ((x & 0xff000000) >> 8)
7487 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
7488   if (!N->hasOneUse())
7489     return false;
7490 
7491   unsigned Opc = N.getOpcode();
7492   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
7493     return false;
7494 
7495   SDValue N0 = N.getOperand(0);
7496   unsigned Opc0 = N0.getOpcode();
7497   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
7498     return false;
7499 
7500   ConstantSDNode *N1C = nullptr;
7501   // SHL or SRL: look upstream for AND mask operand
7502   if (Opc == ISD::AND)
7503     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7504   else if (Opc0 == ISD::AND)
7505     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7506   if (!N1C)
7507     return false;
7508 
7509   unsigned MaskByteOffset;
7510   switch (N1C->getZExtValue()) {
7511   default:
7512     return false;
7513   case 0xFF:       MaskByteOffset = 0; break;
7514   case 0xFF00:     MaskByteOffset = 1; break;
7515   case 0xFFFF:
7516     // In case demanded bits didn't clear the bits that will be shifted out.
7517     // This is needed for X86.
7518     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
7519       MaskByteOffset = 1;
7520       break;
7521     }
7522     return false;
7523   case 0xFF0000:   MaskByteOffset = 2; break;
7524   case 0xFF000000: MaskByteOffset = 3; break;
7525   }
7526 
7527   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
7528   if (Opc == ISD::AND) {
7529     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
7530       // (x >> 8) & 0xff
7531       // (x >> 8) & 0xff0000
7532       if (Opc0 != ISD::SRL)
7533         return false;
7534       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7535       if (!C || C->getZExtValue() != 8)
7536         return false;
7537     } else {
7538       // (x << 8) & 0xff00
7539       // (x << 8) & 0xff000000
7540       if (Opc0 != ISD::SHL)
7541         return false;
7542       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7543       if (!C || C->getZExtValue() != 8)
7544         return false;
7545     }
7546   } else if (Opc == ISD::SHL) {
7547     // (x & 0xff) << 8
7548     // (x & 0xff0000) << 8
7549     if (MaskByteOffset != 0 && MaskByteOffset != 2)
7550       return false;
7551     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7552     if (!C || C->getZExtValue() != 8)
7553       return false;
7554   } else { // Opc == ISD::SRL
7555     // (x & 0xff00) >> 8
7556     // (x & 0xff000000) >> 8
7557     if (MaskByteOffset != 1 && MaskByteOffset != 3)
7558       return false;
7559     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
7560     if (!C || C->getZExtValue() != 8)
7561       return false;
7562   }
7563 
7564   if (Parts[MaskByteOffset])
7565     return false;
7566 
7567   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
7568   return true;
7569 }
7570 
7571 // Match 2 elements of a packed halfword bswap.
7572 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
7573   if (N.getOpcode() == ISD::OR)
7574     return isBSwapHWordElement(N.getOperand(0), Parts) &&
7575            isBSwapHWordElement(N.getOperand(1), Parts);
7576 
7577   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
7578     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
7579     if (!C || C->getAPIntValue() != 16)
7580       return false;
7581     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
7582     return true;
7583   }
7584 
7585   return false;
7586 }
7587 
7588 // Match this pattern:
7589 //   (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff))
7590 // And rewrite this to:
7591 //   (rotr (bswap A), 16)
7592 static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI,
7593                                        SelectionDAG &DAG, SDNode *N, SDValue N0,
7594                                        SDValue N1, EVT VT, EVT ShiftAmountTy) {
7595   assert(N->getOpcode() == ISD::OR && VT == MVT::i32 &&
7596          "MatchBSwapHWordOrAndAnd: expecting i32");
7597   if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7598     return SDValue();
7599   if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND)
7600     return SDValue();
7601   // TODO: this is too restrictive; lifting this restriction requires more tests
7602   if (!N0->hasOneUse() || !N1->hasOneUse())
7603     return SDValue();
7604   ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1));
7605   ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1));
7606   if (!Mask0 || !Mask1)
7607     return SDValue();
7608   if (Mask0->getAPIntValue() != 0xff00ff00 ||
7609       Mask1->getAPIntValue() != 0x00ff00ff)
7610     return SDValue();
7611   SDValue Shift0 = N0.getOperand(0);
7612   SDValue Shift1 = N1.getOperand(0);
7613   if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL)
7614     return SDValue();
7615   ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1));
7616   ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1));
7617   if (!ShiftAmt0 || !ShiftAmt1)
7618     return SDValue();
7619   if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8)
7620     return SDValue();
7621   if (Shift0.getOperand(0) != Shift1.getOperand(0))
7622     return SDValue();
7623 
7624   SDLoc DL(N);
7625   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0));
7626   SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy);
7627   return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7628 }
7629 
7630 /// Match a 32-bit packed halfword bswap. That is
7631 /// ((x & 0x000000ff) << 8) |
7632 /// ((x & 0x0000ff00) >> 8) |
7633 /// ((x & 0x00ff0000) << 8) |
7634 /// ((x & 0xff000000) >> 8)
7635 /// => (rotl (bswap x), 16)
7636 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
7637   if (!LegalOperations)
7638     return SDValue();
7639 
7640   EVT VT = N->getValueType(0);
7641   if (VT != MVT::i32)
7642     return SDValue();
7643   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
7644     return SDValue();
7645 
7646   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT,
7647                                               getShiftAmountTy(VT)))
7648     return BSwap;
7649 
7650   // Try again with commuted operands.
7651   if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT,
7652                                               getShiftAmountTy(VT)))
7653     return BSwap;
7654 
7655 
7656   // Look for either
7657   // (or (bswaphpair), (bswaphpair))
7658   // (or (or (bswaphpair), (and)), (and))
7659   // (or (or (and), (bswaphpair)), (and))
7660   SDNode *Parts[4] = {};
7661 
7662   if (isBSwapHWordPair(N0, Parts)) {
7663     // (or (or (and), (and)), (or (and), (and)))
7664     if (!isBSwapHWordPair(N1, Parts))
7665       return SDValue();
7666   } else if (N0.getOpcode() == ISD::OR) {
7667     // (or (or (or (and), (and)), (and)), (and))
7668     if (!isBSwapHWordElement(N1, Parts))
7669       return SDValue();
7670     SDValue N00 = N0.getOperand(0);
7671     SDValue N01 = N0.getOperand(1);
7672     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
7673         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
7674       return SDValue();
7675   } else {
7676     return SDValue();
7677   }
7678 
7679   // Make sure the parts are all coming from the same node.
7680   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
7681     return SDValue();
7682 
7683   SDLoc DL(N);
7684   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
7685                               SDValue(Parts[0], 0));
7686 
7687   // Result of the bswap should be rotated by 16. If it's not legal, then
7688   // do  (x << 16) | (x >> 16).
7689   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
7690   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
7691     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
7692   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
7693     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
7694   return DAG.getNode(ISD::OR, DL, VT,
7695                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
7696                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
7697 }
7698 
7699 /// This contains all DAGCombine rules which reduce two values combined by
7700 /// an Or operation to a single value \see visitANDLike().
7701 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
7702   EVT VT = N1.getValueType();
7703   SDLoc DL(N);
7704 
7705   // fold (or x, undef) -> -1
7706   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
7707     return DAG.getAllOnesConstant(DL, VT);
7708 
7709   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
7710     return V;
7711 
7712   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
7713   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
7714       // Don't increase # computations.
7715       (N0->hasOneUse() || N1->hasOneUse())) {
7716     // We can only do this xform if we know that bits from X that are set in C2
7717     // but not in C1 are already zero.  Likewise for Y.
7718     if (const ConstantSDNode *N0O1C =
7719         getAsNonOpaqueConstant(N0.getOperand(1))) {
7720       if (const ConstantSDNode *N1O1C =
7721           getAsNonOpaqueConstant(N1.getOperand(1))) {
7722         // We can only do this xform if we know that bits from X that are set in
7723         // C2 but not in C1 are already zero.  Likewise for Y.
7724         const APInt &LHSMask = N0O1C->getAPIntValue();
7725         const APInt &RHSMask = N1O1C->getAPIntValue();
7726 
7727         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
7728             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
7729           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7730                                   N0.getOperand(0), N1.getOperand(0));
7731           return DAG.getNode(ISD::AND, DL, VT, X,
7732                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
7733         }
7734       }
7735     }
7736   }
7737 
7738   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
7739   if (N0.getOpcode() == ISD::AND &&
7740       N1.getOpcode() == ISD::AND &&
7741       N0.getOperand(0) == N1.getOperand(0) &&
7742       // Don't increase # computations.
7743       (N0->hasOneUse() || N1->hasOneUse())) {
7744     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
7745                             N0.getOperand(1), N1.getOperand(1));
7746     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
7747   }
7748 
7749   return SDValue();
7750 }
7751 
7752 /// OR combines for which the commuted variant will be tried as well.
7753 static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
7754                                   SDNode *N) {
7755   EVT VT = N0.getValueType();
7756 
7757   auto peekThroughResize = [](SDValue V) {
7758     if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
7759       return V->getOperand(0);
7760     return V;
7761   };
7762 
7763   SDValue N0Resized = peekThroughResize(N0);
7764   if (N0Resized.getOpcode() == ISD::AND) {
7765     SDValue N1Resized = peekThroughResize(N1);
7766     SDValue N00 = N0Resized.getOperand(0);
7767     SDValue N01 = N0Resized.getOperand(1);
7768 
7769     // fold or (and x, y), x --> x
7770     if (N00 == N1Resized || N01 == N1Resized)
7771       return N1;
7772 
7773     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
7774     // TODO: Set AllowUndefs = true.
7775     if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
7776                                                   /* AllowUndefs */ false)) {
7777       if (peekThroughResize(NotOperand) == N1Resized)
7778         return DAG.getNode(ISD::OR, SDLoc(N), VT,
7779                            DAG.getZExtOrTrunc(N00, SDLoc(N), VT), N1);
7780     }
7781 
7782     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
7783     if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
7784                                                   /* AllowUndefs */ false)) {
7785       if (peekThroughResize(NotOperand) == N1Resized)
7786         return DAG.getNode(ISD::OR, SDLoc(N), VT,
7787                            DAG.getZExtOrTrunc(N01, SDLoc(N), VT), N1);
7788     }
7789   }
7790 
7791   if (N0.getOpcode() == ISD::XOR) {
7792     // fold or (xor x, y), x --> or x, y
7793     //      or (xor x, y), (x and/or y) --> or x, y
7794     SDValue N00 = N0.getOperand(0);
7795     SDValue N01 = N0.getOperand(1);
7796     if (N00 == N1)
7797       return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
7798     if (N01 == N1)
7799       return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1);
7800 
7801     if (N1.getOpcode() == ISD::AND || N1.getOpcode() == ISD::OR) {
7802       SDValue N10 = N1.getOperand(0);
7803       SDValue N11 = N1.getOperand(1);
7804       if ((N00 == N10 && N01 == N11) || (N00 == N11 && N01 == N10))
7805         return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N01);
7806     }
7807   }
7808 
7809   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
7810     return R;
7811 
7812   auto peekThroughZext = [](SDValue V) {
7813     if (V->getOpcode() == ISD::ZERO_EXTEND)
7814       return V->getOperand(0);
7815     return V;
7816   };
7817 
7818   // (fshl X, ?, Y) | (shl X, Y) --> fshl X, ?, Y
7819   if (N0.getOpcode() == ISD::FSHL && N1.getOpcode() == ISD::SHL &&
7820       N0.getOperand(0) == N1.getOperand(0) &&
7821       peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7822     return N0;
7823 
7824   // (fshr ?, X, Y) | (srl X, Y) --> fshr ?, X, Y
7825   if (N0.getOpcode() == ISD::FSHR && N1.getOpcode() == ISD::SRL &&
7826       N0.getOperand(1) == N1.getOperand(0) &&
7827       peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1)))
7828     return N0;
7829 
7830   return SDValue();
7831 }
7832 
7833 SDValue DAGCombiner::visitOR(SDNode *N) {
7834   SDValue N0 = N->getOperand(0);
7835   SDValue N1 = N->getOperand(1);
7836   EVT VT = N1.getValueType();
7837 
7838   // x | x --> x
7839   if (N0 == N1)
7840     return N0;
7841 
7842   // fold (or c1, c2) -> c1|c2
7843   if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1}))
7844     return C;
7845 
7846   // canonicalize constant to RHS
7847   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7848       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7849     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
7850 
7851   // fold vector ops
7852   if (VT.isVector()) {
7853     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
7854       return FoldedVOp;
7855 
7856     // fold (or x, 0) -> x, vector edition
7857     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
7858       return N0;
7859 
7860     // fold (or x, -1) -> -1, vector edition
7861     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
7862       // do not return N1, because undef node may exist in N1
7863       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
7864 
7865     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
7866     // Do this only if the resulting type / shuffle is legal.
7867     auto *SV0 = dyn_cast<ShuffleVectorSDNode>(N0);
7868     auto *SV1 = dyn_cast<ShuffleVectorSDNode>(N1);
7869     if (SV0 && SV1 && TLI.isTypeLegal(VT)) {
7870       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
7871       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
7872       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
7873       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
7874       // Ensure both shuffles have a zero input.
7875       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
7876         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
7877         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
7878         bool CanFold = true;
7879         int NumElts = VT.getVectorNumElements();
7880         SmallVector<int, 4> Mask(NumElts, -1);
7881 
7882         for (int i = 0; i != NumElts; ++i) {
7883           int M0 = SV0->getMaskElt(i);
7884           int M1 = SV1->getMaskElt(i);
7885 
7886           // Determine if either index is pointing to a zero vector.
7887           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
7888           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
7889 
7890           // If one element is zero and the otherside is undef, keep undef.
7891           // This also handles the case that both are undef.
7892           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0))
7893             continue;
7894 
7895           // Make sure only one of the elements is zero.
7896           if (M0Zero == M1Zero) {
7897             CanFold = false;
7898             break;
7899           }
7900 
7901           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
7902 
7903           // We have a zero and non-zero element. If the non-zero came from
7904           // SV0 make the index a LHS index. If it came from SV1, make it
7905           // a RHS index. We need to mod by NumElts because we don't care
7906           // which operand it came from in the original shuffles.
7907           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
7908         }
7909 
7910         if (CanFold) {
7911           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
7912           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
7913 
7914           SDValue LegalShuffle =
7915               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
7916                                           Mask, DAG);
7917           if (LegalShuffle)
7918             return LegalShuffle;
7919         }
7920       }
7921     }
7922   }
7923 
7924   // fold (or x, 0) -> x
7925   if (isNullConstant(N1))
7926     return N0;
7927 
7928   // fold (or x, -1) -> -1
7929   if (isAllOnesConstant(N1))
7930     return N1;
7931 
7932   if (SDValue NewSel = foldBinOpIntoSelect(N))
7933     return NewSel;
7934 
7935   // fold (or x, c) -> c iff (x & ~c) == 0
7936   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
7937   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
7938     return N1;
7939 
7940   if (SDValue R = foldAndOrOfSETCC(N, DAG))
7941     return R;
7942 
7943   if (SDValue Combined = visitORLike(N0, N1, N))
7944     return Combined;
7945 
7946   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
7947     return Combined;
7948 
7949   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
7950   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
7951     return BSwap;
7952   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
7953     return BSwap;
7954 
7955   // reassociate or
7956   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
7957     return ROR;
7958 
7959   // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
7960   if (SDValue SD = reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, SDLoc(N),
7961                                         VT, N0, N1))
7962     return SD;
7963 
7964   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
7965   // iff (c1 & c2) != 0 or c1/c2 are undef.
7966   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
7967     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
7968   };
7969   if (N0.getOpcode() == ISD::AND && N0->hasOneUse() &&
7970       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
7971     if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
7972                                                  {N1, N0.getOperand(1)})) {
7973       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
7974       AddToWorklist(IOR.getNode());
7975       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
7976     }
7977   }
7978 
7979   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
7980     return Combined;
7981   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
7982     return Combined;
7983 
7984   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
7985   if (N0.getOpcode() == N1.getOpcode())
7986     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7987       return V;
7988 
7989   // See if this is some rotate idiom.
7990   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
7991     return Rot;
7992 
7993   if (SDValue Load = MatchLoadCombine(N))
7994     return Load;
7995 
7996   // Simplify the operands using demanded-bits information.
7997   if (SimplifyDemandedBits(SDValue(N, 0)))
7998     return SDValue(N, 0);
7999 
8000   // If OR can be rewritten into ADD, try combines based on ADD.
8001   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
8002       DAG.isADDLike(SDValue(N, 0)))
8003     if (SDValue Combined = visitADDLike(N))
8004       return Combined;
8005 
8006   // Postpone until legalization completed to avoid interference with bswap
8007   // folding
8008   if (LegalOperations || VT.isVector())
8009     if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
8010       return R;
8011 
8012   return SDValue();
8013 }
8014 
8015 static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op,
8016                                  SDValue &Mask) {
8017   if (Op.getOpcode() == ISD::AND &&
8018       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
8019     Mask = Op.getOperand(1);
8020     return Op.getOperand(0);
8021   }
8022   return Op;
8023 }
8024 
8025 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
8026 static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift,
8027                             SDValue &Mask) {
8028   Op = stripConstantMask(DAG, Op, Mask);
8029   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
8030     Shift = Op;
8031     return true;
8032   }
8033   return false;
8034 }
8035 
8036 /// Helper function for visitOR to extract the needed side of a rotate idiom
8037 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
8038 /// InstCombine merged some outside op with one of the shifts from
8039 /// the rotate pattern.
8040 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
8041 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
8042 /// patterns:
8043 ///
8044 ///   (or (add v v) (shrl v bitwidth-1)):
8045 ///     expands (add v v) -> (shl v 1)
8046 ///
8047 ///   (or (mul v c0) (shrl (mul v c1) c2)):
8048 ///     expands (mul v c0) -> (shl (mul v c1) c3)
8049 ///
8050 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
8051 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
8052 ///
8053 ///   (or (shl v c0) (shrl (shl v c1) c2)):
8054 ///     expands (shl v c0) -> (shl (shl v c1) c3)
8055 ///
8056 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
8057 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
8058 ///
8059 /// Such that in all cases, c3+c2==bitwidth(op v c1).
8060 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
8061                                      SDValue ExtractFrom, SDValue &Mask,
8062                                      const SDLoc &DL) {
8063   assert(OppShift && ExtractFrom && "Empty SDValue");
8064   if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL)
8065     return SDValue();
8066 
8067   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
8068 
8069   // Value and Type of the shift.
8070   SDValue OppShiftLHS = OppShift.getOperand(0);
8071   EVT ShiftedVT = OppShiftLHS.getValueType();
8072 
8073   // Amount of the existing shift.
8074   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
8075 
8076   // (add v v) -> (shl v 1)
8077   // TODO: Should this be a general DAG canonicalization?
8078   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
8079       ExtractFrom.getOpcode() == ISD::ADD &&
8080       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
8081       ExtractFrom.getOperand(0) == OppShiftLHS &&
8082       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
8083     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
8084                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
8085 
8086   // Preconditions:
8087   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
8088   //
8089   // Find opcode of the needed shift to be extracted from (op0 v c0).
8090   unsigned Opcode = ISD::DELETED_NODE;
8091   bool IsMulOrDiv = false;
8092   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
8093   // opcode or its arithmetic (mul or udiv) variant.
8094   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
8095     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
8096     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
8097       return false;
8098     Opcode = NeededShift;
8099     return true;
8100   };
8101   // op0 must be either the needed shift opcode or the mul/udiv equivalent
8102   // that the needed shift can be extracted from.
8103   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
8104       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
8105     return SDValue();
8106 
8107   // op0 must be the same opcode on both sides, have the same LHS argument,
8108   // and produce the same value type.
8109   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
8110       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
8111       ShiftedVT != ExtractFrom.getValueType())
8112     return SDValue();
8113 
8114   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
8115   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
8116   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
8117   ConstantSDNode *ExtractFromCst =
8118       isConstOrConstSplat(ExtractFrom.getOperand(1));
8119   // TODO: We should be able to handle non-uniform constant vectors for these values
8120   // Check that we have constant values.
8121   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
8122       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
8123       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
8124     return SDValue();
8125 
8126   // Compute the shift amount we need to extract to complete the rotate.
8127   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
8128   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
8129     return SDValue();
8130   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
8131   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
8132   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
8133   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
8134   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
8135 
8136   // Now try extract the needed shift from the ExtractFrom op and see if the
8137   // result matches up with the existing shift's LHS op.
8138   if (IsMulOrDiv) {
8139     // Op to extract from is a mul or udiv by a constant.
8140     // Check:
8141     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
8142     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
8143     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
8144                                                  NeededShiftAmt.getZExtValue());
8145     APInt ResultAmt;
8146     APInt Rem;
8147     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
8148     if (Rem != 0 || ResultAmt != OppLHSAmt)
8149       return SDValue();
8150   } else {
8151     // Op to extract from is a shift by a constant.
8152     // Check:
8153     //      c2 - (bitwidth(op0 v c0) - c1) == c0
8154     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
8155                                           ExtractFromAmt.getBitWidth()))
8156       return SDValue();
8157   }
8158 
8159   // Return the expanded shift op that should allow a rotate to be formed.
8160   EVT ShiftVT = OppShift.getOperand(1).getValueType();
8161   EVT ResVT = ExtractFrom.getValueType();
8162   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
8163   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
8164 }
8165 
8166 // Return true if we can prove that, whenever Neg and Pos are both in the
8167 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
8168 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
8169 //
8170 //     (or (shift1 X, Neg), (shift2 X, Pos))
8171 //
8172 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
8173 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
8174 // to consider shift amounts with defined behavior.
8175 //
8176 // The IsRotate flag should be set when the LHS of both shifts is the same.
8177 // Otherwise if matching a general funnel shift, it should be clear.
8178 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
8179                            SelectionDAG &DAG, bool IsRotate) {
8180   const auto &TLI = DAG.getTargetLoweringInfo();
8181   // If EltSize is a power of 2 then:
8182   //
8183   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
8184   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
8185   //
8186   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
8187   // for the stronger condition:
8188   //
8189   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
8190   //
8191   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
8192   // we can just replace Neg with Neg' for the rest of the function.
8193   //
8194   // In other cases we check for the even stronger condition:
8195   //
8196   //     Neg == EltSize - Pos                                    [B]
8197   //
8198   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
8199   // behavior if Pos == 0 (and consequently Neg == EltSize).
8200   //
8201   // We could actually use [A] whenever EltSize is a power of 2, but the
8202   // only extra cases that it would match are those uninteresting ones
8203   // where Neg and Pos are never in range at the same time.  E.g. for
8204   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
8205   // as well as (sub 32, Pos), but:
8206   //
8207   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
8208   //
8209   // always invokes undefined behavior for 32-bit X.
8210   //
8211   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
8212   // This allows us to peek through any operations that only affect Mask's
8213   // un-demanded bits.
8214   //
8215   // NOTE: We can only do this when matching operations which won't modify the
8216   // least Log2(EltSize) significant bits and not a general funnel shift.
8217   unsigned MaskLoBits = 0;
8218   if (IsRotate && isPowerOf2_64(EltSize)) {
8219     unsigned Bits = Log2_64(EltSize);
8220     unsigned NegBits = Neg.getScalarValueSizeInBits();
8221     if (NegBits >= Bits) {
8222       APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits);
8223       if (SDValue Inner =
8224               TLI.SimplifyMultipleUseDemandedBits(Neg, DemandedBits, DAG)) {
8225         Neg = Inner;
8226         MaskLoBits = Bits;
8227       }
8228     }
8229   }
8230 
8231   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
8232   if (Neg.getOpcode() != ISD::SUB)
8233     return false;
8234   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
8235   if (!NegC)
8236     return false;
8237   SDValue NegOp1 = Neg.getOperand(1);
8238 
8239   // On the RHS of [A], if Pos is the result of operation on Pos' that won't
8240   // affect Mask's demanded bits, just replace Pos with Pos'. These operations
8241   // are redundant for the purpose of the equality.
8242   if (MaskLoBits) {
8243     unsigned PosBits = Pos.getScalarValueSizeInBits();
8244     if (PosBits >= MaskLoBits) {
8245       APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits);
8246       if (SDValue Inner =
8247               TLI.SimplifyMultipleUseDemandedBits(Pos, DemandedBits, DAG)) {
8248         Pos = Inner;
8249       }
8250     }
8251   }
8252 
8253   // The condition we need is now:
8254   //
8255   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
8256   //
8257   // If NegOp1 == Pos then we need:
8258   //
8259   //              EltSize & Mask == NegC & Mask
8260   //
8261   // (because "x & Mask" is a truncation and distributes through subtraction).
8262   //
8263   // We also need to account for a potential truncation of NegOp1 if the amount
8264   // has already been legalized to a shift amount type.
8265   APInt Width;
8266   if ((Pos == NegOp1) ||
8267       (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0)))
8268     Width = NegC->getAPIntValue();
8269 
8270   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
8271   // Then the condition we want to prove becomes:
8272   //
8273   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
8274   //
8275   // which, again because "x & Mask" is a truncation, becomes:
8276   //
8277   //                NegC & Mask == (EltSize - PosC) & Mask
8278   //             EltSize & Mask == (NegC + PosC) & Mask
8279   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
8280     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
8281       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
8282     else
8283       return false;
8284   } else
8285     return false;
8286 
8287   // Now we just need to check that EltSize & Mask == Width & Mask.
8288   if (MaskLoBits)
8289     // EltSize & Mask is 0 since Mask is EltSize - 1.
8290     return Width.getLoBits(MaskLoBits) == 0;
8291   return Width == EltSize;
8292 }
8293 
8294 // A subroutine of MatchRotate used once we have found an OR of two opposite
8295 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
8296 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
8297 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
8298 // Neg with outer conversions stripped away.
8299 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
8300                                        SDValue Neg, SDValue InnerPos,
8301                                        SDValue InnerNeg, bool HasPos,
8302                                        unsigned PosOpcode, unsigned NegOpcode,
8303                                        const SDLoc &DL) {
8304   // fold (or (shl x, (*ext y)),
8305   //          (srl x, (*ext (sub 32, y)))) ->
8306   //   (rotl x, y) or (rotr x, (sub 32, y))
8307   //
8308   // fold (or (shl x, (*ext (sub 32, y))),
8309   //          (srl x, (*ext y))) ->
8310   //   (rotr x, y) or (rotl x, (sub 32, y))
8311   EVT VT = Shifted.getValueType();
8312   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
8313                      /*IsRotate*/ true)) {
8314     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
8315                        HasPos ? Pos : Neg);
8316   }
8317 
8318   return SDValue();
8319 }
8320 
8321 // A subroutine of MatchRotate used once we have found an OR of two opposite
8322 // shifts of N0 + N1.  If Neg == <operand size> - Pos then the OR reduces
8323 // to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the
8324 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
8325 // Neg with outer conversions stripped away.
8326 // TODO: Merge with MatchRotatePosNeg.
8327 SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos,
8328                                        SDValue Neg, SDValue InnerPos,
8329                                        SDValue InnerNeg, bool HasPos,
8330                                        unsigned PosOpcode, unsigned NegOpcode,
8331                                        const SDLoc &DL) {
8332   EVT VT = N0.getValueType();
8333   unsigned EltBits = VT.getScalarSizeInBits();
8334 
8335   // fold (or (shl x0, (*ext y)),
8336   //          (srl x1, (*ext (sub 32, y)))) ->
8337   //   (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y))
8338   //
8339   // fold (or (shl x0, (*ext (sub 32, y))),
8340   //          (srl x1, (*ext y))) ->
8341   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
8342   if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
8343     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
8344                        HasPos ? Pos : Neg);
8345   }
8346 
8347   // Matching the shift+xor cases, we can't easily use the xor'd shift amount
8348   // so for now just use the PosOpcode case if its legal.
8349   // TODO: When can we use the NegOpcode case?
8350   if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) {
8351     auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) {
8352       if (Op.getOpcode() != BinOpc)
8353         return false;
8354       ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1));
8355       return Cst && (Cst->getAPIntValue() == Imm);
8356     };
8357 
8358     // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31)))
8359     //   -> (fshl x0, x1, y)
8360     if (IsBinOpImm(N1, ISD::SRL, 1) &&
8361         IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) &&
8362         InnerPos == InnerNeg.getOperand(0) &&
8363         TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) {
8364       return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos);
8365     }
8366 
8367     // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y))
8368     //   -> (fshr x0, x1, y)
8369     if (IsBinOpImm(N0, ISD::SHL, 1) &&
8370         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8371         InnerNeg == InnerPos.getOperand(0) &&
8372         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
8373       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8374     }
8375 
8376     // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y))
8377     //   -> (fshr x0, x1, y)
8378     // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization?
8379     if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) &&
8380         IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) &&
8381         InnerNeg == InnerPos.getOperand(0) &&
8382         TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) {
8383       return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg);
8384     }
8385   }
8386 
8387   return SDValue();
8388 }
8389 
8390 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
8391 // idioms for rotate, and if the target supports rotation instructions, generate
8392 // a rot[lr]. This also matches funnel shift patterns, similar to rotation but
8393 // with different shifted sources.
8394 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
8395   EVT VT = LHS.getValueType();
8396 
8397   // The target must have at least one rotate/funnel flavor.
8398   // We still try to match rotate by constant pre-legalization.
8399   // TODO: Support pre-legalization funnel-shift by constant.
8400   bool HasROTL = hasOperation(ISD::ROTL, VT);
8401   bool HasROTR = hasOperation(ISD::ROTR, VT);
8402   bool HasFSHL = hasOperation(ISD::FSHL, VT);
8403   bool HasFSHR = hasOperation(ISD::FSHR, VT);
8404 
8405   // If the type is going to be promoted and the target has enabled custom
8406   // lowering for rotate, allow matching rotate by non-constants. Only allow
8407   // this for scalar types.
8408   if (VT.isScalarInteger() && TLI.getTypeAction(*DAG.getContext(), VT) ==
8409                                   TargetLowering::TypePromoteInteger) {
8410     HasROTL |= TLI.getOperationAction(ISD::ROTL, VT) == TargetLowering::Custom;
8411     HasROTR |= TLI.getOperationAction(ISD::ROTR, VT) == TargetLowering::Custom;
8412   }
8413 
8414   if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8415     return SDValue();
8416 
8417   // Check for truncated rotate.
8418   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
8419       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
8420     assert(LHS.getValueType() == RHS.getValueType());
8421     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
8422       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
8423     }
8424   }
8425 
8426   // Match "(X shl/srl V1) & V2" where V2 may not be present.
8427   SDValue LHSShift;   // The shift.
8428   SDValue LHSMask;    // AND value if any.
8429   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
8430 
8431   SDValue RHSShift;   // The shift.
8432   SDValue RHSMask;    // AND value if any.
8433   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
8434 
8435   // If neither side matched a rotate half, bail
8436   if (!LHSShift && !RHSShift)
8437     return SDValue();
8438 
8439   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
8440   // side of the rotate, so try to handle that here. In all cases we need to
8441   // pass the matched shift from the opposite side to compute the opcode and
8442   // needed shift amount to extract.  We still want to do this if both sides
8443   // matched a rotate half because one half may be a potential overshift that
8444   // can be broken down (ie if InstCombine merged two shl or srl ops into a
8445   // single one).
8446 
8447   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
8448   if (LHSShift)
8449     if (SDValue NewRHSShift =
8450             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
8451       RHSShift = NewRHSShift;
8452   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
8453   if (RHSShift)
8454     if (SDValue NewLHSShift =
8455             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
8456       LHSShift = NewLHSShift;
8457 
8458   // If a side is still missing, nothing else we can do.
8459   if (!RHSShift || !LHSShift)
8460     return SDValue();
8461 
8462   // At this point we've matched or extracted a shift op on each side.
8463 
8464   if (LHSShift.getOpcode() == RHSShift.getOpcode())
8465     return SDValue(); // Shifts must disagree.
8466 
8467   // Canonicalize shl to left side in a shl/srl pair.
8468   if (RHSShift.getOpcode() == ISD::SHL) {
8469     std::swap(LHS, RHS);
8470     std::swap(LHSShift, RHSShift);
8471     std::swap(LHSMask, RHSMask);
8472   }
8473 
8474   // Something has gone wrong - we've lost the shl/srl pair - bail.
8475   if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL)
8476     return SDValue();
8477 
8478   unsigned EltSizeInBits = VT.getScalarSizeInBits();
8479   SDValue LHSShiftArg = LHSShift.getOperand(0);
8480   SDValue LHSShiftAmt = LHSShift.getOperand(1);
8481   SDValue RHSShiftArg = RHSShift.getOperand(0);
8482   SDValue RHSShiftAmt = RHSShift.getOperand(1);
8483 
8484   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
8485                                         ConstantSDNode *RHS) {
8486     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
8487   };
8488 
8489   auto ApplyMasks = [&](SDValue Res) {
8490     // If there is an AND of either shifted operand, apply it to the result.
8491     if (LHSMask.getNode() || RHSMask.getNode()) {
8492       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
8493       SDValue Mask = AllOnes;
8494 
8495       if (LHSMask.getNode()) {
8496         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
8497         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8498                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
8499       }
8500       if (RHSMask.getNode()) {
8501         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
8502         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
8503                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
8504       }
8505 
8506       Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask);
8507     }
8508 
8509     return Res;
8510   };
8511 
8512   // TODO: Support pre-legalization funnel-shift by constant.
8513   bool IsRotate = LHSShiftArg == RHSShiftArg;
8514   if (!IsRotate && !(HasFSHL || HasFSHR)) {
8515     if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() &&
8516         ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8517       // Look for a disguised rotate by constant.
8518       // The common shifted operand X may be hidden inside another 'or'.
8519       SDValue X, Y;
8520       auto matchOr = [&X, &Y](SDValue Or, SDValue CommonOp) {
8521         if (!Or.hasOneUse() || Or.getOpcode() != ISD::OR)
8522           return false;
8523         if (CommonOp == Or.getOperand(0)) {
8524           X = CommonOp;
8525           Y = Or.getOperand(1);
8526           return true;
8527         }
8528         if (CommonOp == Or.getOperand(1)) {
8529           X = CommonOp;
8530           Y = Or.getOperand(0);
8531           return true;
8532         }
8533         return false;
8534       };
8535 
8536       SDValue Res;
8537       if (matchOr(LHSShiftArg, RHSShiftArg)) {
8538         // (shl (X | Y), C1) | (srl X, C2) --> (rotl X, C1) | (shl Y, C1)
8539         SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8540         SDValue ShlY = DAG.getNode(ISD::SHL, DL, VT, Y, LHSShiftAmt);
8541         Res = DAG.getNode(ISD::OR, DL, VT, RotX, ShlY);
8542       } else if (matchOr(RHSShiftArg, LHSShiftArg)) {
8543         // (shl X, C1) | (srl (X | Y), C2) --> (rotl X, C1) | (srl Y, C2)
8544         SDValue RotX = DAG.getNode(ISD::ROTL, DL, VT, X, LHSShiftAmt);
8545         SDValue SrlY = DAG.getNode(ISD::SRL, DL, VT, Y, RHSShiftAmt);
8546         Res = DAG.getNode(ISD::OR, DL, VT, RotX, SrlY);
8547       } else {
8548         return SDValue();
8549       }
8550 
8551       return ApplyMasks(Res);
8552     }
8553 
8554     return SDValue(); // Requires funnel shift support.
8555   }
8556 
8557   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
8558   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
8559   // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1)
8560   // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2)
8561   // iff C1+C2 == EltSizeInBits
8562   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
8563     SDValue Res;
8564     if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) {
8565       bool UseROTL = !LegalOperations || HasROTL;
8566       Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg,
8567                         UseROTL ? LHSShiftAmt : RHSShiftAmt);
8568     } else {
8569       bool UseFSHL = !LegalOperations || HasFSHL;
8570       Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg,
8571                         RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt);
8572     }
8573 
8574     return ApplyMasks(Res);
8575   }
8576 
8577   // Even pre-legalization, we can't easily rotate/funnel-shift by a variable
8578   // shift.
8579   if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR)
8580     return SDValue();
8581 
8582   // If there is a mask here, and we have a variable shift, we can't be sure
8583   // that we're masking out the right stuff.
8584   if (LHSMask.getNode() || RHSMask.getNode())
8585     return SDValue();
8586 
8587   // If the shift amount is sign/zext/any-extended just peel it off.
8588   SDValue LExtOp0 = LHSShiftAmt;
8589   SDValue RExtOp0 = RHSShiftAmt;
8590   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8591        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8592        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8593        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
8594       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
8595        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
8596        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
8597        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
8598     LExtOp0 = LHSShiftAmt.getOperand(0);
8599     RExtOp0 = RHSShiftAmt.getOperand(0);
8600   }
8601 
8602   if (IsRotate && (HasROTL || HasROTR)) {
8603     SDValue TryL =
8604         MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0,
8605                           RExtOp0, HasROTL, ISD::ROTL, ISD::ROTR, DL);
8606     if (TryL)
8607       return TryL;
8608 
8609     SDValue TryR =
8610         MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0,
8611                           LExtOp0, HasROTR, ISD::ROTR, ISD::ROTL, DL);
8612     if (TryR)
8613       return TryR;
8614   }
8615 
8616   SDValue TryL =
8617       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt,
8618                         LExtOp0, RExtOp0, HasFSHL, ISD::FSHL, ISD::FSHR, DL);
8619   if (TryL)
8620     return TryL;
8621 
8622   SDValue TryR =
8623       MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
8624                         RExtOp0, LExtOp0, HasFSHR, ISD::FSHR, ISD::FSHL, DL);
8625   if (TryR)
8626     return TryR;
8627 
8628   return SDValue();
8629 }
8630 
8631 /// Recursively traverses the expression calculating the origin of the requested
8632 /// byte of the given value. Returns std::nullopt if the provider can't be
8633 /// calculated.
8634 ///
8635 /// For all the values except the root of the expression, we verify that the
8636 /// value has exactly one use and if not then return std::nullopt. This way if
8637 /// the origin of the byte is returned it's guaranteed that the values which
8638 /// contribute to the byte are not used outside of this expression.
8639 
8640 /// However, there is a special case when dealing with vector loads -- we allow
8641 /// more than one use if the load is a vector type.  Since the values that
8642 /// contribute to the byte ultimately come from the ExtractVectorElements of the
8643 /// Load, we don't care if the Load has uses other than ExtractVectorElements,
8644 /// because those operations are independent from the pattern to be combined.
8645 /// For vector loads, we simply care that the ByteProviders are adjacent
8646 /// positions of the same vector, and their index matches the byte that is being
8647 /// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex
8648 /// is the index used in an ExtractVectorElement, and \p StartingIndex is the
8649 /// byte position we are trying to provide for the LoadCombine. If these do
8650 /// not match, then we can not combine the vector loads. \p Index uses the
8651 /// byte position we are trying to provide for and is matched against the
8652 /// shl and load size. The \p Index algorithm ensures the requested byte is
8653 /// provided for by the pattern, and the pattern does not over provide bytes.
8654 ///
8655 ///
8656 /// The supported LoadCombine pattern for vector loads is as follows
8657 ///                              or
8658 ///                          /        \
8659 ///                         or        shl
8660 ///                       /     \      |
8661 ///                     or      shl   zext
8662 ///                   /    \     |     |
8663 ///                 shl   zext  zext  EVE*
8664 ///                  |     |     |     |
8665 ///                 zext  EVE*  EVE*  LOAD
8666 ///                  |     |     |
8667 ///                 EVE*  LOAD  LOAD
8668 ///                  |
8669 ///                 LOAD
8670 ///
8671 /// *ExtractVectorElement
8672 using SDByteProvider = ByteProvider<SDNode *>;
8673 
8674 static std::optional<SDByteProvider>
8675 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
8676                       std::optional<uint64_t> VectorIndex,
8677                       unsigned StartingIndex = 0) {
8678 
8679   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
8680   if (Depth == 10)
8681     return std::nullopt;
8682 
8683   // Only allow multiple uses if the instruction is a vector load (in which
8684   // case we will use the load for every ExtractVectorElement)
8685   if (Depth && !Op.hasOneUse() &&
8686       (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector()))
8687     return std::nullopt;
8688 
8689   // Fail to combine if we have encountered anything but a LOAD after handling
8690   // an ExtractVectorElement.
8691   if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value())
8692     return std::nullopt;
8693 
8694   unsigned BitWidth = Op.getValueSizeInBits();
8695   if (BitWidth % 8 != 0)
8696     return std::nullopt;
8697   unsigned ByteWidth = BitWidth / 8;
8698   assert(Index < ByteWidth && "invalid index requested");
8699   (void) ByteWidth;
8700 
8701   switch (Op.getOpcode()) {
8702   case ISD::OR: {
8703     auto LHS =
8704         calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex);
8705     if (!LHS)
8706       return std::nullopt;
8707     auto RHS =
8708         calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex);
8709     if (!RHS)
8710       return std::nullopt;
8711 
8712     if (LHS->isConstantZero())
8713       return RHS;
8714     if (RHS->isConstantZero())
8715       return LHS;
8716     return std::nullopt;
8717   }
8718   case ISD::SHL: {
8719     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8720     if (!ShiftOp)
8721       return std::nullopt;
8722 
8723     uint64_t BitShift = ShiftOp->getZExtValue();
8724 
8725     if (BitShift % 8 != 0)
8726       return std::nullopt;
8727     uint64_t ByteShift = BitShift / 8;
8728 
8729     // If we are shifting by an amount greater than the index we are trying to
8730     // provide, then do not provide anything. Otherwise, subtract the index by
8731     // the amount we shifted by.
8732     return Index < ByteShift
8733                ? SDByteProvider::getConstantZero()
8734                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
8735                                        Depth + 1, VectorIndex, Index);
8736   }
8737   case ISD::ANY_EXTEND:
8738   case ISD::SIGN_EXTEND:
8739   case ISD::ZERO_EXTEND: {
8740     SDValue NarrowOp = Op->getOperand(0);
8741     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8742     if (NarrowBitWidth % 8 != 0)
8743       return std::nullopt;
8744     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8745 
8746     if (Index >= NarrowByteWidth)
8747       return Op.getOpcode() == ISD::ZERO_EXTEND
8748                  ? std::optional<SDByteProvider>(
8749                        SDByteProvider::getConstantZero())
8750                  : std::nullopt;
8751     return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
8752                                  StartingIndex);
8753   }
8754   case ISD::BSWAP:
8755     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
8756                                  Depth + 1, VectorIndex, StartingIndex);
8757   case ISD::EXTRACT_VECTOR_ELT: {
8758     auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
8759     if (!OffsetOp)
8760       return std::nullopt;
8761 
8762     VectorIndex = OffsetOp->getZExtValue();
8763 
8764     SDValue NarrowOp = Op->getOperand(0);
8765     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
8766     if (NarrowBitWidth % 8 != 0)
8767       return std::nullopt;
8768     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8769 
8770     // Check to see if the position of the element in the vector corresponds
8771     // with the byte we are trying to provide for. In the case of a vector of
8772     // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases,
8773     // the element will provide a range of bytes. For example, if we have a
8774     // vector of i16s, each element provides two bytes (V[1] provides byte 2 and
8775     // 3).
8776     if (*VectorIndex * NarrowByteWidth > StartingIndex)
8777       return std::nullopt;
8778     if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex)
8779       return std::nullopt;
8780 
8781     return calculateByteProvider(Op->getOperand(0), Index, Depth + 1,
8782                                  VectorIndex, StartingIndex);
8783   }
8784   case ISD::LOAD: {
8785     auto L = cast<LoadSDNode>(Op.getNode());
8786     if (!L->isSimple() || L->isIndexed())
8787       return std::nullopt;
8788 
8789     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
8790     if (NarrowBitWidth % 8 != 0)
8791       return std::nullopt;
8792     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
8793 
8794     // If the width of the load does not reach byte we are trying to provide for
8795     // and it is not a ZEXTLOAD, then the load does not provide for the byte in
8796     // question
8797     if (Index >= NarrowByteWidth)
8798       return L->getExtensionType() == ISD::ZEXTLOAD
8799                  ? std::optional<SDByteProvider>(
8800                        SDByteProvider::getConstantZero())
8801                  : std::nullopt;
8802 
8803     unsigned BPVectorIndex = VectorIndex.value_or(0U);
8804     return SDByteProvider::getSrc(L, Index, BPVectorIndex);
8805   }
8806   }
8807 
8808   return std::nullopt;
8809 }
8810 
8811 static unsigned littleEndianByteAt(unsigned BW, unsigned i) {
8812   return i;
8813 }
8814 
8815 static unsigned bigEndianByteAt(unsigned BW, unsigned i) {
8816   return BW - i - 1;
8817 }
8818 
8819 // Check if the bytes offsets we are looking at match with either big or
8820 // little endian value loaded. Return true for big endian, false for little
8821 // endian, and std::nullopt if match failed.
8822 static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets,
8823                                        int64_t FirstOffset) {
8824   // The endian can be decided only when it is 2 bytes at least.
8825   unsigned Width = ByteOffsets.size();
8826   if (Width < 2)
8827     return std::nullopt;
8828 
8829   bool BigEndian = true, LittleEndian = true;
8830   for (unsigned i = 0; i < Width; i++) {
8831     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
8832     LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i);
8833     BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i);
8834     if (!BigEndian && !LittleEndian)
8835       return std::nullopt;
8836   }
8837 
8838   assert((BigEndian != LittleEndian) && "It should be either big endian or"
8839                                         "little endian");
8840   return BigEndian;
8841 }
8842 
8843 static SDValue stripTruncAndExt(SDValue Value) {
8844   switch (Value.getOpcode()) {
8845   case ISD::TRUNCATE:
8846   case ISD::ZERO_EXTEND:
8847   case ISD::SIGN_EXTEND:
8848   case ISD::ANY_EXTEND:
8849     return stripTruncAndExt(Value.getOperand(0));
8850   }
8851   return Value;
8852 }
8853 
8854 /// Match a pattern where a wide type scalar value is stored by several narrow
8855 /// stores. Fold it into a single store or a BSWAP and a store if the targets
8856 /// supports it.
8857 ///
8858 /// Assuming little endian target:
8859 ///  i8 *p = ...
8860 ///  i32 val = ...
8861 ///  p[0] = (val >> 0) & 0xFF;
8862 ///  p[1] = (val >> 8) & 0xFF;
8863 ///  p[2] = (val >> 16) & 0xFF;
8864 ///  p[3] = (val >> 24) & 0xFF;
8865 /// =>
8866 ///  *((i32)p) = val;
8867 ///
8868 ///  i8 *p = ...
8869 ///  i32 val = ...
8870 ///  p[0] = (val >> 24) & 0xFF;
8871 ///  p[1] = (val >> 16) & 0xFF;
8872 ///  p[2] = (val >> 8) & 0xFF;
8873 ///  p[3] = (val >> 0) & 0xFF;
8874 /// =>
8875 ///  *((i32)p) = BSWAP(val);
8876 SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
8877   // The matching looks for "store (trunc x)" patterns that appear early but are
8878   // likely to be replaced by truncating store nodes during combining.
8879   // TODO: If there is evidence that running this later would help, this
8880   //       limitation could be removed. Legality checks may need to be added
8881   //       for the created store and optional bswap/rotate.
8882   if (LegalOperations || OptLevel == CodeGenOptLevel::None)
8883     return SDValue();
8884 
8885   // We only handle merging simple stores of 1-4 bytes.
8886   // TODO: Allow unordered atomics when wider type is legal (see D66309)
8887   EVT MemVT = N->getMemoryVT();
8888   if (!(MemVT == MVT::i8 || MemVT == MVT::i16 || MemVT == MVT::i32) ||
8889       !N->isSimple() || N->isIndexed())
8890     return SDValue();
8891 
8892   // Collect all of the stores in the chain, upto the maximum store width (i64).
8893   SDValue Chain = N->getChain();
8894   SmallVector<StoreSDNode *, 8> Stores = {N};
8895   unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
8896   unsigned MaxWideNumBits = 64;
8897   unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
8898   while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
8899     // All stores must be the same size to ensure that we are writing all of the
8900     // bytes in the wide value.
8901     // This store should have exactly one use as a chain operand for another
8902     // store in the merging set. If there are other chain uses, then the
8903     // transform may not be safe because order of loads/stores outside of this
8904     // set may not be preserved.
8905     // TODO: We could allow multiple sizes by tracking each stored byte.
8906     if (Store->getMemoryVT() != MemVT || !Store->isSimple() ||
8907         Store->isIndexed() || !Store->hasOneUse())
8908       return SDValue();
8909     Stores.push_back(Store);
8910     Chain = Store->getChain();
8911     if (MaxStores < Stores.size())
8912       return SDValue();
8913   }
8914   // There is no reason to continue if we do not have at least a pair of stores.
8915   if (Stores.size() < 2)
8916     return SDValue();
8917 
8918   // Handle simple types only.
8919   LLVMContext &Context = *DAG.getContext();
8920   unsigned NumStores = Stores.size();
8921   unsigned WideNumBits = NumStores * NarrowNumBits;
8922   EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
8923   if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
8924     return SDValue();
8925 
8926   // Check if all bytes of the source value that we are looking at are stored
8927   // to the same base address. Collect offsets from Base address into OffsetMap.
8928   SDValue SourceValue;
8929   SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX);
8930   int64_t FirstOffset = INT64_MAX;
8931   StoreSDNode *FirstStore = nullptr;
8932   std::optional<BaseIndexOffset> Base;
8933   for (auto *Store : Stores) {
8934     // All the stores store different parts of the CombinedValue. A truncate is
8935     // required to get the partial value.
8936     SDValue Trunc = Store->getValue();
8937     if (Trunc.getOpcode() != ISD::TRUNCATE)
8938       return SDValue();
8939     // Other than the first/last part, a shift operation is required to get the
8940     // offset.
8941     int64_t Offset = 0;
8942     SDValue WideVal = Trunc.getOperand(0);
8943     if ((WideVal.getOpcode() == ISD::SRL || WideVal.getOpcode() == ISD::SRA) &&
8944         isa<ConstantSDNode>(WideVal.getOperand(1))) {
8945       // The shift amount must be a constant multiple of the narrow type.
8946       // It is translated to the offset address in the wide source value "y".
8947       //
8948       // x = srl y, ShiftAmtC
8949       // i8 z = trunc x
8950       // store z, ...
8951       uint64_t ShiftAmtC = WideVal.getConstantOperandVal(1);
8952       if (ShiftAmtC % NarrowNumBits != 0)
8953         return SDValue();
8954 
8955       Offset = ShiftAmtC / NarrowNumBits;
8956       WideVal = WideVal.getOperand(0);
8957     }
8958 
8959     // Stores must share the same source value with different offsets.
8960     // Truncate and extends should be stripped to get the single source value.
8961     if (!SourceValue)
8962       SourceValue = WideVal;
8963     else if (stripTruncAndExt(SourceValue) != stripTruncAndExt(WideVal))
8964       return SDValue();
8965     else if (SourceValue.getValueType() != WideVT) {
8966       if (WideVal.getValueType() == WideVT ||
8967           WideVal.getScalarValueSizeInBits() >
8968               SourceValue.getScalarValueSizeInBits())
8969         SourceValue = WideVal;
8970       // Give up if the source value type is smaller than the store size.
8971       if (SourceValue.getScalarValueSizeInBits() < WideVT.getScalarSizeInBits())
8972         return SDValue();
8973     }
8974 
8975     // Stores must share the same base address.
8976     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
8977     int64_t ByteOffsetFromBase = 0;
8978     if (!Base)
8979       Base = Ptr;
8980     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
8981       return SDValue();
8982 
8983     // Remember the first store.
8984     if (ByteOffsetFromBase < FirstOffset) {
8985       FirstStore = Store;
8986       FirstOffset = ByteOffsetFromBase;
8987     }
8988     // Map the offset in the store and the offset in the combined value, and
8989     // early return if it has been set before.
8990     if (Offset < 0 || Offset >= NumStores || OffsetMap[Offset] != INT64_MAX)
8991       return SDValue();
8992     OffsetMap[Offset] = ByteOffsetFromBase;
8993   }
8994 
8995   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
8996   assert(FirstStore && "First store must be set");
8997 
8998   // Check that a store of the wide type is both allowed and fast on the target
8999   const DataLayout &Layout = DAG.getDataLayout();
9000   unsigned Fast = 0;
9001   bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT,
9002                                         *FirstStore->getMemOperand(), &Fast);
9003   if (!Allowed || !Fast)
9004     return SDValue();
9005 
9006   // Check if the pieces of the value are going to the expected places in memory
9007   // to merge the stores.
9008   auto checkOffsets = [&](bool MatchLittleEndian) {
9009     if (MatchLittleEndian) {
9010       for (unsigned i = 0; i != NumStores; ++i)
9011         if (OffsetMap[i] != i * (NarrowNumBits / 8) + FirstOffset)
9012           return false;
9013     } else { // MatchBigEndian by reversing loop counter.
9014       for (unsigned i = 0, j = NumStores - 1; i != NumStores; ++i, --j)
9015         if (OffsetMap[j] != i * (NarrowNumBits / 8) + FirstOffset)
9016           return false;
9017     }
9018     return true;
9019   };
9020 
9021   // Check if the offsets line up for the native data layout of this target.
9022   bool NeedBswap = false;
9023   bool NeedRotate = false;
9024   if (!checkOffsets(Layout.isLittleEndian())) {
9025     // Special-case: check if byte offsets line up for the opposite endian.
9026     if (NarrowNumBits == 8 && checkOffsets(Layout.isBigEndian()))
9027       NeedBswap = true;
9028     else if (NumStores == 2 && checkOffsets(Layout.isBigEndian()))
9029       NeedRotate = true;
9030     else
9031       return SDValue();
9032   }
9033 
9034   SDLoc DL(N);
9035   if (WideVT != SourceValue.getValueType()) {
9036     assert(SourceValue.getValueType().getScalarSizeInBits() > WideNumBits &&
9037            "Unexpected store value to merge");
9038     SourceValue = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SourceValue);
9039   }
9040 
9041   // Before legalize we can introduce illegal bswaps/rotates which will be later
9042   // converted to an explicit bswap sequence. This way we end up with a single
9043   // store and byte shuffling instead of several stores and byte shuffling.
9044   if (NeedBswap) {
9045     SourceValue = DAG.getNode(ISD::BSWAP, DL, WideVT, SourceValue);
9046   } else if (NeedRotate) {
9047     assert(WideNumBits % 2 == 0 && "Unexpected type for rotate");
9048     SDValue RotAmt = DAG.getConstant(WideNumBits / 2, DL, WideVT);
9049     SourceValue = DAG.getNode(ISD::ROTR, DL, WideVT, SourceValue, RotAmt);
9050   }
9051 
9052   SDValue NewStore =
9053       DAG.getStore(Chain, DL, SourceValue, FirstStore->getBasePtr(),
9054                    FirstStore->getPointerInfo(), FirstStore->getAlign());
9055 
9056   // Rely on other DAG combine rules to remove the other individual stores.
9057   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
9058   return NewStore;
9059 }
9060 
9061 /// Match a pattern where a wide type scalar value is loaded by several narrow
9062 /// loads and combined by shifts and ors. Fold it into a single load or a load
9063 /// and a BSWAP if the targets supports it.
9064 ///
9065 /// Assuming little endian target:
9066 ///  i8 *a = ...
9067 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
9068 /// =>
9069 ///  i32 val = *((i32)a)
9070 ///
9071 ///  i8 *a = ...
9072 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
9073 /// =>
9074 ///  i32 val = BSWAP(*((i32)a))
9075 ///
9076 /// TODO: This rule matches complex patterns with OR node roots and doesn't
9077 /// interact well with the worklist mechanism. When a part of the pattern is
9078 /// updated (e.g. one of the loads) its direct users are put into the worklist,
9079 /// but the root node of the pattern which triggers the load combine is not
9080 /// necessarily a direct user of the changed node. For example, once the address
9081 /// of t28 load is reassociated load combine won't be triggered:
9082 ///             t25: i32 = add t4, Constant:i32<2>
9083 ///           t26: i64 = sign_extend t25
9084 ///        t27: i64 = add t2, t26
9085 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
9086 ///     t29: i32 = zero_extend t28
9087 ///   t32: i32 = shl t29, Constant:i8<8>
9088 /// t33: i32 = or t23, t32
9089 /// As a possible fix visitLoad can check if the load can be a part of a load
9090 /// combine pattern and add corresponding OR roots to the worklist.
9091 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
9092   assert(N->getOpcode() == ISD::OR &&
9093          "Can only match load combining against OR nodes");
9094 
9095   // Handles simple types only
9096   EVT VT = N->getValueType(0);
9097   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
9098     return SDValue();
9099   unsigned ByteWidth = VT.getSizeInBits() / 8;
9100 
9101   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
9102   auto MemoryByteOffset = [&](SDByteProvider P) {
9103     assert(P.hasSrc() && "Must be a memory byte provider");
9104     auto *Load = cast<LoadSDNode>(P.Src.value());
9105 
9106     unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
9107 
9108     assert(LoadBitWidth % 8 == 0 &&
9109            "can only analyze providers for individual bytes not bit");
9110     unsigned LoadByteWidth = LoadBitWidth / 8;
9111     return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
9112                              : littleEndianByteAt(LoadByteWidth, P.DestOffset);
9113   };
9114 
9115   std::optional<BaseIndexOffset> Base;
9116   SDValue Chain;
9117 
9118   SmallPtrSet<LoadSDNode *, 8> Loads;
9119   std::optional<SDByteProvider> FirstByteProvider;
9120   int64_t FirstOffset = INT64_MAX;
9121 
9122   // Check if all the bytes of the OR we are looking at are loaded from the same
9123   // base address. Collect bytes offsets from Base address in ByteOffsets.
9124   SmallVector<int64_t, 8> ByteOffsets(ByteWidth);
9125   unsigned ZeroExtendedBytes = 0;
9126   for (int i = ByteWidth - 1; i >= 0; --i) {
9127     auto P =
9128         calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt,
9129                               /*StartingIndex*/ i);
9130     if (!P)
9131       return SDValue();
9132 
9133     if (P->isConstantZero()) {
9134       // It's OK for the N most significant bytes to be 0, we can just
9135       // zero-extend the load.
9136       if (++ZeroExtendedBytes != (ByteWidth - static_cast<unsigned>(i)))
9137         return SDValue();
9138       continue;
9139     }
9140     assert(P->hasSrc() && "provenance should either be memory or zero");
9141     auto *L = cast<LoadSDNode>(P->Src.value());
9142 
9143     // All loads must share the same chain
9144     SDValue LChain = L->getChain();
9145     if (!Chain)
9146       Chain = LChain;
9147     else if (Chain != LChain)
9148       return SDValue();
9149 
9150     // Loads must share the same base address
9151     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
9152     int64_t ByteOffsetFromBase = 0;
9153 
9154     // For vector loads, the expected load combine pattern will have an
9155     // ExtractElement for each index in the vector. While each of these
9156     // ExtractElements will be accessing the same base address as determined
9157     // by the load instruction, the actual bytes they interact with will differ
9158     // due to different ExtractElement indices. To accurately determine the
9159     // byte position of an ExtractElement, we offset the base load ptr with
9160     // the index multiplied by the byte size of each element in the vector.
9161     if (L->getMemoryVT().isVector()) {
9162       unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
9163       if (LoadWidthInBit % 8 != 0)
9164         return SDValue();
9165       unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
9166       Ptr.addToOffset(ByteOffsetFromVector);
9167     }
9168 
9169     if (!Base)
9170       Base = Ptr;
9171 
9172     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
9173       return SDValue();
9174 
9175     // Calculate the offset of the current byte from the base address
9176     ByteOffsetFromBase += MemoryByteOffset(*P);
9177     ByteOffsets[i] = ByteOffsetFromBase;
9178 
9179     // Remember the first byte load
9180     if (ByteOffsetFromBase < FirstOffset) {
9181       FirstByteProvider = P;
9182       FirstOffset = ByteOffsetFromBase;
9183     }
9184 
9185     Loads.insert(L);
9186   }
9187 
9188   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
9189          "memory, so there must be at least one load which produces the value");
9190   assert(Base && "Base address of the accessed memory location must be set");
9191   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
9192 
9193   bool NeedsZext = ZeroExtendedBytes > 0;
9194 
9195   EVT MemVT =
9196       EVT::getIntegerVT(*DAG.getContext(), (ByteWidth - ZeroExtendedBytes) * 8);
9197 
9198   if (!MemVT.isSimple())
9199     return SDValue();
9200 
9201   // Before legalize we can introduce too wide illegal loads which will be later
9202   // split into legal sized loads. This enables us to combine i64 load by i8
9203   // patterns to a couple of i32 loads on 32 bit targets.
9204   if (LegalOperations &&
9205       !TLI.isOperationLegal(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD,
9206                             MemVT))
9207     return SDValue();
9208 
9209   // Check if the bytes of the OR we are looking at match with either big or
9210   // little endian value load
9211   std::optional<bool> IsBigEndian = isBigEndian(
9212       ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset);
9213   if (!IsBigEndian)
9214     return SDValue();
9215 
9216   assert(FirstByteProvider && "must be set");
9217 
9218   // Ensure that the first byte is loaded from zero offset of the first load.
9219   // So the combined value can be loaded from the first load address.
9220   if (MemoryByteOffset(*FirstByteProvider) != 0)
9221     return SDValue();
9222   auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
9223 
9224   // The node we are looking at matches with the pattern, check if we can
9225   // replace it with a single (possibly zero-extended) load and bswap + shift if
9226   // needed.
9227 
9228   // If the load needs byte swap check if the target supports it
9229   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
9230 
9231   // Before legalize we can introduce illegal bswaps which will be later
9232   // converted to an explicit bswap sequence. This way we end up with a single
9233   // load and byte shuffling instead of several loads and byte shuffling.
9234   // We do not introduce illegal bswaps when zero-extending as this tends to
9235   // introduce too many arithmetic instructions.
9236   if (NeedsBswap && (LegalOperations || NeedsZext) &&
9237       !TLI.isOperationLegal(ISD::BSWAP, VT))
9238     return SDValue();
9239 
9240   // If we need to bswap and zero extend, we have to insert a shift. Check that
9241   // it is legal.
9242   if (NeedsBswap && NeedsZext && LegalOperations &&
9243       !TLI.isOperationLegal(ISD::SHL, VT))
9244     return SDValue();
9245 
9246   // Check that a load of the wide type is both allowed and fast on the target
9247   unsigned Fast = 0;
9248   bool Allowed =
9249       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
9250                              *FirstLoad->getMemOperand(), &Fast);
9251   if (!Allowed || !Fast)
9252     return SDValue();
9253 
9254   SDValue NewLoad =
9255       DAG.getExtLoad(NeedsZext ? ISD::ZEXTLOAD : ISD::NON_EXTLOAD, SDLoc(N), VT,
9256                      Chain, FirstLoad->getBasePtr(),
9257                      FirstLoad->getPointerInfo(), MemVT, FirstLoad->getAlign());
9258 
9259   // Transfer chain users from old loads to the new load.
9260   for (LoadSDNode *L : Loads)
9261     DAG.makeEquivalentMemoryOrdering(L, NewLoad);
9262 
9263   if (!NeedsBswap)
9264     return NewLoad;
9265 
9266   SDValue ShiftedLoad =
9267       NeedsZext
9268           ? DAG.getNode(ISD::SHL, SDLoc(N), VT, NewLoad,
9269                         DAG.getShiftAmountConstant(ZeroExtendedBytes * 8, VT,
9270                                                    SDLoc(N), LegalOperations))
9271           : NewLoad;
9272   return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, ShiftedLoad);
9273 }
9274 
9275 // If the target has andn, bsl, or a similar bit-select instruction,
9276 // we want to unfold masked merge, with canonical pattern of:
9277 //   |        A  |  |B|
9278 //   ((x ^ y) & m) ^ y
9279 //    |  D  |
9280 // Into:
9281 //   (x & m) | (y & ~m)
9282 // If y is a constant, m is not a 'not', and the 'andn' does not work with
9283 // immediates, we unfold into a different pattern:
9284 //   ~(~x & m) & (m | y)
9285 // If x is a constant, m is a 'not', and the 'andn' does not work with
9286 // immediates, we unfold into a different pattern:
9287 //   (x | ~m) & ~(~m & ~y)
9288 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
9289 //       the very least that breaks andnpd / andnps patterns, and because those
9290 //       patterns are simplified in IR and shouldn't be created in the DAG
9291 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
9292   assert(N->getOpcode() == ISD::XOR);
9293 
9294   // Don't touch 'not' (i.e. where y = -1).
9295   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
9296     return SDValue();
9297 
9298   EVT VT = N->getValueType(0);
9299 
9300   // There are 3 commutable operators in the pattern,
9301   // so we have to deal with 8 possible variants of the basic pattern.
9302   SDValue X, Y, M;
9303   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
9304     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
9305       return false;
9306     SDValue Xor = And.getOperand(XorIdx);
9307     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
9308       return false;
9309     SDValue Xor0 = Xor.getOperand(0);
9310     SDValue Xor1 = Xor.getOperand(1);
9311     // Don't touch 'not' (i.e. where y = -1).
9312     if (isAllOnesOrAllOnesSplat(Xor1))
9313       return false;
9314     if (Other == Xor0)
9315       std::swap(Xor0, Xor1);
9316     if (Other != Xor1)
9317       return false;
9318     X = Xor0;
9319     Y = Xor1;
9320     M = And.getOperand(XorIdx ? 0 : 1);
9321     return true;
9322   };
9323 
9324   SDValue N0 = N->getOperand(0);
9325   SDValue N1 = N->getOperand(1);
9326   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
9327       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
9328     return SDValue();
9329 
9330   // Don't do anything if the mask is constant. This should not be reachable.
9331   // InstCombine should have already unfolded this pattern, and DAGCombiner
9332   // probably shouldn't produce it, too.
9333   if (isa<ConstantSDNode>(M.getNode()))
9334     return SDValue();
9335 
9336   // We can transform if the target has AndNot
9337   if (!TLI.hasAndNot(M))
9338     return SDValue();
9339 
9340   SDLoc DL(N);
9341 
9342   // If Y is a constant, check that 'andn' works with immediates. Unless M is
9343   // a bitwise not that would already allow ANDN to be used.
9344   if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) {
9345     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
9346     // If not, we need to do a bit more work to make sure andn is still used.
9347     SDValue NotX = DAG.getNOT(DL, X, VT);
9348     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
9349     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
9350     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
9351     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
9352   }
9353 
9354   // If X is a constant and M is a bitwise not, check that 'andn' works with
9355   // immediates.
9356   if (!TLI.hasAndNot(X) && isBitwiseNot(M)) {
9357     assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable.");
9358     // If not, we need to do a bit more work to make sure andn is still used.
9359     SDValue NotM = M.getOperand(0);
9360     SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM);
9361     SDValue NotY = DAG.getNOT(DL, Y, VT);
9362     SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY);
9363     SDValue NotRHS = DAG.getNOT(DL, RHS, VT);
9364     return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS);
9365   }
9366 
9367   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
9368   SDValue NotM = DAG.getNOT(DL, M, VT);
9369   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
9370 
9371   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
9372 }
9373 
9374 SDValue DAGCombiner::visitXOR(SDNode *N) {
9375   SDValue N0 = N->getOperand(0);
9376   SDValue N1 = N->getOperand(1);
9377   EVT VT = N0.getValueType();
9378   SDLoc DL(N);
9379 
9380   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
9381   if (N0.isUndef() && N1.isUndef())
9382     return DAG.getConstant(0, DL, VT);
9383 
9384   // fold (xor x, undef) -> undef
9385   if (N0.isUndef())
9386     return N0;
9387   if (N1.isUndef())
9388     return N1;
9389 
9390   // fold (xor c1, c2) -> c1^c2
9391   if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1}))
9392     return C;
9393 
9394   // canonicalize constant to RHS
9395   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
9396       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
9397     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
9398 
9399   // fold vector ops
9400   if (VT.isVector()) {
9401     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
9402       return FoldedVOp;
9403 
9404     // fold (xor x, 0) -> x, vector edition
9405     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
9406       return N0;
9407   }
9408 
9409   // fold (xor x, 0) -> x
9410   if (isNullConstant(N1))
9411     return N0;
9412 
9413   if (SDValue NewSel = foldBinOpIntoSelect(N))
9414     return NewSel;
9415 
9416   // reassociate xor
9417   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
9418     return RXOR;
9419 
9420   // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
9421   if (SDValue SD =
9422           reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
9423     return SD;
9424 
9425   // fold (a^b) -> (a|b) iff a and b share no bits.
9426   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
9427       DAG.haveNoCommonBitsSet(N0, N1))
9428     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
9429 
9430   // look for 'add-like' folds:
9431   // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE)
9432   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
9433       isMinSignedConstant(N1))
9434     if (SDValue Combined = visitADDLike(N))
9435       return Combined;
9436 
9437   // fold !(x cc y) -> (x !cc y)
9438   unsigned N0Opcode = N0.getOpcode();
9439   SDValue LHS, RHS, CC;
9440   if (TLI.isConstTrueVal(N1) &&
9441       isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
9442     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
9443                                                LHS.getValueType());
9444     if (!LegalOperations ||
9445         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
9446       switch (N0Opcode) {
9447       default:
9448         llvm_unreachable("Unhandled SetCC Equivalent!");
9449       case ISD::SETCC:
9450         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
9451       case ISD::SELECT_CC:
9452         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
9453                                N0.getOperand(3), NotCC);
9454       case ISD::STRICT_FSETCC:
9455       case ISD::STRICT_FSETCCS: {
9456         if (N0.hasOneUse()) {
9457           // FIXME Can we handle multiple uses? Could we token factor the chain
9458           // results from the new/old setcc?
9459           SDValue SetCC =
9460               DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC,
9461                            N0.getOperand(0), N0Opcode == ISD::STRICT_FSETCCS);
9462           CombineTo(N, SetCC);
9463           DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1));
9464           recursivelyDeleteUnusedNodes(N0.getNode());
9465           return SDValue(N, 0); // Return N so it doesn't get rechecked!
9466         }
9467         break;
9468       }
9469       }
9470     }
9471   }
9472 
9473   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
9474   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9475       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
9476     SDValue V = N0.getOperand(0);
9477     SDLoc DL0(N0);
9478     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
9479                     DAG.getConstant(1, DL0, V.getValueType()));
9480     AddToWorklist(V.getNode());
9481     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
9482   }
9483 
9484   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
9485   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
9486       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9487     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9488     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
9489       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9490       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9491       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9492       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9493       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9494     }
9495   }
9496   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
9497   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
9498       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
9499     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
9500     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
9501       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
9502       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
9503       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
9504       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
9505       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
9506     }
9507   }
9508 
9509   // fold (not (neg x)) -> (add X, -1)
9510   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
9511   // Y is a constant or the subtract has a single use.
9512   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
9513       isNullConstant(N0.getOperand(0))) {
9514     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
9515                        DAG.getAllOnesConstant(DL, VT));
9516   }
9517 
9518   // fold (not (add X, -1)) -> (neg X)
9519   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD &&
9520       isAllOnesOrAllOnesSplat(N0.getOperand(1))) {
9521     return DAG.getNegative(N0.getOperand(0), DL, VT);
9522   }
9523 
9524   // fold (xor (and x, y), y) -> (and (not x), y)
9525   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
9526     SDValue X = N0.getOperand(0);
9527     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
9528     AddToWorklist(NotX.getNode());
9529     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
9530   }
9531 
9532   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
9533   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
9534     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
9535     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
9536     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
9537       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
9538       SDValue S0 = S.getOperand(0);
9539       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0))
9540         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
9541           if (C->getAPIntValue() == (VT.getScalarSizeInBits() - 1))
9542             return DAG.getNode(ISD::ABS, DL, VT, S0);
9543     }
9544   }
9545 
9546   // fold (xor x, x) -> 0
9547   if (N0 == N1)
9548     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
9549 
9550   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
9551   // Here is a concrete example of this equivalence:
9552   // i16   x ==  14
9553   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
9554   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
9555   //
9556   // =>
9557   //
9558   // i16     ~1      == 0b1111111111111110
9559   // i16 rol(~1, 14) == 0b1011111111111111
9560   //
9561   // Some additional tips to help conceptualize this transform:
9562   // - Try to see the operation as placing a single zero in a value of all ones.
9563   // - There exists no value for x which would allow the result to contain zero.
9564   // - Values of x larger than the bitwidth are undefined and do not require a
9565   //   consistent result.
9566   // - Pushing the zero left requires shifting one bits in from the right.
9567   // A rotate left of ~1 is a nice way of achieving the desired result.
9568   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
9569       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
9570     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
9571                        N0.getOperand(1));
9572   }
9573 
9574   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
9575   if (N0Opcode == N1.getOpcode())
9576     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
9577       return V;
9578 
9579   if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG))
9580     return R;
9581   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
9582     return R;
9583   if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG))
9584     return R;
9585 
9586   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
9587   if (SDValue MM = unfoldMaskedMerge(N))
9588     return MM;
9589 
9590   // Simplify the expression using non-local knowledge.
9591   if (SimplifyDemandedBits(SDValue(N, 0)))
9592     return SDValue(N, 0);
9593 
9594   if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
9595     return Combined;
9596 
9597   return SDValue();
9598 }
9599 
9600 /// If we have a shift-by-constant of a bitwise logic op that itself has a
9601 /// shift-by-constant operand with identical opcode, we may be able to convert
9602 /// that into 2 independent shifts followed by the logic op. This is a
9603 /// throughput improvement.
9604 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
9605   // Match a one-use bitwise logic op.
9606   SDValue LogicOp = Shift->getOperand(0);
9607   if (!LogicOp.hasOneUse())
9608     return SDValue();
9609 
9610   unsigned LogicOpcode = LogicOp.getOpcode();
9611   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
9612       LogicOpcode != ISD::XOR)
9613     return SDValue();
9614 
9615   // Find a matching one-use shift by constant.
9616   unsigned ShiftOpcode = Shift->getOpcode();
9617   SDValue C1 = Shift->getOperand(1);
9618   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
9619   assert(C1Node && "Expected a shift with constant operand");
9620   const APInt &C1Val = C1Node->getAPIntValue();
9621   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
9622                              const APInt *&ShiftAmtVal) {
9623     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
9624       return false;
9625 
9626     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
9627     if (!ShiftCNode)
9628       return false;
9629 
9630     // Capture the shifted operand and shift amount value.
9631     ShiftOp = V.getOperand(0);
9632     ShiftAmtVal = &ShiftCNode->getAPIntValue();
9633 
9634     // Shift amount types do not have to match their operand type, so check that
9635     // the constants are the same width.
9636     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
9637       return false;
9638 
9639     // The fold is not valid if the sum of the shift values exceeds bitwidth.
9640     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
9641       return false;
9642 
9643     return true;
9644   };
9645 
9646   // Logic ops are commutative, so check each operand for a match.
9647   SDValue X, Y;
9648   const APInt *C0Val;
9649   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
9650     Y = LogicOp.getOperand(1);
9651   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
9652     Y = LogicOp.getOperand(0);
9653   else
9654     return SDValue();
9655 
9656   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
9657   SDLoc DL(Shift);
9658   EVT VT = Shift->getValueType(0);
9659   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
9660   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
9661   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
9662   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
9663   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
9664 }
9665 
9666 /// Handle transforms common to the three shifts, when the shift amount is a
9667 /// constant.
9668 /// We are looking for: (shift being one of shl/sra/srl)
9669 ///   shift (binop X, C0), C1
9670 /// And want to transform into:
9671 ///   binop (shift X, C1), (shift C0, C1)
9672 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
9673   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
9674 
9675   // Do not turn a 'not' into a regular xor.
9676   if (isBitwiseNot(N->getOperand(0)))
9677     return SDValue();
9678 
9679   // The inner binop must be one-use, since we want to replace it.
9680   SDValue LHS = N->getOperand(0);
9681   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
9682     return SDValue();
9683 
9684   // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)).
9685   if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
9686     return R;
9687 
9688   // We want to pull some binops through shifts, so that we have (and (shift))
9689   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
9690   // thing happens with address calculations, so it's important to canonicalize
9691   // it.
9692   switch (LHS.getOpcode()) {
9693   default:
9694     return SDValue();
9695   case ISD::OR:
9696   case ISD::XOR:
9697   case ISD::AND:
9698     break;
9699   case ISD::ADD:
9700     if (N->getOpcode() != ISD::SHL)
9701       return SDValue(); // only shl(add) not sr[al](add).
9702     break;
9703   }
9704 
9705   // FIXME: disable this unless the input to the binop is a shift by a constant
9706   // or is copy/select. Enable this in other cases when figure out it's exactly
9707   // profitable.
9708   SDValue BinOpLHSVal = LHS.getOperand(0);
9709   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
9710                             BinOpLHSVal.getOpcode() == ISD::SRA ||
9711                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
9712                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
9713   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
9714                         BinOpLHSVal.getOpcode() == ISD::SELECT;
9715 
9716   if (!IsShiftByConstant && !IsCopyOrSelect)
9717     return SDValue();
9718 
9719   if (IsCopyOrSelect && N->hasOneUse())
9720     return SDValue();
9721 
9722   // Attempt to fold the constants, shifting the binop RHS by the shift amount.
9723   SDLoc DL(N);
9724   EVT VT = N->getValueType(0);
9725   if (SDValue NewRHS = DAG.FoldConstantArithmetic(
9726           N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) {
9727     SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
9728                                    N->getOperand(1));
9729     return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
9730   }
9731 
9732   return SDValue();
9733 }
9734 
9735 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
9736   assert(N->getOpcode() == ISD::TRUNCATE);
9737   assert(N->getOperand(0).getOpcode() == ISD::AND);
9738 
9739   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
9740   EVT TruncVT = N->getValueType(0);
9741   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
9742       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
9743     SDValue N01 = N->getOperand(0).getOperand(1);
9744     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
9745       SDLoc DL(N);
9746       SDValue N00 = N->getOperand(0).getOperand(0);
9747       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
9748       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
9749       AddToWorklist(Trunc00.getNode());
9750       AddToWorklist(Trunc01.getNode());
9751       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
9752     }
9753   }
9754 
9755   return SDValue();
9756 }
9757 
9758 SDValue DAGCombiner::visitRotate(SDNode *N) {
9759   SDLoc dl(N);
9760   SDValue N0 = N->getOperand(0);
9761   SDValue N1 = N->getOperand(1);
9762   EVT VT = N->getValueType(0);
9763   unsigned Bitsize = VT.getScalarSizeInBits();
9764 
9765   // fold (rot x, 0) -> x
9766   if (isNullOrNullSplat(N1))
9767     return N0;
9768 
9769   // fold (rot x, c) -> x iff (c % BitSize) == 0
9770   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
9771     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
9772     if (DAG.MaskedValueIsZero(N1, ModuloMask))
9773       return N0;
9774   }
9775 
9776   // fold (rot x, c) -> (rot x, c % BitSize)
9777   bool OutOfRange = false;
9778   auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) {
9779     OutOfRange |= C->getAPIntValue().uge(Bitsize);
9780     return true;
9781   };
9782   if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) {
9783     EVT AmtVT = N1.getValueType();
9784     SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT);
9785     if (SDValue Amt =
9786             DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits}))
9787       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
9788   }
9789 
9790   // rot i16 X, 8 --> bswap X
9791   auto *RotAmtC = isConstOrConstSplat(N1);
9792   if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
9793       VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
9794     return DAG.getNode(ISD::BSWAP, dl, VT, N0);
9795 
9796   // Simplify the operands using demanded-bits information.
9797   if (SimplifyDemandedBits(SDValue(N, 0)))
9798     return SDValue(N, 0);
9799 
9800   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
9801   if (N1.getOpcode() == ISD::TRUNCATE &&
9802       N1.getOperand(0).getOpcode() == ISD::AND) {
9803     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9804       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
9805   }
9806 
9807   unsigned NextOp = N0.getOpcode();
9808 
9809   // fold (rot* (rot* x, c2), c1)
9810   //   -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize)
9811   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
9812     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
9813     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
9814     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
9815       EVT ShiftVT = C1->getValueType(0);
9816       bool SameSide = (N->getOpcode() == NextOp);
9817       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
9818       SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
9819       SDValue Norm1 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9820                                                  {N1, BitsizeC});
9821       SDValue Norm2 = DAG.FoldConstantArithmetic(ISD::UREM, dl, ShiftVT,
9822                                                  {N0.getOperand(1), BitsizeC});
9823       if (Norm1 && Norm2)
9824         if (SDValue CombinedShift = DAG.FoldConstantArithmetic(
9825                 CombineOp, dl, ShiftVT, {Norm1, Norm2})) {
9826           CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT,
9827                                                      {CombinedShift, BitsizeC});
9828           SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
9829               ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC});
9830           return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
9831                              CombinedShiftNorm);
9832         }
9833     }
9834   }
9835   return SDValue();
9836 }
9837 
9838 SDValue DAGCombiner::visitSHL(SDNode *N) {
9839   SDValue N0 = N->getOperand(0);
9840   SDValue N1 = N->getOperand(1);
9841   if (SDValue V = DAG.simplifyShift(N0, N1))
9842     return V;
9843 
9844   EVT VT = N0.getValueType();
9845   EVT ShiftVT = N1.getValueType();
9846   unsigned OpSizeInBits = VT.getScalarSizeInBits();
9847 
9848   // fold (shl c1, c2) -> c1<<c2
9849   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1}))
9850     return C;
9851 
9852   // fold vector ops
9853   if (VT.isVector()) {
9854     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
9855       return FoldedVOp;
9856 
9857     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
9858     // If setcc produces all-one true value then:
9859     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
9860     if (N1CV && N1CV->isConstant()) {
9861       if (N0.getOpcode() == ISD::AND) {
9862         SDValue N00 = N0->getOperand(0);
9863         SDValue N01 = N0->getOperand(1);
9864         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
9865 
9866         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
9867             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
9868                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
9869           if (SDValue C =
9870                   DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1}))
9871             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
9872         }
9873       }
9874     }
9875   }
9876 
9877   if (SDValue NewSel = foldBinOpIntoSelect(N))
9878     return NewSel;
9879 
9880   // if (shl x, c) is known to be zero, return 0
9881   if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
9882     return DAG.getConstant(0, SDLoc(N), VT);
9883 
9884   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
9885   if (N1.getOpcode() == ISD::TRUNCATE &&
9886       N1.getOperand(0).getOpcode() == ISD::AND) {
9887     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
9888       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
9889   }
9890 
9891   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
9892   if (N0.getOpcode() == ISD::SHL) {
9893     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
9894                                           ConstantSDNode *RHS) {
9895       APInt c1 = LHS->getAPIntValue();
9896       APInt c2 = RHS->getAPIntValue();
9897       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9898       return (c1 + c2).uge(OpSizeInBits);
9899     };
9900     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
9901       return DAG.getConstant(0, SDLoc(N), VT);
9902 
9903     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
9904                                        ConstantSDNode *RHS) {
9905       APInt c1 = LHS->getAPIntValue();
9906       APInt c2 = RHS->getAPIntValue();
9907       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9908       return (c1 + c2).ult(OpSizeInBits);
9909     };
9910     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
9911       SDLoc DL(N);
9912       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
9913       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
9914     }
9915   }
9916 
9917   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
9918   // For this to be valid, the second form must not preserve any of the bits
9919   // that are shifted out by the inner shift in the first form.  This means
9920   // the outer shift size must be >= the number of bits added by the ext.
9921   // As a corollary, we don't care what kind of ext it is.
9922   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
9923        N0.getOpcode() == ISD::ANY_EXTEND ||
9924        N0.getOpcode() == ISD::SIGN_EXTEND) &&
9925       N0.getOperand(0).getOpcode() == ISD::SHL) {
9926     SDValue N0Op0 = N0.getOperand(0);
9927     SDValue InnerShiftAmt = N0Op0.getOperand(1);
9928     EVT InnerVT = N0Op0.getValueType();
9929     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
9930 
9931     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9932                                                          ConstantSDNode *RHS) {
9933       APInt c1 = LHS->getAPIntValue();
9934       APInt c2 = RHS->getAPIntValue();
9935       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9936       return c2.uge(OpSizeInBits - InnerBitwidth) &&
9937              (c1 + c2).uge(OpSizeInBits);
9938     };
9939     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
9940                                   /*AllowUndefs*/ false,
9941                                   /*AllowTypeMismatch*/ true))
9942       return DAG.getConstant(0, SDLoc(N), VT);
9943 
9944     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
9945                                                       ConstantSDNode *RHS) {
9946       APInt c1 = LHS->getAPIntValue();
9947       APInt c2 = RHS->getAPIntValue();
9948       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
9949       return c2.uge(OpSizeInBits - InnerBitwidth) &&
9950              (c1 + c2).ult(OpSizeInBits);
9951     };
9952     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
9953                                   /*AllowUndefs*/ false,
9954                                   /*AllowTypeMismatch*/ true)) {
9955       SDLoc DL(N);
9956       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
9957       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
9958       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
9959       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
9960     }
9961   }
9962 
9963   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
9964   // Only fold this if the inner zext has no other uses to avoid increasing
9965   // the total number of instructions.
9966   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
9967       N0.getOperand(0).getOpcode() == ISD::SRL) {
9968     SDValue N0Op0 = N0.getOperand(0);
9969     SDValue InnerShiftAmt = N0Op0.getOperand(1);
9970 
9971     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
9972       APInt c1 = LHS->getAPIntValue();
9973       APInt c2 = RHS->getAPIntValue();
9974       zeroExtendToMatch(c1, c2);
9975       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
9976     };
9977     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
9978                                   /*AllowUndefs*/ false,
9979                                   /*AllowTypeMismatch*/ true)) {
9980       SDLoc DL(N);
9981       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
9982       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
9983       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
9984       AddToWorklist(NewSHL.getNode());
9985       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
9986     }
9987   }
9988 
9989   if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) {
9990     auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
9991                                            ConstantSDNode *RHS) {
9992       const APInt &LHSC = LHS->getAPIntValue();
9993       const APInt &RHSC = RHS->getAPIntValue();
9994       return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
9995              LHSC.getZExtValue() <= RHSC.getZExtValue();
9996     };
9997 
9998     SDLoc DL(N);
9999 
10000     // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
10001     // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2
10002     if (N0->getFlags().hasExact()) {
10003       if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10004                                     /*AllowUndefs*/ false,
10005                                     /*AllowTypeMismatch*/ true)) {
10006         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10007         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10008         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10009       }
10010       if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10011                                     /*AllowUndefs*/ false,
10012                                     /*AllowTypeMismatch*/ true)) {
10013         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10014         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10015         return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), Diff);
10016       }
10017     }
10018 
10019     // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
10020     //                               (and (srl x, (sub c1, c2), MASK)
10021     // Only fold this if the inner shift has no other uses -- if it does,
10022     // folding this will increase the total number of instructions.
10023     if (N0.getOpcode() == ISD::SRL &&
10024         (N0.getOperand(1) == N1 || N0.hasOneUse()) &&
10025         TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
10026       if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10027                                     /*AllowUndefs*/ false,
10028                                     /*AllowTypeMismatch*/ true)) {
10029         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10030         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10031         SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10032         Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N01);
10033         Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, Diff);
10034         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10035         return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10036       }
10037       if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10038                                     /*AllowUndefs*/ false,
10039                                     /*AllowTypeMismatch*/ true)) {
10040         SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10041         SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10042         SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10043         Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, N1);
10044         SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10045         return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10046       }
10047     }
10048   }
10049 
10050   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
10051   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
10052       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
10053     SDLoc DL(N);
10054     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
10055     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
10056     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
10057   }
10058 
10059   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
10060   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
10061   // Variant of version done on multiply, except mul by a power of 2 is turned
10062   // into a shift.
10063   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
10064       N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) {
10065     SDValue N01 = N0.getOperand(1);
10066     if (SDValue Shl1 =
10067             DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) {
10068       SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
10069       AddToWorklist(Shl0.getNode());
10070       SDNodeFlags Flags;
10071       // Preserve the disjoint flag for Or.
10072       if (N0.getOpcode() == ISD::OR && N0->getFlags().hasDisjoint())
10073         Flags.setDisjoint(true);
10074       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1, Flags);
10075     }
10076   }
10077 
10078   // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2)
10079   // TODO: Add zext/add_nuw variant with suitable test coverage
10080   // TODO: Should we limit this with isLegalAddImmediate?
10081   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
10082       N0.getOperand(0).getOpcode() == ISD::ADD &&
10083       N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() &&
10084       N0.getOperand(0)->hasOneUse() &&
10085       TLI.isDesirableToCommuteWithShift(N, Level)) {
10086     SDValue Add = N0.getOperand(0);
10087     SDLoc DL(N0);
10088     if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT,
10089                                                   {Add.getOperand(1)})) {
10090       if (SDValue ShlC =
10091               DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) {
10092         SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0));
10093         SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1);
10094         return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC);
10095       }
10096     }
10097   }
10098 
10099   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
10100   if (N0.getOpcode() == ISD::MUL && N0->hasOneUse()) {
10101     SDValue N01 = N0.getOperand(1);
10102     if (SDValue Shl =
10103             DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1}))
10104       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
10105   }
10106 
10107   ConstantSDNode *N1C = isConstOrConstSplat(N1);
10108   if (N1C && !N1C->isOpaque())
10109     if (SDValue NewSHL = visitShiftByConstant(N))
10110       return NewSHL;
10111 
10112   if (SimplifyDemandedBits(SDValue(N, 0)))
10113     return SDValue(N, 0);
10114 
10115   // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)).
10116   if (N0.getOpcode() == ISD::VSCALE && N1C) {
10117     const APInt &C0 = N0.getConstantOperandAPInt(0);
10118     const APInt &C1 = N1C->getAPIntValue();
10119     return DAG.getVScale(SDLoc(N), VT, C0 << C1);
10120   }
10121 
10122   // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)).
10123   APInt ShlVal;
10124   if (N0.getOpcode() == ISD::STEP_VECTOR &&
10125       ISD::isConstantSplatVector(N1.getNode(), ShlVal)) {
10126     const APInt &C0 = N0.getConstantOperandAPInt(0);
10127     if (ShlVal.ult(C0.getBitWidth())) {
10128       APInt NewStep = C0 << ShlVal;
10129       return DAG.getStepVector(SDLoc(N), VT, NewStep);
10130     }
10131   }
10132 
10133   return SDValue();
10134 }
10135 
10136 // Transform a right shift of a multiply into a multiply-high.
10137 // Examples:
10138 // (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b)
10139 // (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b)
10140 static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
10141                                   const TargetLowering &TLI) {
10142   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
10143          "SRL or SRA node is required here!");
10144 
10145   // Check the shift amount. Proceed with the transformation if the shift
10146   // amount is constant.
10147   ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
10148   if (!ShiftAmtSrc)
10149     return SDValue();
10150 
10151   SDLoc DL(N);
10152 
10153   // The operation feeding into the shift must be a multiply.
10154   SDValue ShiftOperand = N->getOperand(0);
10155   if (ShiftOperand.getOpcode() != ISD::MUL)
10156     return SDValue();
10157 
10158   // Both operands must be equivalent extend nodes.
10159   SDValue LeftOp = ShiftOperand.getOperand(0);
10160   SDValue RightOp = ShiftOperand.getOperand(1);
10161 
10162   bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
10163   bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
10164 
10165   if (!IsSignExt && !IsZeroExt)
10166     return SDValue();
10167 
10168   EVT NarrowVT = LeftOp.getOperand(0).getValueType();
10169   unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
10170 
10171   // return true if U may use the lower bits of its operands
10172   auto UserOfLowerBits = [NarrowVTSize](SDNode *U) {
10173     if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) {
10174       return true;
10175     }
10176     ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1));
10177     if (!UShiftAmtSrc) {
10178       return true;
10179     }
10180     unsigned UShiftAmt = UShiftAmtSrc->getZExtValue();
10181     return UShiftAmt < NarrowVTSize;
10182   };
10183 
10184   // If the lower part of the MUL is also used and MUL_LOHI is supported
10185   // do not introduce the MULH in favor of MUL_LOHI
10186   unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
10187   if (!ShiftOperand.hasOneUse() &&
10188       TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) &&
10189       llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) {
10190     return SDValue();
10191   }
10192 
10193   SDValue MulhRightOp;
10194   if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
10195     unsigned ActiveBits = IsSignExt
10196                               ? Constant->getAPIntValue().getSignificantBits()
10197                               : Constant->getAPIntValue().getActiveBits();
10198     if (ActiveBits > NarrowVTSize)
10199       return SDValue();
10200     MulhRightOp = DAG.getConstant(
10201         Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
10202         NarrowVT);
10203   } else {
10204     if (LeftOp.getOpcode() != RightOp.getOpcode())
10205       return SDValue();
10206     // Check that the two extend nodes are the same type.
10207     if (NarrowVT != RightOp.getOperand(0).getValueType())
10208       return SDValue();
10209     MulhRightOp = RightOp.getOperand(0);
10210   }
10211 
10212   EVT WideVT = LeftOp.getValueType();
10213   // Proceed with the transformation if the wide types match.
10214   assert((WideVT == RightOp.getValueType()) &&
10215          "Cannot have a multiply node with two different operand types.");
10216 
10217   // Proceed with the transformation if the wide type is twice as large
10218   // as the narrow type.
10219   if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
10220     return SDValue();
10221 
10222   // Check the shift amount with the narrow type size.
10223   // Proceed with the transformation if the shift amount is the width
10224   // of the narrow type.
10225   unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
10226   if (ShiftAmt != NarrowVTSize)
10227     return SDValue();
10228 
10229   // If the operation feeding into the MUL is a sign extend (sext),
10230   // we use mulhs. Othewise, zero extends (zext) use mulhu.
10231   unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
10232 
10233   // Combine to mulh if mulh is legal/custom for the narrow type on the target
10234   // or if it is a vector type then we could transform to an acceptable type and
10235   // rely on legalization to split/combine the result.
10236   if (NarrowVT.isVector()) {
10237     EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
10238     if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
10239         !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
10240       return SDValue();
10241   } else {
10242     if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
10243       return SDValue();
10244   }
10245 
10246   SDValue Result =
10247       DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
10248   bool IsSigned = N->getOpcode() == ISD::SRA;
10249   return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
10250 }
10251 
10252 // fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
10253 // This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
10254 static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG) {
10255   unsigned Opcode = N->getOpcode();
10256   if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
10257     return SDValue();
10258 
10259   SDValue N0 = N->getOperand(0);
10260   EVT VT = N->getValueType(0);
10261   SDLoc DL(N);
10262   if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
10263     SDValue OldLHS = N0.getOperand(0);
10264     SDValue OldRHS = N0.getOperand(1);
10265 
10266     // If both operands are bswap/bitreverse, ignore the multiuse
10267     // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
10268     if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
10269       return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10270                          OldRHS.getOperand(0));
10271     }
10272 
10273     if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
10274       SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
10275       return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
10276                          NewBitReorder);
10277     }
10278 
10279     if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
10280       SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
10281       return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
10282                          OldRHS.getOperand(0));
10283     }
10284   }
10285   return SDValue();
10286 }
10287 
10288 SDValue DAGCombiner::visitSRA(SDNode *N) {
10289   SDValue N0 = N->getOperand(0);
10290   SDValue N1 = N->getOperand(1);
10291   if (SDValue V = DAG.simplifyShift(N0, N1))
10292     return V;
10293 
10294   EVT VT = N0.getValueType();
10295   unsigned OpSizeInBits = VT.getScalarSizeInBits();
10296 
10297   // fold (sra c1, c2) -> (sra c1, c2)
10298   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1}))
10299     return C;
10300 
10301   // Arithmetic shifting an all-sign-bit value is a no-op.
10302   // fold (sra 0, x) -> 0
10303   // fold (sra -1, x) -> -1
10304   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
10305     return N0;
10306 
10307   // fold vector ops
10308   if (VT.isVector())
10309     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
10310       return FoldedVOp;
10311 
10312   if (SDValue NewSel = foldBinOpIntoSelect(N))
10313     return NewSel;
10314 
10315   ConstantSDNode *N1C = isConstOrConstSplat(N1);
10316 
10317   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
10318   // clamp (add c1, c2) to max shift.
10319   if (N0.getOpcode() == ISD::SRA) {
10320     SDLoc DL(N);
10321     EVT ShiftVT = N1.getValueType();
10322     EVT ShiftSVT = ShiftVT.getScalarType();
10323     SmallVector<SDValue, 16> ShiftValues;
10324 
10325     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
10326       APInt c1 = LHS->getAPIntValue();
10327       APInt c2 = RHS->getAPIntValue();
10328       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10329       APInt Sum = c1 + c2;
10330       unsigned ShiftSum =
10331           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
10332       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
10333       return true;
10334     };
10335     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
10336       SDValue ShiftValue;
10337       if (N1.getOpcode() == ISD::BUILD_VECTOR)
10338         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
10339       else if (N1.getOpcode() == ISD::SPLAT_VECTOR) {
10340         assert(ShiftValues.size() == 1 &&
10341                "Expected matchBinaryPredicate to return one element for "
10342                "SPLAT_VECTORs");
10343         ShiftValue = DAG.getSplatVector(ShiftVT, DL, ShiftValues[0]);
10344       } else
10345         ShiftValue = ShiftValues[0];
10346       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
10347     }
10348   }
10349 
10350   // fold (sra (shl X, m), (sub result_size, n))
10351   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
10352   // result_size - n != m.
10353   // If truncate is free for the target sext(shl) is likely to result in better
10354   // code.
10355   if (N0.getOpcode() == ISD::SHL && N1C) {
10356     // Get the two constants of the shifts, CN0 = m, CN = n.
10357     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
10358     if (N01C) {
10359       LLVMContext &Ctx = *DAG.getContext();
10360       // Determine what the truncate's result bitsize and type would be.
10361       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
10362 
10363       if (VT.isVector())
10364         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10365 
10366       // Determine the residual right-shift amount.
10367       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
10368 
10369       // If the shift is not a no-op (in which case this should be just a sign
10370       // extend already), the truncated to type is legal, sign_extend is legal
10371       // on that type, and the truncate to that type is both legal and free,
10372       // perform the transform.
10373       if ((ShiftAmt > 0) &&
10374           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
10375           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
10376           TLI.isTruncateFree(VT, TruncVT)) {
10377         SDLoc DL(N);
10378         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
10379             getShiftAmountTy(N0.getOperand(0).getValueType()));
10380         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
10381                                     N0.getOperand(0), Amt);
10382         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
10383                                     Shift);
10384         return DAG.getNode(ISD::SIGN_EXTEND, DL,
10385                            N->getValueType(0), Trunc);
10386       }
10387     }
10388   }
10389 
10390   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
10391   //   sra (add (shl X, N1C), AddC), N1C -->
10392   //   sext (add (trunc X to (width - N1C)), AddC')
10393   //   sra (sub AddC, (shl X, N1C)), N1C -->
10394   //   sext (sub AddC1',(trunc X to (width - N1C)))
10395   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB) && N1C &&
10396       N0.hasOneUse()) {
10397     bool IsAdd = N0.getOpcode() == ISD::ADD;
10398     SDValue Shl = N0.getOperand(IsAdd ? 0 : 1);
10399     if (Shl.getOpcode() == ISD::SHL && Shl.getOperand(1) == N1 &&
10400         Shl.hasOneUse()) {
10401       // TODO: AddC does not need to be a splat.
10402       if (ConstantSDNode *AddC =
10403               isConstOrConstSplat(N0.getOperand(IsAdd ? 1 : 0))) {
10404         // Determine what the truncate's type would be and ask the target if
10405         // that is a free operation.
10406         LLVMContext &Ctx = *DAG.getContext();
10407         unsigned ShiftAmt = N1C->getZExtValue();
10408         EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
10409         if (VT.isVector())
10410           TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorElementCount());
10411 
10412         // TODO: The simple type check probably belongs in the default hook
10413         //       implementation and/or target-specific overrides (because
10414         //       non-simple types likely require masking when legalized), but
10415         //       that restriction may conflict with other transforms.
10416         if (TruncVT.isSimple() && isTypeLegal(TruncVT) &&
10417             TLI.isTruncateFree(VT, TruncVT)) {
10418           SDLoc DL(N);
10419           SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
10420           SDValue ShiftC =
10421               DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).trunc(
10422                                   TruncVT.getScalarSizeInBits()),
10423                               DL, TruncVT);
10424           SDValue Add;
10425           if (IsAdd)
10426             Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
10427           else
10428             Add = DAG.getNode(ISD::SUB, DL, TruncVT, ShiftC, Trunc);
10429           return DAG.getSExtOrTrunc(Add, DL, VT);
10430         }
10431       }
10432     }
10433   }
10434 
10435   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
10436   if (N1.getOpcode() == ISD::TRUNCATE &&
10437       N1.getOperand(0).getOpcode() == ISD::AND) {
10438     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10439       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
10440   }
10441 
10442   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
10443   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
10444   //      if c1 is equal to the number of bits the trunc removes
10445   // TODO - support non-uniform vector shift amounts.
10446   if (N0.getOpcode() == ISD::TRUNCATE &&
10447       (N0.getOperand(0).getOpcode() == ISD::SRL ||
10448        N0.getOperand(0).getOpcode() == ISD::SRA) &&
10449       N0.getOperand(0).hasOneUse() &&
10450       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
10451     SDValue N0Op0 = N0.getOperand(0);
10452     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
10453       EVT LargeVT = N0Op0.getValueType();
10454       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
10455       if (LargeShift->getAPIntValue() == TruncBits) {
10456         SDLoc DL(N);
10457         EVT LargeShiftVT = getShiftAmountTy(LargeVT);
10458         SDValue Amt = DAG.getZExtOrTrunc(N1, DL, LargeShiftVT);
10459         Amt = DAG.getNode(ISD::ADD, DL, LargeShiftVT, Amt,
10460                           DAG.getConstant(TruncBits, DL, LargeShiftVT));
10461         SDValue SRA =
10462             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
10463         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
10464       }
10465     }
10466   }
10467 
10468   // Simplify, based on bits shifted out of the LHS.
10469   if (SimplifyDemandedBits(SDValue(N, 0)))
10470     return SDValue(N, 0);
10471 
10472   // If the sign bit is known to be zero, switch this to a SRL.
10473   if (DAG.SignBitIsZero(N0))
10474     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
10475 
10476   if (N1C && !N1C->isOpaque())
10477     if (SDValue NewSRA = visitShiftByConstant(N))
10478       return NewSRA;
10479 
10480   // Try to transform this shift into a multiply-high if
10481   // it matches the appropriate pattern detected in combineShiftToMULH.
10482   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
10483     return MULH;
10484 
10485   // Attempt to convert a sra of a load into a narrower sign-extending load.
10486   if (SDValue NarrowLoad = reduceLoadWidth(N))
10487     return NarrowLoad;
10488 
10489   return SDValue();
10490 }
10491 
10492 SDValue DAGCombiner::visitSRL(SDNode *N) {
10493   SDValue N0 = N->getOperand(0);
10494   SDValue N1 = N->getOperand(1);
10495   if (SDValue V = DAG.simplifyShift(N0, N1))
10496     return V;
10497 
10498   EVT VT = N0.getValueType();
10499   EVT ShiftVT = N1.getValueType();
10500   unsigned OpSizeInBits = VT.getScalarSizeInBits();
10501 
10502   // fold (srl c1, c2) -> c1 >>u c2
10503   if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1}))
10504     return C;
10505 
10506   // fold vector ops
10507   if (VT.isVector())
10508     if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
10509       return FoldedVOp;
10510 
10511   if (SDValue NewSel = foldBinOpIntoSelect(N))
10512     return NewSel;
10513 
10514   // if (srl x, c) is known to be zero, return 0
10515   ConstantSDNode *N1C = isConstOrConstSplat(N1);
10516   if (N1C &&
10517       DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits)))
10518     return DAG.getConstant(0, SDLoc(N), VT);
10519 
10520   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
10521   if (N0.getOpcode() == ISD::SRL) {
10522     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
10523                                           ConstantSDNode *RHS) {
10524       APInt c1 = LHS->getAPIntValue();
10525       APInt c2 = RHS->getAPIntValue();
10526       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10527       return (c1 + c2).uge(OpSizeInBits);
10528     };
10529     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
10530       return DAG.getConstant(0, SDLoc(N), VT);
10531 
10532     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
10533                                        ConstantSDNode *RHS) {
10534       APInt c1 = LHS->getAPIntValue();
10535       APInt c2 = RHS->getAPIntValue();
10536       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
10537       return (c1 + c2).ult(OpSizeInBits);
10538     };
10539     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
10540       SDLoc DL(N);
10541       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
10542       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
10543     }
10544   }
10545 
10546   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
10547       N0.getOperand(0).getOpcode() == ISD::SRL) {
10548     SDValue InnerShift = N0.getOperand(0);
10549     // TODO - support non-uniform vector shift amounts.
10550     if (auto *N001C = isConstOrConstSplat(InnerShift.getOperand(1))) {
10551       uint64_t c1 = N001C->getZExtValue();
10552       uint64_t c2 = N1C->getZExtValue();
10553       EVT InnerShiftVT = InnerShift.getValueType();
10554       EVT ShiftAmtVT = InnerShift.getOperand(1).getValueType();
10555       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
10556       // srl (trunc (srl x, c1)), c2 --> 0 or (trunc (srl x, (add c1, c2)))
10557       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
10558       if (c1 + OpSizeInBits == InnerShiftSize) {
10559         SDLoc DL(N);
10560         if (c1 + c2 >= InnerShiftSize)
10561           return DAG.getConstant(0, DL, VT);
10562         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10563         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10564                                        InnerShift.getOperand(0), NewShiftAmt);
10565         return DAG.getNode(ISD::TRUNCATE, DL, VT, NewShift);
10566       }
10567       // In the more general case, we can clear the high bits after the shift:
10568       // srl (trunc (srl x, c1)), c2 --> trunc (and (srl x, (c1+c2)), Mask)
10569       if (N0.hasOneUse() && InnerShift.hasOneUse() &&
10570           c1 + c2 < InnerShiftSize) {
10571         SDLoc DL(N);
10572         SDValue NewShiftAmt = DAG.getConstant(c1 + c2, DL, ShiftAmtVT);
10573         SDValue NewShift = DAG.getNode(ISD::SRL, DL, InnerShiftVT,
10574                                        InnerShift.getOperand(0), NewShiftAmt);
10575         SDValue Mask = DAG.getConstant(APInt::getLowBitsSet(InnerShiftSize,
10576                                                             OpSizeInBits - c2),
10577                                        DL, InnerShiftVT);
10578         SDValue And = DAG.getNode(ISD::AND, DL, InnerShiftVT, NewShift, Mask);
10579         return DAG.getNode(ISD::TRUNCATE, DL, VT, And);
10580       }
10581     }
10582   }
10583 
10584   // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or
10585   //                               (and (srl x, (sub c2, c1), MASK)
10586   if (N0.getOpcode() == ISD::SHL &&
10587       (N0.getOperand(1) == N1 || N0->hasOneUse()) &&
10588       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
10589     auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS,
10590                                            ConstantSDNode *RHS) {
10591       const APInt &LHSC = LHS->getAPIntValue();
10592       const APInt &RHSC = RHS->getAPIntValue();
10593       return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) &&
10594              LHSC.getZExtValue() <= RHSC.getZExtValue();
10595     };
10596     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount,
10597                                   /*AllowUndefs*/ false,
10598                                   /*AllowTypeMismatch*/ true)) {
10599       SDLoc DL(N);
10600       SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10601       SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1);
10602       SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10603       Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01);
10604       Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff);
10605       SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff);
10606       return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10607     }
10608     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount,
10609                                   /*AllowUndefs*/ false,
10610                                   /*AllowTypeMismatch*/ true)) {
10611       SDLoc DL(N);
10612       SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT);
10613       SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01);
10614       SDValue Mask = DAG.getAllOnesConstant(DL, VT);
10615       Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1);
10616       SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff);
10617       return DAG.getNode(ISD::AND, DL, VT, Shift, Mask);
10618     }
10619   }
10620 
10621   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
10622   // TODO - support non-uniform vector shift amounts.
10623   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
10624     // Shifting in all undef bits?
10625     EVT SmallVT = N0.getOperand(0).getValueType();
10626     unsigned BitSize = SmallVT.getScalarSizeInBits();
10627     if (N1C->getAPIntValue().uge(BitSize))
10628       return DAG.getUNDEF(VT);
10629 
10630     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
10631       uint64_t ShiftAmt = N1C->getZExtValue();
10632       SDLoc DL0(N0);
10633       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
10634                                        N0.getOperand(0),
10635                           DAG.getConstant(ShiftAmt, DL0,
10636                                           getShiftAmountTy(SmallVT)));
10637       AddToWorklist(SmallShift.getNode());
10638       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
10639       SDLoc DL(N);
10640       return DAG.getNode(ISD::AND, DL, VT,
10641                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
10642                          DAG.getConstant(Mask, DL, VT));
10643     }
10644   }
10645 
10646   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
10647   // bit, which is unmodified by sra.
10648   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
10649     if (N0.getOpcode() == ISD::SRA)
10650       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
10651   }
10652 
10653   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit), and x has a power
10654   // of two bitwidth. The "5" represents (log2 (bitwidth x)).
10655   if (N1C && N0.getOpcode() == ISD::CTLZ &&
10656       isPowerOf2_32(OpSizeInBits) &&
10657       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
10658     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
10659 
10660     // If any of the input bits are KnownOne, then the input couldn't be all
10661     // zeros, thus the result of the srl will always be zero.
10662     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
10663 
10664     // If all of the bits input the to ctlz node are known to be zero, then
10665     // the result of the ctlz is "32" and the result of the shift is one.
10666     APInt UnknownBits = ~Known.Zero;
10667     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
10668 
10669     // Otherwise, check to see if there is exactly one bit input to the ctlz.
10670     if (UnknownBits.isPowerOf2()) {
10671       // Okay, we know that only that the single bit specified by UnknownBits
10672       // could be set on input to the CTLZ node. If this bit is set, the SRL
10673       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
10674       // to an SRL/XOR pair, which is likely to simplify more.
10675       unsigned ShAmt = UnknownBits.countr_zero();
10676       SDValue Op = N0.getOperand(0);
10677 
10678       if (ShAmt) {
10679         SDLoc DL(N0);
10680         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
10681                   DAG.getConstant(ShAmt, DL,
10682                                   getShiftAmountTy(Op.getValueType())));
10683         AddToWorklist(Op.getNode());
10684       }
10685 
10686       SDLoc DL(N);
10687       return DAG.getNode(ISD::XOR, DL, VT,
10688                          Op, DAG.getConstant(1, DL, VT));
10689     }
10690   }
10691 
10692   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
10693   if (N1.getOpcode() == ISD::TRUNCATE &&
10694       N1.getOperand(0).getOpcode() == ISD::AND) {
10695     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
10696       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
10697   }
10698 
10699   // fold operands of srl based on knowledge that the low bits are not
10700   // demanded.
10701   if (SimplifyDemandedBits(SDValue(N, 0)))
10702     return SDValue(N, 0);
10703 
10704   if (N1C && !N1C->isOpaque())
10705     if (SDValue NewSRL = visitShiftByConstant(N))
10706       return NewSRL;
10707 
10708   // Attempt to convert a srl of a load into a narrower zero-extending load.
10709   if (SDValue NarrowLoad = reduceLoadWidth(N))
10710     return NarrowLoad;
10711 
10712   // Here is a common situation. We want to optimize:
10713   //
10714   //   %a = ...
10715   //   %b = and i32 %a, 2
10716   //   %c = srl i32 %b, 1
10717   //   brcond i32 %c ...
10718   //
10719   // into
10720   //
10721   //   %a = ...
10722   //   %b = and %a, 2
10723   //   %c = setcc eq %b, 0
10724   //   brcond %c ...
10725   //
10726   // However when after the source operand of SRL is optimized into AND, the SRL
10727   // itself may not be optimized further. Look for it and add the BRCOND into
10728   // the worklist.
10729   //
10730   // The also tends to happen for binary operations when SimplifyDemandedBits
10731   // is involved.
10732   //
10733   // FIXME: This is unecessary if we process the DAG in topological order,
10734   // which we plan to do. This workaround can be removed once the DAG is
10735   // processed in topological order.
10736   if (N->hasOneUse()) {
10737     SDNode *Use = *N->use_begin();
10738 
10739     // Look pass the truncate.
10740     if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse())
10741       Use = *Use->use_begin();
10742 
10743     if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND ||
10744         Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR)
10745       AddToWorklist(Use);
10746   }
10747 
10748   // Try to transform this shift into a multiply-high if
10749   // it matches the appropriate pattern detected in combineShiftToMULH.
10750   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
10751     return MULH;
10752 
10753   return SDValue();
10754 }
10755 
10756 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
10757   EVT VT = N->getValueType(0);
10758   SDValue N0 = N->getOperand(0);
10759   SDValue N1 = N->getOperand(1);
10760   SDValue N2 = N->getOperand(2);
10761   bool IsFSHL = N->getOpcode() == ISD::FSHL;
10762   unsigned BitWidth = VT.getScalarSizeInBits();
10763 
10764   // fold (fshl N0, N1, 0) -> N0
10765   // fold (fshr N0, N1, 0) -> N1
10766   if (isPowerOf2_32(BitWidth))
10767     if (DAG.MaskedValueIsZero(
10768             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
10769       return IsFSHL ? N0 : N1;
10770 
10771   auto IsUndefOrZero = [](SDValue V) {
10772     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
10773   };
10774 
10775   // TODO - support non-uniform vector shift amounts.
10776   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
10777     EVT ShAmtTy = N2.getValueType();
10778 
10779     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
10780     if (Cst->getAPIntValue().uge(BitWidth)) {
10781       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
10782       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
10783                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
10784     }
10785 
10786     unsigned ShAmt = Cst->getZExtValue();
10787     if (ShAmt == 0)
10788       return IsFSHL ? N0 : N1;
10789 
10790     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
10791     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
10792     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
10793     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
10794     if (IsUndefOrZero(N0))
10795       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
10796                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
10797                                          SDLoc(N), ShAmtTy));
10798     if (IsUndefOrZero(N1))
10799       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
10800                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
10801                                          SDLoc(N), ShAmtTy));
10802 
10803     // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10804     // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive.
10805     // TODO - bigendian support once we have test coverage.
10806     // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine?
10807     // TODO - permit LHS EXTLOAD if extensions are shifted out.
10808     if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() &&
10809         !DAG.getDataLayout().isBigEndian()) {
10810       auto *LHS = dyn_cast<LoadSDNode>(N0);
10811       auto *RHS = dyn_cast<LoadSDNode>(N1);
10812       if (LHS && RHS && LHS->isSimple() && RHS->isSimple() &&
10813           LHS->getAddressSpace() == RHS->getAddressSpace() &&
10814           (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) &&
10815           ISD::isNON_EXTLoad(LHS)) {
10816         if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) {
10817           SDLoc DL(RHS);
10818           uint64_t PtrOff =
10819               IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8);
10820           Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff);
10821           unsigned Fast = 0;
10822           if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10823                                      RHS->getAddressSpace(), NewAlign,
10824                                      RHS->getMemOperand()->getFlags(), &Fast) &&
10825               Fast) {
10826             SDValue NewPtr = DAG.getMemBasePlusOffset(
10827                 RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL);
10828             AddToWorklist(NewPtr.getNode());
10829             SDValue Load = DAG.getLoad(
10830                 VT, DL, RHS->getChain(), NewPtr,
10831                 RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10832                 RHS->getMemOperand()->getFlags(), RHS->getAAInfo());
10833             // Replace the old load's chain with the new load's chain.
10834             WorklistRemover DeadNodes(*this);
10835             DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1));
10836             return Load;
10837           }
10838         }
10839       }
10840     }
10841   }
10842 
10843   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
10844   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
10845   // iff We know the shift amount is in range.
10846   // TODO: when is it worth doing SUB(BW, N2) as well?
10847   if (isPowerOf2_32(BitWidth)) {
10848     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
10849     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10850       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
10851     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
10852       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
10853   }
10854 
10855   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
10856   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
10857   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
10858   // is legal as well we might be better off avoiding non-constant (BW - N2).
10859   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
10860   if (N0 == N1 && hasOperation(RotOpc, VT))
10861     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
10862 
10863   // Simplify, based on bits shifted out of N0/N1.
10864   if (SimplifyDemandedBits(SDValue(N, 0)))
10865     return SDValue(N, 0);
10866 
10867   return SDValue();
10868 }
10869 
10870 SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
10871   SDValue N0 = N->getOperand(0);
10872   SDValue N1 = N->getOperand(1);
10873   if (SDValue V = DAG.simplifyShift(N0, N1))
10874     return V;
10875 
10876   EVT VT = N0.getValueType();
10877 
10878   // fold (*shlsat c1, c2) -> c1<<c2
10879   if (SDValue C =
10880           DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, {N0, N1}))
10881     return C;
10882 
10883   ConstantSDNode *N1C = isConstOrConstSplat(N1);
10884 
10885   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) {
10886     // fold (sshlsat x, c) -> (shl x, c)
10887     if (N->getOpcode() == ISD::SSHLSAT && N1C &&
10888         N1C->getAPIntValue().ult(DAG.ComputeNumSignBits(N0)))
10889       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
10890 
10891     // fold (ushlsat x, c) -> (shl x, c)
10892     if (N->getOpcode() == ISD::USHLSAT && N1C &&
10893         N1C->getAPIntValue().ule(
10894             DAG.computeKnownBits(N0).countMinLeadingZeros()))
10895       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N1);
10896   }
10897 
10898   return SDValue();
10899 }
10900 
10901 // Given a ABS node, detect the following patterns:
10902 // (ABS (SUB (EXTEND a), (EXTEND b))).
10903 // (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
10904 // Generates UABD/SABD instruction.
10905 SDValue DAGCombiner::foldABSToABD(SDNode *N) {
10906   EVT SrcVT = N->getValueType(0);
10907 
10908   if (N->getOpcode() == ISD::TRUNCATE)
10909     N = N->getOperand(0).getNode();
10910 
10911   if (N->getOpcode() != ISD::ABS)
10912     return SDValue();
10913 
10914   EVT VT = N->getValueType(0);
10915   SDValue AbsOp1 = N->getOperand(0);
10916   SDValue Op0, Op1;
10917   SDLoc DL(N);
10918 
10919   if (AbsOp1.getOpcode() != ISD::SUB)
10920     return SDValue();
10921 
10922   Op0 = AbsOp1.getOperand(0);
10923   Op1 = AbsOp1.getOperand(1);
10924 
10925   unsigned Opc0 = Op0.getOpcode();
10926 
10927   // Check if the operands of the sub are (zero|sign)-extended.
10928   // TODO: Should we use ValueTracking instead?
10929   if (Opc0 != Op1.getOpcode() ||
10930       (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
10931        Opc0 != ISD::SIGN_EXTEND_INREG)) {
10932     // fold (abs (sub nsw x, y)) -> abds(x, y)
10933     if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
10934         TLI.preferABDSToABSWithNSW(VT)) {
10935       SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
10936       return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10937     }
10938     return SDValue();
10939   }
10940 
10941   EVT VT0, VT1;
10942   if (Opc0 == ISD::SIGN_EXTEND_INREG) {
10943     VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT();
10944     VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT();
10945   } else {
10946     VT0 = Op0.getOperand(0).getValueType();
10947     VT1 = Op1.getOperand(0).getValueType();
10948   }
10949   unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS;
10950 
10951   // fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
10952   // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
10953   EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
10954   if ((VT0 == MaxVT || Op0->hasOneUse()) &&
10955       (VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
10956     SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
10957                               DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
10958                               DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
10959     ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
10960     return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10961   }
10962 
10963   // fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
10964   // fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
10965   if (hasOperation(ABDOpcode, VT)) {
10966     SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
10967     return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
10968   }
10969 
10970   return SDValue();
10971 }
10972 
10973 SDValue DAGCombiner::visitABS(SDNode *N) {
10974   SDValue N0 = N->getOperand(0);
10975   EVT VT = N->getValueType(0);
10976 
10977   // fold (abs c1) -> c2
10978   if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, SDLoc(N), VT, {N0}))
10979     return C;
10980   // fold (abs (abs x)) -> (abs x)
10981   if (N0.getOpcode() == ISD::ABS)
10982     return N0;
10983   // fold (abs x) -> x iff not-negative
10984   if (DAG.SignBitIsZero(N0))
10985     return N0;
10986 
10987   if (SDValue ABD = foldABSToABD(N))
10988     return ABD;
10989 
10990   // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x)))
10991   // iff zero_extend/truncate are free.
10992   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
10993     EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT();
10994     if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) &&
10995         TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) &&
10996         hasOperation(ISD::ABS, ExtVT)) {
10997       SDLoc DL(N);
10998       return DAG.getNode(
10999           ISD::ZERO_EXTEND, DL, VT,
11000           DAG.getNode(ISD::ABS, DL, ExtVT,
11001                       DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0))));
11002     }
11003   }
11004 
11005   return SDValue();
11006 }
11007 
11008 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
11009   SDValue N0 = N->getOperand(0);
11010   EVT VT = N->getValueType(0);
11011   SDLoc DL(N);
11012 
11013   // fold (bswap c1) -> c2
11014   if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
11015     return C;
11016   // fold (bswap (bswap x)) -> x
11017   if (N0.getOpcode() == ISD::BSWAP)
11018     return N0.getOperand(0);
11019 
11020   // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
11021   // isn't supported, it will be expanded to bswap followed by a manual reversal
11022   // of bits in each byte. By placing bswaps before bitreverse, we can remove
11023   // the two bswaps if the bitreverse gets expanded.
11024   if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
11025     SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11026     return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
11027   }
11028 
11029   // fold (bswap shl(x,c)) -> (zext(bswap(trunc(shl(x,sub(c,bw/2))))))
11030   // iff x >= bw/2 (i.e. lower half is known zero)
11031   unsigned BW = VT.getScalarSizeInBits();
11032   if (BW >= 32 && N0.getOpcode() == ISD::SHL && N0.hasOneUse()) {
11033     auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11034     EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), BW / 2);
11035     if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11036         ShAmt->getZExtValue() >= (BW / 2) &&
11037         (ShAmt->getZExtValue() % 16) == 0 && TLI.isTypeLegal(HalfVT) &&
11038         TLI.isTruncateFree(VT, HalfVT) &&
11039         (!LegalOperations || hasOperation(ISD::BSWAP, HalfVT))) {
11040       SDValue Res = N0.getOperand(0);
11041       if (uint64_t NewShAmt = (ShAmt->getZExtValue() - (BW / 2)))
11042         Res = DAG.getNode(ISD::SHL, DL, VT, Res,
11043                           DAG.getConstant(NewShAmt, DL, getShiftAmountTy(VT)));
11044       Res = DAG.getZExtOrTrunc(Res, DL, HalfVT);
11045       Res = DAG.getNode(ISD::BSWAP, DL, HalfVT, Res);
11046       return DAG.getZExtOrTrunc(Res, DL, VT);
11047     }
11048   }
11049 
11050   // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
11051   // inverse-shift-of-bswap:
11052   // bswap (X u<< C) --> (bswap X) u>> C
11053   // bswap (X u>> C) --> (bswap X) u<< C
11054   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
11055       N0.hasOneUse()) {
11056     auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
11057     if (ShAmt && ShAmt->getAPIntValue().ult(BW) &&
11058         ShAmt->getZExtValue() % 8 == 0) {
11059       SDValue NewSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
11060       unsigned InverseShift = N0.getOpcode() == ISD::SHL ? ISD::SRL : ISD::SHL;
11061       return DAG.getNode(InverseShift, DL, VT, NewSwap, N0.getOperand(1));
11062     }
11063   }
11064 
11065   if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
11066     return V;
11067 
11068   return SDValue();
11069 }
11070 
11071 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
11072   SDValue N0 = N->getOperand(0);
11073   EVT VT = N->getValueType(0);
11074   SDLoc DL(N);
11075 
11076   // fold (bitreverse c1) -> c2
11077   if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
11078     return C;
11079   // fold (bitreverse (bitreverse x)) -> x
11080   if (N0.getOpcode() == ISD::BITREVERSE)
11081     return N0.getOperand(0);
11082   return SDValue();
11083 }
11084 
11085 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
11086   SDValue N0 = N->getOperand(0);
11087   EVT VT = N->getValueType(0);
11088   SDLoc DL(N);
11089 
11090   // fold (ctlz c1) -> c2
11091   if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
11092     return C;
11093 
11094   // If the value is known never to be zero, switch to the undef version.
11095   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
11096     if (DAG.isKnownNeverZero(N0))
11097       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
11098 
11099   return SDValue();
11100 }
11101 
11102 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
11103   SDValue N0 = N->getOperand(0);
11104   EVT VT = N->getValueType(0);
11105   SDLoc DL(N);
11106 
11107   // fold (ctlz_zero_undef c1) -> c2
11108   if (SDValue C =
11109           DAG.FoldConstantArithmetic(ISD::CTLZ_ZERO_UNDEF, DL, VT, {N0}))
11110     return C;
11111   return SDValue();
11112 }
11113 
11114 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
11115   SDValue N0 = N->getOperand(0);
11116   EVT VT = N->getValueType(0);
11117   SDLoc DL(N);
11118 
11119   // fold (cttz c1) -> c2
11120   if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
11121     return C;
11122 
11123   // If the value is known never to be zero, switch to the undef version.
11124   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
11125     if (DAG.isKnownNeverZero(N0))
11126       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
11127 
11128   return SDValue();
11129 }
11130 
11131 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
11132   SDValue N0 = N->getOperand(0);
11133   EVT VT = N->getValueType(0);
11134   SDLoc DL(N);
11135 
11136   // fold (cttz_zero_undef c1) -> c2
11137   if (SDValue C =
11138           DAG.FoldConstantArithmetic(ISD::CTTZ_ZERO_UNDEF, DL, VT, {N0}))
11139     return C;
11140   return SDValue();
11141 }
11142 
11143 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
11144   SDValue N0 = N->getOperand(0);
11145   EVT VT = N->getValueType(0);
11146   SDLoc DL(N);
11147 
11148   // fold (ctpop c1) -> c2
11149   if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
11150     return C;
11151   return SDValue();
11152 }
11153 
11154 // FIXME: This should be checking for no signed zeros on individual operands, as
11155 // well as no nans.
11156 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
11157                                          SDValue RHS,
11158                                          const TargetLowering &TLI) {
11159   const TargetOptions &Options = DAG.getTarget().Options;
11160   EVT VT = LHS.getValueType();
11161 
11162   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
11163          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
11164          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
11165 }
11166 
11167 static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS,
11168                                        SDValue RHS, SDValue True, SDValue False,
11169                                        ISD::CondCode CC,
11170                                        const TargetLowering &TLI,
11171                                        SelectionDAG &DAG) {
11172   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
11173   switch (CC) {
11174   case ISD::SETOLT:
11175   case ISD::SETOLE:
11176   case ISD::SETLT:
11177   case ISD::SETLE:
11178   case ISD::SETULT:
11179   case ISD::SETULE: {
11180     // Since it's known never nan to get here already, either fminnum or
11181     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
11182     // expanded in terms of it.
11183     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
11184     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11185       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11186 
11187     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
11188     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11189       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11190     return SDValue();
11191   }
11192   case ISD::SETOGT:
11193   case ISD::SETOGE:
11194   case ISD::SETGT:
11195   case ISD::SETGE:
11196   case ISD::SETUGT:
11197   case ISD::SETUGE: {
11198     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
11199     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
11200       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
11201 
11202     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
11203     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
11204       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
11205     return SDValue();
11206   }
11207   default:
11208     return SDValue();
11209   }
11210 }
11211 
11212 /// Generate Min/Max node
11213 SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
11214                                          SDValue RHS, SDValue True,
11215                                          SDValue False, ISD::CondCode CC) {
11216   if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
11217     return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG);
11218 
11219   // If we can't directly match this, try to see if we can pull an fneg out of
11220   // the select.
11221   SDValue NegTrue = TLI.getCheaperOrNeutralNegatedExpression(
11222       True, DAG, LegalOperations, ForCodeSize);
11223   if (!NegTrue)
11224     return SDValue();
11225 
11226   HandleSDNode NegTrueHandle(NegTrue);
11227 
11228   // Try to unfold an fneg from the select if we are comparing the negated
11229   // constant.
11230   //
11231   // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K))
11232   //
11233   // TODO: Handle fabs
11234   if (LHS == NegTrue) {
11235     // If we can't directly match this, try to see if we can pull an fneg out of
11236     // the select.
11237     SDValue NegRHS = TLI.getCheaperOrNeutralNegatedExpression(
11238         RHS, DAG, LegalOperations, ForCodeSize);
11239     if (NegRHS) {
11240       HandleSDNode NegRHSHandle(NegRHS);
11241       if (NegRHS == False) {
11242         SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
11243                                                    False, CC, TLI, DAG);
11244         if (Combined)
11245           return DAG.getNode(ISD::FNEG, DL, VT, Combined);
11246       }
11247     }
11248   }
11249 
11250   return SDValue();
11251 }
11252 
11253 /// If a (v)select has a condition value that is a sign-bit test, try to smear
11254 /// the condition operand sign-bit across the value width and use it as a mask.
11255 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
11256   SDValue Cond = N->getOperand(0);
11257   SDValue C1 = N->getOperand(1);
11258   SDValue C2 = N->getOperand(2);
11259   if (!isConstantOrConstantVector(C1) || !isConstantOrConstantVector(C2))
11260     return SDValue();
11261 
11262   EVT VT = N->getValueType(0);
11263   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
11264       VT != Cond.getOperand(0).getValueType())
11265     return SDValue();
11266 
11267   // The inverted-condition + commuted-select variants of these patterns are
11268   // canonicalized to these forms in IR.
11269   SDValue X = Cond.getOperand(0);
11270   SDValue CondC = Cond.getOperand(1);
11271   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11272   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
11273       isAllOnesOrAllOnesSplat(C2)) {
11274     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
11275     SDLoc DL(N);
11276     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11277     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11278     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
11279   }
11280   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
11281     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
11282     SDLoc DL(N);
11283     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
11284     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
11285     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
11286   }
11287   return SDValue();
11288 }
11289 
11290 static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT,
11291                                                  const TargetLowering &TLI) {
11292   if (!TLI.convertSelectOfConstantsToMath(VT))
11293     return false;
11294 
11295   if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse())
11296     return true;
11297   if (!TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
11298     return true;
11299 
11300   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11301   if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1)))
11302     return true;
11303   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1)))
11304     return true;
11305 
11306   return false;
11307 }
11308 
11309 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
11310   SDValue Cond = N->getOperand(0);
11311   SDValue N1 = N->getOperand(1);
11312   SDValue N2 = N->getOperand(2);
11313   EVT VT = N->getValueType(0);
11314   EVT CondVT = Cond.getValueType();
11315   SDLoc DL(N);
11316 
11317   if (!VT.isInteger())
11318     return SDValue();
11319 
11320   auto *C1 = dyn_cast<ConstantSDNode>(N1);
11321   auto *C2 = dyn_cast<ConstantSDNode>(N2);
11322   if (!C1 || !C2)
11323     return SDValue();
11324 
11325   if (CondVT != MVT::i1 || LegalOperations) {
11326     // fold (select Cond, 0, 1) -> (xor Cond, 1)
11327     // We can't do this reliably if integer based booleans have different contents
11328     // to floating point based booleans. This is because we can't tell whether we
11329     // have an integer-based boolean or a floating-point-based boolean unless we
11330     // can find the SETCC that produced it and inspect its operands. This is
11331     // fairly easy if C is the SETCC node, but it can potentially be
11332     // undiscoverable (or not reasonably discoverable). For example, it could be
11333     // in another basic block or it could require searching a complicated
11334     // expression.
11335     if (CondVT.isInteger() &&
11336         TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
11337             TargetLowering::ZeroOrOneBooleanContent &&
11338         TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
11339             TargetLowering::ZeroOrOneBooleanContent &&
11340         C1->isZero() && C2->isOne()) {
11341       SDValue NotCond =
11342           DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
11343       if (VT.bitsEq(CondVT))
11344         return NotCond;
11345       return DAG.getZExtOrTrunc(NotCond, DL, VT);
11346     }
11347 
11348     return SDValue();
11349   }
11350 
11351   // Only do this before legalization to avoid conflicting with target-specific
11352   // transforms in the other direction (create a select from a zext/sext). There
11353   // is also a target-independent combine here in DAGCombiner in the other
11354   // direction for (select Cond, -1, 0) when the condition is not i1.
11355   assert(CondVT == MVT::i1 && !LegalOperations);
11356 
11357   // select Cond, 1, 0 --> zext (Cond)
11358   if (C1->isOne() && C2->isZero())
11359     return DAG.getZExtOrTrunc(Cond, DL, VT);
11360 
11361   // select Cond, -1, 0 --> sext (Cond)
11362   if (C1->isAllOnes() && C2->isZero())
11363     return DAG.getSExtOrTrunc(Cond, DL, VT);
11364 
11365   // select Cond, 0, 1 --> zext (!Cond)
11366   if (C1->isZero() && C2->isOne()) {
11367     SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11368     NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT);
11369     return NotCond;
11370   }
11371 
11372   // select Cond, 0, -1 --> sext (!Cond)
11373   if (C1->isZero() && C2->isAllOnes()) {
11374     SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11375     NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11376     return NotCond;
11377   }
11378 
11379   // Use a target hook because some targets may prefer to transform in the
11380   // other direction.
11381   if (!shouldConvertSelectOfConstantsToMath(Cond, VT, TLI))
11382     return SDValue();
11383 
11384   // For any constants that differ by 1, we can transform the select into
11385   // an extend and add.
11386   const APInt &C1Val = C1->getAPIntValue();
11387   const APInt &C2Val = C2->getAPIntValue();
11388 
11389   // select Cond, C1, C1-1 --> add (zext Cond), C1-1
11390   if (C1Val - 1 == C2Val) {
11391     Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11392     return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11393   }
11394 
11395   // select Cond, C1, C1+1 --> add (sext Cond), C1+1
11396   if (C1Val + 1 == C2Val) {
11397     Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11398     return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
11399   }
11400 
11401   // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
11402   if (C1Val.isPowerOf2() && C2Val.isZero()) {
11403     Cond = DAG.getZExtOrTrunc(Cond, DL, VT);
11404     SDValue ShAmtC =
11405         DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL);
11406     return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
11407   }
11408 
11409   // select Cond, -1, C --> or (sext Cond), C
11410   if (C1->isAllOnes()) {
11411     Cond = DAG.getSExtOrTrunc(Cond, DL, VT);
11412     return DAG.getNode(ISD::OR, DL, VT, Cond, N2);
11413   }
11414 
11415   // select Cond, C, -1 --> or (sext (not Cond)), C
11416   if (C2->isAllOnes()) {
11417     SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
11418     NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT);
11419     return DAG.getNode(ISD::OR, DL, VT, NotCond, N1);
11420   }
11421 
11422   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
11423     return V;
11424 
11425   return SDValue();
11426 }
11427 
11428 static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
11429   assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
11430          "Expected a (v)select");
11431   SDValue Cond = N->getOperand(0);
11432   SDValue T = N->getOperand(1), F = N->getOperand(2);
11433   EVT VT = N->getValueType(0);
11434   if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
11435     return SDValue();
11436 
11437   // select Cond, Cond, F --> or Cond, F
11438   // select Cond, 1, F    --> or Cond, F
11439   if (Cond == T || isOneOrOneSplat(T, /* AllowUndefs */ true))
11440     return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
11441 
11442   // select Cond, T, Cond --> and Cond, T
11443   // select Cond, T, 0    --> and Cond, T
11444   if (Cond == F || isNullOrNullSplat(F, /* AllowUndefs */ true))
11445     return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
11446 
11447   // select Cond, T, 1 --> or (not Cond), T
11448   if (isOneOrOneSplat(F, /* AllowUndefs */ true)) {
11449     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
11450     return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
11451   }
11452 
11453   // select Cond, 0, F --> and (not Cond), F
11454   if (isNullOrNullSplat(T, /* AllowUndefs */ true)) {
11455     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
11456     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
11457   }
11458 
11459   return SDValue();
11460 }
11461 
11462 static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) {
11463   SDValue N0 = N->getOperand(0);
11464   SDValue N1 = N->getOperand(1);
11465   SDValue N2 = N->getOperand(2);
11466   EVT VT = N->getValueType(0);
11467   if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse())
11468     return SDValue();
11469 
11470   SDValue Cond0 = N0.getOperand(0);
11471   SDValue Cond1 = N0.getOperand(1);
11472   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11473   if (VT != Cond0.getValueType())
11474     return SDValue();
11475 
11476   // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the
11477   // compare is inverted from that pattern ("Cond0 s> -1").
11478   if (CC == ISD::SETLT && isNullOrNullSplat(Cond1))
11479     ; // This is the pattern we are looking for.
11480   else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1))
11481     std::swap(N1, N2);
11482   else
11483     return SDValue();
11484 
11485   // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1
11486   if (isNullOrNullSplat(N2)) {
11487     SDLoc DL(N);
11488     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11489     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11490     return DAG.getNode(ISD::AND, DL, VT, Sra, N1);
11491   }
11492 
11493   // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2
11494   if (isAllOnesOrAllOnesSplat(N1)) {
11495     SDLoc DL(N);
11496     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11497     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11498     return DAG.getNode(ISD::OR, DL, VT, Sra, N2);
11499   }
11500 
11501   // If we have to invert the sign bit mask, only do that transform if the
11502   // target has a bitwise 'and not' instruction (the invert is free).
11503   // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2
11504   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11505   if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) {
11506     SDLoc DL(N);
11507     SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
11508     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt);
11509     SDValue Not = DAG.getNOT(DL, Sra, VT);
11510     return DAG.getNode(ISD::AND, DL, VT, Not, N2);
11511   }
11512 
11513   // TODO: There's another pattern in this family, but it may require
11514   //       implementing hasOrNot() to check for profitability:
11515   //       (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2
11516 
11517   return SDValue();
11518 }
11519 
11520 SDValue DAGCombiner::visitSELECT(SDNode *N) {
11521   SDValue N0 = N->getOperand(0);
11522   SDValue N1 = N->getOperand(1);
11523   SDValue N2 = N->getOperand(2);
11524   EVT VT = N->getValueType(0);
11525   EVT VT0 = N0.getValueType();
11526   SDLoc DL(N);
11527   SDNodeFlags Flags = N->getFlags();
11528 
11529   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
11530     return V;
11531 
11532   if (SDValue V = foldBoolSelectToLogic(N, DAG))
11533     return V;
11534 
11535   // select (not Cond), N1, N2 -> select Cond, N2, N1
11536   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
11537     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
11538     SelectOp->setFlags(Flags);
11539     return SelectOp;
11540   }
11541 
11542   if (SDValue V = foldSelectOfConstants(N))
11543     return V;
11544 
11545   // If we can fold this based on the true/false value, do so.
11546   if (SimplifySelectOps(N, N1, N2))
11547     return SDValue(N, 0); // Don't revisit N.
11548 
11549   if (VT0 == MVT::i1) {
11550     // The code in this block deals with the following 2 equivalences:
11551     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
11552     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
11553     // The target can specify its preferred form with the
11554     // shouldNormalizeToSelectSequence() callback. However we always transform
11555     // to the right anyway if we find the inner select exists in the DAG anyway
11556     // and we always transform to the left side if we know that we can further
11557     // optimize the combination of the conditions.
11558     bool normalizeToSequence =
11559         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
11560     // select (and Cond0, Cond1), X, Y
11561     //   -> select Cond0, (select Cond1, X, Y), Y
11562     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
11563       SDValue Cond0 = N0->getOperand(0);
11564       SDValue Cond1 = N0->getOperand(1);
11565       SDValue InnerSelect =
11566           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
11567       if (normalizeToSequence || !InnerSelect.use_empty())
11568         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
11569                            InnerSelect, N2, Flags);
11570       // Cleanup on failure.
11571       if (InnerSelect.use_empty())
11572         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11573     }
11574     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
11575     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
11576       SDValue Cond0 = N0->getOperand(0);
11577       SDValue Cond1 = N0->getOperand(1);
11578       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
11579                                         Cond1, N1, N2, Flags);
11580       if (normalizeToSequence || !InnerSelect.use_empty())
11581         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
11582                            InnerSelect, Flags);
11583       // Cleanup on failure.
11584       if (InnerSelect.use_empty())
11585         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
11586     }
11587 
11588     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
11589     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
11590       SDValue N1_0 = N1->getOperand(0);
11591       SDValue N1_1 = N1->getOperand(1);
11592       SDValue N1_2 = N1->getOperand(2);
11593       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
11594         // Create the actual and node if we can generate good code for it.
11595         if (!normalizeToSequence) {
11596           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
11597           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
11598                              N2, Flags);
11599         }
11600         // Otherwise see if we can optimize the "and" to a better pattern.
11601         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
11602           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
11603                              N2, Flags);
11604         }
11605       }
11606     }
11607     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
11608     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
11609       SDValue N2_0 = N2->getOperand(0);
11610       SDValue N2_1 = N2->getOperand(1);
11611       SDValue N2_2 = N2->getOperand(2);
11612       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
11613         // Create the actual or node if we can generate good code for it.
11614         if (!normalizeToSequence) {
11615           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
11616           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
11617                              N2_2, Flags);
11618         }
11619         // Otherwise see if we can optimize to a better pattern.
11620         if (SDValue Combined = visitORLike(N0, N2_0, N))
11621           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
11622                              N2_2, Flags);
11623       }
11624     }
11625   }
11626 
11627   // Fold selects based on a setcc into other things, such as min/max/abs.
11628   if (N0.getOpcode() == ISD::SETCC) {
11629     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
11630     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
11631 
11632     // select (fcmp lt x, y), x, y -> fminnum x, y
11633     // select (fcmp gt x, y), x, y -> fmaxnum x, y
11634     //
11635     // This is OK if we don't care what happens if either operand is a NaN.
11636     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
11637       if (SDValue FMinMax =
11638               combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC))
11639         return FMinMax;
11640 
11641     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
11642     // This is conservatively limited to pre-legal-operations to give targets
11643     // a chance to reverse the transform if they want to do that. Also, it is
11644     // unlikely that the pattern would be formed late, so it's probably not
11645     // worth going through the other checks.
11646     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
11647         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
11648         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
11649       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
11650       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
11651       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
11652         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
11653         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
11654         //
11655         // The IR equivalent of this transform would have this form:
11656         //   %a = add %x, C
11657         //   %c = icmp ugt %x, ~C
11658         //   %r = select %c, -1, %a
11659         //   =>
11660         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
11661         //   %u0 = extractvalue %u, 0
11662         //   %u1 = extractvalue %u, 1
11663         //   %r = select %u1, -1, %u0
11664         SDVTList VTs = DAG.getVTList(VT, VT0);
11665         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
11666         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
11667       }
11668     }
11669 
11670     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
11671         (!LegalOperations &&
11672          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
11673       // Any flags available in a select/setcc fold will be on the setcc as they
11674       // migrated from fcmp
11675       Flags = N0->getFlags();
11676       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
11677                                        N2, N0.getOperand(2));
11678       SelectNode->setFlags(Flags);
11679       return SelectNode;
11680     }
11681 
11682     if (SDValue NewSel = SimplifySelect(DL, N0, N1, N2))
11683       return NewSel;
11684   }
11685 
11686   if (!VT.isVector())
11687     if (SDValue BinOp = foldSelectOfBinops(N))
11688       return BinOp;
11689 
11690   if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG))
11691     return R;
11692 
11693   return SDValue();
11694 }
11695 
11696 // This function assumes all the vselect's arguments are CONCAT_VECTOR
11697 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
11698 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
11699   SDLoc DL(N);
11700   SDValue Cond = N->getOperand(0);
11701   SDValue LHS = N->getOperand(1);
11702   SDValue RHS = N->getOperand(2);
11703   EVT VT = N->getValueType(0);
11704   int NumElems = VT.getVectorNumElements();
11705   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
11706          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
11707          Cond.getOpcode() == ISD::BUILD_VECTOR);
11708 
11709   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
11710   // binary ones here.
11711   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
11712     return SDValue();
11713 
11714   // We're sure we have an even number of elements due to the
11715   // concat_vectors we have as arguments to vselect.
11716   // Skip BV elements until we find one that's not an UNDEF
11717   // After we find an UNDEF element, keep looping until we get to half the
11718   // length of the BV and see if all the non-undef nodes are the same.
11719   ConstantSDNode *BottomHalf = nullptr;
11720   for (int i = 0; i < NumElems / 2; ++i) {
11721     if (Cond->getOperand(i)->isUndef())
11722       continue;
11723 
11724     if (BottomHalf == nullptr)
11725       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11726     else if (Cond->getOperand(i).getNode() != BottomHalf)
11727       return SDValue();
11728   }
11729 
11730   // Do the same for the second half of the BuildVector
11731   ConstantSDNode *TopHalf = nullptr;
11732   for (int i = NumElems / 2; i < NumElems; ++i) {
11733     if (Cond->getOperand(i)->isUndef())
11734       continue;
11735 
11736     if (TopHalf == nullptr)
11737       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
11738     else if (Cond->getOperand(i).getNode() != TopHalf)
11739       return SDValue();
11740   }
11741 
11742   assert(TopHalf && BottomHalf &&
11743          "One half of the selector was all UNDEFs and the other was all the "
11744          "same value. This should have been addressed before this function.");
11745   return DAG.getNode(
11746       ISD::CONCAT_VECTORS, DL, VT,
11747       BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0),
11748       TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1));
11749 }
11750 
11751 bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled,
11752                        SelectionDAG &DAG, const SDLoc &DL) {
11753 
11754   // Only perform the transformation when existing operands can be reused.
11755   if (IndexIsScaled)
11756     return false;
11757 
11758   if (!isNullConstant(BasePtr) && !Index.hasOneUse())
11759     return false;
11760 
11761   EVT VT = BasePtr.getValueType();
11762 
11763   if (SDValue SplatVal = DAG.getSplatValue(Index);
11764       SplatVal && !isNullConstant(SplatVal) &&
11765       SplatVal.getValueType() == VT) {
11766     BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11767     Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT));
11768     return true;
11769   }
11770 
11771   if (Index.getOpcode() != ISD::ADD)
11772     return false;
11773 
11774   if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0));
11775       SplatVal && SplatVal.getValueType() == VT) {
11776     BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11777     Index = Index.getOperand(1);
11778     return true;
11779   }
11780   if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1));
11781       SplatVal && SplatVal.getValueType() == VT) {
11782     BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal);
11783     Index = Index.getOperand(0);
11784     return true;
11785   }
11786   return false;
11787 }
11788 
11789 // Fold sext/zext of index into index type.
11790 bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
11791                      SelectionDAG &DAG) {
11792   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
11793 
11794   // It's always safe to look through zero extends.
11795   if (Index.getOpcode() == ISD::ZERO_EXTEND) {
11796     if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11797       IndexType = ISD::UNSIGNED_SCALED;
11798       Index = Index.getOperand(0);
11799       return true;
11800     }
11801     if (ISD::isIndexTypeSigned(IndexType)) {
11802       IndexType = ISD::UNSIGNED_SCALED;
11803       return true;
11804     }
11805   }
11806 
11807   // It's only safe to look through sign extends when Index is signed.
11808   if (Index.getOpcode() == ISD::SIGN_EXTEND &&
11809       ISD::isIndexTypeSigned(IndexType) &&
11810       TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
11811     Index = Index.getOperand(0);
11812     return true;
11813   }
11814 
11815   return false;
11816 }
11817 
11818 SDValue DAGCombiner::visitVPSCATTER(SDNode *N) {
11819   VPScatterSDNode *MSC = cast<VPScatterSDNode>(N);
11820   SDValue Mask = MSC->getMask();
11821   SDValue Chain = MSC->getChain();
11822   SDValue Index = MSC->getIndex();
11823   SDValue Scale = MSC->getScale();
11824   SDValue StoreVal = MSC->getValue();
11825   SDValue BasePtr = MSC->getBasePtr();
11826   SDValue VL = MSC->getVectorLength();
11827   ISD::MemIndexType IndexType = MSC->getIndexType();
11828   SDLoc DL(N);
11829 
11830   // Zap scatters with a zero mask.
11831   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
11832     return Chain;
11833 
11834   if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11835     SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11836     return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11837                             DL, Ops, MSC->getMemOperand(), IndexType);
11838   }
11839 
11840   if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11841     SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL};
11842     return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11843                             DL, Ops, MSC->getMemOperand(), IndexType);
11844   }
11845 
11846   return SDValue();
11847 }
11848 
11849 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
11850   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
11851   SDValue Mask = MSC->getMask();
11852   SDValue Chain = MSC->getChain();
11853   SDValue Index = MSC->getIndex();
11854   SDValue Scale = MSC->getScale();
11855   SDValue StoreVal = MSC->getValue();
11856   SDValue BasePtr = MSC->getBasePtr();
11857   ISD::MemIndexType IndexType = MSC->getIndexType();
11858   SDLoc DL(N);
11859 
11860   // Zap scatters with a zero mask.
11861   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
11862     return Chain;
11863 
11864   if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) {
11865     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11866     return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11867                                 DL, Ops, MSC->getMemOperand(), IndexType,
11868                                 MSC->isTruncatingStore());
11869   }
11870 
11871   if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) {
11872     SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale};
11873     return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(),
11874                                 DL, Ops, MSC->getMemOperand(), IndexType,
11875                                 MSC->isTruncatingStore());
11876   }
11877 
11878   return SDValue();
11879 }
11880 
11881 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
11882   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
11883   SDValue Mask = MST->getMask();
11884   SDValue Chain = MST->getChain();
11885   SDValue Value = MST->getValue();
11886   SDValue Ptr = MST->getBasePtr();
11887   SDLoc DL(N);
11888 
11889   // Zap masked stores with a zero mask.
11890   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
11891     return Chain;
11892 
11893   // Remove a masked store if base pointers and masks are equal.
11894   if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
11895     if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
11896         MST1->isSimple() && MST1->getBasePtr() == Ptr &&
11897         !MST->getBasePtr().isUndef() &&
11898         ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
11899                                          MST1->getMemoryVT().getStoreSize()) ||
11900          ISD::isConstantSplatVectorAllOnes(Mask.getNode())) &&
11901         TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
11902                             MST->getMemoryVT().getStoreSize())) {
11903       CombineTo(MST1, MST1->getChain());
11904       if (N->getOpcode() != ISD::DELETED_NODE)
11905         AddToWorklist(N);
11906       return SDValue(N, 0);
11907     }
11908   }
11909 
11910   // If this is a masked load with an all ones mask, we can use a unmasked load.
11911   // FIXME: Can we do this for indexed, compressing, or truncating stores?
11912   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
11913       !MST->isCompressingStore() && !MST->isTruncatingStore())
11914     return DAG.getStore(MST->getChain(), SDLoc(N), MST->getValue(),
11915                         MST->getBasePtr(), MST->getPointerInfo(),
11916                         MST->getOriginalAlign(), MachineMemOperand::MOStore,
11917                         MST->getAAInfo());
11918 
11919   // Try transforming N to an indexed store.
11920   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
11921     return SDValue(N, 0);
11922 
11923   if (MST->isTruncatingStore() && MST->isUnindexed() &&
11924       Value.getValueType().isInteger() &&
11925       (!isa<ConstantSDNode>(Value) ||
11926        !cast<ConstantSDNode>(Value)->isOpaque())) {
11927     APInt TruncDemandedBits =
11928         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
11929                              MST->getMemoryVT().getScalarSizeInBits());
11930 
11931     // See if we can simplify the operation with
11932     // SimplifyDemandedBits, which only works if the value has a single use.
11933     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
11934       // Re-visit the store if anything changed and the store hasn't been merged
11935       // with another node (N is deleted) SimplifyDemandedBits will add Value's
11936       // node back to the worklist if necessary, but we also need to re-visit
11937       // the Store node itself.
11938       if (N->getOpcode() != ISD::DELETED_NODE)
11939         AddToWorklist(N);
11940       return SDValue(N, 0);
11941     }
11942   }
11943 
11944   // If this is a TRUNC followed by a masked store, fold this into a masked
11945   // truncating store.  We can do this even if this is already a masked
11946   // truncstore.
11947   // TODO: Try combine to masked compress store if possiable.
11948   if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() &&
11949       MST->isUnindexed() && !MST->isCompressingStore() &&
11950       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
11951                                MST->getMemoryVT(), LegalOperations)) {
11952     auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(),
11953                                          Value.getOperand(0).getValueType());
11954     return DAG.getMaskedStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
11955                               MST->getOffset(), Mask, MST->getMemoryVT(),
11956                               MST->getMemOperand(), MST->getAddressingMode(),
11957                               /*IsTruncating=*/true);
11958   }
11959 
11960   return SDValue();
11961 }
11962 
11963 SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) {
11964   auto *SST = cast<VPStridedStoreSDNode>(N);
11965   EVT EltVT = SST->getValue().getValueType().getVectorElementType();
11966   // Combine strided stores with unit-stride to a regular VP store.
11967   if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride());
11968       CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
11969     return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(),
11970                           SST->getBasePtr(), SST->getOffset(), SST->getMask(),
11971                           SST->getVectorLength(), SST->getMemoryVT(),
11972                           SST->getMemOperand(), SST->getAddressingMode(),
11973                           SST->isTruncatingStore(), SST->isCompressingStore());
11974   }
11975   return SDValue();
11976 }
11977 
11978 SDValue DAGCombiner::visitVPGATHER(SDNode *N) {
11979   VPGatherSDNode *MGT = cast<VPGatherSDNode>(N);
11980   SDValue Mask = MGT->getMask();
11981   SDValue Chain = MGT->getChain();
11982   SDValue Index = MGT->getIndex();
11983   SDValue Scale = MGT->getScale();
11984   SDValue BasePtr = MGT->getBasePtr();
11985   SDValue VL = MGT->getVectorLength();
11986   ISD::MemIndexType IndexType = MGT->getIndexType();
11987   SDLoc DL(N);
11988 
11989   if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
11990     SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11991     return DAG.getGatherVP(
11992         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
11993         Ops, MGT->getMemOperand(), IndexType);
11994   }
11995 
11996   if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
11997     SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL};
11998     return DAG.getGatherVP(
11999         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12000         Ops, MGT->getMemOperand(), IndexType);
12001   }
12002 
12003   return SDValue();
12004 }
12005 
12006 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
12007   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
12008   SDValue Mask = MGT->getMask();
12009   SDValue Chain = MGT->getChain();
12010   SDValue Index = MGT->getIndex();
12011   SDValue Scale = MGT->getScale();
12012   SDValue PassThru = MGT->getPassThru();
12013   SDValue BasePtr = MGT->getBasePtr();
12014   ISD::MemIndexType IndexType = MGT->getIndexType();
12015   SDLoc DL(N);
12016 
12017   // Zap gathers with a zero mask.
12018   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
12019     return CombineTo(N, PassThru, MGT->getChain());
12020 
12021   if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) {
12022     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12023     return DAG.getMaskedGather(
12024         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12025         Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12026   }
12027 
12028   if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) {
12029     SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale};
12030     return DAG.getMaskedGather(
12031         DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL,
12032         Ops, MGT->getMemOperand(), IndexType, MGT->getExtensionType());
12033   }
12034 
12035   return SDValue();
12036 }
12037 
12038 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
12039   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
12040   SDValue Mask = MLD->getMask();
12041   SDLoc DL(N);
12042 
12043   // Zap masked loads with a zero mask.
12044   if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
12045     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
12046 
12047   // If this is a masked load with an all ones mask, we can use a unmasked load.
12048   // FIXME: Can we do this for indexed, expanding, or extending loads?
12049   if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MLD->isUnindexed() &&
12050       !MLD->isExpandingLoad() && MLD->getExtensionType() == ISD::NON_EXTLOAD) {
12051     SDValue NewLd = DAG.getLoad(
12052         N->getValueType(0), SDLoc(N), MLD->getChain(), MLD->getBasePtr(),
12053         MLD->getPointerInfo(), MLD->getOriginalAlign(),
12054         MachineMemOperand::MOLoad, MLD->getAAInfo(), MLD->getRanges());
12055     return CombineTo(N, NewLd, NewLd.getValue(1));
12056   }
12057 
12058   // Try transforming N to an indexed load.
12059   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
12060     return SDValue(N, 0);
12061 
12062   return SDValue();
12063 }
12064 
12065 SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) {
12066   auto *SLD = cast<VPStridedLoadSDNode>(N);
12067   EVT EltVT = SLD->getValueType(0).getVectorElementType();
12068   // Combine strided loads with unit-stride to a regular VP load.
12069   if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride());
12070       CStride && CStride->getZExtValue() == EltVT.getStoreSize()) {
12071     SDValue NewLd = DAG.getLoadVP(
12072         SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0),
12073         SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(),
12074         SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(),
12075         SLD->getMemOperand(), SLD->isExpandingLoad());
12076     return CombineTo(N, NewLd, NewLd.getValue(1));
12077   }
12078   return SDValue();
12079 }
12080 
12081 /// A vector select of 2 constant vectors can be simplified to math/logic to
12082 /// avoid a variable select instruction and possibly avoid constant loads.
12083 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
12084   SDValue Cond = N->getOperand(0);
12085   SDValue N1 = N->getOperand(1);
12086   SDValue N2 = N->getOperand(2);
12087   EVT VT = N->getValueType(0);
12088   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
12089       !shouldConvertSelectOfConstantsToMath(Cond, VT, TLI) ||
12090       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
12091       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
12092     return SDValue();
12093 
12094   // Check if we can use the condition value to increment/decrement a single
12095   // constant value. This simplifies a select to an add and removes a constant
12096   // load/materialization from the general case.
12097   bool AllAddOne = true;
12098   bool AllSubOne = true;
12099   unsigned Elts = VT.getVectorNumElements();
12100   for (unsigned i = 0; i != Elts; ++i) {
12101     SDValue N1Elt = N1.getOperand(i);
12102     SDValue N2Elt = N2.getOperand(i);
12103     if (N1Elt.isUndef() || N2Elt.isUndef())
12104       continue;
12105     if (N1Elt.getValueType() != N2Elt.getValueType())
12106       continue;
12107 
12108     const APInt &C1 = N1Elt->getAsAPIntVal();
12109     const APInt &C2 = N2Elt->getAsAPIntVal();
12110     if (C1 != C2 + 1)
12111       AllAddOne = false;
12112     if (C1 != C2 - 1)
12113       AllSubOne = false;
12114   }
12115 
12116   // Further simplifications for the extra-special cases where the constants are
12117   // all 0 or all -1 should be implemented as folds of these patterns.
12118   SDLoc DL(N);
12119   if (AllAddOne || AllSubOne) {
12120     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
12121     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
12122     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
12123     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
12124     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
12125   }
12126 
12127   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
12128   APInt Pow2C;
12129   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
12130       isNullOrNullSplat(N2)) {
12131     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
12132     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
12133     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
12134   }
12135 
12136   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
12137     return V;
12138 
12139   // The general case for select-of-constants:
12140   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
12141   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
12142   // leave that to a machine-specific pass.
12143   return SDValue();
12144 }
12145 
12146 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
12147   SDValue N0 = N->getOperand(0);
12148   SDValue N1 = N->getOperand(1);
12149   SDValue N2 = N->getOperand(2);
12150   EVT VT = N->getValueType(0);
12151   SDLoc DL(N);
12152 
12153   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
12154     return V;
12155 
12156   if (SDValue V = foldBoolSelectToLogic(N, DAG))
12157     return V;
12158 
12159   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
12160   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
12161     return DAG.getSelect(DL, VT, F, N2, N1);
12162 
12163   // Canonicalize integer abs.
12164   // vselect (setg[te] X,  0),  X, -X ->
12165   // vselect (setgt    X, -1),  X, -X ->
12166   // vselect (setl[te] X,  0), -X,  X ->
12167   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
12168   if (N0.getOpcode() == ISD::SETCC) {
12169     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
12170     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
12171     bool isAbs = false;
12172     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
12173 
12174     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
12175          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
12176         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
12177       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
12178     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
12179              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
12180       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
12181 
12182     if (isAbs) {
12183       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
12184         return DAG.getNode(ISD::ABS, DL, VT, LHS);
12185 
12186       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
12187                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
12188                                                   DL, getShiftAmountTy(VT)));
12189       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
12190       AddToWorklist(Shift.getNode());
12191       AddToWorklist(Add.getNode());
12192       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
12193     }
12194 
12195     // vselect x, y (fcmp lt x, y) -> fminnum x, y
12196     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
12197     //
12198     // This is OK if we don't care about what happens if either operand is a
12199     // NaN.
12200     //
12201     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
12202       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC))
12203         return FMinMax;
12204     }
12205 
12206     if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12207       return S;
12208     if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
12209       return S;
12210 
12211     // If this select has a condition (setcc) with narrower operands than the
12212     // select, try to widen the compare to match the select width.
12213     // TODO: This should be extended to handle any constant.
12214     // TODO: This could be extended to handle non-loading patterns, but that
12215     //       requires thorough testing to avoid regressions.
12216     if (isNullOrNullSplat(RHS)) {
12217       EVT NarrowVT = LHS.getValueType();
12218       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
12219       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
12220       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
12221       unsigned WideWidth = WideVT.getScalarSizeInBits();
12222       bool IsSigned = isSignedIntSetCC(CC);
12223       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12224       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
12225           SetCCWidth != 1 && SetCCWidth < WideWidth &&
12226           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
12227           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
12228         // Both compare operands can be widened for free. The LHS can use an
12229         // extended load, and the RHS is a constant:
12230         //   vselect (ext (setcc load(X), C)), N1, N2 -->
12231         //   vselect (setcc extload(X), C'), N1, N2
12232         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
12233         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
12234         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
12235         EVT WideSetCCVT = getSetCCResultType(WideVT);
12236         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
12237         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
12238       }
12239     }
12240 
12241     // Match VSELECTs with absolute difference patterns.
12242     // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
12243     // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
12244     // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
12245     // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
12246     if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
12247         N1.getOperand(0) == N2.getOperand(1) &&
12248         N1.getOperand(1) == N2.getOperand(0)) {
12249       bool IsSigned = isSignedIntSetCC(CC);
12250       unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12251       if (hasOperation(ABDOpc, VT)) {
12252         switch (CC) {
12253         case ISD::SETGT:
12254         case ISD::SETGE:
12255         case ISD::SETUGT:
12256         case ISD::SETUGE:
12257           if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
12258             return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12259           break;
12260         case ISD::SETLT:
12261         case ISD::SETLE:
12262         case ISD::SETULT:
12263         case ISD::SETULE:
12264           if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
12265             return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
12266           break;
12267         default:
12268           break;
12269         }
12270       }
12271     }
12272 
12273     // Match VSELECTs into add with unsigned saturation.
12274     if (hasOperation(ISD::UADDSAT, VT)) {
12275       // Check if one of the arms of the VSELECT is vector with all bits set.
12276       // If it's on the left side invert the predicate to simplify logic below.
12277       SDValue Other;
12278       ISD::CondCode SatCC = CC;
12279       if (ISD::isConstantSplatVectorAllOnes(N1.getNode())) {
12280         Other = N2;
12281         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12282       } else if (ISD::isConstantSplatVectorAllOnes(N2.getNode())) {
12283         Other = N1;
12284       }
12285 
12286       if (Other && Other.getOpcode() == ISD::ADD) {
12287         SDValue CondLHS = LHS, CondRHS = RHS;
12288         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12289 
12290         // Canonicalize condition operands.
12291         if (SatCC == ISD::SETUGE) {
12292           std::swap(CondLHS, CondRHS);
12293           SatCC = ISD::SETULE;
12294         }
12295 
12296         // We can test against either of the addition operands.
12297         // x <= x+y ? x+y : ~0 --> uaddsat x, y
12298         // x+y >= x ? x+y : ~0 --> uaddsat x, y
12299         if (SatCC == ISD::SETULE && Other == CondRHS &&
12300             (OpLHS == CondLHS || OpRHS == CondLHS))
12301           return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12302 
12303         if (OpRHS.getOpcode() == CondRHS.getOpcode() &&
12304             (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12305              OpRHS.getOpcode() == ISD::SPLAT_VECTOR) &&
12306             CondLHS == OpLHS) {
12307           // If the RHS is a constant we have to reverse the const
12308           // canonicalization.
12309           // x >= ~C ? x+C : ~0 --> uaddsat x, C
12310           auto MatchUADDSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12311             return Cond->getAPIntValue() == ~Op->getAPIntValue();
12312           };
12313           if (SatCC == ISD::SETULE &&
12314               ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUADDSAT))
12315             return DAG.getNode(ISD::UADDSAT, DL, VT, OpLHS, OpRHS);
12316         }
12317       }
12318     }
12319 
12320     // Match VSELECTs into sub with unsigned saturation.
12321     if (hasOperation(ISD::USUBSAT, VT)) {
12322       // Check if one of the arms of the VSELECT is a zero vector. If it's on
12323       // the left side invert the predicate to simplify logic below.
12324       SDValue Other;
12325       ISD::CondCode SatCC = CC;
12326       if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
12327         Other = N2;
12328         SatCC = ISD::getSetCCInverse(SatCC, VT.getScalarType());
12329       } else if (ISD::isConstantSplatVectorAllZeros(N2.getNode())) {
12330         Other = N1;
12331       }
12332 
12333       // zext(x) >= y ? trunc(zext(x) - y) : 0
12334       // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12335       // zext(x) >  y ? trunc(zext(x) - y) : 0
12336       // --> usubsat(trunc(zext(x)),trunc(umin(y,SatLimit)))
12337       if (Other && Other.getOpcode() == ISD::TRUNCATE &&
12338           Other.getOperand(0).getOpcode() == ISD::SUB &&
12339           (SatCC == ISD::SETUGE || SatCC == ISD::SETUGT)) {
12340         SDValue OpLHS = Other.getOperand(0).getOperand(0);
12341         SDValue OpRHS = Other.getOperand(0).getOperand(1);
12342         if (LHS == OpLHS && RHS == OpRHS && LHS.getOpcode() == ISD::ZERO_EXTEND)
12343           if (SDValue R = getTruncatedUSUBSAT(VT, LHS.getValueType(), LHS, RHS,
12344                                               DAG, DL))
12345             return R;
12346       }
12347 
12348       if (Other && Other.getNumOperands() == 2) {
12349         SDValue CondRHS = RHS;
12350         SDValue OpLHS = Other.getOperand(0), OpRHS = Other.getOperand(1);
12351 
12352         if (OpLHS == LHS) {
12353           // Look for a general sub with unsigned saturation first.
12354           // x >= y ? x-y : 0 --> usubsat x, y
12355           // x >  y ? x-y : 0 --> usubsat x, y
12356           if ((SatCC == ISD::SETUGE || SatCC == ISD::SETUGT) &&
12357               Other.getOpcode() == ISD::SUB && OpRHS == CondRHS)
12358             return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12359 
12360           if (OpRHS.getOpcode() == ISD::BUILD_VECTOR ||
12361               OpRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12362             if (CondRHS.getOpcode() == ISD::BUILD_VECTOR ||
12363                 CondRHS.getOpcode() == ISD::SPLAT_VECTOR) {
12364               // If the RHS is a constant we have to reverse the const
12365               // canonicalization.
12366               // x > C-1 ? x+-C : 0 --> usubsat x, C
12367               auto MatchUSUBSAT = [](ConstantSDNode *Op, ConstantSDNode *Cond) {
12368                 return (!Op && !Cond) ||
12369                        (Op && Cond &&
12370                         Cond->getAPIntValue() == (-Op->getAPIntValue() - 1));
12371               };
12372               if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD &&
12373                   ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT,
12374                                             /*AllowUndefs*/ true)) {
12375                 OpRHS = DAG.getNegative(OpRHS, DL, VT);
12376                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12377               }
12378 
12379               // Another special case: If C was a sign bit, the sub has been
12380               // canonicalized into a xor.
12381               // FIXME: Would it be better to use computeKnownBits to
12382               // determine whether it's safe to decanonicalize the xor?
12383               // x s< 0 ? x^C : 0 --> usubsat x, C
12384               APInt SplatValue;
12385               if (SatCC == ISD::SETLT && Other.getOpcode() == ISD::XOR &&
12386                   ISD::isConstantSplatVector(OpRHS.getNode(), SplatValue) &&
12387                   ISD::isConstantSplatVectorAllZeros(CondRHS.getNode()) &&
12388                   SplatValue.isSignMask()) {
12389                 // Note that we have to rebuild the RHS constant here to
12390                 // ensure we don't rely on particular values of undef lanes.
12391                 OpRHS = DAG.getConstant(SplatValue, DL, VT);
12392                 return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS);
12393               }
12394             }
12395           }
12396         }
12397       }
12398     }
12399   }
12400 
12401   if (SimplifySelectOps(N, N1, N2))
12402     return SDValue(N, 0);  // Don't revisit N.
12403 
12404   // Fold (vselect all_ones, N1, N2) -> N1
12405   if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
12406     return N1;
12407   // Fold (vselect all_zeros, N1, N2) -> N2
12408   if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
12409     return N2;
12410 
12411   // The ConvertSelectToConcatVector function is assuming both the above
12412   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
12413   // and addressed.
12414   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
12415       N2.getOpcode() == ISD::CONCAT_VECTORS &&
12416       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
12417     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
12418       return CV;
12419   }
12420 
12421   if (SDValue V = foldVSelectOfConstants(N))
12422     return V;
12423 
12424   if (hasOperation(ISD::SRA, VT))
12425     if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG))
12426       return V;
12427 
12428   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
12429     return SDValue(N, 0);
12430 
12431   return SDValue();
12432 }
12433 
12434 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
12435   SDValue N0 = N->getOperand(0);
12436   SDValue N1 = N->getOperand(1);
12437   SDValue N2 = N->getOperand(2);
12438   SDValue N3 = N->getOperand(3);
12439   SDValue N4 = N->getOperand(4);
12440   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
12441 
12442   // fold select_cc lhs, rhs, x, x, cc -> x
12443   if (N2 == N3)
12444     return N2;
12445 
12446   // select_cc bool, 0, x, y, seteq -> select bool, y, x
12447   if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 &&
12448       isNullConstant(N1))
12449     return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2);
12450 
12451   // Determine if the condition we're dealing with is constant
12452   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
12453                                   CC, SDLoc(N), false)) {
12454     AddToWorklist(SCC.getNode());
12455 
12456     // cond always true -> true val
12457     // cond always false -> false val
12458     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode()))
12459       return SCCC->isZero() ? N3 : N2;
12460 
12461     // When the condition is UNDEF, just return the first operand. This is
12462     // coherent the DAG creation, no setcc node is created in this case
12463     if (SCC->isUndef())
12464       return N2;
12465 
12466     // Fold to a simpler select_cc
12467     if (SCC.getOpcode() == ISD::SETCC) {
12468       SDValue SelectOp = DAG.getNode(
12469           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
12470           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
12471       SelectOp->setFlags(SCC->getFlags());
12472       return SelectOp;
12473     }
12474   }
12475 
12476   // If we can fold this based on the true/false value, do so.
12477   if (SimplifySelectOps(N, N2, N3))
12478     return SDValue(N, 0);  // Don't revisit N.
12479 
12480   // fold select_cc into other things, such as min/max/abs
12481   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
12482 }
12483 
12484 SDValue DAGCombiner::visitSETCC(SDNode *N) {
12485   // setcc is very commonly used as an argument to brcond. This pattern
12486   // also lend itself to numerous combines and, as a result, it is desired
12487   // we keep the argument to a brcond as a setcc as much as possible.
12488   bool PreferSetCC =
12489       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
12490 
12491   ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
12492   EVT VT = N->getValueType(0);
12493   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
12494 
12495   SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC);
12496 
12497   if (Combined) {
12498     // If we prefer to have a setcc, and we don't, we'll try our best to
12499     // recreate one using rebuildSetCC.
12500     if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
12501       SDValue NewSetCC = rebuildSetCC(Combined);
12502 
12503       // We don't have anything interesting to combine to.
12504       if (NewSetCC.getNode() == N)
12505         return SDValue();
12506 
12507       if (NewSetCC)
12508         return NewSetCC;
12509     }
12510     return Combined;
12511   }
12512 
12513   // Optimize
12514   //    1) (icmp eq/ne (and X, C0), (shift X, C1))
12515   // or
12516   //    2) (icmp eq/ne X, (rotate X, C1))
12517   // If C0 is a mask or shifted mask and the shift amt (C1) isolates the
12518   // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`)
12519   // Then:
12520   // If C1 is a power of 2, then the rotate and shift+and versions are
12521   // equivilent, so we can interchange them depending on target preference.
12522   // Otherwise, if we have the shift+and version we can interchange srl/shl
12523   // which inturn affects the constant C0. We can use this to get better
12524   // constants again determined by target preference.
12525   if (Cond == ISD::SETNE || Cond == ISD::SETEQ) {
12526     auto IsAndWithShift = [](SDValue A, SDValue B) {
12527       return A.getOpcode() == ISD::AND &&
12528              (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) &&
12529              A.getOperand(0) == B.getOperand(0);
12530     };
12531     auto IsRotateWithOp = [](SDValue A, SDValue B) {
12532       return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) &&
12533              B.getOperand(0) == A;
12534     };
12535     SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue();
12536     bool IsRotate = false;
12537 
12538     // Find either shift+and or rotate pattern.
12539     if (IsAndWithShift(N0, N1)) {
12540       AndOrOp = N0;
12541       ShiftOrRotate = N1;
12542     } else if (IsAndWithShift(N1, N0)) {
12543       AndOrOp = N1;
12544       ShiftOrRotate = N0;
12545     } else if (IsRotateWithOp(N0, N1)) {
12546       IsRotate = true;
12547       AndOrOp = N0;
12548       ShiftOrRotate = N1;
12549     } else if (IsRotateWithOp(N1, N0)) {
12550       IsRotate = true;
12551       AndOrOp = N1;
12552       ShiftOrRotate = N0;
12553     }
12554 
12555     if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() &&
12556         (IsRotate || AndOrOp.hasOneUse())) {
12557       EVT OpVT = N0.getValueType();
12558       // Get constant shift/rotate amount and possibly mask (if its shift+and
12559       // variant).
12560       auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> {
12561         ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false,
12562                                                     /*AllowTrunc*/ false);
12563         if (CNode == nullptr)
12564           return std::nullopt;
12565         return CNode->getAPIntValue();
12566       };
12567       std::optional<APInt> AndCMask =
12568           IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1));
12569       std::optional<APInt> ShiftCAmt =
12570           GetAPIntValue(ShiftOrRotate.getOperand(1));
12571       unsigned NumBits = OpVT.getScalarSizeInBits();
12572 
12573       // We found constants.
12574       if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) {
12575         unsigned ShiftOpc = ShiftOrRotate.getOpcode();
12576         // Check that the constants meet the constraints.
12577         bool CanTransform = IsRotate;
12578         if (!CanTransform) {
12579           // Check that mask and shift compliment eachother
12580           CanTransform = *ShiftCAmt == (~*AndCMask).popcount();
12581           // Check that we are comparing all bits
12582           CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits;
12583           // Check that the and mask is correct for the shift
12584           CanTransform &=
12585               ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask();
12586         }
12587 
12588         // See if target prefers another shift/rotate opcode.
12589         unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand(
12590             OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask);
12591         // Transform is valid and we have a new preference.
12592         if (CanTransform && NewShiftOpc != ShiftOpc) {
12593           SDLoc DL(N);
12594           SDValue NewShiftOrRotate =
12595               DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0),
12596                           ShiftOrRotate.getOperand(1));
12597           SDValue NewAndOrOp = SDValue();
12598 
12599           if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) {
12600             APInt NewMask =
12601                 NewShiftOpc == ISD::SHL
12602                     ? APInt::getHighBitsSet(NumBits,
12603                                             NumBits - ShiftCAmt->getZExtValue())
12604                     : APInt::getLowBitsSet(NumBits,
12605                                            NumBits - ShiftCAmt->getZExtValue());
12606             NewAndOrOp =
12607                 DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0),
12608                             DAG.getConstant(NewMask, DL, OpVT));
12609           } else {
12610             NewAndOrOp = ShiftOrRotate.getOperand(0);
12611           }
12612 
12613           return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond);
12614         }
12615       }
12616     }
12617   }
12618   return SDValue();
12619 }
12620 
12621 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
12622   SDValue LHS = N->getOperand(0);
12623   SDValue RHS = N->getOperand(1);
12624   SDValue Carry = N->getOperand(2);
12625   SDValue Cond = N->getOperand(3);
12626 
12627   // If Carry is false, fold to a regular SETCC.
12628   if (isNullConstant(Carry))
12629     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
12630 
12631   return SDValue();
12632 }
12633 
12634 /// Check if N satisfies:
12635 ///   N is used once.
12636 ///   N is a Load.
12637 ///   The load is compatible with ExtOpcode. It means
12638 ///     If load has explicit zero/sign extension, ExpOpcode must have the same
12639 ///     extension.
12640 ///     Otherwise returns true.
12641 static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
12642   if (!N.hasOneUse())
12643     return false;
12644 
12645   if (!isa<LoadSDNode>(N))
12646     return false;
12647 
12648   LoadSDNode *Load = cast<LoadSDNode>(N);
12649   ISD::LoadExtType LoadExt = Load->getExtensionType();
12650   if (LoadExt == ISD::NON_EXTLOAD || LoadExt == ISD::EXTLOAD)
12651     return true;
12652 
12653   // Now LoadExt is either SEXTLOAD or ZEXTLOAD, ExtOpcode must have the same
12654   // extension.
12655   if ((LoadExt == ISD::SEXTLOAD && ExtOpcode != ISD::SIGN_EXTEND) ||
12656       (LoadExt == ISD::ZEXTLOAD && ExtOpcode != ISD::ZERO_EXTEND))
12657     return false;
12658 
12659   return true;
12660 }
12661 
12662 /// Fold
12663 ///   (sext (select c, load x, load y)) -> (select c, sextload x, sextload y)
12664 ///   (zext (select c, load x, load y)) -> (select c, zextload x, zextload y)
12665 ///   (aext (select c, load x, load y)) -> (select c, extload x, extload y)
12666 /// This function is called by the DAGCombiner when visiting sext/zext/aext
12667 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12668 static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
12669                                          SelectionDAG &DAG,
12670                                          CombineLevel Level) {
12671   unsigned Opcode = N->getOpcode();
12672   SDValue N0 = N->getOperand(0);
12673   EVT VT = N->getValueType(0);
12674   SDLoc DL(N);
12675 
12676   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
12677           Opcode == ISD::ANY_EXTEND) &&
12678          "Expected EXTEND dag node in input!");
12679 
12680   if (!(N0->getOpcode() == ISD::SELECT || N0->getOpcode() == ISD::VSELECT) ||
12681       !N0.hasOneUse())
12682     return SDValue();
12683 
12684   SDValue Op1 = N0->getOperand(1);
12685   SDValue Op2 = N0->getOperand(2);
12686   if (!isCompatibleLoad(Op1, Opcode) || !isCompatibleLoad(Op2, Opcode))
12687     return SDValue();
12688 
12689   auto ExtLoadOpcode = ISD::EXTLOAD;
12690   if (Opcode == ISD::SIGN_EXTEND)
12691     ExtLoadOpcode = ISD::SEXTLOAD;
12692   else if (Opcode == ISD::ZERO_EXTEND)
12693     ExtLoadOpcode = ISD::ZEXTLOAD;
12694 
12695   // Illegal VSELECT may ISel fail if happen after legalization (DAG
12696   // Combine2), so we should conservatively check the OperationAction.
12697   LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
12698   LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
12699   if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
12700       !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
12701       (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
12702        TLI.getOperationAction(ISD::VSELECT, VT) != TargetLowering::Legal))
12703     return SDValue();
12704 
12705   SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
12706   SDValue Ext2 = DAG.getNode(Opcode, DL, VT, Op2);
12707   return DAG.getSelect(DL, VT, N0->getOperand(0), Ext1, Ext2);
12708 }
12709 
12710 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
12711 /// a build_vector of constants.
12712 /// This function is called by the DAGCombiner when visiting sext/zext/aext
12713 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
12714 /// Vector extends are not folded if operations are legal; this is to
12715 /// avoid introducing illegal build_vector dag nodes.
12716 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
12717                                          SelectionDAG &DAG, bool LegalTypes) {
12718   unsigned Opcode = N->getOpcode();
12719   SDValue N0 = N->getOperand(0);
12720   EVT VT = N->getValueType(0);
12721   SDLoc DL(N);
12722 
12723   assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
12724          "Expected EXTEND dag node in input!");
12725 
12726   // fold (sext c1) -> c1
12727   // fold (zext c1) -> c1
12728   // fold (aext c1) -> c1
12729   if (isa<ConstantSDNode>(N0))
12730     return DAG.getNode(Opcode, DL, VT, N0);
12731 
12732   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12733   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
12734   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
12735   if (N0->getOpcode() == ISD::SELECT) {
12736     SDValue Op1 = N0->getOperand(1);
12737     SDValue Op2 = N0->getOperand(2);
12738     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
12739         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
12740       // For any_extend, choose sign extension of the constants to allow a
12741       // possible further transform to sign_extend_inreg.i.e.
12742       //
12743       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
12744       // t2: i64 = any_extend t1
12745       // -->
12746       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
12747       // -->
12748       // t4: i64 = sign_extend_inreg t3
12749       unsigned FoldOpc = Opcode;
12750       if (FoldOpc == ISD::ANY_EXTEND)
12751         FoldOpc = ISD::SIGN_EXTEND;
12752       return DAG.getSelect(DL, VT, N0->getOperand(0),
12753                            DAG.getNode(FoldOpc, DL, VT, Op1),
12754                            DAG.getNode(FoldOpc, DL, VT, Op2));
12755     }
12756   }
12757 
12758   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
12759   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
12760   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
12761   EVT SVT = VT.getScalarType();
12762   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
12763       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
12764     return SDValue();
12765 
12766   // We can fold this node into a build_vector.
12767   unsigned VTBits = SVT.getSizeInBits();
12768   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
12769   SmallVector<SDValue, 8> Elts;
12770   unsigned NumElts = VT.getVectorNumElements();
12771 
12772   for (unsigned i = 0; i != NumElts; ++i) {
12773     SDValue Op = N0.getOperand(i);
12774     if (Op.isUndef()) {
12775       if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG)
12776         Elts.push_back(DAG.getUNDEF(SVT));
12777       else
12778         Elts.push_back(DAG.getConstant(0, DL, SVT));
12779       continue;
12780     }
12781 
12782     SDLoc DL(Op);
12783     // Get the constant value and if needed trunc it to the size of the type.
12784     // Nodes like build_vector might have constants wider than the scalar type.
12785     APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
12786     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
12787       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
12788     else
12789       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
12790   }
12791 
12792   return DAG.getBuildVector(VT, DL, Elts);
12793 }
12794 
12795 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
12796 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
12797 // transformation. Returns true if extension are possible and the above
12798 // mentioned transformation is profitable.
12799 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
12800                                     unsigned ExtOpc,
12801                                     SmallVectorImpl<SDNode *> &ExtendNodes,
12802                                     const TargetLowering &TLI) {
12803   bool HasCopyToRegUses = false;
12804   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
12805   for (SDNode::use_iterator UI = N0->use_begin(), UE = N0->use_end(); UI != UE;
12806        ++UI) {
12807     SDNode *User = *UI;
12808     if (User == N)
12809       continue;
12810     if (UI.getUse().getResNo() != N0.getResNo())
12811       continue;
12812     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
12813     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
12814       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
12815       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
12816         // Sign bits will be lost after a zext.
12817         return false;
12818       bool Add = false;
12819       for (unsigned i = 0; i != 2; ++i) {
12820         SDValue UseOp = User->getOperand(i);
12821         if (UseOp == N0)
12822           continue;
12823         if (!isa<ConstantSDNode>(UseOp))
12824           return false;
12825         Add = true;
12826       }
12827       if (Add)
12828         ExtendNodes.push_back(User);
12829       continue;
12830     }
12831     // If truncates aren't free and there are users we can't
12832     // extend, it isn't worthwhile.
12833     if (!isTruncFree)
12834       return false;
12835     // Remember if this value is live-out.
12836     if (User->getOpcode() == ISD::CopyToReg)
12837       HasCopyToRegUses = true;
12838   }
12839 
12840   if (HasCopyToRegUses) {
12841     bool BothLiveOut = false;
12842     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
12843          UI != UE; ++UI) {
12844       SDUse &Use = UI.getUse();
12845       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
12846         BothLiveOut = true;
12847         break;
12848       }
12849     }
12850     if (BothLiveOut)
12851       // Both unextended and extended values are live out. There had better be
12852       // a good reason for the transformation.
12853       return !ExtendNodes.empty();
12854   }
12855   return true;
12856 }
12857 
12858 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
12859                                   SDValue OrigLoad, SDValue ExtLoad,
12860                                   ISD::NodeType ExtType) {
12861   // Extend SetCC uses if necessary.
12862   SDLoc DL(ExtLoad);
12863   for (SDNode *SetCC : SetCCs) {
12864     SmallVector<SDValue, 4> Ops;
12865 
12866     for (unsigned j = 0; j != 2; ++j) {
12867       SDValue SOp = SetCC->getOperand(j);
12868       if (SOp == OrigLoad)
12869         Ops.push_back(ExtLoad);
12870       else
12871         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
12872     }
12873 
12874     Ops.push_back(SetCC->getOperand(2));
12875     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
12876   }
12877 }
12878 
12879 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
12880 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
12881   SDValue N0 = N->getOperand(0);
12882   EVT DstVT = N->getValueType(0);
12883   EVT SrcVT = N0.getValueType();
12884 
12885   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
12886           N->getOpcode() == ISD::ZERO_EXTEND) &&
12887          "Unexpected node type (not an extend)!");
12888 
12889   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
12890   // For example, on a target with legal v4i32, but illegal v8i32, turn:
12891   //   (v8i32 (sext (v8i16 (load x))))
12892   // into:
12893   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
12894   //                          (v4i32 (sextload (x + 16)))))
12895   // Where uses of the original load, i.e.:
12896   //   (v8i16 (load x))
12897   // are replaced with:
12898   //   (v8i16 (truncate
12899   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
12900   //                            (v4i32 (sextload (x + 16)))))))
12901   //
12902   // This combine is only applicable to illegal, but splittable, vectors.
12903   // All legal types, and illegal non-vector types, are handled elsewhere.
12904   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
12905   //
12906   if (N0->getOpcode() != ISD::LOAD)
12907     return SDValue();
12908 
12909   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12910 
12911   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
12912       !N0.hasOneUse() || !LN0->isSimple() ||
12913       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
12914       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
12915     return SDValue();
12916 
12917   SmallVector<SDNode *, 4> SetCCs;
12918   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
12919     return SDValue();
12920 
12921   ISD::LoadExtType ExtType =
12922       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
12923 
12924   // Try to split the vector types to get down to legal types.
12925   EVT SplitSrcVT = SrcVT;
12926   EVT SplitDstVT = DstVT;
12927   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
12928          SplitSrcVT.getVectorNumElements() > 1) {
12929     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
12930     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
12931   }
12932 
12933   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
12934     return SDValue();
12935 
12936   assert(!DstVT.isScalableVector() && "Unexpected scalable vector type");
12937 
12938   SDLoc DL(N);
12939   const unsigned NumSplits =
12940       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
12941   const unsigned Stride = SplitSrcVT.getStoreSize();
12942   SmallVector<SDValue, 4> Loads;
12943   SmallVector<SDValue, 4> Chains;
12944 
12945   SDValue BasePtr = LN0->getBasePtr();
12946   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
12947     const unsigned Offset = Idx * Stride;
12948     const Align Align = commonAlignment(LN0->getAlign(), Offset);
12949 
12950     SDValue SplitLoad = DAG.getExtLoad(
12951         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
12952         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
12953         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
12954 
12955     BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL);
12956 
12957     Loads.push_back(SplitLoad.getValue(0));
12958     Chains.push_back(SplitLoad.getValue(1));
12959   }
12960 
12961   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
12962   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
12963 
12964   // Simplify TF.
12965   AddToWorklist(NewChain.getNode());
12966 
12967   CombineTo(N, NewValue);
12968 
12969   // Replace uses of the original load (before extension)
12970   // with a truncate of the concatenated sextloaded vectors.
12971   SDValue Trunc =
12972       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
12973   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
12974   CombineTo(N0.getNode(), Trunc, NewChain);
12975   return SDValue(N, 0); // Return N so it doesn't get rechecked!
12976 }
12977 
12978 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
12979 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
12980 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
12981   assert(N->getOpcode() == ISD::ZERO_EXTEND);
12982   EVT VT = N->getValueType(0);
12983   EVT OrigVT = N->getOperand(0).getValueType();
12984   if (TLI.isZExtFree(OrigVT, VT))
12985     return SDValue();
12986 
12987   // and/or/xor
12988   SDValue N0 = N->getOperand(0);
12989   if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
12990       N0.getOperand(1).getOpcode() != ISD::Constant ||
12991       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
12992     return SDValue();
12993 
12994   // shl/shr
12995   SDValue N1 = N0->getOperand(0);
12996   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
12997       N1.getOperand(1).getOpcode() != ISD::Constant ||
12998       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
12999     return SDValue();
13000 
13001   // load
13002   if (!isa<LoadSDNode>(N1.getOperand(0)))
13003     return SDValue();
13004   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
13005   EVT MemVT = Load->getMemoryVT();
13006   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
13007       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
13008     return SDValue();
13009 
13010 
13011   // If the shift op is SHL, the logic op must be AND, otherwise the result
13012   // will be wrong.
13013   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
13014     return SDValue();
13015 
13016   if (!N0.hasOneUse() || !N1.hasOneUse())
13017     return SDValue();
13018 
13019   SmallVector<SDNode*, 4> SetCCs;
13020   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
13021                                ISD::ZERO_EXTEND, SetCCs, TLI))
13022     return SDValue();
13023 
13024   // Actually do the transformation.
13025   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
13026                                    Load->getChain(), Load->getBasePtr(),
13027                                    Load->getMemoryVT(), Load->getMemOperand());
13028 
13029   SDLoc DL1(N1);
13030   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
13031                               N1.getOperand(1));
13032 
13033   APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
13034   SDLoc DL0(N0);
13035   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
13036                             DAG.getConstant(Mask, DL0, VT));
13037 
13038   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13039   CombineTo(N, And);
13040   if (SDValue(Load, 0).hasOneUse()) {
13041     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
13042   } else {
13043     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
13044                                 Load->getValueType(0), ExtLoad);
13045     CombineTo(Load, Trunc, ExtLoad.getValue(1));
13046   }
13047 
13048   // N0 is dead at this point.
13049   recursivelyDeleteUnusedNodes(N0.getNode());
13050 
13051   return SDValue(N,0); // Return N so it doesn't get rechecked!
13052 }
13053 
13054 /// If we're narrowing or widening the result of a vector select and the final
13055 /// size is the same size as a setcc (compare) feeding the select, then try to
13056 /// apply the cast operation to the select's operands because matching vector
13057 /// sizes for a select condition and other operands should be more efficient.
13058 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
13059   unsigned CastOpcode = Cast->getOpcode();
13060   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
13061           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
13062           CastOpcode == ISD::FP_ROUND) &&
13063          "Unexpected opcode for vector select narrowing/widening");
13064 
13065   // We only do this transform before legal ops because the pattern may be
13066   // obfuscated by target-specific operations after legalization. Do not create
13067   // an illegal select op, however, because that may be difficult to lower.
13068   EVT VT = Cast->getValueType(0);
13069   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
13070     return SDValue();
13071 
13072   SDValue VSel = Cast->getOperand(0);
13073   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
13074       VSel.getOperand(0).getOpcode() != ISD::SETCC)
13075     return SDValue();
13076 
13077   // Does the setcc have the same vector size as the casted select?
13078   SDValue SetCC = VSel.getOperand(0);
13079   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
13080   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
13081     return SDValue();
13082 
13083   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
13084   SDValue A = VSel.getOperand(1);
13085   SDValue B = VSel.getOperand(2);
13086   SDValue CastA, CastB;
13087   SDLoc DL(Cast);
13088   if (CastOpcode == ISD::FP_ROUND) {
13089     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
13090     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
13091     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
13092   } else {
13093     CastA = DAG.getNode(CastOpcode, DL, VT, A);
13094     CastB = DAG.getNode(CastOpcode, DL, VT, B);
13095   }
13096   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
13097 }
13098 
13099 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13100 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13101 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
13102                                      const TargetLowering &TLI, EVT VT,
13103                                      bool LegalOperations, SDNode *N,
13104                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
13105   SDNode *N0Node = N0.getNode();
13106   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
13107                                                    : ISD::isZEXTLoad(N0Node);
13108   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
13109       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
13110     return SDValue();
13111 
13112   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13113   EVT MemVT = LN0->getMemoryVT();
13114   if ((LegalOperations || !LN0->isSimple() ||
13115        VT.isVector()) &&
13116       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
13117     return SDValue();
13118 
13119   SDValue ExtLoad =
13120       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13121                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
13122   Combiner.CombineTo(N, ExtLoad);
13123   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13124   if (LN0->use_empty())
13125     Combiner.recursivelyDeleteUnusedNodes(LN0);
13126   return SDValue(N, 0); // Return N so it doesn't get rechecked!
13127 }
13128 
13129 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
13130 // Only generate vector extloads when 1) they're legal, and 2) they are
13131 // deemed desirable by the target.
13132 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
13133                                   const TargetLowering &TLI, EVT VT,
13134                                   bool LegalOperations, SDNode *N, SDValue N0,
13135                                   ISD::LoadExtType ExtLoadType,
13136                                   ISD::NodeType ExtOpc) {
13137   // TODO: isFixedLengthVector() should be removed and any negative effects on
13138   // code generation being the result of that target's implementation of
13139   // isVectorLoadExtDesirable().
13140   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
13141       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
13142       ((LegalOperations || VT.isFixedLengthVector() ||
13143         !cast<LoadSDNode>(N0)->isSimple()) &&
13144        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
13145     return {};
13146 
13147   bool DoXform = true;
13148   SmallVector<SDNode *, 4> SetCCs;
13149   if (!N0.hasOneUse())
13150     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
13151   if (VT.isVector())
13152     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
13153   if (!DoXform)
13154     return {};
13155 
13156   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13157   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
13158                                    LN0->getBasePtr(), N0.getValueType(),
13159                                    LN0->getMemOperand());
13160   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
13161   // If the load value is used only by N, replace it via CombineTo N.
13162   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
13163   Combiner.CombineTo(N, ExtLoad);
13164   if (NoReplaceTrunc) {
13165     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13166     Combiner.recursivelyDeleteUnusedNodes(LN0);
13167   } else {
13168     SDValue Trunc =
13169         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
13170     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
13171   }
13172   return SDValue(N, 0); // Return N so it doesn't get rechecked!
13173 }
13174 
13175 static SDValue
13176 tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT,
13177                          bool LegalOperations, SDNode *N, SDValue N0,
13178                          ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) {
13179   if (!N0.hasOneUse())
13180     return SDValue();
13181 
13182   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
13183   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
13184     return SDValue();
13185 
13186   if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) &&
13187       !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0)))
13188     return SDValue();
13189 
13190   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
13191     return SDValue();
13192 
13193   SDLoc dl(Ld);
13194   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
13195   SDValue NewLoad = DAG.getMaskedLoad(
13196       VT, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(), Ld->getMask(),
13197       PassThru, Ld->getMemoryVT(), Ld->getMemOperand(), Ld->getAddressingMode(),
13198       ExtLoadType, Ld->isExpandingLoad());
13199   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
13200   return NewLoad;
13201 }
13202 
13203 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
13204                                        bool LegalOperations) {
13205   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
13206           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
13207 
13208   SDValue SetCC = N->getOperand(0);
13209   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
13210       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
13211     return SDValue();
13212 
13213   SDValue X = SetCC.getOperand(0);
13214   SDValue Ones = SetCC.getOperand(1);
13215   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
13216   EVT VT = N->getValueType(0);
13217   EVT XVT = X.getValueType();
13218   // setge X, C is canonicalized to setgt, so we do not need to match that
13219   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
13220   // not require the 'not' op.
13221   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
13222     // Invert and smear/shift the sign bit:
13223     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
13224     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
13225     SDLoc DL(N);
13226     unsigned ShCt = VT.getSizeInBits() - 1;
13227     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13228     if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
13229       SDValue NotX = DAG.getNOT(DL, X, VT);
13230       SDValue ShiftAmount = DAG.getConstant(ShCt, DL, VT);
13231       auto ShiftOpcode =
13232         N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
13233       return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
13234     }
13235   }
13236   return SDValue();
13237 }
13238 
13239 SDValue DAGCombiner::foldSextSetcc(SDNode *N) {
13240   SDValue N0 = N->getOperand(0);
13241   if (N0.getOpcode() != ISD::SETCC)
13242     return SDValue();
13243 
13244   SDValue N00 = N0.getOperand(0);
13245   SDValue N01 = N0.getOperand(1);
13246   ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
13247   EVT VT = N->getValueType(0);
13248   EVT N00VT = N00.getValueType();
13249   SDLoc DL(N);
13250 
13251   // Propagate fast-math-flags.
13252   SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13253 
13254   // On some architectures (such as SSE/NEON/etc) the SETCC result type is
13255   // the same size as the compared operands. Try to optimize sext(setcc())
13256   // if this is the case.
13257   if (VT.isVector() && !LegalOperations &&
13258       TLI.getBooleanContents(N00VT) ==
13259           TargetLowering::ZeroOrNegativeOneBooleanContent) {
13260     EVT SVT = getSetCCResultType(N00VT);
13261 
13262     // If we already have the desired type, don't change it.
13263     if (SVT != N0.getValueType()) {
13264       // We know that the # elements of the results is the same as the
13265       // # elements of the compare (and the # elements of the compare result
13266       // for that matter).  Check to see that they are the same size.  If so,
13267       // we know that the element size of the sext'd result matches the
13268       // element size of the compare operands.
13269       if (VT.getSizeInBits() == SVT.getSizeInBits())
13270         return DAG.getSetCC(DL, VT, N00, N01, CC);
13271 
13272       // If the desired elements are smaller or larger than the source
13273       // elements, we can use a matching integer vector type and then
13274       // truncate/sign extend.
13275       EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
13276       if (SVT == MatchingVecType) {
13277         SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
13278         return DAG.getSExtOrTrunc(VsetCC, DL, VT);
13279       }
13280     }
13281 
13282     // Try to eliminate the sext of a setcc by zexting the compare operands.
13283     if (N0.hasOneUse() && TLI.isOperationLegalOrCustom(ISD::SETCC, VT) &&
13284         !TLI.isOperationLegalOrCustom(ISD::SETCC, SVT)) {
13285       bool IsSignedCmp = ISD::isSignedIntSetCC(CC);
13286       unsigned LoadOpcode = IsSignedCmp ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
13287       unsigned ExtOpcode = IsSignedCmp ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
13288 
13289       // We have an unsupported narrow vector compare op that would be legal
13290       // if extended to the destination type. See if the compare operands
13291       // can be freely extended to the destination type.
13292       auto IsFreeToExtend = [&](SDValue V) {
13293         if (isConstantOrConstantVector(V, /*NoOpaques*/ true))
13294           return true;
13295         // Match a simple, non-extended load that can be converted to a
13296         // legal {z/s}ext-load.
13297         // TODO: Allow widening of an existing {z/s}ext-load?
13298         if (!(ISD::isNON_EXTLoad(V.getNode()) &&
13299               ISD::isUNINDEXEDLoad(V.getNode()) &&
13300               cast<LoadSDNode>(V)->isSimple() &&
13301               TLI.isLoadExtLegal(LoadOpcode, VT, V.getValueType())))
13302           return false;
13303 
13304         // Non-chain users of this value must either be the setcc in this
13305         // sequence or extends that can be folded into the new {z/s}ext-load.
13306         for (SDNode::use_iterator UI = V->use_begin(), UE = V->use_end();
13307              UI != UE; ++UI) {
13308           // Skip uses of the chain and the setcc.
13309           SDNode *User = *UI;
13310           if (UI.getUse().getResNo() != 0 || User == N0.getNode())
13311             continue;
13312           // Extra users must have exactly the same cast we are about to create.
13313           // TODO: This restriction could be eased if ExtendUsesToFormExtLoad()
13314           //       is enhanced similarly.
13315           if (User->getOpcode() != ExtOpcode || User->getValueType(0) != VT)
13316             return false;
13317         }
13318         return true;
13319       };
13320 
13321       if (IsFreeToExtend(N00) && IsFreeToExtend(N01)) {
13322         SDValue Ext0 = DAG.getNode(ExtOpcode, DL, VT, N00);
13323         SDValue Ext1 = DAG.getNode(ExtOpcode, DL, VT, N01);
13324         return DAG.getSetCC(DL, VT, Ext0, Ext1, CC);
13325       }
13326     }
13327   }
13328 
13329   // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
13330   // Here, T can be 1 or -1, depending on the type of the setcc and
13331   // getBooleanContents().
13332   unsigned SetCCWidth = N0.getScalarValueSizeInBits();
13333 
13334   // To determine the "true" side of the select, we need to know the high bit
13335   // of the value returned by the setcc if it evaluates to true.
13336   // If the type of the setcc is i1, then the true case of the select is just
13337   // sext(i1 1), that is, -1.
13338   // If the type of the setcc is larger (say, i8) then the value of the high
13339   // bit depends on getBooleanContents(), so ask TLI for a real "true" value
13340   // of the appropriate width.
13341   SDValue ExtTrueVal = (SetCCWidth == 1)
13342                            ? DAG.getAllOnesConstant(DL, VT)
13343                            : DAG.getBoolConstant(true, DL, VT, N00VT);
13344   SDValue Zero = DAG.getConstant(0, DL, VT);
13345   if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
13346     return SCC;
13347 
13348   if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) {
13349     EVT SetCCVT = getSetCCResultType(N00VT);
13350     // Don't do this transform for i1 because there's a select transform
13351     // that would reverse it.
13352     // TODO: We should not do this transform at all without a target hook
13353     // because a sext is likely cheaper than a select?
13354     if (SetCCVT.getScalarSizeInBits() != 1 &&
13355         (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
13356       SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
13357       return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
13358     }
13359   }
13360 
13361   return SDValue();
13362 }
13363 
13364 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
13365   SDValue N0 = N->getOperand(0);
13366   EVT VT = N->getValueType(0);
13367   SDLoc DL(N);
13368 
13369   if (VT.isVector())
13370     if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13371       return FoldedVOp;
13372 
13373   // sext(undef) = 0 because the top bit will all be the same.
13374   if (N0.isUndef())
13375     return DAG.getConstant(0, DL, VT);
13376 
13377   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
13378     return Res;
13379 
13380   // fold (sext (sext x)) -> (sext x)
13381   // fold (sext (aext x)) -> (sext x)
13382   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13383     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
13384 
13385   // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13386   // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13387   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
13388       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
13389     return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
13390                        N0.getOperand(0));
13391 
13392   // fold (sext (sext_inreg x)) -> (sext (trunc x))
13393   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
13394     SDValue N00 = N0.getOperand(0);
13395     EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
13396     if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
13397         (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
13398       SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
13399       return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
13400     }
13401   }
13402 
13403   if (N0.getOpcode() == ISD::TRUNCATE) {
13404     // fold (sext (truncate (load x))) -> (sext (smaller load x))
13405     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
13406     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13407       SDNode *oye = N0.getOperand(0).getNode();
13408       if (NarrowLoad.getNode() != N0.getNode()) {
13409         CombineTo(N0.getNode(), NarrowLoad);
13410         // CombineTo deleted the truncate, if needed, but not what's under it.
13411         AddToWorklist(oye);
13412       }
13413       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13414     }
13415 
13416     // See if the value being truncated is already sign extended.  If so, just
13417     // eliminate the trunc/sext pair.
13418     SDValue Op = N0.getOperand(0);
13419     unsigned OpBits   = Op.getScalarValueSizeInBits();
13420     unsigned MidBits  = N0.getScalarValueSizeInBits();
13421     unsigned DestBits = VT.getScalarSizeInBits();
13422     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
13423 
13424     if (OpBits == DestBits) {
13425       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
13426       // bits, it is already ready.
13427       if (NumSignBits > DestBits-MidBits)
13428         return Op;
13429     } else if (OpBits < DestBits) {
13430       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
13431       // bits, just sext from i32.
13432       if (NumSignBits > OpBits-MidBits)
13433         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
13434     } else {
13435       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
13436       // bits, just truncate to i32.
13437       if (NumSignBits > OpBits-MidBits)
13438         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
13439     }
13440 
13441     // fold (sext (truncate x)) -> (sextinreg x).
13442     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
13443                                                  N0.getValueType())) {
13444       if (OpBits < DestBits)
13445         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
13446       else if (OpBits > DestBits)
13447         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
13448       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
13449                          DAG.getValueType(N0.getValueType()));
13450     }
13451   }
13452 
13453   // Try to simplify (sext (load x)).
13454   if (SDValue foldedExt =
13455           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13456                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
13457     return foldedExt;
13458 
13459   if (SDValue foldedExt =
13460           tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13461                                    ISD::SEXTLOAD, ISD::SIGN_EXTEND))
13462     return foldedExt;
13463 
13464   // fold (sext (load x)) to multiple smaller sextloads.
13465   // Only on illegal but splittable vectors.
13466   if (SDValue ExtLoad = CombineExtLoad(N))
13467     return ExtLoad;
13468 
13469   // Try to simplify (sext (sextload x)).
13470   if (SDValue foldedExt = tryToFoldExtOfExtload(
13471           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
13472     return foldedExt;
13473 
13474   // fold (sext (and/or/xor (load x), cst)) ->
13475   //      (and/or/xor (sextload x), (sext cst))
13476   if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
13477       isa<LoadSDNode>(N0.getOperand(0)) &&
13478       N0.getOperand(1).getOpcode() == ISD::Constant &&
13479       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13480     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13481     EVT MemVT = LN00->getMemoryVT();
13482     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
13483       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
13484       SmallVector<SDNode*, 4> SetCCs;
13485       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13486                                              ISD::SIGN_EXTEND, SetCCs, TLI);
13487       if (DoXform) {
13488         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
13489                                          LN00->getChain(), LN00->getBasePtr(),
13490                                          LN00->getMemoryVT(),
13491                                          LN00->getMemOperand());
13492         APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits());
13493         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13494                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
13495         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
13496         bool NoReplaceTruncAnd = !N0.hasOneUse();
13497         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13498         CombineTo(N, And);
13499         // If N0 has multiple uses, change other uses as well.
13500         if (NoReplaceTruncAnd) {
13501           SDValue TruncAnd =
13502               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
13503           CombineTo(N0.getNode(), TruncAnd);
13504         }
13505         if (NoReplaceTrunc) {
13506           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13507         } else {
13508           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13509                                       LN00->getValueType(0), ExtLoad);
13510           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13511         }
13512         return SDValue(N,0); // Return N so it doesn't get rechecked!
13513       }
13514     }
13515   }
13516 
13517   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13518     return V;
13519 
13520   if (SDValue V = foldSextSetcc(N))
13521     return V;
13522 
13523   // fold (sext x) -> (zext x) if the sign bit is known zero.
13524   if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) &&
13525       (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
13526       DAG.SignBitIsZero(N0)) {
13527     SDNodeFlags Flags;
13528     Flags.setNonNeg(true);
13529     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, Flags);
13530   }
13531 
13532   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13533     return NewVSel;
13534 
13535   // Eliminate this sign extend by doing a negation in the destination type:
13536   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
13537   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
13538       isNullOrNullSplat(N0.getOperand(0)) &&
13539       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
13540       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
13541     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
13542     return DAG.getNegative(Zext, DL, VT);
13543   }
13544   // Eliminate this sign extend by doing a decrement in the destination type:
13545   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
13546   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
13547       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
13548       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
13549       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
13550     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13551     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13552   }
13553 
13554   // fold sext (not i1 X) -> add (zext i1 X), -1
13555   // TODO: This could be extended to handle bool vectors.
13556   if (N0.getValueType() == MVT::i1 && isBitwiseNot(N0) && N0.hasOneUse() &&
13557       (!LegalOperations || (TLI.isOperationLegal(ISD::ZERO_EXTEND, VT) &&
13558                             TLI.isOperationLegal(ISD::ADD, VT)))) {
13559     // If we can eliminate the 'not', the sext form should be better
13560     if (SDValue NewXor = visitXOR(N0.getNode())) {
13561       // Returning N0 is a form of in-visit replacement that may have
13562       // invalidated N0.
13563       if (NewXor.getNode() == N0.getNode()) {
13564         // Return SDValue here as the xor should have already been replaced in
13565         // this sext.
13566         return SDValue();
13567       }
13568 
13569       // Return a new sext with the new xor.
13570       return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NewXor);
13571     }
13572 
13573     SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
13574     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
13575   }
13576 
13577   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13578     return Res;
13579 
13580   return SDValue();
13581 }
13582 
13583 /// Given an extending node with a pop-count operand, if the target does not
13584 /// support a pop-count in the narrow source type but does support it in the
13585 /// destination type, widen the pop-count to the destination type.
13586 static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) {
13587   assert((Extend->getOpcode() == ISD::ZERO_EXTEND ||
13588           Extend->getOpcode() == ISD::ANY_EXTEND) && "Expected extend op");
13589 
13590   SDValue CtPop = Extend->getOperand(0);
13591   if (CtPop.getOpcode() != ISD::CTPOP || !CtPop.hasOneUse())
13592     return SDValue();
13593 
13594   EVT VT = Extend->getValueType(0);
13595   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13596   if (TLI.isOperationLegalOrCustom(ISD::CTPOP, CtPop.getValueType()) ||
13597       !TLI.isOperationLegalOrCustom(ISD::CTPOP, VT))
13598     return SDValue();
13599 
13600   // zext (ctpop X) --> ctpop (zext X)
13601   SDLoc DL(Extend);
13602   SDValue NewZext = DAG.getZExtOrTrunc(CtPop.getOperand(0), DL, VT);
13603   return DAG.getNode(ISD::CTPOP, DL, VT, NewZext);
13604 }
13605 
13606 // If we have (zext (abs X)) where X is a type that will be promoted by type
13607 // legalization, convert to (abs (sext X)). But don't extend past a legal type.
13608 static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
13609   assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend.");
13610 
13611   EVT VT = Extend->getValueType(0);
13612   if (VT.isVector())
13613     return SDValue();
13614 
13615   SDValue Abs = Extend->getOperand(0);
13616   if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse())
13617     return SDValue();
13618 
13619   EVT AbsVT = Abs.getValueType();
13620   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13621   if (TLI.getTypeAction(*DAG.getContext(), AbsVT) !=
13622       TargetLowering::TypePromoteInteger)
13623     return SDValue();
13624 
13625   EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT);
13626 
13627   SDValue SExt =
13628       DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0));
13629   SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt);
13630   return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT);
13631 }
13632 
13633 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
13634   SDValue N0 = N->getOperand(0);
13635   EVT VT = N->getValueType(0);
13636   SDLoc DL(N);
13637 
13638   if (VT.isVector())
13639     if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
13640       return FoldedVOp;
13641 
13642   // zext(undef) = 0
13643   if (N0.isUndef())
13644     return DAG.getConstant(0, DL, VT);
13645 
13646   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
13647     return Res;
13648 
13649   // fold (zext (zext x)) -> (zext x)
13650   // fold (zext (aext x)) -> (zext x)
13651   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
13652     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
13653 
13654   // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13655   // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13656   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
13657       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)
13658     return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(N), VT,
13659                        N0.getOperand(0));
13660 
13661   // fold (zext (truncate x)) -> (zext x) or
13662   //      (zext (truncate x)) -> (truncate x)
13663   // This is valid when the truncated bits of x are already zero.
13664   SDValue Op;
13665   KnownBits Known;
13666   if (isTruncateOf(DAG, N0, Op, Known)) {
13667     APInt TruncatedBits =
13668       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
13669       APInt(Op.getScalarValueSizeInBits(), 0) :
13670       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
13671                         N0.getScalarValueSizeInBits(),
13672                         std::min(Op.getScalarValueSizeInBits(),
13673                                  VT.getScalarSizeInBits()));
13674     if (TruncatedBits.isSubsetOf(Known.Zero)) {
13675       SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13676       DAG.salvageDebugInfo(*N0.getNode());
13677 
13678       return ZExtOrTrunc;
13679     }
13680   }
13681 
13682   // fold (zext (truncate x)) -> (and x, mask)
13683   if (N0.getOpcode() == ISD::TRUNCATE) {
13684     // fold (zext (truncate (load x))) -> (zext (smaller load x))
13685     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
13686     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13687       SDNode *oye = N0.getOperand(0).getNode();
13688       if (NarrowLoad.getNode() != N0.getNode()) {
13689         CombineTo(N0.getNode(), NarrowLoad);
13690         // CombineTo deleted the truncate, if needed, but not what's under it.
13691         AddToWorklist(oye);
13692       }
13693       return SDValue(N, 0); // Return N so it doesn't get rechecked!
13694     }
13695 
13696     EVT SrcVT = N0.getOperand(0).getValueType();
13697     EVT MinVT = N0.getValueType();
13698 
13699     // Try to mask before the extension to avoid having to generate a larger mask,
13700     // possibly over several sub-vectors.
13701     if (SrcVT.bitsLT(VT) && VT.isVector()) {
13702       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
13703                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
13704         SDValue Op = N0.getOperand(0);
13705         Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
13706         AddToWorklist(Op.getNode());
13707         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
13708         // Transfer the debug info; the new node is equivalent to N0.
13709         DAG.transferDbgValues(N0, ZExtOrTrunc);
13710         return ZExtOrTrunc;
13711       }
13712     }
13713 
13714     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
13715       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
13716       AddToWorklist(Op.getNode());
13717       SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
13718       // We may safely transfer the debug info describing the truncate node over
13719       // to the equivalent and operation.
13720       DAG.transferDbgValues(N0, And);
13721       return And;
13722     }
13723   }
13724 
13725   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
13726   // if either of the casts is not free.
13727   if (N0.getOpcode() == ISD::AND &&
13728       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13729       N0.getOperand(1).getOpcode() == ISD::Constant &&
13730       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType()) ||
13731        !TLI.isZExtFree(N0.getValueType(), VT))) {
13732     SDValue X = N0.getOperand(0).getOperand(0);
13733     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
13734     APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
13735     return DAG.getNode(ISD::AND, DL, VT,
13736                        X, DAG.getConstant(Mask, DL, VT));
13737   }
13738 
13739   // Try to simplify (zext (load x)).
13740   if (SDValue foldedExt =
13741           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13742                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
13743     return foldedExt;
13744 
13745   if (SDValue foldedExt =
13746           tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0,
13747                                    ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
13748     return foldedExt;
13749 
13750   // fold (zext (load x)) to multiple smaller zextloads.
13751   // Only on illegal but splittable vectors.
13752   if (SDValue ExtLoad = CombineExtLoad(N))
13753     return ExtLoad;
13754 
13755   // fold (zext (and/or/xor (load x), cst)) ->
13756   //      (and/or/xor (zextload x), (zext cst))
13757   // Unless (and (load x) cst) will match as a zextload already and has
13758   // additional users, or the zext is already free.
13759   if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) &&
13760       isa<LoadSDNode>(N0.getOperand(0)) &&
13761       N0.getOperand(1).getOpcode() == ISD::Constant &&
13762       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
13763     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
13764     EVT MemVT = LN00->getMemoryVT();
13765     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
13766         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
13767       bool DoXform = true;
13768       SmallVector<SDNode*, 4> SetCCs;
13769       if (!N0.hasOneUse()) {
13770         if (N0.getOpcode() == ISD::AND) {
13771           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
13772           EVT LoadResultTy = AndC->getValueType(0);
13773           EVT ExtVT;
13774           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
13775             DoXform = false;
13776         }
13777       }
13778       if (DoXform)
13779         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
13780                                           ISD::ZERO_EXTEND, SetCCs, TLI);
13781       if (DoXform) {
13782         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
13783                                          LN00->getChain(), LN00->getBasePtr(),
13784                                          LN00->getMemoryVT(),
13785                                          LN00->getMemOperand());
13786         APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
13787         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
13788                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
13789         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
13790         bool NoReplaceTruncAnd = !N0.hasOneUse();
13791         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
13792         CombineTo(N, And);
13793         // If N0 has multiple uses, change other uses as well.
13794         if (NoReplaceTruncAnd) {
13795           SDValue TruncAnd =
13796               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
13797           CombineTo(N0.getNode(), TruncAnd);
13798         }
13799         if (NoReplaceTrunc) {
13800           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
13801         } else {
13802           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
13803                                       LN00->getValueType(0), ExtLoad);
13804           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
13805         }
13806         return SDValue(N,0); // Return N so it doesn't get rechecked!
13807       }
13808     }
13809   }
13810 
13811   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
13812   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
13813   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
13814     return ZExtLoad;
13815 
13816   // Try to simplify (zext (zextload x)).
13817   if (SDValue foldedExt = tryToFoldExtOfExtload(
13818           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
13819     return foldedExt;
13820 
13821   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
13822     return V;
13823 
13824   if (N0.getOpcode() == ISD::SETCC) {
13825     // Propagate fast-math-flags.
13826     SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
13827 
13828     // Only do this before legalize for now.
13829     if (!LegalOperations && VT.isVector() &&
13830         N0.getValueType().getVectorElementType() == MVT::i1) {
13831       EVT N00VT = N0.getOperand(0).getValueType();
13832       if (getSetCCResultType(N00VT) == N0.getValueType())
13833         return SDValue();
13834 
13835       // We know that the # elements of the results is the same as the #
13836       // elements of the compare (and the # elements of the compare result for
13837       // that matter). Check to see that they are the same size. If so, we know
13838       // that the element size of the sext'd result matches the element size of
13839       // the compare operands.
13840       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
13841         // zext(setcc) -> zext_in_reg(vsetcc) for vectors.
13842         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
13843                                      N0.getOperand(1), N0.getOperand(2));
13844         return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType());
13845       }
13846 
13847       // If the desired elements are smaller or larger than the source
13848       // elements we can use a matching integer vector type and then
13849       // truncate/any extend followed by zext_in_reg.
13850       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
13851       SDValue VsetCC =
13852           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
13853                       N0.getOperand(1), N0.getOperand(2));
13854       return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL,
13855                                     N0.getValueType());
13856     }
13857 
13858     // zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
13859     EVT N0VT = N0.getValueType();
13860     EVT N00VT = N0.getOperand(0).getValueType();
13861     if (SDValue SCC = SimplifySelectCC(
13862             DL, N0.getOperand(0), N0.getOperand(1),
13863             DAG.getBoolConstant(true, DL, N0VT, N00VT),
13864             DAG.getBoolConstant(false, DL, N0VT, N00VT),
13865             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
13866       return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, SCC);
13867   }
13868 
13869   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
13870   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
13871       !TLI.isZExtFree(N0, VT)) {
13872     SDValue ShVal = N0.getOperand(0);
13873     SDValue ShAmt = N0.getOperand(1);
13874     if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
13875       if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
13876         if (N0.getOpcode() == ISD::SHL) {
13877           // If the original shl may be shifting out bits, do not perform this
13878           // transformation.
13879           // TODO: Add MaskedValueIsZero check.
13880           unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
13881                                    ShVal.getOperand(0).getValueSizeInBits();
13882           if (ShAmtC->getAPIntValue().ugt(KnownZeroBits))
13883             return SDValue();
13884         }
13885 
13886         // Ensure that the shift amount is wide enough for the shifted value.
13887         if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
13888           ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
13889 
13890         return DAG.getNode(N0.getOpcode(), DL, VT,
13891                            DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
13892       }
13893     }
13894   }
13895 
13896   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13897     return NewVSel;
13898 
13899   if (SDValue NewCtPop = widenCtPop(N, DAG))
13900     return NewCtPop;
13901 
13902   if (SDValue V = widenAbs(N, DAG))
13903     return V;
13904 
13905   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
13906     return Res;
13907 
13908   return SDValue();
13909 }
13910 
13911 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
13912   SDValue N0 = N->getOperand(0);
13913   EVT VT = N->getValueType(0);
13914 
13915   // aext(undef) = undef
13916   if (N0.isUndef())
13917     return DAG.getUNDEF(VT);
13918 
13919   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
13920     return Res;
13921 
13922   // fold (aext (aext x)) -> (aext x)
13923   // fold (aext (zext x)) -> (zext x)
13924   // fold (aext (sext x)) -> (sext x)
13925   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
13926       N0.getOpcode() == ISD::ZERO_EXTEND ||
13927       N0.getOpcode() == ISD::SIGN_EXTEND)
13928     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
13929 
13930   // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
13931   // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
13932   // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
13933   if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
13934       N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG ||
13935       N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
13936     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
13937 
13938   // fold (aext (truncate (load x))) -> (aext (smaller load x))
13939   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
13940   if (N0.getOpcode() == ISD::TRUNCATE) {
13941     if (SDValue NarrowLoad = reduceLoadWidth(N0.getNode())) {
13942       SDNode *oye = N0.getOperand(0).getNode();
13943       if (NarrowLoad.getNode() != N0.getNode()) {
13944         CombineTo(N0.getNode(), NarrowLoad);
13945         // CombineTo deleted the truncate, if needed, but not what's under it.
13946         AddToWorklist(oye);
13947       }
13948       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13949     }
13950   }
13951 
13952   // fold (aext (truncate x))
13953   if (N0.getOpcode() == ISD::TRUNCATE)
13954     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
13955 
13956   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
13957   // if the trunc is not free.
13958   if (N0.getOpcode() == ISD::AND &&
13959       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
13960       N0.getOperand(1).getOpcode() == ISD::Constant &&
13961       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0), N0.getValueType())) {
13962     SDLoc DL(N);
13963     SDValue X = DAG.getAnyExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
13964     SDValue Y = DAG.getNode(ISD::ANY_EXTEND, DL, VT, N0.getOperand(1));
13965     assert(isa<ConstantSDNode>(Y) && "Expected constant to be folded!");
13966     return DAG.getNode(ISD::AND, DL, VT, X, Y);
13967   }
13968 
13969   // fold (aext (load x)) -> (aext (truncate (extload x)))
13970   // None of the supported targets knows how to perform load and any_ext
13971   // on vectors in one instruction, so attempt to fold to zext instead.
13972   if (VT.isVector()) {
13973     // Try to simplify (zext (load x)).
13974     if (SDValue foldedExt =
13975             tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
13976                                ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
13977       return foldedExt;
13978   } else if (ISD::isNON_EXTLoad(N0.getNode()) &&
13979              ISD::isUNINDEXEDLoad(N0.getNode()) &&
13980              TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
13981     bool DoXform = true;
13982     SmallVector<SDNode *, 4> SetCCs;
13983     if (!N0.hasOneUse())
13984       DoXform =
13985           ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
13986     if (DoXform) {
13987       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13988       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
13989                                        LN0->getChain(), LN0->getBasePtr(),
13990                                        N0.getValueType(), LN0->getMemOperand());
13991       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
13992       // If the load value is used only by N, replace it via CombineTo N.
13993       bool NoReplaceTrunc = N0.hasOneUse();
13994       CombineTo(N, ExtLoad);
13995       if (NoReplaceTrunc) {
13996         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
13997         recursivelyDeleteUnusedNodes(LN0);
13998       } else {
13999         SDValue Trunc =
14000             DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
14001         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
14002       }
14003       return SDValue(N, 0); // Return N so it doesn't get rechecked!
14004     }
14005   }
14006 
14007   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
14008   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
14009   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
14010   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
14011       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
14012     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14013     ISD::LoadExtType ExtType = LN0->getExtensionType();
14014     EVT MemVT = LN0->getMemoryVT();
14015     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
14016       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
14017                                        VT, LN0->getChain(), LN0->getBasePtr(),
14018                                        MemVT, LN0->getMemOperand());
14019       CombineTo(N, ExtLoad);
14020       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
14021       recursivelyDeleteUnusedNodes(LN0);
14022       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14023     }
14024   }
14025 
14026   if (N0.getOpcode() == ISD::SETCC) {
14027     // Propagate fast-math-flags.
14028     SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags());
14029 
14030     // For vectors:
14031     // aext(setcc) -> vsetcc
14032     // aext(setcc) -> truncate(vsetcc)
14033     // aext(setcc) -> aext(vsetcc)
14034     // Only do this before legalize for now.
14035     if (VT.isVector() && !LegalOperations) {
14036       EVT N00VT = N0.getOperand(0).getValueType();
14037       if (getSetCCResultType(N00VT) == N0.getValueType())
14038         return SDValue();
14039 
14040       // We know that the # elements of the results is the same as the
14041       // # elements of the compare (and the # elements of the compare result
14042       // for that matter).  Check to see that they are the same size.  If so,
14043       // we know that the element size of the sext'd result matches the
14044       // element size of the compare operands.
14045       if (VT.getSizeInBits() == N00VT.getSizeInBits())
14046         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
14047                              N0.getOperand(1),
14048                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
14049 
14050       // If the desired elements are smaller or larger than the source
14051       // elements we can use a matching integer vector type and then
14052       // truncate/any extend
14053       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
14054       SDValue VsetCC =
14055         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
14056                       N0.getOperand(1),
14057                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
14058       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
14059     }
14060 
14061     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
14062     SDLoc DL(N);
14063     if (SDValue SCC = SimplifySelectCC(
14064             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
14065             DAG.getConstant(0, DL, VT),
14066             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
14067       return SCC;
14068   }
14069 
14070   if (SDValue NewCtPop = widenCtPop(N, DAG))
14071     return NewCtPop;
14072 
14073   if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
14074     return Res;
14075 
14076   return SDValue();
14077 }
14078 
14079 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
14080   unsigned Opcode = N->getOpcode();
14081   SDValue N0 = N->getOperand(0);
14082   SDValue N1 = N->getOperand(1);
14083   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
14084 
14085   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
14086   if (N0.getOpcode() == Opcode &&
14087       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
14088     return N0;
14089 
14090   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14091       N0.getOperand(0).getOpcode() == Opcode) {
14092     // We have an assert, truncate, assert sandwich. Make one stronger assert
14093     // by asserting on the smallest asserted type to the larger source type.
14094     // This eliminates the later assert:
14095     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
14096     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
14097     SDLoc DL(N);
14098     SDValue BigA = N0.getOperand(0);
14099     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14100     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
14101     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
14102     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14103                                     BigA.getOperand(0), MinAssertVTVal);
14104     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14105   }
14106 
14107   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
14108   // than X. Just move the AssertZext in front of the truncate and drop the
14109   // AssertSExt.
14110   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
14111       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
14112       Opcode == ISD::AssertZext) {
14113     SDValue BigA = N0.getOperand(0);
14114     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
14115     if (AssertVT.bitsLT(BigA_AssertVT)) {
14116       SDLoc DL(N);
14117       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
14118                                       BigA.getOperand(0), N1);
14119       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
14120     }
14121   }
14122 
14123   return SDValue();
14124 }
14125 
14126 SDValue DAGCombiner::visitAssertAlign(SDNode *N) {
14127   SDLoc DL(N);
14128 
14129   Align AL = cast<AssertAlignSDNode>(N)->getAlign();
14130   SDValue N0 = N->getOperand(0);
14131 
14132   // Fold (assertalign (assertalign x, AL0), AL1) ->
14133   // (assertalign x, max(AL0, AL1))
14134   if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0))
14135     return DAG.getAssertAlign(DL, N0.getOperand(0),
14136                               std::max(AL, AAN->getAlign()));
14137 
14138   // In rare cases, there are trivial arithmetic ops in source operands. Sink
14139   // this assert down to source operands so that those arithmetic ops could be
14140   // exposed to the DAG combining.
14141   switch (N0.getOpcode()) {
14142   default:
14143     break;
14144   case ISD::ADD:
14145   case ISD::SUB: {
14146     unsigned AlignShift = Log2(AL);
14147     SDValue LHS = N0.getOperand(0);
14148     SDValue RHS = N0.getOperand(1);
14149     unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros();
14150     unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros();
14151     if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) {
14152       if (LHSAlignShift < AlignShift)
14153         LHS = DAG.getAssertAlign(DL, LHS, AL);
14154       if (RHSAlignShift < AlignShift)
14155         RHS = DAG.getAssertAlign(DL, RHS, AL);
14156       return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS);
14157     }
14158     break;
14159   }
14160   }
14161 
14162   return SDValue();
14163 }
14164 
14165 /// If the result of a load is shifted/masked/truncated to an effectively
14166 /// narrower type, try to transform the load to a narrower type and/or
14167 /// use an extending load.
14168 SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
14169   unsigned Opc = N->getOpcode();
14170 
14171   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
14172   SDValue N0 = N->getOperand(0);
14173   EVT VT = N->getValueType(0);
14174   EVT ExtVT = VT;
14175 
14176   // This transformation isn't valid for vector loads.
14177   if (VT.isVector())
14178     return SDValue();
14179 
14180   // The ShAmt variable is used to indicate that we've consumed a right
14181   // shift. I.e. we want to narrow the width of the load by skipping to load the
14182   // ShAmt least significant bits.
14183   unsigned ShAmt = 0;
14184   // A special case is when the least significant bits from the load are masked
14185   // away, but using an AND rather than a right shift. HasShiftedOffset is used
14186   // to indicate that the narrowed load should be left-shifted ShAmt bits to get
14187   // the result.
14188   bool HasShiftedOffset = false;
14189   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
14190   // extended to VT.
14191   if (Opc == ISD::SIGN_EXTEND_INREG) {
14192     ExtType = ISD::SEXTLOAD;
14193     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
14194   } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
14195     // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
14196     // value, or it may be shifting a higher subword, half or byte into the
14197     // lowest bits.
14198 
14199     // Only handle shift with constant shift amount, and the shiftee must be a
14200     // load.
14201     auto *LN = dyn_cast<LoadSDNode>(N0);
14202     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14203     if (!N1C || !LN)
14204       return SDValue();
14205     // If the shift amount is larger than the memory type then we're not
14206     // accessing any of the loaded bytes.
14207     ShAmt = N1C->getZExtValue();
14208     uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
14209     if (MemoryWidth <= ShAmt)
14210       return SDValue();
14211     // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
14212     ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
14213     ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14214     // If original load is a SEXTLOAD then we can't simply replace it by a
14215     // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
14216     // followed by a ZEXT, but that is not handled at the moment). Similarly if
14217     // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
14218     if ((LN->getExtensionType() == ISD::SEXTLOAD ||
14219          LN->getExtensionType() == ISD::ZEXTLOAD) &&
14220         LN->getExtensionType() != ExtType)
14221       return SDValue();
14222   } else if (Opc == ISD::AND) {
14223     // An AND with a constant mask is the same as a truncate + zero-extend.
14224     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
14225     if (!AndC)
14226       return SDValue();
14227 
14228     const APInt &Mask = AndC->getAPIntValue();
14229     unsigned ActiveBits = 0;
14230     if (Mask.isMask()) {
14231       ActiveBits = Mask.countr_one();
14232     } else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
14233       HasShiftedOffset = true;
14234     } else {
14235       return SDValue();
14236     }
14237 
14238     ExtType = ISD::ZEXTLOAD;
14239     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
14240   }
14241 
14242   // In case Opc==SRL we've already prepared ExtVT/ExtType/ShAmt based on doing
14243   // a right shift. Here we redo some of those checks, to possibly adjust the
14244   // ExtVT even further based on "a masking AND". We could also end up here for
14245   // other reasons (e.g. based on Opc==TRUNCATE) and that is why some checks
14246   // need to be done here as well.
14247   if (Opc == ISD::SRL || N0.getOpcode() == ISD::SRL) {
14248     SDValue SRL = Opc == ISD::SRL ? SDValue(N, 0) : N0;
14249     // Bail out when the SRL has more than one use. This is done for historical
14250     // (undocumented) reasons. Maybe intent was to guard the AND-masking below
14251     // check below? And maybe it could be non-profitable to do the transform in
14252     // case the SRL has multiple uses and we get here with Opc!=ISD::SRL?
14253     // FIXME: Can't we just skip this check for the Opc==ISD::SRL case.
14254     if (!SRL.hasOneUse())
14255       return SDValue();
14256 
14257     // Only handle shift with constant shift amount, and the shiftee must be a
14258     // load.
14259     auto *LN = dyn_cast<LoadSDNode>(SRL.getOperand(0));
14260     auto *SRL1C = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
14261     if (!SRL1C || !LN)
14262       return SDValue();
14263 
14264     // If the shift amount is larger than the input type then we're not
14265     // accessing any of the loaded bytes.  If the load was a zextload/extload
14266     // then the result of the shift+trunc is zero/undef (handled elsewhere).
14267     ShAmt = SRL1C->getZExtValue();
14268     uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
14269     if (ShAmt >= MemoryWidth)
14270       return SDValue();
14271 
14272     // Because a SRL must be assumed to *need* to zero-extend the high bits
14273     // (as opposed to anyext the high bits), we can't combine the zextload
14274     // lowering of SRL and an sextload.
14275     if (LN->getExtensionType() == ISD::SEXTLOAD)
14276       return SDValue();
14277 
14278     // Avoid reading outside the memory accessed by the original load (could
14279     // happened if we only adjust the load base pointer by ShAmt). Instead we
14280     // try to narrow the load even further. The typical scenario here is:
14281     //   (i64 (truncate (i96 (srl (load x), 64)))) ->
14282     //     (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
14283     if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
14284       // Don't replace sextload by zextload.
14285       if (ExtType == ISD::SEXTLOAD)
14286         return SDValue();
14287       // Narrow the load.
14288       ExtType = ISD::ZEXTLOAD;
14289       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
14290     }
14291 
14292     // If the SRL is only used by a masking AND, we may be able to adjust
14293     // the ExtVT to make the AND redundant.
14294     SDNode *Mask = *(SRL->use_begin());
14295     if (SRL.hasOneUse() && Mask->getOpcode() == ISD::AND &&
14296         isa<ConstantSDNode>(Mask->getOperand(1))) {
14297       const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
14298       if (ShiftMask.isMask()) {
14299         EVT MaskedVT =
14300             EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
14301         // If the mask is smaller, recompute the type.
14302         if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
14303             TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
14304           ExtVT = MaskedVT;
14305       }
14306     }
14307 
14308     N0 = SRL.getOperand(0);
14309   }
14310 
14311   // If the load is shifted left (and the result isn't shifted back right), we
14312   // can fold a truncate through the shift. The typical scenario is that N
14313   // points at a TRUNCATE here so the attempted fold is:
14314   //   (truncate (shl (load x), c))) -> (shl (narrow load x), c)
14315   // ShLeftAmt will indicate how much a narrowed load should be shifted left.
14316   unsigned ShLeftAmt = 0;
14317   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14318       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
14319     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
14320       ShLeftAmt = N01->getZExtValue();
14321       N0 = N0.getOperand(0);
14322     }
14323   }
14324 
14325   // If we haven't found a load, we can't narrow it.
14326   if (!isa<LoadSDNode>(N0))
14327     return SDValue();
14328 
14329   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14330   // Reducing the width of a volatile load is illegal.  For atomics, we may be
14331   // able to reduce the width provided we never widen again. (see D66309)
14332   if (!LN0->isSimple() ||
14333       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
14334     return SDValue();
14335 
14336   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
14337     unsigned LVTStoreBits =
14338         LN0->getMemoryVT().getStoreSizeInBits().getFixedValue();
14339     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue();
14340     return LVTStoreBits - EVTStoreBits - ShAmt;
14341   };
14342 
14343   // We need to adjust the pointer to the load by ShAmt bits in order to load
14344   // the correct bytes.
14345   unsigned PtrAdjustmentInBits =
14346       DAG.getDataLayout().isBigEndian() ? AdjustBigEndianShift(ShAmt) : ShAmt;
14347 
14348   uint64_t PtrOff = PtrAdjustmentInBits / 8;
14349   Align NewAlign = commonAlignment(LN0->getAlign(), PtrOff);
14350   SDLoc DL(LN0);
14351   // The original load itself didn't wrap, so an offset within it doesn't.
14352   SDNodeFlags Flags;
14353   Flags.setNoUnsignedWrap(true);
14354   SDValue NewPtr = DAG.getMemBasePlusOffset(
14355       LN0->getBasePtr(), TypeSize::getFixed(PtrOff), DL, Flags);
14356   AddToWorklist(NewPtr.getNode());
14357 
14358   SDValue Load;
14359   if (ExtType == ISD::NON_EXTLOAD)
14360     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
14361                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
14362                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
14363   else
14364     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
14365                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
14366                           NewAlign, LN0->getMemOperand()->getFlags(),
14367                           LN0->getAAInfo());
14368 
14369   // Replace the old load's chain with the new load's chain.
14370   WorklistRemover DeadNodes(*this);
14371   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
14372 
14373   // Shift the result left, if we've swallowed a left shift.
14374   SDValue Result = Load;
14375   if (ShLeftAmt != 0) {
14376     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
14377     if (!isUIntN(ShImmTy.getScalarSizeInBits(), ShLeftAmt))
14378       ShImmTy = VT;
14379     // If the shift amount is as large as the result size (but, presumably,
14380     // no larger than the source) then the useful bits of the result are
14381     // zero; we can't simply return the shortened shift, because the result
14382     // of that operation is undefined.
14383     if (ShLeftAmt >= VT.getScalarSizeInBits())
14384       Result = DAG.getConstant(0, DL, VT);
14385     else
14386       Result = DAG.getNode(ISD::SHL, DL, VT,
14387                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
14388   }
14389 
14390   if (HasShiftedOffset) {
14391     // We're using a shifted mask, so the load now has an offset. This means
14392     // that data has been loaded into the lower bytes than it would have been
14393     // before, so we need to shl the loaded data into the correct position in the
14394     // register.
14395     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
14396     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
14397     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
14398   }
14399 
14400   // Return the new loaded value.
14401   return Result;
14402 }
14403 
14404 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
14405   SDValue N0 = N->getOperand(0);
14406   SDValue N1 = N->getOperand(1);
14407   EVT VT = N->getValueType(0);
14408   EVT ExtVT = cast<VTSDNode>(N1)->getVT();
14409   unsigned VTBits = VT.getScalarSizeInBits();
14410   unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
14411 
14412   // sext_vector_inreg(undef) = 0 because the top bit will all be the same.
14413   if (N0.isUndef())
14414     return DAG.getConstant(0, SDLoc(N), VT);
14415 
14416   // fold (sext_in_reg c1) -> c1
14417   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
14418     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
14419 
14420   // If the input is already sign extended, just drop the extension.
14421   if (ExtVTBits >= DAG.ComputeMaxSignificantBits(N0))
14422     return N0;
14423 
14424   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
14425   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14426       ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
14427     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0),
14428                        N1);
14429 
14430   // fold (sext_in_reg (sext x)) -> (sext x)
14431   // fold (sext_in_reg (aext x)) -> (sext x)
14432   // if x is small enough or if we know that x has more than 1 sign bit and the
14433   // sign_extend_inreg is extending from one of them.
14434   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
14435     SDValue N00 = N0.getOperand(0);
14436     unsigned N00Bits = N00.getScalarValueSizeInBits();
14437     if ((N00Bits <= ExtVTBits ||
14438          DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits) &&
14439         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14440       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14441   }
14442 
14443   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
14444   // if x is small enough or if we know that x has more than 1 sign bit and the
14445   // sign_extend_inreg is extending from one of them.
14446   if (ISD::isExtVecInRegOpcode(N0.getOpcode())) {
14447     SDValue N00 = N0.getOperand(0);
14448     unsigned N00Bits = N00.getScalarValueSizeInBits();
14449     unsigned DstElts = N0.getValueType().getVectorMinNumElements();
14450     unsigned SrcElts = N00.getValueType().getVectorMinNumElements();
14451     bool IsZext = N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
14452     APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts);
14453     if ((N00Bits == ExtVTBits ||
14454          (!IsZext && (N00Bits < ExtVTBits ||
14455                       DAG.ComputeMaxSignificantBits(N00) <= ExtVTBits))) &&
14456         (!LegalOperations ||
14457          TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)))
14458       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00);
14459   }
14460 
14461   // fold (sext_in_reg (zext x)) -> (sext x)
14462   // iff we are extending the source sign bit.
14463   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
14464     SDValue N00 = N0.getOperand(0);
14465     if (N00.getScalarValueSizeInBits() == ExtVTBits &&
14466         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
14467       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
14468   }
14469 
14470   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
14471   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1)))
14472     return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT);
14473 
14474   // fold operands of sext_in_reg based on knowledge that the top bits are not
14475   // demanded.
14476   if (SimplifyDemandedBits(SDValue(N, 0)))
14477     return SDValue(N, 0);
14478 
14479   // fold (sext_in_reg (load x)) -> (smaller sextload x)
14480   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
14481   if (SDValue NarrowLoad = reduceLoadWidth(N))
14482     return NarrowLoad;
14483 
14484   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
14485   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
14486   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
14487   if (N0.getOpcode() == ISD::SRL) {
14488     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
14489       if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) {
14490         // We can turn this into an SRA iff the input to the SRL is already sign
14491         // extended enough.
14492         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
14493         if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits)
14494           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
14495                              N0.getOperand(1));
14496       }
14497   }
14498 
14499   // fold (sext_inreg (extload x)) -> (sextload x)
14500   // If sextload is not supported by target, we can only do the combine when
14501   // load has one use. Doing otherwise can block folding the extload with other
14502   // extends that the target does support.
14503   if (ISD::isEXTLoad(N0.getNode()) &&
14504       ISD::isUNINDEXEDLoad(N0.getNode()) &&
14505       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14506       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
14507         N0.hasOneUse()) ||
14508        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14509     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14510     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14511                                      LN0->getChain(),
14512                                      LN0->getBasePtr(), ExtVT,
14513                                      LN0->getMemOperand());
14514     CombineTo(N, ExtLoad);
14515     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14516     AddToWorklist(ExtLoad.getNode());
14517     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14518   }
14519 
14520   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
14521   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
14522       N0.hasOneUse() &&
14523       ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
14524       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
14525        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) {
14526     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
14527     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
14528                                      LN0->getChain(),
14529                                      LN0->getBasePtr(), ExtVT,
14530                                      LN0->getMemOperand());
14531     CombineTo(N, ExtLoad);
14532     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14533     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14534   }
14535 
14536   // fold (sext_inreg (masked_load x)) -> (sext_masked_load x)
14537   // ignore it if the masked load is already sign extended
14538   if (MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0)) {
14539     if (ExtVT == Ld->getMemoryVT() && N0.hasOneUse() &&
14540         Ld->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD &&
14541         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT)) {
14542       SDValue ExtMaskedLoad = DAG.getMaskedLoad(
14543           VT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), Ld->getOffset(),
14544           Ld->getMask(), Ld->getPassThru(), ExtVT, Ld->getMemOperand(),
14545           Ld->getAddressingMode(), ISD::SEXTLOAD, Ld->isExpandingLoad());
14546       CombineTo(N, ExtMaskedLoad);
14547       CombineTo(N0.getNode(), ExtMaskedLoad, ExtMaskedLoad.getValue(1));
14548       return SDValue(N, 0); // Return N so it doesn't get rechecked!
14549     }
14550   }
14551 
14552   // fold (sext_inreg (masked_gather x)) -> (sext_masked_gather x)
14553   if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) {
14554     if (SDValue(GN0, 0).hasOneUse() &&
14555         ExtVT == GN0->getMemoryVT() &&
14556         TLI.isVectorLoadExtDesirable(SDValue(SDValue(GN0, 0)))) {
14557       SDValue Ops[] = {GN0->getChain(),   GN0->getPassThru(), GN0->getMask(),
14558                        GN0->getBasePtr(), GN0->getIndex(),    GN0->getScale()};
14559 
14560       SDValue ExtLoad = DAG.getMaskedGather(
14561           DAG.getVTList(VT, MVT::Other), ExtVT, SDLoc(N), Ops,
14562           GN0->getMemOperand(), GN0->getIndexType(), ISD::SEXTLOAD);
14563 
14564       CombineTo(N, ExtLoad);
14565       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
14566       AddToWorklist(ExtLoad.getNode());
14567       return SDValue(N, 0); // Return N so it doesn't get rechecked!
14568     }
14569   }
14570 
14571   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
14572   if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) {
14573     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
14574                                            N0.getOperand(1), false))
14575       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1);
14576   }
14577 
14578   // Fold (iM_signext_inreg
14579   //        (extract_subvector (zext|anyext|sext iN_v to _) _)
14580   //        from iN)
14581   //      -> (extract_subvector (signext iN_v to iM))
14582   if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() &&
14583       ISD::isExtOpcode(N0.getOperand(0).getOpcode())) {
14584     SDValue InnerExt = N0.getOperand(0);
14585     EVT InnerExtVT = InnerExt->getValueType(0);
14586     SDValue Extendee = InnerExt->getOperand(0);
14587 
14588     if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() &&
14589         (!LegalOperations ||
14590          TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) {
14591       SDValue SignExtExtendee =
14592           DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee);
14593       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee,
14594                          N0.getOperand(1));
14595     }
14596   }
14597 
14598   return SDValue();
14599 }
14600 
14601 static SDValue
14602 foldExtendVectorInregToExtendOfSubvector(SDNode *N, const TargetLowering &TLI,
14603                                          SelectionDAG &DAG,
14604                                          bool LegalOperations) {
14605   unsigned InregOpcode = N->getOpcode();
14606   unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode);
14607 
14608   SDValue Src = N->getOperand(0);
14609   EVT VT = N->getValueType(0);
14610   EVT SrcVT = EVT::getVectorVT(*DAG.getContext(),
14611                                Src.getValueType().getVectorElementType(),
14612                                VT.getVectorElementCount());
14613 
14614   assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
14615          "Expected EXTEND_VECTOR_INREG dag node in input!");
14616 
14617   // Profitability check: our operand must be an one-use CONCAT_VECTORS.
14618   // FIXME: one-use check may be overly restrictive
14619   if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS)
14620     return SDValue();
14621 
14622   // Profitability check: we must be extending exactly one of it's operands.
14623   // FIXME: this is probably overly restrictive.
14624   Src = Src.getOperand(0);
14625   if (Src.getValueType() != SrcVT)
14626     return SDValue();
14627 
14628   if (LegalOperations && !TLI.isOperationLegal(Opcode, VT))
14629     return SDValue();
14630 
14631   return DAG.getNode(Opcode, SDLoc(N), VT, Src);
14632 }
14633 
14634 SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) {
14635   SDValue N0 = N->getOperand(0);
14636   EVT VT = N->getValueType(0);
14637 
14638   if (N0.isUndef()) {
14639     // aext_vector_inreg(undef) = undef because the top bits are undefined.
14640     // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same.
14641     return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
14642                ? DAG.getUNDEF(VT)
14643                : DAG.getConstant(0, SDLoc(N), VT);
14644   }
14645 
14646   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
14647     return Res;
14648 
14649   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
14650     return SDValue(N, 0);
14651 
14652   if (SDValue R = foldExtendVectorInregToExtendOfSubvector(N, TLI, DAG,
14653                                                            LegalOperations))
14654     return R;
14655 
14656   return SDValue();
14657 }
14658 
14659 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
14660   SDValue N0 = N->getOperand(0);
14661   EVT VT = N->getValueType(0);
14662   EVT SrcVT = N0.getValueType();
14663   bool isLE = DAG.getDataLayout().isLittleEndian();
14664 
14665   // trunc(undef) = undef
14666   if (N0.isUndef())
14667     return DAG.getUNDEF(VT);
14668 
14669   // fold (truncate (truncate x)) -> (truncate x)
14670   if (N0.getOpcode() == ISD::TRUNCATE)
14671     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
14672 
14673   // fold (truncate c1) -> c1
14674   if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, SDLoc(N), VT, {N0}))
14675     return C;
14676 
14677   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
14678   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
14679       N0.getOpcode() == ISD::SIGN_EXTEND ||
14680       N0.getOpcode() == ISD::ANY_EXTEND) {
14681     // if the source is smaller than the dest, we still need an extend.
14682     if (N0.getOperand(0).getValueType().bitsLT(VT))
14683       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
14684     // if the source is larger than the dest, than we just need the truncate.
14685     if (N0.getOperand(0).getValueType().bitsGT(VT))
14686       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
14687     // if the source and dest are the same type, we can drop both the extend
14688     // and the truncate.
14689     return N0.getOperand(0);
14690   }
14691 
14692   // Try to narrow a truncate-of-sext_in_reg to the destination type:
14693   // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM
14694   if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
14695       N0.hasOneUse()) {
14696     SDValue X = N0.getOperand(0);
14697     SDValue ExtVal = N0.getOperand(1);
14698     EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT();
14699     if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) {
14700       SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
14701       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal);
14702     }
14703   }
14704 
14705   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
14706   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
14707     return SDValue();
14708 
14709   // Fold extract-and-trunc into a narrow extract. For example:
14710   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
14711   //   i32 y = TRUNCATE(i64 x)
14712   //        -- becomes --
14713   //   v16i8 b = BITCAST (v2i64 val)
14714   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
14715   //
14716   // Note: We only run this optimization after type legalization (which often
14717   // creates this pattern) and before operation legalization after which
14718   // we need to be more careful about the vector instructions that we generate.
14719   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
14720       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
14721     EVT VecTy = N0.getOperand(0).getValueType();
14722     EVT ExTy = N0.getValueType();
14723     EVT TrTy = N->getValueType(0);
14724 
14725     auto EltCnt = VecTy.getVectorElementCount();
14726     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
14727     auto NewEltCnt = EltCnt * SizeRatio;
14728 
14729     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, NewEltCnt);
14730     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
14731 
14732     SDValue EltNo = N0->getOperand(1);
14733     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
14734       int Elt = EltNo->getAsZExtVal();
14735       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
14736 
14737       SDLoc DL(N);
14738       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
14739                          DAG.getBitcast(NVT, N0.getOperand(0)),
14740                          DAG.getVectorIdxConstant(Index, DL));
14741     }
14742   }
14743 
14744   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
14745   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
14746     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
14747         TLI.isTruncateFree(SrcVT, VT)) {
14748       SDLoc SL(N0);
14749       SDValue Cond = N0.getOperand(0);
14750       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
14751       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
14752       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
14753     }
14754   }
14755 
14756   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
14757   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
14758       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
14759       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
14760     SDValue Amt = N0.getOperand(1);
14761     KnownBits Known = DAG.computeKnownBits(Amt);
14762     unsigned Size = VT.getScalarSizeInBits();
14763     if (Known.countMaxActiveBits() <= Log2_32(Size)) {
14764       SDLoc SL(N);
14765       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
14766 
14767       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
14768       if (AmtVT != Amt.getValueType()) {
14769         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
14770         AddToWorklist(Amt.getNode());
14771       }
14772       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
14773     }
14774   }
14775 
14776   if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
14777     return V;
14778 
14779   if (SDValue ABD = foldABSToABD(N))
14780     return ABD;
14781 
14782   // Attempt to pre-truncate BUILD_VECTOR sources.
14783   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
14784       N0.hasOneUse() &&
14785       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
14786       // Avoid creating illegal types if running after type legalizer.
14787       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) {
14788     SDLoc DL(N);
14789     EVT SVT = VT.getScalarType();
14790     SmallVector<SDValue, 8> TruncOps;
14791     for (const SDValue &Op : N0->op_values()) {
14792       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
14793       TruncOps.push_back(TruncOp);
14794     }
14795     return DAG.getBuildVector(VT, DL, TruncOps);
14796   }
14797 
14798   // trunc (splat_vector x) -> splat_vector (trunc x)
14799   if (N0.getOpcode() == ISD::SPLAT_VECTOR &&
14800       (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) &&
14801       (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) {
14802     SDLoc DL(N);
14803     EVT SVT = VT.getScalarType();
14804     return DAG.getSplatVector(
14805         VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0)));
14806   }
14807 
14808   // Fold a series of buildvector, bitcast, and truncate if possible.
14809   // For example fold
14810   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
14811   //   (2xi32 (buildvector x, y)).
14812   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
14813       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
14814       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
14815       N0.getOperand(0).hasOneUse()) {
14816     SDValue BuildVect = N0.getOperand(0);
14817     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
14818     EVT TruncVecEltTy = VT.getVectorElementType();
14819 
14820     // Check that the element types match.
14821     if (BuildVectEltTy == TruncVecEltTy) {
14822       // Now we only need to compute the offset of the truncated elements.
14823       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
14824       unsigned TruncVecNumElts = VT.getVectorNumElements();
14825       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
14826 
14827       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
14828              "Invalid number of elements");
14829 
14830       SmallVector<SDValue, 8> Opnds;
14831       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
14832         Opnds.push_back(BuildVect.getOperand(i));
14833 
14834       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
14835     }
14836   }
14837 
14838   // fold (truncate (load x)) -> (smaller load x)
14839   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
14840   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
14841     if (SDValue Reduced = reduceLoadWidth(N))
14842       return Reduced;
14843 
14844     // Handle the case where the truncated result is at least as wide as the
14845     // loaded type.
14846     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
14847       auto *LN0 = cast<LoadSDNode>(N0);
14848       if (LN0->isSimple() && LN0->getMemoryVT().bitsLE(VT)) {
14849         SDValue NewLoad = DAG.getExtLoad(
14850             LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(),
14851             LN0->getBasePtr(), LN0->getMemoryVT(), LN0->getMemOperand());
14852         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
14853         return NewLoad;
14854       }
14855     }
14856   }
14857 
14858   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
14859   // where ... are all 'undef'.
14860   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
14861     SmallVector<EVT, 8> VTs;
14862     SDValue V;
14863     unsigned Idx = 0;
14864     unsigned NumDefs = 0;
14865 
14866     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
14867       SDValue X = N0.getOperand(i);
14868       if (!X.isUndef()) {
14869         V = X;
14870         Idx = i;
14871         NumDefs++;
14872       }
14873       // Stop if more than one members are non-undef.
14874       if (NumDefs > 1)
14875         break;
14876 
14877       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
14878                                      VT.getVectorElementType(),
14879                                      X.getValueType().getVectorElementCount()));
14880     }
14881 
14882     if (NumDefs == 0)
14883       return DAG.getUNDEF(VT);
14884 
14885     if (NumDefs == 1) {
14886       assert(V.getNode() && "The single defined operand is empty!");
14887       SmallVector<SDValue, 8> Opnds;
14888       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
14889         if (i != Idx) {
14890           Opnds.push_back(DAG.getUNDEF(VTs[i]));
14891           continue;
14892         }
14893         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
14894         AddToWorklist(NV.getNode());
14895         Opnds.push_back(NV);
14896       }
14897       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
14898     }
14899   }
14900 
14901   // Fold truncate of a bitcast of a vector to an extract of the low vector
14902   // element.
14903   //
14904   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
14905   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
14906     SDValue VecSrc = N0.getOperand(0);
14907     EVT VecSrcVT = VecSrc.getValueType();
14908     if (VecSrcVT.isVector() && VecSrcVT.getScalarType() == VT &&
14909         (!LegalOperations ||
14910          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) {
14911       SDLoc SL(N);
14912 
14913       unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1;
14914       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc,
14915                          DAG.getVectorIdxConstant(Idx, SL));
14916     }
14917   }
14918 
14919   // Simplify the operands using demanded-bits information.
14920   if (SimplifyDemandedBits(SDValue(N, 0)))
14921     return SDValue(N, 0);
14922 
14923   // fold (truncate (extract_subvector(ext x))) ->
14924   //      (extract_subvector x)
14925   // TODO: This can be generalized to cover cases where the truncate and extract
14926   // do not fully cancel each other out.
14927   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
14928     SDValue N00 = N0.getOperand(0);
14929     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
14930         N00.getOpcode() == ISD::ZERO_EXTEND ||
14931         N00.getOpcode() == ISD::ANY_EXTEND) {
14932       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
14933           VT.getVectorElementType())
14934         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
14935                            N00.getOperand(0), N0.getOperand(1));
14936     }
14937   }
14938 
14939   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
14940     return NewVSel;
14941 
14942   // Narrow a suitable binary operation with a non-opaque constant operand by
14943   // moving it ahead of the truncate. This is limited to pre-legalization
14944   // because targets may prefer a wider type during later combines and invert
14945   // this transform.
14946   switch (N0.getOpcode()) {
14947   case ISD::ADD:
14948   case ISD::SUB:
14949   case ISD::MUL:
14950   case ISD::AND:
14951   case ISD::OR:
14952   case ISD::XOR:
14953     if (!LegalOperations && N0.hasOneUse() &&
14954         (isConstantOrConstantVector(N0.getOperand(0), true) ||
14955          isConstantOrConstantVector(N0.getOperand(1), true))) {
14956       // TODO: We already restricted this to pre-legalization, but for vectors
14957       // we are extra cautious to not create an unsupported operation.
14958       // Target-specific changes are likely needed to avoid regressions here.
14959       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
14960         SDLoc DL(N);
14961         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14962         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
14963         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
14964       }
14965     }
14966     break;
14967   case ISD::ADDE:
14968   case ISD::UADDO_CARRY:
14969     // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
14970     // (trunc uaddo_carry(X, Y, Carry)) ->
14971     //     (uaddo_carry trunc(X), trunc(Y), Carry)
14972     // When the adde's carry is not used.
14973     // We only do for uaddo_carry before legalize operation
14974     if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
14975          TLI.isOperationLegal(N0.getOpcode(), VT)) &&
14976         N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
14977       SDLoc DL(N);
14978       SDValue X = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
14979       SDValue Y = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
14980       SDVTList VTs = DAG.getVTList(VT, N0->getValueType(1));
14981       return DAG.getNode(N0.getOpcode(), DL, VTs, X, Y, N0.getOperand(2));
14982     }
14983     break;
14984   case ISD::USUBSAT:
14985     // Truncate the USUBSAT only if LHS is a known zero-extension, its not
14986     // enough to know that the upper bits are zero we must ensure that we don't
14987     // introduce an extra truncate.
14988     if (!LegalOperations && N0.hasOneUse() &&
14989         N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
14990         N0.getOperand(0).getOperand(0).getScalarValueSizeInBits() <=
14991             VT.getScalarSizeInBits() &&
14992         hasOperation(N0.getOpcode(), VT)) {
14993       return getTruncatedUSUBSAT(VT, SrcVT, N0.getOperand(0), N0.getOperand(1),
14994                                  DAG, SDLoc(N));
14995     }
14996     break;
14997   }
14998 
14999   return SDValue();
15000 }
15001 
15002 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
15003   SDValue Elt = N->getOperand(i);
15004   if (Elt.getOpcode() != ISD::MERGE_VALUES)
15005     return Elt.getNode();
15006   return Elt.getOperand(Elt.getResNo()).getNode();
15007 }
15008 
15009 /// build_pair (load, load) -> load
15010 /// if load locations are consecutive.
15011 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
15012   assert(N->getOpcode() == ISD::BUILD_PAIR);
15013 
15014   auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
15015   auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
15016 
15017   // A BUILD_PAIR is always having the least significant part in elt 0 and the
15018   // most significant part in elt 1. So when combining into one large load, we
15019   // need to consider the endianness.
15020   if (DAG.getDataLayout().isBigEndian())
15021     std::swap(LD1, LD2);
15022 
15023   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) ||
15024       !LD1->hasOneUse() || !LD2->hasOneUse() ||
15025       LD1->getAddressSpace() != LD2->getAddressSpace())
15026     return SDValue();
15027 
15028   unsigned LD1Fast = 0;
15029   EVT LD1VT = LD1->getValueType(0);
15030   unsigned LD1Bytes = LD1VT.getStoreSize();
15031   if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
15032       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) &&
15033       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
15034                              *LD1->getMemOperand(), &LD1Fast) && LD1Fast)
15035     return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
15036                        LD1->getPointerInfo(), LD1->getAlign());
15037 
15038   return SDValue();
15039 }
15040 
15041 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
15042   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
15043   // and Lo parts; on big-endian machines it doesn't.
15044   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
15045 }
15046 
15047 SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
15048                                           const TargetLowering &TLI) {
15049   // If this is not a bitcast to an FP type or if the target doesn't have
15050   // IEEE754-compliant FP logic, we're done.
15051   EVT VT = N->getValueType(0);
15052   SDValue N0 = N->getOperand(0);
15053   EVT SourceVT = N0.getValueType();
15054 
15055   if (!VT.isFloatingPoint())
15056     return SDValue();
15057 
15058   // TODO: Handle cases where the integer constant is a different scalar
15059   // bitwidth to the FP.
15060   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
15061     return SDValue();
15062 
15063   unsigned FPOpcode;
15064   APInt SignMask;
15065   switch (N0.getOpcode()) {
15066   case ISD::AND:
15067     FPOpcode = ISD::FABS;
15068     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
15069     break;
15070   case ISD::XOR:
15071     FPOpcode = ISD::FNEG;
15072     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15073     break;
15074   case ISD::OR:
15075     FPOpcode = ISD::FABS;
15076     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
15077     break;
15078   default:
15079     return SDValue();
15080   }
15081 
15082   if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
15083     return SDValue();
15084 
15085   // This needs to be the inverse of logic in foldSignChangeInBitcast.
15086   // FIXME: I don't think looking for bitcast intrinsically makes sense, but
15087   // removing this would require more changes.
15088   auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
15089     if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
15090       return true;
15091 
15092     return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
15093   };
15094 
15095   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
15096   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
15097   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
15098   //   fneg (fabs X)
15099   SDValue LogicOp0 = N0.getOperand(0);
15100   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
15101   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
15102       IsBitCastOrFree(LogicOp0, VT)) {
15103     SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
15104     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
15105     NumFPLogicOpsConv++;
15106     if (N0.getOpcode() == ISD::OR)
15107       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
15108     return FPOp;
15109   }
15110 
15111   return SDValue();
15112 }
15113 
15114 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
15115   SDValue N0 = N->getOperand(0);
15116   EVT VT = N->getValueType(0);
15117 
15118   if (N0.isUndef())
15119     return DAG.getUNDEF(VT);
15120 
15121   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
15122   // Only do this before legalize types, unless both types are integer and the
15123   // scalar type is legal. Only do this before legalize ops, since the target
15124   // maybe depending on the bitcast.
15125   // First check to see if this is all constant.
15126   // TODO: Support FP bitcasts after legalize types.
15127   if (VT.isVector() &&
15128       (!LegalTypes ||
15129        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
15130         TLI.isTypeLegal(VT.getVectorElementType()))) &&
15131       N0.getOpcode() == ISD::BUILD_VECTOR && N0->hasOneUse() &&
15132       cast<BuildVectorSDNode>(N0)->isConstant())
15133     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
15134                                              VT.getVectorElementType());
15135 
15136   // If the input is a constant, let getNode fold it.
15137   if (isIntOrFPConstant(N0)) {
15138     // If we can't allow illegal operations, we need to check that this is just
15139     // a fp -> int or int -> conversion and that the resulting operation will
15140     // be legal.
15141     if (!LegalOperations ||
15142         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
15143          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
15144         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
15145          TLI.isOperationLegal(ISD::Constant, VT))) {
15146       SDValue C = DAG.getBitcast(VT, N0);
15147       if (C.getNode() != N)
15148         return C;
15149     }
15150   }
15151 
15152   // (conv (conv x, t1), t2) -> (conv x, t2)
15153   if (N0.getOpcode() == ISD::BITCAST)
15154     return DAG.getBitcast(VT, N0.getOperand(0));
15155 
15156   // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
15157   // iff the current bitwise logicop type isn't legal
15158   if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
15159       !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
15160     auto IsFreeBitcast = [VT](SDValue V) {
15161       return (V.getOpcode() == ISD::BITCAST &&
15162               V.getOperand(0).getValueType() == VT) ||
15163              (ISD::isBuildVectorOfConstantSDNodes(V.getNode()) &&
15164               V->hasOneUse());
15165     };
15166     if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
15167       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
15168                          DAG.getBitcast(VT, N0.getOperand(0)),
15169                          DAG.getBitcast(VT, N0.getOperand(1)));
15170   }
15171 
15172   // fold (conv (load x)) -> (load (conv*)x)
15173   // If the resultant load doesn't need a higher alignment than the original!
15174   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
15175       // Do not remove the cast if the types differ in endian layout.
15176       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
15177           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
15178       // If the load is volatile, we only want to change the load type if the
15179       // resulting load is legal. Otherwise we might increase the number of
15180       // memory accesses. We don't care if the original type was legal or not
15181       // as we assume software couldn't rely on the number of accesses of an
15182       // illegal type.
15183       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
15184        TLI.isOperationLegal(ISD::LOAD, VT))) {
15185     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15186 
15187     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
15188                                     *LN0->getMemOperand())) {
15189       SDValue Load =
15190           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
15191                       LN0->getMemOperand());
15192       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15193       return Load;
15194     }
15195   }
15196 
15197   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
15198     return V;
15199 
15200   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
15201   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
15202   //
15203   // For ppc_fp128:
15204   // fold (bitcast (fneg x)) ->
15205   //     flipbit = signbit
15206   //     (xor (bitcast x) (build_pair flipbit, flipbit))
15207   //
15208   // fold (bitcast (fabs x)) ->
15209   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
15210   //     (xor (bitcast x) (build_pair flipbit, flipbit))
15211   // This often reduces constant pool loads.
15212   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
15213        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
15214       N0->hasOneUse() && VT.isInteger() && !VT.isVector() &&
15215       !N0.getValueType().isVector()) {
15216     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
15217     AddToWorklist(NewConv.getNode());
15218 
15219     SDLoc DL(N);
15220     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15221       assert(VT.getSizeInBits() == 128);
15222       SDValue SignBit = DAG.getConstant(
15223           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
15224       SDValue FlipBit;
15225       if (N0.getOpcode() == ISD::FNEG) {
15226         FlipBit = SignBit;
15227         AddToWorklist(FlipBit.getNode());
15228       } else {
15229         assert(N0.getOpcode() == ISD::FABS);
15230         SDValue Hi =
15231             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
15232                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
15233                                               SDLoc(NewConv)));
15234         AddToWorklist(Hi.getNode());
15235         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
15236         AddToWorklist(FlipBit.getNode());
15237       }
15238       SDValue FlipBits =
15239           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15240       AddToWorklist(FlipBits.getNode());
15241       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
15242     }
15243     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15244     if (N0.getOpcode() == ISD::FNEG)
15245       return DAG.getNode(ISD::XOR, DL, VT,
15246                          NewConv, DAG.getConstant(SignBit, DL, VT));
15247     assert(N0.getOpcode() == ISD::FABS);
15248     return DAG.getNode(ISD::AND, DL, VT,
15249                        NewConv, DAG.getConstant(~SignBit, DL, VT));
15250   }
15251 
15252   // fold (bitconvert (fcopysign cst, x)) ->
15253   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
15254   // Note that we don't handle (copysign x, cst) because this can always be
15255   // folded to an fneg or fabs.
15256   //
15257   // For ppc_fp128:
15258   // fold (bitcast (fcopysign cst, x)) ->
15259   //     flipbit = (and (extract_element
15260   //                     (xor (bitcast cst), (bitcast x)), 0),
15261   //                    signbit)
15262   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
15263   if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
15264       isa<ConstantFPSDNode>(N0.getOperand(0)) && VT.isInteger() &&
15265       !VT.isVector()) {
15266     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
15267     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
15268     if (isTypeLegal(IntXVT)) {
15269       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
15270       AddToWorklist(X.getNode());
15271 
15272       // If X has a different width than the result/lhs, sext it or truncate it.
15273       unsigned VTWidth = VT.getSizeInBits();
15274       if (OrigXWidth < VTWidth) {
15275         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
15276         AddToWorklist(X.getNode());
15277       } else if (OrigXWidth > VTWidth) {
15278         // To get the sign bit in the right place, we have to shift it right
15279         // before truncating.
15280         SDLoc DL(X);
15281         X = DAG.getNode(ISD::SRL, DL,
15282                         X.getValueType(), X,
15283                         DAG.getConstant(OrigXWidth-VTWidth, DL,
15284                                         X.getValueType()));
15285         AddToWorklist(X.getNode());
15286         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
15287         AddToWorklist(X.getNode());
15288       }
15289 
15290       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
15291         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
15292         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15293         AddToWorklist(Cst.getNode());
15294         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
15295         AddToWorklist(X.getNode());
15296         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
15297         AddToWorklist(XorResult.getNode());
15298         SDValue XorResult64 = DAG.getNode(
15299             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
15300             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
15301                                   SDLoc(XorResult)));
15302         AddToWorklist(XorResult64.getNode());
15303         SDValue FlipBit =
15304             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
15305                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
15306         AddToWorklist(FlipBit.getNode());
15307         SDValue FlipBits =
15308             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
15309         AddToWorklist(FlipBits.getNode());
15310         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
15311       }
15312       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
15313       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
15314                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
15315       AddToWorklist(X.getNode());
15316 
15317       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
15318       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
15319                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
15320       AddToWorklist(Cst.getNode());
15321 
15322       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
15323     }
15324   }
15325 
15326   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
15327   if (N0.getOpcode() == ISD::BUILD_PAIR)
15328     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
15329       return CombineLD;
15330 
15331   // Remove double bitcasts from shuffles - this is often a legacy of
15332   // XformToShuffleWithZero being used to combine bitmaskings (of
15333   // float vectors bitcast to integer vectors) into shuffles.
15334   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
15335   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
15336       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
15337       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
15338       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
15339     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
15340 
15341     // If operands are a bitcast, peek through if it casts the original VT.
15342     // If operands are a constant, just bitcast back to original VT.
15343     auto PeekThroughBitcast = [&](SDValue Op) {
15344       if (Op.getOpcode() == ISD::BITCAST &&
15345           Op.getOperand(0).getValueType() == VT)
15346         return SDValue(Op.getOperand(0));
15347       if (Op.isUndef() || isAnyConstantBuildVector(Op))
15348         return DAG.getBitcast(VT, Op);
15349       return SDValue();
15350     };
15351 
15352     // FIXME: If either input vector is bitcast, try to convert the shuffle to
15353     // the result type of this bitcast. This would eliminate at least one
15354     // bitcast. See the transform in InstCombine.
15355     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
15356     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
15357     if (!(SV0 && SV1))
15358       return SDValue();
15359 
15360     int MaskScale =
15361         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
15362     SmallVector<int, 8> NewMask;
15363     for (int M : SVN->getMask())
15364       for (int i = 0; i != MaskScale; ++i)
15365         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
15366 
15367     SDValue LegalShuffle =
15368         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
15369     if (LegalShuffle)
15370       return LegalShuffle;
15371   }
15372 
15373   return SDValue();
15374 }
15375 
15376 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
15377   EVT VT = N->getValueType(0);
15378   return CombineConsecutiveLoads(N, VT);
15379 }
15380 
15381 SDValue DAGCombiner::visitFREEZE(SDNode *N) {
15382   SDValue N0 = N->getOperand(0);
15383 
15384   if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false))
15385     return N0;
15386 
15387   // Fold freeze(op(x, ...)) -> op(freeze(x), ...).
15388   // Try to push freeze through instructions that propagate but don't produce
15389   // poison as far as possible. If an operand of freeze follows three
15390   // conditions 1) one-use, 2) does not produce poison, and 3) has all but one
15391   // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push
15392   // the freeze through to the operands that are not guaranteed non-poison.
15393   // NOTE: we will strip poison-generating flags, so ignore them here.
15394   if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false,
15395                                  /*ConsiderFlags*/ false) ||
15396       N0->getNumValues() != 1 || !N0->hasOneUse())
15397     return SDValue();
15398 
15399   bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR ||
15400                                           N0.getOpcode() == ISD::BUILD_PAIR ||
15401                                           N0.getOpcode() == ISD::CONCAT_VECTORS;
15402 
15403   SmallSetVector<SDValue, 8> MaybePoisonOperands;
15404   for (SDValue Op : N0->ops()) {
15405     if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false,
15406                                              /*Depth*/ 1))
15407       continue;
15408     bool HadMaybePoisonOperands = !MaybePoisonOperands.empty();
15409     bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op);
15410     if (!HadMaybePoisonOperands)
15411       continue;
15412     if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) {
15413       // Multiple maybe-poison ops when not allowed - bail out.
15414       return SDValue();
15415     }
15416   }
15417   // NOTE: the whole op may be not guaranteed to not be undef or poison because
15418   // it could create undef or poison due to it's poison-generating flags.
15419   // So not finding any maybe-poison operands is fine.
15420 
15421   for (SDValue MaybePoisonOperand : MaybePoisonOperands) {
15422     // Don't replace every single UNDEF everywhere with frozen UNDEF, though.
15423     if (MaybePoisonOperand.getOpcode() == ISD::UNDEF)
15424       continue;
15425     // First, freeze each offending operand.
15426     SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand);
15427     // Then, change all other uses of unfrozen operand to use frozen operand.
15428     DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand);
15429     if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE &&
15430         FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) {
15431       // But, that also updated the use in the freeze we just created, thus
15432       // creating a cycle in a DAG. Let's undo that by mutating the freeze.
15433       DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(),
15434                              MaybePoisonOperand);
15435     }
15436   }
15437 
15438   // This node has been merged with another.
15439   if (N->getOpcode() == ISD::DELETED_NODE)
15440     return SDValue(N, 0);
15441 
15442   // The whole node may have been updated, so the value we were holding
15443   // may no longer be valid. Re-fetch the operand we're `freeze`ing.
15444   N0 = N->getOperand(0);
15445 
15446   // Finally, recreate the node, it's operands were updated to use
15447   // frozen operands, so we just need to use it's "original" operands.
15448   SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end());
15449   // Special-handle ISD::UNDEF, each single one of them can be it's own thing.
15450   for (SDValue &Op : Ops) {
15451     if (Op.getOpcode() == ISD::UNDEF)
15452       Op = DAG.getFreeze(Op);
15453   }
15454   // NOTE: this strips poison generating flags.
15455   SDValue R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops);
15456   assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) &&
15457          "Can't create node that may be undef/poison!");
15458   return R;
15459 }
15460 
15461 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
15462 /// operands. DstEltVT indicates the destination element value type.
15463 SDValue DAGCombiner::
15464 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
15465   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
15466 
15467   // If this is already the right type, we're done.
15468   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
15469 
15470   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
15471   unsigned DstBitSize = DstEltVT.getSizeInBits();
15472 
15473   // If this is a conversion of N elements of one type to N elements of another
15474   // type, convert each element.  This handles FP<->INT cases.
15475   if (SrcBitSize == DstBitSize) {
15476     SmallVector<SDValue, 8> Ops;
15477     for (SDValue Op : BV->op_values()) {
15478       // If the vector element type is not legal, the BUILD_VECTOR operands
15479       // are promoted and implicitly truncated.  Make that explicit here.
15480       if (Op.getValueType() != SrcEltVT)
15481         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
15482       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
15483       AddToWorklist(Ops.back().getNode());
15484     }
15485     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
15486                               BV->getValueType(0).getVectorNumElements());
15487     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
15488   }
15489 
15490   // Otherwise, we're growing or shrinking the elements.  To avoid having to
15491   // handle annoying details of growing/shrinking FP values, we convert them to
15492   // int first.
15493   if (SrcEltVT.isFloatingPoint()) {
15494     // Convert the input float vector to a int vector where the elements are the
15495     // same sizes.
15496     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
15497     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
15498     SrcEltVT = IntVT;
15499   }
15500 
15501   // Now we know the input is an integer vector.  If the output is a FP type,
15502   // convert to integer first, then to FP of the right size.
15503   if (DstEltVT.isFloatingPoint()) {
15504     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
15505     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
15506 
15507     // Next, convert to FP elements of the same size.
15508     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
15509   }
15510 
15511   // Okay, we know the src/dst types are both integers of differing types.
15512   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
15513 
15514   // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a
15515   // BuildVectorSDNode?
15516   auto *BVN = cast<BuildVectorSDNode>(BV);
15517 
15518   // Extract the constant raw bit data.
15519   BitVector UndefElements;
15520   SmallVector<APInt> RawBits;
15521   bool IsLE = DAG.getDataLayout().isLittleEndian();
15522   if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements))
15523     return SDValue();
15524 
15525   SDLoc DL(BV);
15526   SmallVector<SDValue, 8> Ops;
15527   for (unsigned I = 0, E = RawBits.size(); I != E; ++I) {
15528     if (UndefElements[I])
15529       Ops.push_back(DAG.getUNDEF(DstEltVT));
15530     else
15531       Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT));
15532   }
15533 
15534   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
15535   return DAG.getBuildVector(VT, DL, Ops);
15536 }
15537 
15538 // Returns true if floating point contraction is allowed on the FMUL-SDValue
15539 // `N`
15540 static bool isContractableFMUL(const TargetOptions &Options, SDValue N) {
15541   assert(N.getOpcode() == ISD::FMUL);
15542 
15543   return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath ||
15544          N->getFlags().hasAllowContract();
15545 }
15546 
15547 // Returns true if `N` can assume no infinities involved in its computation.
15548 static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
15549   return Options.NoInfsFPMath || N->getFlags().hasNoInfs();
15550 }
15551 
15552 /// Try to perform FMA combining on a given FADD node.
15553 template <class MatchContextClass>
15554 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
15555   SDValue N0 = N->getOperand(0);
15556   SDValue N1 = N->getOperand(1);
15557   EVT VT = N->getValueType(0);
15558   SDLoc SL(N);
15559   MatchContextClass matcher(DAG, TLI, N);
15560   const TargetOptions &Options = DAG.getTarget().Options;
15561 
15562   bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15563 
15564   // Floating-point multiply-add with intermediate rounding.
15565   // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15566   // FIXME: Add VP_FMAD opcode.
15567   bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15568 
15569   // Floating-point multiply-add without intermediate rounding.
15570   bool HasFMA =
15571       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
15572       (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15573 
15574   // No valid opcode, do not combine.
15575   if (!HasFMAD && !HasFMA)
15576     return SDValue();
15577 
15578   bool CanReassociate =
15579       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15580   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15581                               Options.UnsafeFPMath || HasFMAD);
15582   // If the addition is not contractable, do not combine.
15583   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15584     return SDValue();
15585 
15586   // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
15587   // beneficial. It does not reduce latency. It increases register pressure. It
15588   // replaces an fadd with an fma which is a more complex instruction, so is
15589   // likely to have a larger encoding, use more functional units, etc.
15590   if (N0 == N1)
15591     return SDValue();
15592 
15593   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15594     return SDValue();
15595 
15596   // Always prefer FMAD to FMA for precision.
15597   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15598   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
15599 
15600   auto isFusedOp = [&](SDValue N) {
15601     return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
15602   };
15603 
15604   // Is the node an FMUL and contractable either due to global flags or
15605   // SDNodeFlags.
15606   auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15607     if (!matcher.match(N, ISD::FMUL))
15608       return false;
15609     return AllowFusionGlobally || N->getFlags().hasAllowContract();
15610   };
15611   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
15612   // prefer to fold the multiply with fewer uses.
15613   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
15614     if (N0->use_size() > N1->use_size())
15615       std::swap(N0, N1);
15616   }
15617 
15618   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
15619   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
15620     return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
15621                            N0.getOperand(1), N1);
15622   }
15623 
15624   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
15625   // Note: Commutes FADD operands.
15626   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
15627     return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
15628                            N1.getOperand(1), N0);
15629   }
15630 
15631   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
15632   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
15633   // This also works with nested fma instructions:
15634   // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G -->
15635   // fma A, B, (fma C, D, fma (E, F, G))
15636   // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) -->
15637   // fma A, B, (fma C, D, fma (E, F, G)).
15638   // This requires reassociation because it changes the order of operations.
15639   if (CanReassociate) {
15640     SDValue FMA, E;
15641     if (isFusedOp(N0) && N0.hasOneUse()) {
15642       FMA = N0;
15643       E = N1;
15644     } else if (isFusedOp(N1) && N1.hasOneUse()) {
15645       FMA = N1;
15646       E = N0;
15647     }
15648 
15649     SDValue TmpFMA = FMA;
15650     while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
15651       SDValue FMul = TmpFMA->getOperand(2);
15652       if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
15653         SDValue C = FMul.getOperand(0);
15654         SDValue D = FMul.getOperand(1);
15655         SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
15656         DAG.ReplaceAllUsesOfValueWith(FMul, CDE);
15657         // Replacing the inner FMul could cause the outer FMA to be simplified
15658         // away.
15659         return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA;
15660       }
15661 
15662       TmpFMA = TmpFMA->getOperand(2);
15663     }
15664   }
15665 
15666   // Look through FP_EXTEND nodes to do more combining.
15667 
15668   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
15669   if (matcher.match(N0, ISD::FP_EXTEND)) {
15670     SDValue N00 = N0.getOperand(0);
15671     if (isContractableFMUL(N00) &&
15672         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15673                             N00.getValueType())) {
15674       return matcher.getNode(
15675           PreferredFusedOpcode, SL, VT,
15676           matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15677           matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
15678     }
15679   }
15680 
15681   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
15682   // Note: Commutes FADD operands.
15683   if (matcher.match(N1, ISD::FP_EXTEND)) {
15684     SDValue N10 = N1.getOperand(0);
15685     if (isContractableFMUL(N10) &&
15686         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15687                             N10.getValueType())) {
15688       return matcher.getNode(
15689           PreferredFusedOpcode, SL, VT,
15690           matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
15691           matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15692     }
15693   }
15694 
15695   // More folding opportunities when target permits.
15696   if (Aggressive) {
15697     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
15698     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
15699     auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15700                                     SDValue Z) {
15701       return matcher.getNode(
15702           PreferredFusedOpcode, SL, VT, X, Y,
15703           matcher.getNode(PreferredFusedOpcode, SL, VT,
15704                           matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15705                           matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15706     };
15707     if (isFusedOp(N0)) {
15708       SDValue N02 = N0.getOperand(2);
15709       if (matcher.match(N02, ISD::FP_EXTEND)) {
15710         SDValue N020 = N02.getOperand(0);
15711         if (isContractableFMUL(N020) &&
15712             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15713                                 N020.getValueType())) {
15714           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
15715                                       N020.getOperand(0), N020.getOperand(1),
15716                                       N1);
15717         }
15718       }
15719     }
15720 
15721     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
15722     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
15723     // FIXME: This turns two single-precision and one double-precision
15724     // operation into two double-precision operations, which might not be
15725     // interesting for all targets, especially GPUs.
15726     auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
15727                                     SDValue Z) {
15728       return matcher.getNode(
15729           PreferredFusedOpcode, SL, VT,
15730           matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
15731           matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
15732           matcher.getNode(PreferredFusedOpcode, SL, VT,
15733                           matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
15734                           matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
15735     };
15736     if (N0.getOpcode() == ISD::FP_EXTEND) {
15737       SDValue N00 = N0.getOperand(0);
15738       if (isFusedOp(N00)) {
15739         SDValue N002 = N00.getOperand(2);
15740         if (isContractableFMUL(N002) &&
15741             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15742                                 N00.getValueType())) {
15743           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
15744                                       N002.getOperand(0), N002.getOperand(1),
15745                                       N1);
15746         }
15747       }
15748     }
15749 
15750     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
15751     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
15752     if (isFusedOp(N1)) {
15753       SDValue N12 = N1.getOperand(2);
15754       if (N12.getOpcode() == ISD::FP_EXTEND) {
15755         SDValue N120 = N12.getOperand(0);
15756         if (isContractableFMUL(N120) &&
15757             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15758                                 N120.getValueType())) {
15759           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
15760                                       N120.getOperand(0), N120.getOperand(1),
15761                                       N0);
15762         }
15763       }
15764     }
15765 
15766     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
15767     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
15768     // FIXME: This turns two single-precision and one double-precision
15769     // operation into two double-precision operations, which might not be
15770     // interesting for all targets, especially GPUs.
15771     if (N1.getOpcode() == ISD::FP_EXTEND) {
15772       SDValue N10 = N1.getOperand(0);
15773       if (isFusedOp(N10)) {
15774         SDValue N102 = N10.getOperand(2);
15775         if (isContractableFMUL(N102) &&
15776             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15777                                 N10.getValueType())) {
15778           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
15779                                       N102.getOperand(0), N102.getOperand(1),
15780                                       N0);
15781         }
15782       }
15783     }
15784   }
15785 
15786   return SDValue();
15787 }
15788 
15789 /// Try to perform FMA combining on a given FSUB node.
15790 template <class MatchContextClass>
15791 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
15792   SDValue N0 = N->getOperand(0);
15793   SDValue N1 = N->getOperand(1);
15794   EVT VT = N->getValueType(0);
15795   SDLoc SL(N);
15796   MatchContextClass matcher(DAG, TLI, N);
15797   const TargetOptions &Options = DAG.getTarget().Options;
15798 
15799   bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
15800 
15801   // Floating-point multiply-add with intermediate rounding.
15802   // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
15803   // FIXME: Add VP_FMAD opcode.
15804   bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
15805 
15806   // Floating-point multiply-add without intermediate rounding.
15807   bool HasFMA =
15808       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
15809       (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
15810 
15811   // No valid opcode, do not combine.
15812   if (!HasFMAD && !HasFMA)
15813     return SDValue();
15814 
15815   const SDNodeFlags Flags = N->getFlags();
15816   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
15817                               Options.UnsafeFPMath || HasFMAD);
15818 
15819   // If the subtraction is not contractable, do not combine.
15820   if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
15821     return SDValue();
15822 
15823   if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
15824     return SDValue();
15825 
15826   // Always prefer FMAD to FMA for precision.
15827   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
15828   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
15829   bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros();
15830 
15831   // Is the node an FMUL and contractable either due to global flags or
15832   // SDNodeFlags.
15833   auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
15834     if (!matcher.match(N, ISD::FMUL))
15835       return false;
15836     return AllowFusionGlobally || N->getFlags().hasAllowContract();
15837   };
15838 
15839   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15840   auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
15841     if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
15842       return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
15843                              XY.getOperand(1),
15844                              matcher.getNode(ISD::FNEG, SL, VT, Z));
15845     }
15846     return SDValue();
15847   };
15848 
15849   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15850   // Note: Commutes FSUB operands.
15851   auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
15852     if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
15853       return matcher.getNode(
15854           PreferredFusedOpcode, SL, VT,
15855           matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
15856           YZ.getOperand(1), X);
15857     }
15858     return SDValue();
15859   };
15860 
15861   // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)),
15862   // prefer to fold the multiply with fewer uses.
15863   if (isContractableFMUL(N0) && isContractableFMUL(N1) &&
15864       (N0->use_size() > N1->use_size())) {
15865     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b))
15866     if (SDValue V = tryToFoldXSubYZ(N0, N1))
15867       return V;
15868     // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d)))
15869     if (SDValue V = tryToFoldXYSubZ(N0, N1))
15870       return V;
15871   } else {
15872     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
15873     if (SDValue V = tryToFoldXYSubZ(N0, N1))
15874       return V;
15875     // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
15876     if (SDValue V = tryToFoldXSubYZ(N0, N1))
15877       return V;
15878   }
15879 
15880   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
15881   if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
15882       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
15883     SDValue N00 = N0.getOperand(0).getOperand(0);
15884     SDValue N01 = N0.getOperand(0).getOperand(1);
15885     return matcher.getNode(PreferredFusedOpcode, SL, VT,
15886                            matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
15887                            matcher.getNode(ISD::FNEG, SL, VT, N1));
15888   }
15889 
15890   // Look through FP_EXTEND nodes to do more combining.
15891 
15892   // fold (fsub (fpext (fmul x, y)), z)
15893   //   -> (fma (fpext x), (fpext y), (fneg z))
15894   if (matcher.match(N0, ISD::FP_EXTEND)) {
15895     SDValue N00 = N0.getOperand(0);
15896     if (isContractableFMUL(N00) &&
15897         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15898                             N00.getValueType())) {
15899       return matcher.getNode(
15900           PreferredFusedOpcode, SL, VT,
15901           matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
15902           matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
15903           matcher.getNode(ISD::FNEG, SL, VT, N1));
15904     }
15905   }
15906 
15907   // fold (fsub x, (fpext (fmul y, z)))
15908   //   -> (fma (fneg (fpext y)), (fpext z), x)
15909   // Note: Commutes FSUB operands.
15910   if (matcher.match(N1, ISD::FP_EXTEND)) {
15911     SDValue N10 = N1.getOperand(0);
15912     if (isContractableFMUL(N10) &&
15913         TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15914                             N10.getValueType())) {
15915       return matcher.getNode(
15916           PreferredFusedOpcode, SL, VT,
15917           matcher.getNode(
15918               ISD::FNEG, SL, VT,
15919               matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
15920           matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
15921     }
15922   }
15923 
15924   // fold (fsub (fpext (fneg (fmul, x, y))), z)
15925   //   -> (fneg (fma (fpext x), (fpext y), z))
15926   // Note: This could be removed with appropriate canonicalization of the
15927   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
15928   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
15929   // from implementing the canonicalization in visitFSUB.
15930   if (matcher.match(N0, ISD::FP_EXTEND)) {
15931     SDValue N00 = N0.getOperand(0);
15932     if (matcher.match(N00, ISD::FNEG)) {
15933       SDValue N000 = N00.getOperand(0);
15934       if (isContractableFMUL(N000) &&
15935           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15936                               N00.getValueType())) {
15937         return matcher.getNode(
15938             ISD::FNEG, SL, VT,
15939             matcher.getNode(
15940                 PreferredFusedOpcode, SL, VT,
15941                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
15942                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
15943                 N1));
15944       }
15945     }
15946   }
15947 
15948   // fold (fsub (fneg (fpext (fmul, x, y))), z)
15949   //   -> (fneg (fma (fpext x)), (fpext y), z)
15950   // Note: This could be removed with appropriate canonicalization of the
15951   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
15952   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
15953   // from implementing the canonicalization in visitFSUB.
15954   if (matcher.match(N0, ISD::FNEG)) {
15955     SDValue N00 = N0.getOperand(0);
15956     if (matcher.match(N00, ISD::FP_EXTEND)) {
15957       SDValue N000 = N00.getOperand(0);
15958       if (isContractableFMUL(N000) &&
15959           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
15960                               N000.getValueType())) {
15961         return matcher.getNode(
15962             ISD::FNEG, SL, VT,
15963             matcher.getNode(
15964                 PreferredFusedOpcode, SL, VT,
15965                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
15966                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
15967                 N1));
15968       }
15969     }
15970   }
15971 
15972   auto isReassociable = [&Options](SDNode *N) {
15973     return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
15974   };
15975 
15976   auto isContractableAndReassociableFMUL = [&isContractableFMUL,
15977                                             &isReassociable](SDValue N) {
15978     return isContractableFMUL(N) && isReassociable(N.getNode());
15979   };
15980 
15981   auto isFusedOp = [&](SDValue N) {
15982     return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
15983   };
15984 
15985   // More folding opportunities when target permits.
15986   if (Aggressive && isReassociable(N)) {
15987     bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract();
15988     // fold (fsub (fma x, y, (fmul u, v)), z)
15989     //   -> (fma x, y (fma u, v, (fneg z)))
15990     if (CanFuse && isFusedOp(N0) &&
15991         isContractableAndReassociableFMUL(N0.getOperand(2)) &&
15992         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
15993       return matcher.getNode(
15994           PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
15995           matcher.getNode(PreferredFusedOpcode, SL, VT,
15996                           N0.getOperand(2).getOperand(0),
15997                           N0.getOperand(2).getOperand(1),
15998                           matcher.getNode(ISD::FNEG, SL, VT, N1)));
15999     }
16000 
16001     // fold (fsub x, (fma y, z, (fmul u, v)))
16002     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
16003     if (CanFuse && isFusedOp(N1) &&
16004         isContractableAndReassociableFMUL(N1.getOperand(2)) &&
16005         N1->hasOneUse() && NoSignedZero) {
16006       SDValue N20 = N1.getOperand(2).getOperand(0);
16007       SDValue N21 = N1.getOperand(2).getOperand(1);
16008       return matcher.getNode(
16009           PreferredFusedOpcode, SL, VT,
16010           matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16011           N1.getOperand(1),
16012           matcher.getNode(PreferredFusedOpcode, SL, VT,
16013                           matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
16014     }
16015 
16016     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
16017     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
16018     if (isFusedOp(N0) && N0->hasOneUse()) {
16019       SDValue N02 = N0.getOperand(2);
16020       if (matcher.match(N02, ISD::FP_EXTEND)) {
16021         SDValue N020 = N02.getOperand(0);
16022         if (isContractableAndReassociableFMUL(N020) &&
16023             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16024                                 N020.getValueType())) {
16025           return matcher.getNode(
16026               PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
16027               matcher.getNode(
16028                   PreferredFusedOpcode, SL, VT,
16029                   matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
16030                   matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
16031                   matcher.getNode(ISD::FNEG, SL, VT, N1)));
16032         }
16033       }
16034     }
16035 
16036     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
16037     //   -> (fma (fpext x), (fpext y),
16038     //           (fma (fpext u), (fpext v), (fneg z)))
16039     // FIXME: This turns two single-precision and one double-precision
16040     // operation into two double-precision operations, which might not be
16041     // interesting for all targets, especially GPUs.
16042     if (matcher.match(N0, ISD::FP_EXTEND)) {
16043       SDValue N00 = N0.getOperand(0);
16044       if (isFusedOp(N00)) {
16045         SDValue N002 = N00.getOperand(2);
16046         if (isContractableAndReassociableFMUL(N002) &&
16047             TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16048                                 N00.getValueType())) {
16049           return matcher.getNode(
16050               PreferredFusedOpcode, SL, VT,
16051               matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
16052               matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
16053               matcher.getNode(
16054                   PreferredFusedOpcode, SL, VT,
16055                   matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
16056                   matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
16057                   matcher.getNode(ISD::FNEG, SL, VT, N1)));
16058         }
16059       }
16060     }
16061 
16062     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
16063     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
16064     if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
16065         N1->hasOneUse()) {
16066       SDValue N120 = N1.getOperand(2).getOperand(0);
16067       if (isContractableAndReassociableFMUL(N120) &&
16068           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16069                               N120.getValueType())) {
16070         SDValue N1200 = N120.getOperand(0);
16071         SDValue N1201 = N120.getOperand(1);
16072         return matcher.getNode(
16073             PreferredFusedOpcode, SL, VT,
16074             matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
16075             N1.getOperand(1),
16076             matcher.getNode(
16077                 PreferredFusedOpcode, SL, VT,
16078                 matcher.getNode(ISD::FNEG, SL, VT,
16079                                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
16080                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
16081       }
16082     }
16083 
16084     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
16085     //   -> (fma (fneg (fpext y)), (fpext z),
16086     //           (fma (fneg (fpext u)), (fpext v), x))
16087     // FIXME: This turns two single-precision and one double-precision
16088     // operation into two double-precision operations, which might not be
16089     // interesting for all targets, especially GPUs.
16090     if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
16091       SDValue CvtSrc = N1.getOperand(0);
16092       SDValue N100 = CvtSrc.getOperand(0);
16093       SDValue N101 = CvtSrc.getOperand(1);
16094       SDValue N102 = CvtSrc.getOperand(2);
16095       if (isContractableAndReassociableFMUL(N102) &&
16096           TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
16097                               CvtSrc.getValueType())) {
16098         SDValue N1020 = N102.getOperand(0);
16099         SDValue N1021 = N102.getOperand(1);
16100         return matcher.getNode(
16101             PreferredFusedOpcode, SL, VT,
16102             matcher.getNode(ISD::FNEG, SL, VT,
16103                             matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
16104             matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
16105             matcher.getNode(
16106                 PreferredFusedOpcode, SL, VT,
16107                 matcher.getNode(ISD::FNEG, SL, VT,
16108                                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
16109                 matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
16110       }
16111     }
16112   }
16113 
16114   return SDValue();
16115 }
16116 
16117 /// Try to perform FMA combining on a given FMUL node based on the distributive
16118 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
16119 /// subtraction instead of addition).
16120 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
16121   SDValue N0 = N->getOperand(0);
16122   SDValue N1 = N->getOperand(1);
16123   EVT VT = N->getValueType(0);
16124   SDLoc SL(N);
16125 
16126   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
16127 
16128   const TargetOptions &Options = DAG.getTarget().Options;
16129 
16130   // The transforms below are incorrect when x == 0 and y == inf, because the
16131   // intermediate multiplication produces a nan.
16132   SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1;
16133   if (!hasNoInfs(Options, FAdd))
16134     return SDValue();
16135 
16136   // Floating-point multiply-add without intermediate rounding.
16137   bool HasFMA =
16138       isContractableFMUL(Options, SDValue(N, 0)) &&
16139       TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
16140       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
16141 
16142   // Floating-point multiply-add with intermediate rounding. This can result
16143   // in a less precise result due to the changed rounding order.
16144   bool HasFMAD = Options.UnsafeFPMath &&
16145                  (LegalOperations && TLI.isFMADLegal(DAG, N));
16146 
16147   // No valid opcode, do not combine.
16148   if (!HasFMAD && !HasFMA)
16149     return SDValue();
16150 
16151   // Always prefer FMAD to FMA for precision.
16152   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
16153   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
16154 
16155   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
16156   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
16157   auto FuseFADD = [&](SDValue X, SDValue Y) {
16158     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
16159       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
16160         if (C->isExactlyValue(+1.0))
16161           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16162                              Y);
16163         if (C->isExactlyValue(-1.0))
16164           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16165                              DAG.getNode(ISD::FNEG, SL, VT, Y));
16166       }
16167     }
16168     return SDValue();
16169   };
16170 
16171   if (SDValue FMA = FuseFADD(N0, N1))
16172     return FMA;
16173   if (SDValue FMA = FuseFADD(N1, N0))
16174     return FMA;
16175 
16176   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
16177   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
16178   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
16179   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
16180   auto FuseFSUB = [&](SDValue X, SDValue Y) {
16181     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
16182       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
16183         if (C0->isExactlyValue(+1.0))
16184           return DAG.getNode(PreferredFusedOpcode, SL, VT,
16185                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16186                              Y);
16187         if (C0->isExactlyValue(-1.0))
16188           return DAG.getNode(PreferredFusedOpcode, SL, VT,
16189                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
16190                              DAG.getNode(ISD::FNEG, SL, VT, Y));
16191       }
16192       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
16193         if (C1->isExactlyValue(+1.0))
16194           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16195                              DAG.getNode(ISD::FNEG, SL, VT, Y));
16196         if (C1->isExactlyValue(-1.0))
16197           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
16198                              Y);
16199       }
16200     }
16201     return SDValue();
16202   };
16203 
16204   if (SDValue FMA = FuseFSUB(N0, N1))
16205     return FMA;
16206   if (SDValue FMA = FuseFSUB(N1, N0))
16207     return FMA;
16208 
16209   return SDValue();
16210 }
16211 
16212 SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
16213   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16214 
16215   // FADD -> FMA combines:
16216   if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
16217     if (Fused.getOpcode() != ISD::DELETED_NODE)
16218       AddToWorklist(Fused.getNode());
16219     return Fused;
16220   }
16221   return SDValue();
16222 }
16223 
16224 SDValue DAGCombiner::visitFADD(SDNode *N) {
16225   SDValue N0 = N->getOperand(0);
16226   SDValue N1 = N->getOperand(1);
16227   SDNode *N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0);
16228   SDNode *N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1);
16229   EVT VT = N->getValueType(0);
16230   SDLoc DL(N);
16231   const TargetOptions &Options = DAG.getTarget().Options;
16232   SDNodeFlags Flags = N->getFlags();
16233   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16234 
16235   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16236     return R;
16237 
16238   // fold (fadd c1, c2) -> c1 + c2
16239   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FADD, DL, VT, {N0, N1}))
16240     return C;
16241 
16242   // canonicalize constant to RHS
16243   if (N0CFP && !N1CFP)
16244     return DAG.getNode(ISD::FADD, DL, VT, N1, N0);
16245 
16246   // fold vector ops
16247   if (VT.isVector())
16248     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16249       return FoldedVOp;
16250 
16251   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
16252   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
16253   if (N1C && N1C->isZero())
16254     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
16255       return N0;
16256 
16257   if (SDValue NewSel = foldBinOpIntoSelect(N))
16258     return NewSel;
16259 
16260   // fold (fadd A, (fneg B)) -> (fsub A, B)
16261   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16262     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16263             N1, DAG, LegalOperations, ForCodeSize))
16264       return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1);
16265 
16266   // fold (fadd (fneg A), B) -> (fsub B, A)
16267   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
16268     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16269             N0, DAG, LegalOperations, ForCodeSize))
16270       return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0);
16271 
16272   auto isFMulNegTwo = [](SDValue FMul) {
16273     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
16274       return false;
16275     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
16276     return C && C->isExactlyValue(-2.0);
16277   };
16278 
16279   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
16280   if (isFMulNegTwo(N0)) {
16281     SDValue B = N0.getOperand(0);
16282     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16283     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add);
16284   }
16285   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
16286   if (isFMulNegTwo(N1)) {
16287     SDValue B = N1.getOperand(0);
16288     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B);
16289     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add);
16290   }
16291 
16292   // No FP constant should be created after legalization as Instruction
16293   // Selection pass has a hard time dealing with FP constants.
16294   bool AllowNewConst = (Level < AfterLegalizeDAG);
16295 
16296   // If nnan is enabled, fold lots of things.
16297   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
16298     // If allowed, fold (fadd (fneg x), x) -> 0.0
16299     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
16300       return DAG.getConstantFP(0.0, DL, VT);
16301 
16302     // If allowed, fold (fadd x, (fneg x)) -> 0.0
16303     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
16304       return DAG.getConstantFP(0.0, DL, VT);
16305   }
16306 
16307   // If 'unsafe math' or reassoc and nsz, fold lots of things.
16308   // TODO: break out portions of the transformations below for which Unsafe is
16309   //       considered and which do not require both nsz and reassoc
16310   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16311        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16312       AllowNewConst) {
16313     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
16314     if (N1CFP && N0.getOpcode() == ISD::FADD &&
16315         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
16316       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1);
16317       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC);
16318     }
16319 
16320     // We can fold chains of FADD's of the same value into multiplications.
16321     // This transform is not safe in general because we are reducing the number
16322     // of rounding steps.
16323     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
16324       if (N0.getOpcode() == ISD::FMUL) {
16325         SDNode *CFP00 =
16326             DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
16327         SDNode *CFP01 =
16328             DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
16329 
16330         // (fadd (fmul x, c), x) -> (fmul x, c+1)
16331         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
16332           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16333                                        DAG.getConstantFP(1.0, DL, VT));
16334           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP);
16335         }
16336 
16337         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
16338         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
16339             N1.getOperand(0) == N1.getOperand(1) &&
16340             N0.getOperand(0) == N1.getOperand(0)) {
16341           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
16342                                        DAG.getConstantFP(2.0, DL, VT));
16343           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP);
16344         }
16345       }
16346 
16347       if (N1.getOpcode() == ISD::FMUL) {
16348         SDNode *CFP10 =
16349             DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
16350         SDNode *CFP11 =
16351             DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
16352 
16353         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
16354         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
16355           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16356                                        DAG.getConstantFP(1.0, DL, VT));
16357           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP);
16358         }
16359 
16360         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
16361         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
16362             N0.getOperand(0) == N0.getOperand(1) &&
16363             N1.getOperand(0) == N0.getOperand(0)) {
16364           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
16365                                        DAG.getConstantFP(2.0, DL, VT));
16366           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP);
16367         }
16368       }
16369 
16370       if (N0.getOpcode() == ISD::FADD) {
16371         SDNode *CFP00 =
16372             DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
16373         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
16374         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
16375             (N0.getOperand(0) == N1)) {
16376           return DAG.getNode(ISD::FMUL, DL, VT, N1,
16377                              DAG.getConstantFP(3.0, DL, VT));
16378         }
16379       }
16380 
16381       if (N1.getOpcode() == ISD::FADD) {
16382         SDNode *CFP10 =
16383             DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
16384         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
16385         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
16386             N1.getOperand(0) == N0) {
16387           return DAG.getNode(ISD::FMUL, DL, VT, N0,
16388                              DAG.getConstantFP(3.0, DL, VT));
16389         }
16390       }
16391 
16392       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
16393       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
16394           N0.getOperand(0) == N0.getOperand(1) &&
16395           N1.getOperand(0) == N1.getOperand(1) &&
16396           N0.getOperand(0) == N1.getOperand(0)) {
16397         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
16398                            DAG.getConstantFP(4.0, DL, VT));
16399       }
16400     }
16401 
16402     // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
16403     if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
16404                                           VT, N0, N1, Flags))
16405       return SD;
16406   } // enable-unsafe-fp-math
16407 
16408   // FADD -> FMA combines:
16409   if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
16410     if (Fused.getOpcode() != ISD::DELETED_NODE)
16411       AddToWorklist(Fused.getNode());
16412     return Fused;
16413   }
16414   return SDValue();
16415 }
16416 
16417 SDValue DAGCombiner::visitSTRICT_FADD(SDNode *N) {
16418   SDValue Chain = N->getOperand(0);
16419   SDValue N0 = N->getOperand(1);
16420   SDValue N1 = N->getOperand(2);
16421   EVT VT = N->getValueType(0);
16422   EVT ChainVT = N->getValueType(1);
16423   SDLoc DL(N);
16424   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16425 
16426   // fold (strict_fadd A, (fneg B)) -> (strict_fsub A, B)
16427   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16428     if (SDValue NegN1 = TLI.getCheaperNegatedExpression(
16429             N1, DAG, LegalOperations, ForCodeSize)) {
16430       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16431                          {Chain, N0, NegN1});
16432     }
16433 
16434   // fold (strict_fadd (fneg A), B) -> (strict_fsub B, A)
16435   if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::STRICT_FSUB, VT))
16436     if (SDValue NegN0 = TLI.getCheaperNegatedExpression(
16437             N0, DAG, LegalOperations, ForCodeSize)) {
16438       return DAG.getNode(ISD::STRICT_FSUB, DL, DAG.getVTList(VT, ChainVT),
16439                          {Chain, N1, NegN0});
16440     }
16441   return SDValue();
16442 }
16443 
16444 SDValue DAGCombiner::visitFSUB(SDNode *N) {
16445   SDValue N0 = N->getOperand(0);
16446   SDValue N1 = N->getOperand(1);
16447   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
16448   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16449   EVT VT = N->getValueType(0);
16450   SDLoc DL(N);
16451   const TargetOptions &Options = DAG.getTarget().Options;
16452   const SDNodeFlags Flags = N->getFlags();
16453   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16454 
16455   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16456     return R;
16457 
16458   // fold (fsub c1, c2) -> c1-c2
16459   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FSUB, DL, VT, {N0, N1}))
16460     return C;
16461 
16462   // fold vector ops
16463   if (VT.isVector())
16464     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16465       return FoldedVOp;
16466 
16467   if (SDValue NewSel = foldBinOpIntoSelect(N))
16468     return NewSel;
16469 
16470   // (fsub A, 0) -> A
16471   if (N1CFP && N1CFP->isZero()) {
16472     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
16473         Flags.hasNoSignedZeros()) {
16474       return N0;
16475     }
16476   }
16477 
16478   if (N0 == N1) {
16479     // (fsub x, x) -> 0.0
16480     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
16481       return DAG.getConstantFP(0.0f, DL, VT);
16482   }
16483 
16484   // (fsub -0.0, N1) -> -N1
16485   if (N0CFP && N0CFP->isZero()) {
16486     if (N0CFP->isNegative() ||
16487         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
16488       // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
16489       // flushed to zero, unless all users treat denorms as zero (DAZ).
16490       // FIXME: This transform will change the sign of a NaN and the behavior
16491       // of a signaling NaN. It is only valid when a NoNaN flag is present.
16492       DenormalMode DenormMode = DAG.getDenormalMode(VT);
16493       if (DenormMode == DenormalMode::getIEEE()) {
16494         if (SDValue NegN1 =
16495                 TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16496           return NegN1;
16497         if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
16498           return DAG.getNode(ISD::FNEG, DL, VT, N1);
16499       }
16500     }
16501   }
16502 
16503   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
16504        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
16505       N1.getOpcode() == ISD::FADD) {
16506     // X - (X + Y) -> -Y
16507     if (N0 == N1->getOperand(0))
16508       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
16509     // X - (Y + X) -> -Y
16510     if (N0 == N1->getOperand(1))
16511       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
16512   }
16513 
16514   // fold (fsub A, (fneg B)) -> (fadd A, B)
16515   if (SDValue NegN1 =
16516           TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
16517     return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
16518 
16519   // FSUB -> FMA combines:
16520   if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
16521     AddToWorklist(Fused.getNode());
16522     return Fused;
16523   }
16524 
16525   return SDValue();
16526 }
16527 
16528 // Transform IEEE Floats:
16529 //      (fmul C, (uitofp Pow2))
16530 //          -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa))
16531 //      (fdiv C, (uitofp Pow2))
16532 //          -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa))
16533 //
16534 // The rationale is fmul/fdiv by a power of 2 is just change the exponent, so
16535 // there is no need for more than an add/sub.
16536 //
16537 // This is valid under the following circumstances:
16538 // 1) We are dealing with IEEE floats
16539 // 2) C is normal
16540 // 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds.
16541 // TODO: Much of this could also be used for generating `ldexp` on targets the
16542 // prefer it.
16543 SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) {
16544   EVT VT = N->getValueType(0);
16545   SDValue ConstOp, Pow2Op;
16546 
16547   std::optional<int> Mantissa;
16548   auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) {
16549     if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV)
16550       return false;
16551 
16552     ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx));
16553     Pow2Op = N->getOperand(1 - ConstOpIdx);
16554     if (Pow2Op.getOpcode() != ISD::UINT_TO_FP &&
16555         (Pow2Op.getOpcode() != ISD::SINT_TO_FP ||
16556          !DAG.computeKnownBits(Pow2Op).isNonNegative()))
16557       return false;
16558 
16559     Pow2Op = Pow2Op.getOperand(0);
16560 
16561     // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`.
16562     // TODO: We could use knownbits to make this bound more precise.
16563     int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits();
16564 
16565     auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) {
16566       if (CFP == nullptr)
16567         return false;
16568 
16569       const APFloat &APF = CFP->getValueAPF();
16570 
16571       // Make sure we have normal/ieee constant.
16572       if (!APF.isNormal() || !APF.isIEEE())
16573         return false;
16574 
16575       // Make sure the floats exponent is within the bounds that this transform
16576       // produces bitwise equals value.
16577       int CurExp = ilogb(APF);
16578       // FMul by pow2 will only increase exponent.
16579       int MinExp =
16580           N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange);
16581       // FDiv by pow2 will only decrease exponent.
16582       int MaxExp =
16583           N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange);
16584       if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) ||
16585           MaxExp >= APFloat::semanticsMaxExponent(APF.getSemantics()))
16586         return false;
16587 
16588       // Finally make sure we actually know the mantissa for the float type.
16589       int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1;
16590       if (!Mantissa)
16591         Mantissa = ThisMantissa;
16592 
16593       return *Mantissa == ThisMantissa && ThisMantissa > 0;
16594     };
16595 
16596     // TODO: We may be able to include undefs.
16597     return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid);
16598   };
16599 
16600   if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1))
16601     return SDValue();
16602 
16603   if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op))
16604     return SDValue();
16605 
16606   // Get log2 after all other checks have taken place. This is because
16607   // BuildLogBase2 may create a new node.
16608   SDLoc DL(N);
16609   // Get Log2 type with same bitwidth as the float type (VT).
16610   EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits());
16611   if (VT.isVector())
16612     NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT,
16613                                 VT.getVectorElementCount());
16614 
16615   SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op),
16616                                /*InexpensiveOnly*/ true, NewIntVT);
16617   if (!Log2)
16618     return SDValue();
16619 
16620   // Perform actual transform.
16621   SDValue MantissaShiftCnt =
16622       DAG.getConstant(*Mantissa, DL, getShiftAmountTy(NewIntVT));
16623   // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to
16624   // `(X << C1) + (C << C1)`, but that isn't always the case because of the
16625   // cast. We could implement that by handle here to handle the casts.
16626   SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt);
16627   SDValue ResAsInt =
16628       DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL,
16629                   NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift);
16630   SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt);
16631   return ResAsFP;
16632 }
16633 
16634 SDValue DAGCombiner::visitFMUL(SDNode *N) {
16635   SDValue N0 = N->getOperand(0);
16636   SDValue N1 = N->getOperand(1);
16637   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
16638   EVT VT = N->getValueType(0);
16639   SDLoc DL(N);
16640   const TargetOptions &Options = DAG.getTarget().Options;
16641   const SDNodeFlags Flags = N->getFlags();
16642   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16643 
16644   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
16645     return R;
16646 
16647   // fold (fmul c1, c2) -> c1*c2
16648   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FMUL, DL, VT, {N0, N1}))
16649     return C;
16650 
16651   // canonicalize constant to RHS
16652   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
16653      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
16654     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0);
16655 
16656   // fold vector ops
16657   if (VT.isVector())
16658     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
16659       return FoldedVOp;
16660 
16661   if (SDValue NewSel = foldBinOpIntoSelect(N))
16662     return NewSel;
16663 
16664   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
16665     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
16666     if (DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
16667         N0.getOpcode() == ISD::FMUL) {
16668       SDValue N00 = N0.getOperand(0);
16669       SDValue N01 = N0.getOperand(1);
16670       // Avoid an infinite loop by making sure that N00 is not a constant
16671       // (the inner multiply has not been constant folded yet).
16672       if (DAG.isConstantFPBuildVectorOrConstantFP(N01) &&
16673           !DAG.isConstantFPBuildVectorOrConstantFP(N00)) {
16674         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1);
16675         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts);
16676       }
16677     }
16678 
16679     // Match a special-case: we convert X * 2.0 into fadd.
16680     // fmul (fadd X, X), C -> fmul X, 2.0 * C
16681     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
16682         N0.getOperand(0) == N0.getOperand(1)) {
16683       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
16684       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
16685       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
16686     }
16687 
16688     // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
16689     if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
16690                                           VT, N0, N1, Flags))
16691       return SD;
16692   }
16693 
16694   // fold (fmul X, 2.0) -> (fadd X, X)
16695   if (N1CFP && N1CFP->isExactlyValue(+2.0))
16696     return DAG.getNode(ISD::FADD, DL, VT, N0, N0);
16697 
16698   // fold (fmul X, -1.0) -> (fsub -0.0, X)
16699   if (N1CFP && N1CFP->isExactlyValue(-1.0)) {
16700     if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) {
16701       return DAG.getNode(ISD::FSUB, DL, VT,
16702                          DAG.getConstantFP(-0.0, DL, VT), N0, Flags);
16703     }
16704   }
16705 
16706   // -N0 * -N1 --> N0 * N1
16707   TargetLowering::NegatibleCost CostN0 =
16708       TargetLowering::NegatibleCost::Expensive;
16709   TargetLowering::NegatibleCost CostN1 =
16710       TargetLowering::NegatibleCost::Expensive;
16711   SDValue NegN0 =
16712       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16713   if (NegN0) {
16714     HandleSDNode NegN0Handle(NegN0);
16715     SDValue NegN1 =
16716         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16717     if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16718                   CostN1 == TargetLowering::NegatibleCost::Cheaper))
16719       return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1);
16720   }
16721 
16722   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
16723   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
16724   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
16725       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
16726       TLI.isOperationLegal(ISD::FABS, VT)) {
16727     SDValue Select = N0, X = N1;
16728     if (Select.getOpcode() != ISD::SELECT)
16729       std::swap(Select, X);
16730 
16731     SDValue Cond = Select.getOperand(0);
16732     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
16733     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
16734 
16735     if (TrueOpnd && FalseOpnd &&
16736         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
16737         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
16738         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
16739       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16740       switch (CC) {
16741       default: break;
16742       case ISD::SETOLT:
16743       case ISD::SETULT:
16744       case ISD::SETOLE:
16745       case ISD::SETULE:
16746       case ISD::SETLT:
16747       case ISD::SETLE:
16748         std::swap(TrueOpnd, FalseOpnd);
16749         [[fallthrough]];
16750       case ISD::SETOGT:
16751       case ISD::SETUGT:
16752       case ISD::SETOGE:
16753       case ISD::SETUGE:
16754       case ISD::SETGT:
16755       case ISD::SETGE:
16756         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
16757             TLI.isOperationLegal(ISD::FNEG, VT))
16758           return DAG.getNode(ISD::FNEG, DL, VT,
16759                    DAG.getNode(ISD::FABS, DL, VT, X));
16760         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
16761           return DAG.getNode(ISD::FABS, DL, VT, X);
16762 
16763         break;
16764       }
16765     }
16766   }
16767 
16768   // FMUL -> FMA combines:
16769   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
16770     AddToWorklist(Fused.getNode());
16771     return Fused;
16772   }
16773 
16774   // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been
16775   // able to run.
16776   if (SDValue R = combineFMulOrFDivWithIntPow2(N))
16777     return R;
16778 
16779   return SDValue();
16780 }
16781 
16782 template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
16783   SDValue N0 = N->getOperand(0);
16784   SDValue N1 = N->getOperand(1);
16785   SDValue N2 = N->getOperand(2);
16786   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
16787   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
16788   EVT VT = N->getValueType(0);
16789   SDLoc DL(N);
16790   const TargetOptions &Options = DAG.getTarget().Options;
16791   // FMA nodes have flags that propagate to the created nodes.
16792   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
16793   MatchContextClass matcher(DAG, TLI, N);
16794 
16795   bool CanReassociate =
16796       Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
16797 
16798   // Constant fold FMA.
16799   if (isa<ConstantFPSDNode>(N0) &&
16800       isa<ConstantFPSDNode>(N1) &&
16801       isa<ConstantFPSDNode>(N2)) {
16802     return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2);
16803   }
16804 
16805   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
16806   TargetLowering::NegatibleCost CostN0 =
16807       TargetLowering::NegatibleCost::Expensive;
16808   TargetLowering::NegatibleCost CostN1 =
16809       TargetLowering::NegatibleCost::Expensive;
16810   SDValue NegN0 =
16811       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
16812   if (NegN0) {
16813     HandleSDNode NegN0Handle(NegN0);
16814     SDValue NegN1 =
16815         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
16816     if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
16817                   CostN1 == TargetLowering::NegatibleCost::Cheaper))
16818       return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
16819   }
16820 
16821   // FIXME: use fast math flags instead of Options.UnsafeFPMath
16822   if (Options.UnsafeFPMath) {
16823     if (N0CFP && N0CFP->isZero())
16824       return N2;
16825     if (N1CFP && N1CFP->isZero())
16826       return N2;
16827   }
16828 
16829   // FIXME: Support splat of constant.
16830   if (N0CFP && N0CFP->isExactlyValue(1.0))
16831     return matcher.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
16832   if (N1CFP && N1CFP->isExactlyValue(1.0))
16833     return matcher.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
16834 
16835   // Canonicalize (fma c, x, y) -> (fma x, c, y)
16836   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
16837      !DAG.isConstantFPBuildVectorOrConstantFP(N1))
16838     return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
16839 
16840   if (CanReassociate) {
16841     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
16842     if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
16843         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
16844         DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
16845       return matcher.getNode(
16846           ISD::FMUL, DL, VT, N0,
16847           matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
16848     }
16849 
16850     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
16851     if (matcher.match(N0, ISD::FMUL) &&
16852         DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
16853         DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
16854       return matcher.getNode(
16855           ISD::FMA, DL, VT, N0.getOperand(0),
16856           matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
16857     }
16858   }
16859 
16860   // (fma x, -1, y) -> (fadd (fneg x), y)
16861   // FIXME: Support splat of constant.
16862   if (N1CFP) {
16863     if (N1CFP->isExactlyValue(1.0))
16864       return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
16865 
16866     if (N1CFP->isExactlyValue(-1.0) &&
16867         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
16868       SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
16869       AddToWorklist(RHSNeg.getNode());
16870       return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
16871     }
16872 
16873     // fma (fneg x), K, y -> fma x -K, y
16874     if (matcher.match(N0, ISD::FNEG) &&
16875         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
16876          (N1.hasOneUse() &&
16877           !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
16878       return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
16879                              matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
16880     }
16881   }
16882 
16883   // FIXME: Support splat of constant.
16884   if (CanReassociate) {
16885     // (fma x, c, x) -> (fmul x, (c+1))
16886     if (N1CFP && N0 == N2) {
16887       return matcher.getNode(ISD::FMUL, DL, VT, N0,
16888                              matcher.getNode(ISD::FADD, DL, VT, N1,
16889                                              DAG.getConstantFP(1.0, DL, VT)));
16890     }
16891 
16892     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
16893     if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
16894       return matcher.getNode(ISD::FMUL, DL, VT, N0,
16895                              matcher.getNode(ISD::FADD, DL, VT, N1,
16896                                              DAG.getConstantFP(-1.0, DL, VT)));
16897     }
16898   }
16899 
16900   // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z))
16901   // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z))
16902   if (!TLI.isFNegFree(VT))
16903     if (SDValue Neg = TLI.getCheaperNegatedExpression(
16904             SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
16905       return matcher.getNode(ISD::FNEG, DL, VT, Neg);
16906   return SDValue();
16907 }
16908 
16909 SDValue DAGCombiner::visitFMAD(SDNode *N) {
16910   SDValue N0 = N->getOperand(0);
16911   SDValue N1 = N->getOperand(1);
16912   SDValue N2 = N->getOperand(2);
16913   EVT VT = N->getValueType(0);
16914   SDLoc DL(N);
16915 
16916   // Constant fold FMAD.
16917   if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) &&
16918       isa<ConstantFPSDNode>(N2))
16919     return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2);
16920 
16921   return SDValue();
16922 }
16923 
16924 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
16925 // reciprocal.
16926 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
16927 // Notice that this is not always beneficial. One reason is different targets
16928 // may have different costs for FDIV and FMUL, so sometimes the cost of two
16929 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
16930 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
16931 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
16932   // TODO: Limit this transform based on optsize/minsize - it always creates at
16933   //       least 1 extra instruction. But the perf win may be substantial enough
16934   //       that only minsize should restrict this.
16935   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
16936   const SDNodeFlags Flags = N->getFlags();
16937   if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal()))
16938     return SDValue();
16939 
16940   // Skip if current node is a reciprocal/fneg-reciprocal.
16941   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
16942   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
16943   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
16944     return SDValue();
16945 
16946   // Exit early if the target does not want this transform or if there can't
16947   // possibly be enough uses of the divisor to make the transform worthwhile.
16948   unsigned MinUses = TLI.combineRepeatedFPDivisors();
16949 
16950   // For splat vectors, scale the number of uses by the splat factor. If we can
16951   // convert the division into a scalar op, that will likely be much faster.
16952   unsigned NumElts = 1;
16953   EVT VT = N->getValueType(0);
16954   if (VT.isVector() && DAG.isSplatValue(N1))
16955     NumElts = VT.getVectorMinNumElements();
16956 
16957   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
16958     return SDValue();
16959 
16960   // Find all FDIV users of the same divisor.
16961   // Use a set because duplicates may be present in the user list.
16962   SetVector<SDNode *> Users;
16963   for (auto *U : N1->uses()) {
16964     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
16965       // Skip X/sqrt(X) that has not been simplified to sqrt(X) yet.
16966       if (U->getOperand(1).getOpcode() == ISD::FSQRT &&
16967           U->getOperand(0) == U->getOperand(1).getOperand(0) &&
16968           U->getFlags().hasAllowReassociation() &&
16969           U->getFlags().hasNoSignedZeros())
16970         continue;
16971 
16972       // This division is eligible for optimization only if global unsafe math
16973       // is enabled or if this division allows reciprocal formation.
16974       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
16975         Users.insert(U);
16976     }
16977   }
16978 
16979   // Now that we have the actual number of divisor uses, make sure it meets
16980   // the minimum threshold specified by the target.
16981   if ((Users.size() * NumElts) < MinUses)
16982     return SDValue();
16983 
16984   SDLoc DL(N);
16985   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
16986   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
16987 
16988   // Dividend / Divisor -> Dividend * Reciprocal
16989   for (auto *U : Users) {
16990     SDValue Dividend = U->getOperand(0);
16991     if (Dividend != FPOne) {
16992       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
16993                                     Reciprocal, Flags);
16994       CombineTo(U, NewNode);
16995     } else if (U != Reciprocal.getNode()) {
16996       // In the absence of fast-math-flags, this user node is always the
16997       // same node as Reciprocal, but with FMF they may be different nodes.
16998       CombineTo(U, Reciprocal);
16999     }
17000   }
17001   return SDValue(N, 0);  // N was replaced.
17002 }
17003 
17004 SDValue DAGCombiner::visitFDIV(SDNode *N) {
17005   SDValue N0 = N->getOperand(0);
17006   SDValue N1 = N->getOperand(1);
17007   EVT VT = N->getValueType(0);
17008   SDLoc DL(N);
17009   const TargetOptions &Options = DAG.getTarget().Options;
17010   SDNodeFlags Flags = N->getFlags();
17011   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17012 
17013   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17014     return R;
17015 
17016   // fold (fdiv c1, c2) -> c1/c2
17017   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FDIV, DL, VT, {N0, N1}))
17018     return C;
17019 
17020   // fold vector ops
17021   if (VT.isVector())
17022     if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
17023       return FoldedVOp;
17024 
17025   if (SDValue NewSel = foldBinOpIntoSelect(N))
17026     return NewSel;
17027 
17028   if (SDValue V = combineRepeatedFPDivisors(N))
17029     return V;
17030 
17031   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
17032     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
17033     if (auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1)) {
17034       // Compute the reciprocal 1.0 / c2.
17035       const APFloat &N1APF = N1CFP->getValueAPF();
17036       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
17037       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
17038       // Only do the transform if the reciprocal is a legal fp immediate that
17039       // isn't too nasty (eg NaN, denormal, ...).
17040       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
17041           (!LegalOperations ||
17042            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
17043            // backend)... we should handle this gracefully after Legalize.
17044            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
17045            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
17046            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
17047         return DAG.getNode(ISD::FMUL, DL, VT, N0,
17048                            DAG.getConstantFP(Recip, DL, VT));
17049     }
17050 
17051     // If this FDIV is part of a reciprocal square root, it may be folded
17052     // into a target-specific square root estimate instruction.
17053     if (N1.getOpcode() == ISD::FSQRT) {
17054       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
17055         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17056     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
17057                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17058       if (SDValue RV =
17059               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17060         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
17061         AddToWorklist(RV.getNode());
17062         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17063       }
17064     } else if (N1.getOpcode() == ISD::FP_ROUND &&
17065                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17066       if (SDValue RV =
17067               buildRsqrtEstimate(N1.getOperand(0).getOperand(0), Flags)) {
17068         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
17069         AddToWorklist(RV.getNode());
17070         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV);
17071       }
17072     } else if (N1.getOpcode() == ISD::FMUL) {
17073       // Look through an FMUL. Even though this won't remove the FDIV directly,
17074       // it's still worthwhile to get rid of the FSQRT if possible.
17075       SDValue Sqrt, Y;
17076       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
17077         Sqrt = N1.getOperand(0);
17078         Y = N1.getOperand(1);
17079       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
17080         Sqrt = N1.getOperand(1);
17081         Y = N1.getOperand(0);
17082       }
17083       if (Sqrt.getNode()) {
17084         // If the other multiply operand is known positive, pull it into the
17085         // sqrt. That will eliminate the division if we convert to an estimate.
17086         if (Flags.hasAllowReassociation() && N1.hasOneUse() &&
17087             N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse()) {
17088           SDValue A;
17089           if (Y.getOpcode() == ISD::FABS && Y.hasOneUse())
17090             A = Y.getOperand(0);
17091           else if (Y == Sqrt.getOperand(0))
17092             A = Y;
17093           if (A) {
17094             // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z)
17095             // X / (A * sqrt(A))       --> X / sqrt(A*A*A) --> X * rsqrt(A*A*A)
17096             SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, A, A);
17097             SDValue AAZ =
17098                 DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0));
17099             if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags))
17100               return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt);
17101 
17102             // Estimate creation failed. Clean up speculatively created nodes.
17103             recursivelyDeleteUnusedNodes(AAZ.getNode());
17104           }
17105         }
17106 
17107         // We found a FSQRT, so try to make this fold:
17108         // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y)
17109         if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) {
17110           SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y);
17111           AddToWorklist(Div.getNode());
17112           return DAG.getNode(ISD::FMUL, DL, VT, N0, Div);
17113         }
17114       }
17115     }
17116 
17117     // Fold into a reciprocal estimate and multiply instead of a real divide.
17118     if (Options.NoInfsFPMath || Flags.hasNoInfs())
17119       if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
17120         return RV;
17121   }
17122 
17123   // Fold X/Sqrt(X) -> Sqrt(X)
17124   if ((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
17125       (Options.UnsafeFPMath || Flags.hasAllowReassociation()))
17126     if (N1.getOpcode() == ISD::FSQRT && N0 == N1.getOperand(0))
17127       return N1;
17128 
17129   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
17130   TargetLowering::NegatibleCost CostN0 =
17131       TargetLowering::NegatibleCost::Expensive;
17132   TargetLowering::NegatibleCost CostN1 =
17133       TargetLowering::NegatibleCost::Expensive;
17134   SDValue NegN0 =
17135       TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0);
17136   if (NegN0) {
17137     HandleSDNode NegN0Handle(NegN0);
17138     SDValue NegN1 =
17139         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
17140     if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
17141                   CostN1 == TargetLowering::NegatibleCost::Cheaper))
17142       return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1);
17143   }
17144 
17145   if (SDValue R = combineFMulOrFDivWithIntPow2(N))
17146     return R;
17147 
17148   return SDValue();
17149 }
17150 
17151 SDValue DAGCombiner::visitFREM(SDNode *N) {
17152   SDValue N0 = N->getOperand(0);
17153   SDValue N1 = N->getOperand(1);
17154   EVT VT = N->getValueType(0);
17155   SDNodeFlags Flags = N->getFlags();
17156   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17157 
17158   if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags))
17159     return R;
17160 
17161   // fold (frem c1, c2) -> fmod(c1,c2)
17162   if (SDValue C = DAG.FoldConstantArithmetic(ISD::FREM, SDLoc(N), VT, {N0, N1}))
17163     return C;
17164 
17165   if (SDValue NewSel = foldBinOpIntoSelect(N))
17166     return NewSel;
17167 
17168   return SDValue();
17169 }
17170 
17171 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
17172   SDNodeFlags Flags = N->getFlags();
17173   const TargetOptions &Options = DAG.getTarget().Options;
17174 
17175   // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as:
17176   // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN
17177   if (!Flags.hasApproximateFuncs() ||
17178       (!Options.NoInfsFPMath && !Flags.hasNoInfs()))
17179     return SDValue();
17180 
17181   SDValue N0 = N->getOperand(0);
17182   if (TLI.isFsqrtCheap(N0, DAG))
17183     return SDValue();
17184 
17185   // FSQRT nodes have flags that propagate to the created nodes.
17186   // TODO: If this is N0/sqrt(N0), and we reach this node before trying to
17187   //       transform the fdiv, we may produce a sub-optimal estimate sequence
17188   //       because the reciprocal calculation may not have to filter out a
17189   //       0.0 input.
17190   return buildSqrtEstimate(N0, Flags);
17191 }
17192 
17193 /// copysign(x, fp_extend(y)) -> copysign(x, y)
17194 /// copysign(x, fp_round(y)) -> copysign(x, y)
17195 /// Operands to the functions are the type of X and Y respectively.
17196 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
17197   // Always fold no-op FP casts.
17198   if (XTy == YTy)
17199     return true;
17200 
17201   // Do not optimize out type conversion of f128 type yet.
17202   // For some targets like x86_64, configuration is changed to keep one f128
17203   // value in one SSE register, but instruction selection cannot handle
17204   // FCOPYSIGN on SSE registers yet.
17205   if (YTy == MVT::f128)
17206     return false;
17207 
17208   return !YTy.isVector() || EnableVectorFCopySignExtendRound;
17209 }
17210 
17211 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
17212   SDValue N1 = N->getOperand(1);
17213   if (N1.getOpcode() != ISD::FP_EXTEND &&
17214       N1.getOpcode() != ISD::FP_ROUND)
17215     return false;
17216   EVT N1VT = N1->getValueType(0);
17217   EVT N1Op0VT = N1->getOperand(0).getValueType();
17218   return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
17219 }
17220 
17221 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
17222   SDValue N0 = N->getOperand(0);
17223   SDValue N1 = N->getOperand(1);
17224   EVT VT = N->getValueType(0);
17225 
17226   // fold (fcopysign c1, c2) -> fcopysign(c1,c2)
17227   if (SDValue C =
17228           DAG.FoldConstantArithmetic(ISD::FCOPYSIGN, SDLoc(N), VT, {N0, N1}))
17229     return C;
17230 
17231   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
17232     const APFloat &V = N1C->getValueAPF();
17233     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
17234     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
17235     if (!V.isNegative()) {
17236       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
17237         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17238     } else {
17239       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
17240         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
17241                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
17242     }
17243   }
17244 
17245   // copysign(fabs(x), y) -> copysign(x, y)
17246   // copysign(fneg(x), y) -> copysign(x, y)
17247   // copysign(copysign(x,z), y) -> copysign(x, y)
17248   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
17249       N0.getOpcode() == ISD::FCOPYSIGN)
17250     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
17251 
17252   // copysign(x, abs(y)) -> abs(x)
17253   if (N1.getOpcode() == ISD::FABS)
17254     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17255 
17256   // copysign(x, copysign(y,z)) -> copysign(x, z)
17257   if (N1.getOpcode() == ISD::FCOPYSIGN)
17258     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
17259 
17260   // copysign(x, fp_extend(y)) -> copysign(x, y)
17261   // copysign(x, fp_round(y)) -> copysign(x, y)
17262   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
17263     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
17264 
17265   return SDValue();
17266 }
17267 
17268 SDValue DAGCombiner::visitFPOW(SDNode *N) {
17269   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
17270   if (!ExponentC)
17271     return SDValue();
17272   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17273 
17274   // Try to convert x ** (1/3) into cube root.
17275   // TODO: Handle the various flavors of long double.
17276   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
17277   //       Some range near 1/3 should be fine.
17278   EVT VT = N->getValueType(0);
17279   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
17280       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
17281     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
17282     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
17283     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
17284     // For regular numbers, rounding may cause the results to differ.
17285     // Therefore, we require { nsz ninf nnan afn } for this transform.
17286     // TODO: We could select out the special cases if we don't have nsz/ninf.
17287     SDNodeFlags Flags = N->getFlags();
17288     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
17289         !Flags.hasApproximateFuncs())
17290       return SDValue();
17291 
17292     // Do not create a cbrt() libcall if the target does not have it, and do not
17293     // turn a pow that has lowering support into a cbrt() libcall.
17294     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
17295         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
17296          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
17297       return SDValue();
17298 
17299     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0));
17300   }
17301 
17302   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
17303   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
17304   // TODO: This could be extended (using a target hook) to handle smaller
17305   // power-of-2 fractional exponents.
17306   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
17307   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
17308   if (ExponentIs025 || ExponentIs075) {
17309     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
17310     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
17311     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
17312     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
17313     // For regular numbers, rounding may cause the results to differ.
17314     // Therefore, we require { nsz ninf afn } for this transform.
17315     // TODO: We could select out the special cases if we don't have nsz/ninf.
17316     SDNodeFlags Flags = N->getFlags();
17317 
17318     // We only need no signed zeros for the 0.25 case.
17319     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
17320         !Flags.hasApproximateFuncs())
17321       return SDValue();
17322 
17323     // Don't double the number of libcalls. We are trying to inline fast code.
17324     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
17325       return SDValue();
17326 
17327     // Assume that libcalls are the smallest code.
17328     // TODO: This restriction should probably be lifted for vectors.
17329     if (ForCodeSize)
17330       return SDValue();
17331 
17332     // pow(X, 0.25) --> sqrt(sqrt(X))
17333     SDLoc DL(N);
17334     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0));
17335     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt);
17336     if (ExponentIs025)
17337       return SqrtSqrt;
17338     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
17339     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt);
17340   }
17341 
17342   return SDValue();
17343 }
17344 
17345 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
17346                                const TargetLowering &TLI) {
17347   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
17348   // replacing casts with a libcall. We also must be allowed to ignore -0.0
17349   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
17350   // conversions would return +0.0.
17351   // FIXME: We should be able to use node-level FMF here.
17352   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
17353   EVT VT = N->getValueType(0);
17354   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
17355       !DAG.getTarget().Options.NoSignedZerosFPMath)
17356     return SDValue();
17357 
17358   // fptosi/fptoui round towards zero, so converting from FP to integer and
17359   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
17360   SDValue N0 = N->getOperand(0);
17361   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
17362       N0.getOperand(0).getValueType() == VT)
17363     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17364 
17365   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
17366       N0.getOperand(0).getValueType() == VT)
17367     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
17368 
17369   return SDValue();
17370 }
17371 
17372 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
17373   SDValue N0 = N->getOperand(0);
17374   EVT VT = N->getValueType(0);
17375   EVT OpVT = N0.getValueType();
17376 
17377   // [us]itofp(undef) = 0, because the result value is bounded.
17378   if (N0.isUndef())
17379     return DAG.getConstantFP(0.0, SDLoc(N), VT);
17380 
17381   // fold (sint_to_fp c1) -> c1fp
17382   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
17383       // ...but only if the target supports immediate floating-point values
17384       (!LegalOperations ||
17385        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
17386     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17387 
17388   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
17389   // but UINT_TO_FP is legal on this target, try to convert.
17390   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
17391       hasOperation(ISD::UINT_TO_FP, OpVT)) {
17392     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
17393     if (DAG.SignBitIsZero(N0))
17394       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17395   }
17396 
17397   // The next optimizations are desirable only if SELECT_CC can be lowered.
17398   // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0)
17399   if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
17400       !VT.isVector() &&
17401       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17402     SDLoc DL(N);
17403     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT),
17404                          DAG.getConstantFP(0.0, DL, VT));
17405   }
17406 
17407   // fold (sint_to_fp (zext (setcc x, y, cc))) ->
17408   //      (select (setcc x, y, cc), 1.0, 0.0)
17409   if (N0.getOpcode() == ISD::ZERO_EXTEND &&
17410       N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() &&
17411       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17412     SDLoc DL(N);
17413     return DAG.getSelect(DL, VT, N0.getOperand(0),
17414                          DAG.getConstantFP(1.0, DL, VT),
17415                          DAG.getConstantFP(0.0, DL, VT));
17416   }
17417 
17418   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17419     return FTrunc;
17420 
17421   return SDValue();
17422 }
17423 
17424 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
17425   SDValue N0 = N->getOperand(0);
17426   EVT VT = N->getValueType(0);
17427   EVT OpVT = N0.getValueType();
17428 
17429   // [us]itofp(undef) = 0, because the result value is bounded.
17430   if (N0.isUndef())
17431     return DAG.getConstantFP(0.0, SDLoc(N), VT);
17432 
17433   // fold (uint_to_fp c1) -> c1fp
17434   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
17435       // ...but only if the target supports immediate floating-point values
17436       (!LegalOperations ||
17437        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
17438     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
17439 
17440   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
17441   // but SINT_TO_FP is legal on this target, try to convert.
17442   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
17443       hasOperation(ISD::SINT_TO_FP, OpVT)) {
17444     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
17445     if (DAG.SignBitIsZero(N0))
17446       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
17447   }
17448 
17449   // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0)
17450   if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
17451       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
17452     SDLoc DL(N);
17453     return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT),
17454                          DAG.getConstantFP(0.0, DL, VT));
17455   }
17456 
17457   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
17458     return FTrunc;
17459 
17460   return SDValue();
17461 }
17462 
17463 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
17464 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
17465   SDValue N0 = N->getOperand(0);
17466   EVT VT = N->getValueType(0);
17467 
17468   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
17469     return SDValue();
17470 
17471   SDValue Src = N0.getOperand(0);
17472   EVT SrcVT = Src.getValueType();
17473   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
17474   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
17475 
17476   // We can safely assume the conversion won't overflow the output range,
17477   // because (for example) (uint8_t)18293.f is undefined behavior.
17478 
17479   // Since we can assume the conversion won't overflow, our decision as to
17480   // whether the input will fit in the float should depend on the minimum
17481   // of the input range and output range.
17482 
17483   // This means this is also safe for a signed input and unsigned output, since
17484   // a negative input would lead to undefined behavior.
17485   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
17486   unsigned OutputSize = (int)VT.getScalarSizeInBits();
17487   unsigned ActualSize = std::min(InputSize, OutputSize);
17488   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
17489 
17490   // We can only fold away the float conversion if the input range can be
17491   // represented exactly in the float range.
17492   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
17493     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
17494       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
17495                                                        : ISD::ZERO_EXTEND;
17496       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
17497     }
17498     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
17499       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
17500     return DAG.getBitcast(VT, Src);
17501   }
17502   return SDValue();
17503 }
17504 
17505 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
17506   SDValue N0 = N->getOperand(0);
17507   EVT VT = N->getValueType(0);
17508 
17509   // fold (fp_to_sint undef) -> undef
17510   if (N0.isUndef())
17511     return DAG.getUNDEF(VT);
17512 
17513   // fold (fp_to_sint c1fp) -> c1
17514   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17515     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
17516 
17517   return FoldIntToFPToInt(N, DAG);
17518 }
17519 
17520 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
17521   SDValue N0 = N->getOperand(0);
17522   EVT VT = N->getValueType(0);
17523 
17524   // fold (fp_to_uint undef) -> undef
17525   if (N0.isUndef())
17526     return DAG.getUNDEF(VT);
17527 
17528   // fold (fp_to_uint c1fp) -> c1
17529   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17530     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
17531 
17532   return FoldIntToFPToInt(N, DAG);
17533 }
17534 
17535 SDValue DAGCombiner::visitXRINT(SDNode *N) {
17536   SDValue N0 = N->getOperand(0);
17537   EVT VT = N->getValueType(0);
17538 
17539   // fold (lrint|llrint undef) -> undef
17540   if (N0.isUndef())
17541     return DAG.getUNDEF(VT);
17542 
17543   // fold (lrint|llrint c1fp) -> c1
17544   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17545     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0);
17546 
17547   return SDValue();
17548 }
17549 
17550 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
17551   SDValue N0 = N->getOperand(0);
17552   SDValue N1 = N->getOperand(1);
17553   EVT VT = N->getValueType(0);
17554 
17555   // fold (fp_round c1fp) -> c1fp
17556   if (SDValue C =
17557           DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1}))
17558     return C;
17559 
17560   // fold (fp_round (fp_extend x)) -> x
17561   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
17562     return N0.getOperand(0);
17563 
17564   // fold (fp_round (fp_round x)) -> (fp_round x)
17565   if (N0.getOpcode() == ISD::FP_ROUND) {
17566     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
17567     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
17568 
17569     // Avoid folding legal fp_rounds into non-legal ones.
17570     if (!hasOperation(ISD::FP_ROUND, VT))
17571       return SDValue();
17572 
17573     // Skip this folding if it results in an fp_round from f80 to f16.
17574     //
17575     // f80 to f16 always generates an expensive (and as yet, unimplemented)
17576     // libcall to __truncxfhf2 instead of selecting native f16 conversion
17577     // instructions from f32 or f64.  Moreover, the first (value-preserving)
17578     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
17579     // x86.
17580     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
17581       return SDValue();
17582 
17583     // If the first fp_round isn't a value preserving truncation, it might
17584     // introduce a tie in the second fp_round, that wouldn't occur in the
17585     // single-step fp_round we want to fold to.
17586     // In other words, double rounding isn't the same as rounding.
17587     // Also, this is a value preserving truncation iff both fp_round's are.
17588     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
17589       SDLoc DL(N);
17590       return DAG.getNode(
17591           ISD::FP_ROUND, DL, VT, N0.getOperand(0),
17592           DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true));
17593     }
17594   }
17595 
17596   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
17597   // Note: From a legality perspective, this is a two step transform.  First,
17598   // we duplicate the fp_round to the arguments of the copysign, then we
17599   // eliminate the fp_round on Y.  The second step requires an additional
17600   // predicate to match the implementation above.
17601   if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
17602       CanCombineFCOPYSIGN_EXTEND_ROUND(VT,
17603                                        N0.getValueType())) {
17604     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
17605                               N0.getOperand(0), N1);
17606     AddToWorklist(Tmp.getNode());
17607     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
17608                        Tmp, N0.getOperand(1));
17609   }
17610 
17611   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17612     return NewVSel;
17613 
17614   return SDValue();
17615 }
17616 
17617 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
17618   SDValue N0 = N->getOperand(0);
17619   EVT VT = N->getValueType(0);
17620 
17621   if (VT.isVector())
17622     if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
17623       return FoldedVOp;
17624 
17625   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
17626   if (N->hasOneUse() &&
17627       N->use_begin()->getOpcode() == ISD::FP_ROUND)
17628     return SDValue();
17629 
17630   // fold (fp_extend c1fp) -> c1fp
17631   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17632     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
17633 
17634   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
17635   if (N0.getOpcode() == ISD::FP16_TO_FP &&
17636       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
17637     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
17638 
17639   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
17640   // value of X.
17641   if (N0.getOpcode() == ISD::FP_ROUND
17642       && N0.getConstantOperandVal(1) == 1) {
17643     SDValue In = N0.getOperand(0);
17644     if (In.getValueType() == VT) return In;
17645     if (VT.bitsLT(In.getValueType()))
17646       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
17647                          In, N0.getOperand(1));
17648     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
17649   }
17650 
17651   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
17652   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
17653       TLI.isLoadExtLegalOrCustom(ISD::EXTLOAD, VT, N0.getValueType())) {
17654     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
17655     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
17656                                      LN0->getChain(),
17657                                      LN0->getBasePtr(), N0.getValueType(),
17658                                      LN0->getMemOperand());
17659     CombineTo(N, ExtLoad);
17660     CombineTo(
17661         N0.getNode(),
17662         DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad,
17663                     DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)),
17664         ExtLoad.getValue(1));
17665     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
17666   }
17667 
17668   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
17669     return NewVSel;
17670 
17671   return SDValue();
17672 }
17673 
17674 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
17675   SDValue N0 = N->getOperand(0);
17676   EVT VT = N->getValueType(0);
17677 
17678   // fold (fceil c1) -> fceil(c1)
17679   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17680     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
17681 
17682   return SDValue();
17683 }
17684 
17685 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
17686   SDValue N0 = N->getOperand(0);
17687   EVT VT = N->getValueType(0);
17688 
17689   // fold (ftrunc c1) -> ftrunc(c1)
17690   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17691     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
17692 
17693   // fold ftrunc (known rounded int x) -> x
17694   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
17695   // likely to be generated to extract integer from a rounded floating value.
17696   switch (N0.getOpcode()) {
17697   default: break;
17698   case ISD::FRINT:
17699   case ISD::FTRUNC:
17700   case ISD::FNEARBYINT:
17701   case ISD::FROUNDEVEN:
17702   case ISD::FFLOOR:
17703   case ISD::FCEIL:
17704     return N0;
17705   }
17706 
17707   return SDValue();
17708 }
17709 
17710 SDValue DAGCombiner::visitFFREXP(SDNode *N) {
17711   SDValue N0 = N->getOperand(0);
17712 
17713   // fold (ffrexp c1) -> ffrexp(c1)
17714   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17715     return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
17716   return SDValue();
17717 }
17718 
17719 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
17720   SDValue N0 = N->getOperand(0);
17721   EVT VT = N->getValueType(0);
17722 
17723   // fold (ffloor c1) -> ffloor(c1)
17724   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17725     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
17726 
17727   return SDValue();
17728 }
17729 
17730 SDValue DAGCombiner::visitFNEG(SDNode *N) {
17731   SDValue N0 = N->getOperand(0);
17732   EVT VT = N->getValueType(0);
17733   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17734 
17735   // Constant fold FNEG.
17736   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17737     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
17738 
17739   if (SDValue NegN0 =
17740           TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize))
17741     return NegN0;
17742 
17743   // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
17744   // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
17745   // know it was called from a context with a nsz flag if the input fsub does
17746   // not.
17747   if (N0.getOpcode() == ISD::FSUB &&
17748       (DAG.getTarget().Options.NoSignedZerosFPMath ||
17749        N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) {
17750     return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
17751                        N0.getOperand(0));
17752   }
17753 
17754   if (SDValue Cast = foldSignChangeInBitcast(N))
17755     return Cast;
17756 
17757   return SDValue();
17758 }
17759 
17760 SDValue DAGCombiner::visitFMinMax(SDNode *N) {
17761   SDValue N0 = N->getOperand(0);
17762   SDValue N1 = N->getOperand(1);
17763   EVT VT = N->getValueType(0);
17764   const SDNodeFlags Flags = N->getFlags();
17765   unsigned Opc = N->getOpcode();
17766   bool PropagatesNaN = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
17767   bool IsMin = Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM;
17768   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
17769 
17770   // Constant fold.
17771   if (SDValue C = DAG.FoldConstantArithmetic(Opc, SDLoc(N), VT, {N0, N1}))
17772     return C;
17773 
17774   // Canonicalize to constant on RHS.
17775   if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
17776       !DAG.isConstantFPBuildVectorOrConstantFP(N1))
17777     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
17778 
17779   if (const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1)) {
17780     const APFloat &AF = N1CFP->getValueAPF();
17781 
17782     // minnum(X, nan) -> X
17783     // maxnum(X, nan) -> X
17784     // minimum(X, nan) -> nan
17785     // maximum(X, nan) -> nan
17786     if (AF.isNaN())
17787       return PropagatesNaN ? N->getOperand(1) : N->getOperand(0);
17788 
17789     // In the following folds, inf can be replaced with the largest finite
17790     // float, if the ninf flag is set.
17791     if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
17792       // minnum(X, -inf) -> -inf
17793       // maxnum(X, +inf) -> +inf
17794       // minimum(X, -inf) -> -inf if nnan
17795       // maximum(X, +inf) -> +inf if nnan
17796       if (IsMin == AF.isNegative() && (!PropagatesNaN || Flags.hasNoNaNs()))
17797         return N->getOperand(1);
17798 
17799       // minnum(X, +inf) -> X if nnan
17800       // maxnum(X, -inf) -> X if nnan
17801       // minimum(X, +inf) -> X
17802       // maximum(X, -inf) -> X
17803       if (IsMin != AF.isNegative() && (PropagatesNaN || Flags.hasNoNaNs()))
17804         return N->getOperand(0);
17805     }
17806   }
17807 
17808   if (SDValue SD = reassociateReduction(
17809           PropagatesNaN
17810               ? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM)
17811               : (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX),
17812           Opc, SDLoc(N), VT, N0, N1, Flags))
17813     return SD;
17814 
17815   return SDValue();
17816 }
17817 
17818 SDValue DAGCombiner::visitFABS(SDNode *N) {
17819   SDValue N0 = N->getOperand(0);
17820   EVT VT = N->getValueType(0);
17821 
17822   // fold (fabs c1) -> fabs(c1)
17823   if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
17824     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
17825 
17826   // fold (fabs (fabs x)) -> (fabs x)
17827   if (N0.getOpcode() == ISD::FABS)
17828     return N->getOperand(0);
17829 
17830   // fold (fabs (fneg x)) -> (fabs x)
17831   // fold (fabs (fcopysign x, y)) -> (fabs x)
17832   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
17833     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
17834 
17835   if (SDValue Cast = foldSignChangeInBitcast(N))
17836     return Cast;
17837 
17838   return SDValue();
17839 }
17840 
17841 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
17842   SDValue Chain = N->getOperand(0);
17843   SDValue N1 = N->getOperand(1);
17844   SDValue N2 = N->getOperand(2);
17845 
17846   // BRCOND(FREEZE(cond)) is equivalent to BRCOND(cond) (both are
17847   // nondeterministic jumps).
17848   if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse()) {
17849     return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
17850                        N1->getOperand(0), N2);
17851   }
17852 
17853   // Variant of the previous fold where there is a SETCC in between:
17854   //   BRCOND(SETCC(FREEZE(X), CONST, Cond))
17855   // =>
17856   //   BRCOND(FREEZE(SETCC(X, CONST, Cond)))
17857   // =>
17858   //   BRCOND(SETCC(X, CONST, Cond))
17859   // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
17860   // isn't equivalent to true or false.
17861   // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
17862   // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
17863   if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
17864     SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
17865     ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
17866     ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
17867     ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
17868     bool Updated = false;
17869 
17870     // Is 'X Cond C' always true or false?
17871     auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
17872       bool False = (Cond == ISD::SETULT && C->isZero()) ||
17873                    (Cond == ISD::SETLT && C->isMinSignedValue()) ||
17874                    (Cond == ISD::SETUGT && C->isAllOnes()) ||
17875                    (Cond == ISD::SETGT && C->isMaxSignedValue());
17876       bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
17877                   (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
17878                   (Cond == ISD::SETUGE && C->isZero()) ||
17879                   (Cond == ISD::SETGE && C->isMinSignedValue());
17880       return True || False;
17881     };
17882 
17883     if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
17884       if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
17885         S0 = S0->getOperand(0);
17886         Updated = true;
17887       }
17888     }
17889     if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
17890       if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
17891         S1 = S1->getOperand(0);
17892         Updated = true;
17893       }
17894     }
17895 
17896     if (Updated)
17897       return DAG.getNode(
17898           ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
17899           DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
17900   }
17901 
17902   // If N is a constant we could fold this into a fallthrough or unconditional
17903   // branch. However that doesn't happen very often in normal code, because
17904   // Instcombine/SimplifyCFG should have handled the available opportunities.
17905   // If we did this folding here, it would be necessary to update the
17906   // MachineBasicBlock CFG, which is awkward.
17907 
17908   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
17909   // on the target.
17910   if (N1.getOpcode() == ISD::SETCC &&
17911       TLI.isOperationLegalOrCustom(ISD::BR_CC,
17912                                    N1.getOperand(0).getValueType())) {
17913     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
17914                        Chain, N1.getOperand(2),
17915                        N1.getOperand(0), N1.getOperand(1), N2);
17916   }
17917 
17918   if (N1.hasOneUse()) {
17919     // rebuildSetCC calls visitXor which may change the Chain when there is a
17920     // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes.
17921     HandleSDNode ChainHandle(Chain);
17922     if (SDValue NewN1 = rebuildSetCC(N1))
17923       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other,
17924                          ChainHandle.getValue(), NewN1, N2);
17925   }
17926 
17927   return SDValue();
17928 }
17929 
17930 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
17931   if (N.getOpcode() == ISD::SRL ||
17932       (N.getOpcode() == ISD::TRUNCATE &&
17933        (N.getOperand(0).hasOneUse() &&
17934         N.getOperand(0).getOpcode() == ISD::SRL))) {
17935     // Look pass the truncate.
17936     if (N.getOpcode() == ISD::TRUNCATE)
17937       N = N.getOperand(0);
17938 
17939     // Match this pattern so that we can generate simpler code:
17940     //
17941     //   %a = ...
17942     //   %b = and i32 %a, 2
17943     //   %c = srl i32 %b, 1
17944     //   brcond i32 %c ...
17945     //
17946     // into
17947     //
17948     //   %a = ...
17949     //   %b = and i32 %a, 2
17950     //   %c = setcc eq %b, 0
17951     //   brcond %c ...
17952     //
17953     // This applies only when the AND constant value has one bit set and the
17954     // SRL constant is equal to the log2 of the AND constant. The back-end is
17955     // smart enough to convert the result into a TEST/JMP sequence.
17956     SDValue Op0 = N.getOperand(0);
17957     SDValue Op1 = N.getOperand(1);
17958 
17959     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
17960       SDValue AndOp1 = Op0.getOperand(1);
17961 
17962       if (AndOp1.getOpcode() == ISD::Constant) {
17963         const APInt &AndConst = AndOp1->getAsAPIntVal();
17964 
17965         if (AndConst.isPowerOf2() &&
17966             Op1->getAsAPIntVal() == AndConst.logBase2()) {
17967           SDLoc DL(N);
17968           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
17969                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
17970                               ISD::SETNE);
17971         }
17972       }
17973     }
17974   }
17975 
17976   // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
17977   // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
17978   if (N.getOpcode() == ISD::XOR) {
17979     // Because we may call this on a speculatively constructed
17980     // SimplifiedSetCC Node, we need to simplify this node first.
17981     // Ideally this should be folded into SimplifySetCC and not
17982     // here. For now, grab a handle to N so we don't lose it from
17983     // replacements interal to the visit.
17984     HandleSDNode XORHandle(N);
17985     while (N.getOpcode() == ISD::XOR) {
17986       SDValue Tmp = visitXOR(N.getNode());
17987       // No simplification done.
17988       if (!Tmp.getNode())
17989         break;
17990       // Returning N is form in-visit replacement that may invalidated
17991       // N. Grab value from Handle.
17992       if (Tmp.getNode() == N.getNode())
17993         N = XORHandle.getValue();
17994       else // Node simplified. Try simplifying again.
17995         N = Tmp;
17996     }
17997 
17998     if (N.getOpcode() != ISD::XOR)
17999       return N;
18000 
18001     SDValue Op0 = N->getOperand(0);
18002     SDValue Op1 = N->getOperand(1);
18003 
18004     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
18005       bool Equal = false;
18006       // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
18007       if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
18008           Op0.getValueType() == MVT::i1) {
18009         N = Op0;
18010         Op0 = N->getOperand(0);
18011         Op1 = N->getOperand(1);
18012         Equal = true;
18013       }
18014 
18015       EVT SetCCVT = N.getValueType();
18016       if (LegalTypes)
18017         SetCCVT = getSetCCResultType(SetCCVT);
18018       // Replace the uses of XOR with SETCC
18019       return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
18020                           Equal ? ISD::SETEQ : ISD::SETNE);
18021     }
18022   }
18023 
18024   return SDValue();
18025 }
18026 
18027 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
18028 //
18029 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
18030   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
18031   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
18032 
18033   // If N is a constant we could fold this into a fallthrough or unconditional
18034   // branch. However that doesn't happen very often in normal code, because
18035   // Instcombine/SimplifyCFG should have handled the available opportunities.
18036   // If we did this folding here, it would be necessary to update the
18037   // MachineBasicBlock CFG, which is awkward.
18038 
18039   // Use SimplifySetCC to simplify SETCC's.
18040   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
18041                                CondLHS, CondRHS, CC->get(), SDLoc(N),
18042                                false);
18043   if (Simp.getNode()) AddToWorklist(Simp.getNode());
18044 
18045   // fold to a simpler setcc
18046   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
18047     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
18048                        N->getOperand(0), Simp.getOperand(2),
18049                        Simp.getOperand(0), Simp.getOperand(1),
18050                        N->getOperand(4));
18051 
18052   return SDValue();
18053 }
18054 
18055 static bool getCombineLoadStoreParts(SDNode *N, unsigned Inc, unsigned Dec,
18056                                      bool &IsLoad, bool &IsMasked, SDValue &Ptr,
18057                                      const TargetLowering &TLI) {
18058   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
18059     if (LD->isIndexed())
18060       return false;
18061     EVT VT = LD->getMemoryVT();
18062     if (!TLI.isIndexedLoadLegal(Inc, VT) && !TLI.isIndexedLoadLegal(Dec, VT))
18063       return false;
18064     Ptr = LD->getBasePtr();
18065   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
18066     if (ST->isIndexed())
18067       return false;
18068     EVT VT = ST->getMemoryVT();
18069     if (!TLI.isIndexedStoreLegal(Inc, VT) && !TLI.isIndexedStoreLegal(Dec, VT))
18070       return false;
18071     Ptr = ST->getBasePtr();
18072     IsLoad = false;
18073   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
18074     if (LD->isIndexed())
18075       return false;
18076     EVT VT = LD->getMemoryVT();
18077     if (!TLI.isIndexedMaskedLoadLegal(Inc, VT) &&
18078         !TLI.isIndexedMaskedLoadLegal(Dec, VT))
18079       return false;
18080     Ptr = LD->getBasePtr();
18081     IsMasked = true;
18082   } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
18083     if (ST->isIndexed())
18084       return false;
18085     EVT VT = ST->getMemoryVT();
18086     if (!TLI.isIndexedMaskedStoreLegal(Inc, VT) &&
18087         !TLI.isIndexedMaskedStoreLegal(Dec, VT))
18088       return false;
18089     Ptr = ST->getBasePtr();
18090     IsLoad = false;
18091     IsMasked = true;
18092   } else {
18093     return false;
18094   }
18095   return true;
18096 }
18097 
18098 /// Try turning a load/store into a pre-indexed load/store when the base
18099 /// pointer is an add or subtract and it has other uses besides the load/store.
18100 /// After the transformation, the new indexed load/store has effectively folded
18101 /// the add/subtract in and all of its other uses are redirected to the
18102 /// new load/store.
18103 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
18104   if (Level < AfterLegalizeDAG)
18105     return false;
18106 
18107   bool IsLoad = true;
18108   bool IsMasked = false;
18109   SDValue Ptr;
18110   if (!getCombineLoadStoreParts(N, ISD::PRE_INC, ISD::PRE_DEC, IsLoad, IsMasked,
18111                                 Ptr, TLI))
18112     return false;
18113 
18114   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
18115   // out.  There is no reason to make this a preinc/predec.
18116   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
18117       Ptr->hasOneUse())
18118     return false;
18119 
18120   // Ask the target to do addressing mode selection.
18121   SDValue BasePtr;
18122   SDValue Offset;
18123   ISD::MemIndexedMode AM = ISD::UNINDEXED;
18124   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
18125     return false;
18126 
18127   // Backends without true r+i pre-indexed forms may need to pass a
18128   // constant base with a variable offset so that constant coercion
18129   // will work with the patterns in canonical form.
18130   bool Swapped = false;
18131   if (isa<ConstantSDNode>(BasePtr)) {
18132     std::swap(BasePtr, Offset);
18133     Swapped = true;
18134   }
18135 
18136   // Don't create a indexed load / store with zero offset.
18137   if (isNullConstant(Offset))
18138     return false;
18139 
18140   // Try turning it into a pre-indexed load / store except when:
18141   // 1) The new base ptr is a frame index.
18142   // 2) If N is a store and the new base ptr is either the same as or is a
18143   //    predecessor of the value being stored.
18144   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
18145   //    that would create a cycle.
18146   // 4) All uses are load / store ops that use it as old base ptr.
18147 
18148   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
18149   // (plus the implicit offset) to a register to preinc anyway.
18150   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18151     return false;
18152 
18153   // Check #2.
18154   if (!IsLoad) {
18155     SDValue Val = IsMasked ? cast<MaskedStoreSDNode>(N)->getValue()
18156                            : cast<StoreSDNode>(N)->getValue();
18157 
18158     // Would require a copy.
18159     if (Val == BasePtr)
18160       return false;
18161 
18162     // Would create a cycle.
18163     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
18164       return false;
18165   }
18166 
18167   // Caches for hasPredecessorHelper.
18168   SmallPtrSet<const SDNode *, 32> Visited;
18169   SmallVector<const SDNode *, 16> Worklist;
18170   Worklist.push_back(N);
18171 
18172   // If the offset is a constant, there may be other adds of constants that
18173   // can be folded with this one. We should do this to avoid having to keep
18174   // a copy of the original base pointer.
18175   SmallVector<SDNode *, 16> OtherUses;
18176   constexpr unsigned int MaxSteps = 8192;
18177   if (isa<ConstantSDNode>(Offset))
18178     for (SDNode::use_iterator UI = BasePtr->use_begin(),
18179                               UE = BasePtr->use_end();
18180          UI != UE; ++UI) {
18181       SDUse &Use = UI.getUse();
18182       // Skip the use that is Ptr and uses of other results from BasePtr's
18183       // node (important for nodes that return multiple results).
18184       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
18185         continue;
18186 
18187       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist,
18188                                        MaxSteps))
18189         continue;
18190 
18191       if (Use.getUser()->getOpcode() != ISD::ADD &&
18192           Use.getUser()->getOpcode() != ISD::SUB) {
18193         OtherUses.clear();
18194         break;
18195       }
18196 
18197       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
18198       if (!isa<ConstantSDNode>(Op1)) {
18199         OtherUses.clear();
18200         break;
18201       }
18202 
18203       // FIXME: In some cases, we can be smarter about this.
18204       if (Op1.getValueType() != Offset.getValueType()) {
18205         OtherUses.clear();
18206         break;
18207       }
18208 
18209       OtherUses.push_back(Use.getUser());
18210     }
18211 
18212   if (Swapped)
18213     std::swap(BasePtr, Offset);
18214 
18215   // Now check for #3 and #4.
18216   bool RealUse = false;
18217 
18218   for (SDNode *Use : Ptr->uses()) {
18219     if (Use == N)
18220       continue;
18221     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps))
18222       return false;
18223 
18224     // If Ptr may be folded in addressing mode of other use, then it's
18225     // not profitable to do this transformation.
18226     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
18227       RealUse = true;
18228   }
18229 
18230   if (!RealUse)
18231     return false;
18232 
18233   SDValue Result;
18234   if (!IsMasked) {
18235     if (IsLoad)
18236       Result = DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18237     else
18238       Result =
18239           DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM);
18240   } else {
18241     if (IsLoad)
18242       Result = DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18243                                         Offset, AM);
18244     else
18245       Result = DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr,
18246                                          Offset, AM);
18247   }
18248   ++PreIndexedNodes;
18249   ++NodesCombined;
18250   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
18251              Result.dump(&DAG); dbgs() << '\n');
18252   WorklistRemover DeadNodes(*this);
18253   if (IsLoad) {
18254     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18255     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18256   } else {
18257     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18258   }
18259 
18260   // Finally, since the node is now dead, remove it from the graph.
18261   deleteAndRecombine(N);
18262 
18263   if (Swapped)
18264     std::swap(BasePtr, Offset);
18265 
18266   // Replace other uses of BasePtr that can be updated to use Ptr
18267   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
18268     unsigned OffsetIdx = 1;
18269     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
18270       OffsetIdx = 0;
18271     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
18272            BasePtr.getNode() && "Expected BasePtr operand");
18273 
18274     // We need to replace ptr0 in the following expression:
18275     //   x0 * offset0 + y0 * ptr0 = t0
18276     // knowing that
18277     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
18278     //
18279     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
18280     // indexed load/store and the expression that needs to be re-written.
18281     //
18282     // Therefore, we have:
18283     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
18284 
18285     auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
18286     const APInt &Offset0 = CN->getAPIntValue();
18287     const APInt &Offset1 = Offset->getAsAPIntVal();
18288     int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
18289     int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
18290     int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
18291     int Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
18292 
18293     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
18294 
18295     APInt CNV = Offset0;
18296     if (X0 < 0) CNV = -CNV;
18297     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
18298     else CNV = CNV - Offset1;
18299 
18300     SDLoc DL(OtherUses[i]);
18301 
18302     // We can now generate the new expression.
18303     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
18304     SDValue NewOp2 = Result.getValue(IsLoad ? 1 : 0);
18305 
18306     SDValue NewUse = DAG.getNode(Opcode,
18307                                  DL,
18308                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
18309     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
18310     deleteAndRecombine(OtherUses[i]);
18311   }
18312 
18313   // Replace the uses of Ptr with uses of the updated base value.
18314   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(IsLoad ? 1 : 0));
18315   deleteAndRecombine(Ptr.getNode());
18316   AddToWorklist(Result.getNode());
18317 
18318   return true;
18319 }
18320 
18321 static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse,
18322                                    SDValue &BasePtr, SDValue &Offset,
18323                                    ISD::MemIndexedMode &AM,
18324                                    SelectionDAG &DAG,
18325                                    const TargetLowering &TLI) {
18326   if (PtrUse == N ||
18327       (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB))
18328     return false;
18329 
18330   if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG))
18331     return false;
18332 
18333   // Don't create a indexed load / store with zero offset.
18334   if (isNullConstant(Offset))
18335     return false;
18336 
18337   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
18338     return false;
18339 
18340   SmallPtrSet<const SDNode *, 32> Visited;
18341   for (SDNode *Use : BasePtr->uses()) {
18342     if (Use == Ptr.getNode())
18343       continue;
18344 
18345     // No if there's a later user which could perform the index instead.
18346     if (isa<MemSDNode>(Use)) {
18347       bool IsLoad = true;
18348       bool IsMasked = false;
18349       SDValue OtherPtr;
18350       if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad,
18351                                    IsMasked, OtherPtr, TLI)) {
18352         SmallVector<const SDNode *, 2> Worklist;
18353         Worklist.push_back(Use);
18354         if (SDNode::hasPredecessorHelper(N, Visited, Worklist))
18355           return false;
18356       }
18357     }
18358 
18359     // If all the uses are load / store addresses, then don't do the
18360     // transformation.
18361     if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) {
18362       for (SDNode *UseUse : Use->uses())
18363         if (canFoldInAddressingMode(Use, UseUse, DAG, TLI))
18364           return false;
18365     }
18366   }
18367   return true;
18368 }
18369 
18370 static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad,
18371                                          bool &IsMasked, SDValue &Ptr,
18372                                          SDValue &BasePtr, SDValue &Offset,
18373                                          ISD::MemIndexedMode &AM,
18374                                          SelectionDAG &DAG,
18375                                          const TargetLowering &TLI) {
18376   if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad,
18377                                 IsMasked, Ptr, TLI) ||
18378       Ptr->hasOneUse())
18379     return nullptr;
18380 
18381   // Try turning it into a post-indexed load / store except when
18382   // 1) All uses are load / store ops that use it as base ptr (and
18383   //    it may be folded as addressing mmode).
18384   // 2) Op must be independent of N, i.e. Op is neither a predecessor
18385   //    nor a successor of N. Otherwise, if Op is folded that would
18386   //    create a cycle.
18387   for (SDNode *Op : Ptr->uses()) {
18388     // Check for #1.
18389     if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI))
18390       continue;
18391 
18392     // Check for #2.
18393     SmallPtrSet<const SDNode *, 32> Visited;
18394     SmallVector<const SDNode *, 8> Worklist;
18395     constexpr unsigned int MaxSteps = 8192;
18396     // Ptr is predecessor to both N and Op.
18397     Visited.insert(Ptr.getNode());
18398     Worklist.push_back(N);
18399     Worklist.push_back(Op);
18400     if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) &&
18401         !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps))
18402       return Op;
18403   }
18404   return nullptr;
18405 }
18406 
18407 /// Try to combine a load/store with a add/sub of the base pointer node into a
18408 /// post-indexed load/store. The transformation folded the add/subtract into the
18409 /// new indexed load/store effectively and all of its uses are redirected to the
18410 /// new load/store.
18411 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
18412   if (Level < AfterLegalizeDAG)
18413     return false;
18414 
18415   bool IsLoad = true;
18416   bool IsMasked = false;
18417   SDValue Ptr;
18418   SDValue BasePtr;
18419   SDValue Offset;
18420   ISD::MemIndexedMode AM = ISD::UNINDEXED;
18421   SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr,
18422                                          Offset, AM, DAG, TLI);
18423   if (!Op)
18424     return false;
18425 
18426   SDValue Result;
18427   if (!IsMasked)
18428     Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr,
18429                                          Offset, AM)
18430                     : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N),
18431                                           BasePtr, Offset, AM);
18432   else
18433     Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N),
18434                                                BasePtr, Offset, AM)
18435                     : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N),
18436                                                 BasePtr, Offset, AM);
18437   ++PostIndexedNodes;
18438   ++NodesCombined;
18439   LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); dbgs() << "\nWith: ";
18440              Result.dump(&DAG); dbgs() << '\n');
18441   WorklistRemover DeadNodes(*this);
18442   if (IsLoad) {
18443     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
18444     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
18445   } else {
18446     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
18447   }
18448 
18449   // Finally, since the node is now dead, remove it from the graph.
18450   deleteAndRecombine(N);
18451 
18452   // Replace the uses of Use with uses of the updated base value.
18453   DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
18454                                 Result.getValue(IsLoad ? 1 : 0));
18455   deleteAndRecombine(Op);
18456   return true;
18457 }
18458 
18459 /// Return the base-pointer arithmetic from an indexed \p LD.
18460 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
18461   ISD::MemIndexedMode AM = LD->getAddressingMode();
18462   assert(AM != ISD::UNINDEXED);
18463   SDValue BP = LD->getOperand(1);
18464   SDValue Inc = LD->getOperand(2);
18465 
18466   // Some backends use TargetConstants for load offsets, but don't expect
18467   // TargetConstants in general ADD nodes. We can convert these constants into
18468   // regular Constants (if the constant is not opaque).
18469   assert((Inc.getOpcode() != ISD::TargetConstant ||
18470           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
18471          "Cannot split out indexing using opaque target constants");
18472   if (Inc.getOpcode() == ISD::TargetConstant) {
18473     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
18474     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
18475                           ConstInc->getValueType(0));
18476   }
18477 
18478   unsigned Opc =
18479       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
18480   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
18481 }
18482 
18483 static inline ElementCount numVectorEltsOrZero(EVT T) {
18484   return T.isVector() ? T.getVectorElementCount() : ElementCount::getFixed(0);
18485 }
18486 
18487 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
18488   EVT STType = Val.getValueType();
18489   EVT STMemType = ST->getMemoryVT();
18490   if (STType == STMemType)
18491     return true;
18492   if (isTypeLegal(STMemType))
18493     return false; // fail.
18494   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
18495       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
18496     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
18497     return true;
18498   }
18499   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
18500       STType.isInteger() && STMemType.isInteger()) {
18501     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
18502     return true;
18503   }
18504   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
18505     Val = DAG.getBitcast(STMemType, Val);
18506     return true;
18507   }
18508   return false; // fail.
18509 }
18510 
18511 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
18512   EVT LDMemType = LD->getMemoryVT();
18513   EVT LDType = LD->getValueType(0);
18514   assert(Val.getValueType() == LDMemType &&
18515          "Attempting to extend value of non-matching type");
18516   if (LDType == LDMemType)
18517     return true;
18518   if (LDMemType.isInteger() && LDType.isInteger()) {
18519     switch (LD->getExtensionType()) {
18520     case ISD::NON_EXTLOAD:
18521       Val = DAG.getBitcast(LDType, Val);
18522       return true;
18523     case ISD::EXTLOAD:
18524       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
18525       return true;
18526     case ISD::SEXTLOAD:
18527       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
18528       return true;
18529     case ISD::ZEXTLOAD:
18530       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
18531       return true;
18532     }
18533   }
18534   return false;
18535 }
18536 
18537 StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
18538                                                 int64_t &Offset) {
18539   SDValue Chain = LD->getOperand(0);
18540 
18541   // Look through CALLSEQ_START.
18542   if (Chain.getOpcode() == ISD::CALLSEQ_START)
18543     Chain = Chain->getOperand(0);
18544 
18545   StoreSDNode *ST = nullptr;
18546   SmallVector<SDValue, 8> Aliases;
18547   if (Chain.getOpcode() == ISD::TokenFactor) {
18548     // Look for unique store within the TokenFactor.
18549     for (SDValue Op : Chain->ops()) {
18550       StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
18551       if (!Store)
18552         continue;
18553       BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18554       BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18555       if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18556         continue;
18557       // Make sure the store is not aliased with any nodes in TokenFactor.
18558       GatherAllAliases(Store, Chain, Aliases);
18559       if (Aliases.empty() ||
18560           (Aliases.size() == 1 && Aliases.front().getNode() == Store))
18561         ST = Store;
18562       break;
18563     }
18564   } else {
18565     StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
18566     if (Store) {
18567       BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
18568       BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
18569       if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
18570         ST = Store;
18571     }
18572   }
18573 
18574   return ST;
18575 }
18576 
18577 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
18578   if (OptLevel == CodeGenOptLevel::None || !LD->isSimple())
18579     return SDValue();
18580   SDValue Chain = LD->getOperand(0);
18581   int64_t Offset;
18582 
18583   StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
18584   // TODO: Relax this restriction for unordered atomics (see D66309)
18585   if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
18586     return SDValue();
18587 
18588   EVT LDType = LD->getValueType(0);
18589   EVT LDMemType = LD->getMemoryVT();
18590   EVT STMemType = ST->getMemoryVT();
18591   EVT STType = ST->getValue().getValueType();
18592 
18593   // There are two cases to consider here:
18594   //  1. The store is fixed width and the load is scalable. In this case we
18595   //     don't know at compile time if the store completely envelops the load
18596   //     so we abandon the optimisation.
18597   //  2. The store is scalable and the load is fixed width. We could
18598   //     potentially support a limited number of cases here, but there has been
18599   //     no cost-benefit analysis to prove it's worth it.
18600   bool LdStScalable = LDMemType.isScalableVT();
18601   if (LdStScalable != STMemType.isScalableVT())
18602     return SDValue();
18603 
18604   // If we are dealing with scalable vectors on a big endian platform the
18605   // calculation of offsets below becomes trickier, since we do not know at
18606   // compile time the absolute size of the vector. Until we've done more
18607   // analysis on big-endian platforms it seems better to bail out for now.
18608   if (LdStScalable && DAG.getDataLayout().isBigEndian())
18609     return SDValue();
18610 
18611   // Normalize for Endianness. After this Offset=0 will denote that the least
18612   // significant bit in the loaded value maps to the least significant bit in
18613   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
18614   // n:th least significant byte of the stored value.
18615   int64_t OrigOffset = Offset;
18616   if (DAG.getDataLayout().isBigEndian())
18617     Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() -
18618               (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) /
18619                  8 -
18620              Offset;
18621 
18622   // Check that the stored value cover all bits that are loaded.
18623   bool STCoversLD;
18624 
18625   TypeSize LdMemSize = LDMemType.getSizeInBits();
18626   TypeSize StMemSize = STMemType.getSizeInBits();
18627   if (LdStScalable)
18628     STCoversLD = (Offset == 0) && LdMemSize == StMemSize;
18629   else
18630     STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <=
18631                                    StMemSize.getFixedValue());
18632 
18633   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
18634     if (LD->isIndexed()) {
18635       // Cannot handle opaque target constants and we must respect the user's
18636       // request not to split indexes from loads.
18637       if (!canSplitIdx(LD))
18638         return SDValue();
18639       SDValue Idx = SplitIndexingFromLoad(LD);
18640       SDValue Ops[] = {Val, Idx, Chain};
18641       return CombineTo(LD, Ops, 3);
18642     }
18643     return CombineTo(LD, Val, Chain);
18644   };
18645 
18646   if (!STCoversLD)
18647     return SDValue();
18648 
18649   // Memory as copy space (potentially masked).
18650   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
18651     // Simple case: Direct non-truncating forwarding
18652     if (LDType.getSizeInBits() == LdMemSize)
18653       return ReplaceLd(LD, ST->getValue(), Chain);
18654     // Can we model the truncate and extension with an and mask?
18655     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
18656         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
18657       // Mask to size of LDMemType
18658       auto Mask =
18659           DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(),
18660                                                StMemSize.getFixedValue()),
18661                           SDLoc(ST), STType);
18662       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
18663       return ReplaceLd(LD, Val, Chain);
18664     }
18665   }
18666 
18667   // Handle some cases for big-endian that would be Offset 0 and handled for
18668   // little-endian.
18669   SDValue Val = ST->getValue();
18670   if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) {
18671     if (STType.isInteger() && !STType.isVector() && LDType.isInteger() &&
18672         !LDType.isVector() && isTypeLegal(STType) &&
18673         TLI.isOperationLegal(ISD::SRL, STType)) {
18674       Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val,
18675                         DAG.getConstant(Offset * 8, SDLoc(LD), STType));
18676       Offset = 0;
18677     }
18678   }
18679 
18680   // TODO: Deal with nonzero offset.
18681   if (LD->getBasePtr().isUndef() || Offset != 0)
18682     return SDValue();
18683   // Model necessary truncations / extenstions.
18684   // Truncate Value To Stored Memory Size.
18685   do {
18686     if (!getTruncatedStoreValue(ST, Val))
18687       continue;
18688     if (!isTypeLegal(LDMemType))
18689       continue;
18690     if (STMemType != LDMemType) {
18691       // TODO: Support vectors? This requires extract_subvector/bitcast.
18692       if (!STMemType.isVector() && !LDMemType.isVector() &&
18693           STMemType.isInteger() && LDMemType.isInteger())
18694         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
18695       else
18696         continue;
18697     }
18698     if (!extendLoadedValueToExtension(LD, Val))
18699       continue;
18700     return ReplaceLd(LD, Val, Chain);
18701   } while (false);
18702 
18703   // On failure, cleanup dead nodes we may have created.
18704   if (Val->use_empty())
18705     deleteAndRecombine(Val.getNode());
18706   return SDValue();
18707 }
18708 
18709 SDValue DAGCombiner::visitLOAD(SDNode *N) {
18710   LoadSDNode *LD  = cast<LoadSDNode>(N);
18711   SDValue Chain = LD->getChain();
18712   SDValue Ptr   = LD->getBasePtr();
18713 
18714   // If load is not volatile and there are no uses of the loaded value (and
18715   // the updated indexed value in case of indexed loads), change uses of the
18716   // chain value into uses of the chain input (i.e. delete the dead load).
18717   // TODO: Allow this for unordered atomics (see D66309)
18718   if (LD->isSimple()) {
18719     if (N->getValueType(1) == MVT::Other) {
18720       // Unindexed loads.
18721       if (!N->hasAnyUseOfValue(0)) {
18722         // It's not safe to use the two value CombineTo variant here. e.g.
18723         // v1, chain2 = load chain1, loc
18724         // v2, chain3 = load chain2, loc
18725         // v3         = add v2, c
18726         // Now we replace use of chain2 with chain1.  This makes the second load
18727         // isomorphic to the one we are deleting, and thus makes this load live.
18728         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
18729                    dbgs() << "\nWith chain: "; Chain.dump(&DAG);
18730                    dbgs() << "\n");
18731         WorklistRemover DeadNodes(*this);
18732         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
18733         AddUsersToWorklist(Chain.getNode());
18734         if (N->use_empty())
18735           deleteAndRecombine(N);
18736 
18737         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
18738       }
18739     } else {
18740       // Indexed loads.
18741       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
18742 
18743       // If this load has an opaque TargetConstant offset, then we cannot split
18744       // the indexing into an add/sub directly (that TargetConstant may not be
18745       // valid for a different type of node, and we cannot convert an opaque
18746       // target constant into a regular constant).
18747       bool CanSplitIdx = canSplitIdx(LD);
18748 
18749       if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) {
18750         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
18751         SDValue Index;
18752         if (N->hasAnyUseOfValue(1) && CanSplitIdx) {
18753           Index = SplitIndexingFromLoad(LD);
18754           // Try to fold the base pointer arithmetic into subsequent loads and
18755           // stores.
18756           AddUsersToWorklist(N);
18757         } else
18758           Index = DAG.getUNDEF(N->getValueType(1));
18759         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
18760                    dbgs() << "\nWith: "; Undef.dump(&DAG);
18761                    dbgs() << " and 2 other values\n");
18762         WorklistRemover DeadNodes(*this);
18763         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
18764         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
18765         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
18766         deleteAndRecombine(N);
18767         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
18768       }
18769     }
18770   }
18771 
18772   // If this load is directly stored, replace the load value with the stored
18773   // value.
18774   if (auto V = ForwardStoreValueToDirectLoad(LD))
18775     return V;
18776 
18777   // Try to infer better alignment information than the load already has.
18778   if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() &&
18779       !LD->isAtomic()) {
18780     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
18781       if (*Alignment > LD->getAlign() &&
18782           isAligned(*Alignment, LD->getSrcValueOffset())) {
18783         SDValue NewLoad = DAG.getExtLoad(
18784             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
18785             LD->getPointerInfo(), LD->getMemoryVT(), *Alignment,
18786             LD->getMemOperand()->getFlags(), LD->getAAInfo());
18787         // NewLoad will always be N as we are only refining the alignment
18788         assert(NewLoad.getNode() == N);
18789         (void)NewLoad;
18790       }
18791     }
18792   }
18793 
18794   if (LD->isUnindexed()) {
18795     // Walk up chain skipping non-aliasing memory nodes.
18796     SDValue BetterChain = FindBetterChain(LD, Chain);
18797 
18798     // If there is a better chain.
18799     if (Chain != BetterChain) {
18800       SDValue ReplLoad;
18801 
18802       // Replace the chain to void dependency.
18803       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
18804         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
18805                                BetterChain, Ptr, LD->getMemOperand());
18806       } else {
18807         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
18808                                   LD->getValueType(0),
18809                                   BetterChain, Ptr, LD->getMemoryVT(),
18810                                   LD->getMemOperand());
18811       }
18812 
18813       // Create token factor to keep old chain connected.
18814       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
18815                                   MVT::Other, Chain, ReplLoad.getValue(1));
18816 
18817       // Replace uses with load result and token factor
18818       return CombineTo(N, ReplLoad.getValue(0), Token);
18819     }
18820   }
18821 
18822   // Try transforming N to an indexed load.
18823   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
18824     return SDValue(N, 0);
18825 
18826   // Try to slice up N to more direct loads if the slices are mapped to
18827   // different register banks or pairing can take place.
18828   if (SliceUpLoad(N))
18829     return SDValue(N, 0);
18830 
18831   return SDValue();
18832 }
18833 
18834 namespace {
18835 
18836 /// Helper structure used to slice a load in smaller loads.
18837 /// Basically a slice is obtained from the following sequence:
18838 /// Origin = load Ty1, Base
18839 /// Shift = srl Ty1 Origin, CstTy Amount
18840 /// Inst = trunc Shift to Ty2
18841 ///
18842 /// Then, it will be rewritten into:
18843 /// Slice = load SliceTy, Base + SliceOffset
18844 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
18845 ///
18846 /// SliceTy is deduced from the number of bits that are actually used to
18847 /// build Inst.
18848 struct LoadedSlice {
18849   /// Helper structure used to compute the cost of a slice.
18850   struct Cost {
18851     /// Are we optimizing for code size.
18852     bool ForCodeSize = false;
18853 
18854     /// Various cost.
18855     unsigned Loads = 0;
18856     unsigned Truncates = 0;
18857     unsigned CrossRegisterBanksCopies = 0;
18858     unsigned ZExts = 0;
18859     unsigned Shift = 0;
18860 
18861     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
18862 
18863     /// Get the cost of one isolated slice.
18864     Cost(const LoadedSlice &LS, bool ForCodeSize)
18865         : ForCodeSize(ForCodeSize), Loads(1) {
18866       EVT TruncType = LS.Inst->getValueType(0);
18867       EVT LoadedType = LS.getLoadedType();
18868       if (TruncType != LoadedType &&
18869           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
18870         ZExts = 1;
18871     }
18872 
18873     /// Account for slicing gain in the current cost.
18874     /// Slicing provide a few gains like removing a shift or a
18875     /// truncate. This method allows to grow the cost of the original
18876     /// load with the gain from this slice.
18877     void addSliceGain(const LoadedSlice &LS) {
18878       // Each slice saves a truncate.
18879       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
18880       if (!TLI.isTruncateFree(LS.Inst->getOperand(0), LS.Inst->getValueType(0)))
18881         ++Truncates;
18882       // If there is a shift amount, this slice gets rid of it.
18883       if (LS.Shift)
18884         ++Shift;
18885       // If this slice can merge a cross register bank copy, account for it.
18886       if (LS.canMergeExpensiveCrossRegisterBankCopy())
18887         ++CrossRegisterBanksCopies;
18888     }
18889 
18890     Cost &operator+=(const Cost &RHS) {
18891       Loads += RHS.Loads;
18892       Truncates += RHS.Truncates;
18893       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
18894       ZExts += RHS.ZExts;
18895       Shift += RHS.Shift;
18896       return *this;
18897     }
18898 
18899     bool operator==(const Cost &RHS) const {
18900       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
18901              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
18902              ZExts == RHS.ZExts && Shift == RHS.Shift;
18903     }
18904 
18905     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
18906 
18907     bool operator<(const Cost &RHS) const {
18908       // Assume cross register banks copies are as expensive as loads.
18909       // FIXME: Do we want some more target hooks?
18910       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
18911       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
18912       // Unless we are optimizing for code size, consider the
18913       // expensive operation first.
18914       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
18915         return ExpensiveOpsLHS < ExpensiveOpsRHS;
18916       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
18917              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
18918     }
18919 
18920     bool operator>(const Cost &RHS) const { return RHS < *this; }
18921 
18922     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
18923 
18924     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
18925   };
18926 
18927   // The last instruction that represent the slice. This should be a
18928   // truncate instruction.
18929   SDNode *Inst;
18930 
18931   // The original load instruction.
18932   LoadSDNode *Origin;
18933 
18934   // The right shift amount in bits from the original load.
18935   unsigned Shift;
18936 
18937   // The DAG from which Origin came from.
18938   // This is used to get some contextual information about legal types, etc.
18939   SelectionDAG *DAG;
18940 
18941   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
18942               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
18943       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
18944 
18945   /// Get the bits used in a chunk of bits \p BitWidth large.
18946   /// \return Result is \p BitWidth and has used bits set to 1 and
18947   ///         not used bits set to 0.
18948   APInt getUsedBits() const {
18949     // Reproduce the trunc(lshr) sequence:
18950     // - Start from the truncated value.
18951     // - Zero extend to the desired bit width.
18952     // - Shift left.
18953     assert(Origin && "No original load to compare against.");
18954     unsigned BitWidth = Origin->getValueSizeInBits(0);
18955     assert(Inst && "This slice is not bound to an instruction");
18956     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
18957            "Extracted slice is bigger than the whole type!");
18958     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
18959     UsedBits.setAllBits();
18960     UsedBits = UsedBits.zext(BitWidth);
18961     UsedBits <<= Shift;
18962     return UsedBits;
18963   }
18964 
18965   /// Get the size of the slice to be loaded in bytes.
18966   unsigned getLoadedSize() const {
18967     unsigned SliceSize = getUsedBits().popcount();
18968     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
18969     return SliceSize / 8;
18970   }
18971 
18972   /// Get the type that will be loaded for this slice.
18973   /// Note: This may not be the final type for the slice.
18974   EVT getLoadedType() const {
18975     assert(DAG && "Missing context");
18976     LLVMContext &Ctxt = *DAG->getContext();
18977     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
18978   }
18979 
18980   /// Get the alignment of the load used for this slice.
18981   Align getAlign() const {
18982     Align Alignment = Origin->getAlign();
18983     uint64_t Offset = getOffsetFromBase();
18984     if (Offset != 0)
18985       Alignment = commonAlignment(Alignment, Alignment.value() + Offset);
18986     return Alignment;
18987   }
18988 
18989   /// Check if this slice can be rewritten with legal operations.
18990   bool isLegal() const {
18991     // An invalid slice is not legal.
18992     if (!Origin || !Inst || !DAG)
18993       return false;
18994 
18995     // Offsets are for indexed load only, we do not handle that.
18996     if (!Origin->getOffset().isUndef())
18997       return false;
18998 
18999     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19000 
19001     // Check that the type is legal.
19002     EVT SliceType = getLoadedType();
19003     if (!TLI.isTypeLegal(SliceType))
19004       return false;
19005 
19006     // Check that the load is legal for this type.
19007     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
19008       return false;
19009 
19010     // Check that the offset can be computed.
19011     // 1. Check its type.
19012     EVT PtrType = Origin->getBasePtr().getValueType();
19013     if (PtrType == MVT::Untyped || PtrType.isExtended())
19014       return false;
19015 
19016     // 2. Check that it fits in the immediate.
19017     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
19018       return false;
19019 
19020     // 3. Check that the computation is legal.
19021     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
19022       return false;
19023 
19024     // Check that the zext is legal if it needs one.
19025     EVT TruncateType = Inst->getValueType(0);
19026     if (TruncateType != SliceType &&
19027         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
19028       return false;
19029 
19030     return true;
19031   }
19032 
19033   /// Get the offset in bytes of this slice in the original chunk of
19034   /// bits.
19035   /// \pre DAG != nullptr.
19036   uint64_t getOffsetFromBase() const {
19037     assert(DAG && "Missing context.");
19038     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
19039     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
19040     uint64_t Offset = Shift / 8;
19041     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
19042     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
19043            "The size of the original loaded type is not a multiple of a"
19044            " byte.");
19045     // If Offset is bigger than TySizeInBytes, it means we are loading all
19046     // zeros. This should have been optimized before in the process.
19047     assert(TySizeInBytes > Offset &&
19048            "Invalid shift amount for given loaded size");
19049     if (IsBigEndian)
19050       Offset = TySizeInBytes - Offset - getLoadedSize();
19051     return Offset;
19052   }
19053 
19054   /// Generate the sequence of instructions to load the slice
19055   /// represented by this object and redirect the uses of this slice to
19056   /// this new sequence of instructions.
19057   /// \pre this->Inst && this->Origin are valid Instructions and this
19058   /// object passed the legal check: LoadedSlice::isLegal returned true.
19059   /// \return The last instruction of the sequence used to load the slice.
19060   SDValue loadSlice() const {
19061     assert(Inst && Origin && "Unable to replace a non-existing slice.");
19062     const SDValue &OldBaseAddr = Origin->getBasePtr();
19063     SDValue BaseAddr = OldBaseAddr;
19064     // Get the offset in that chunk of bytes w.r.t. the endianness.
19065     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
19066     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
19067     if (Offset) {
19068       // BaseAddr = BaseAddr + Offset.
19069       EVT ArithType = BaseAddr.getValueType();
19070       SDLoc DL(Origin);
19071       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
19072                               DAG->getConstant(Offset, DL, ArithType));
19073     }
19074 
19075     // Create the type of the loaded slice according to its size.
19076     EVT SliceType = getLoadedType();
19077 
19078     // Create the load for the slice.
19079     SDValue LastInst =
19080         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
19081                      Origin->getPointerInfo().getWithOffset(Offset), getAlign(),
19082                      Origin->getMemOperand()->getFlags());
19083     // If the final type is not the same as the loaded type, this means that
19084     // we have to pad with zero. Create a zero extend for that.
19085     EVT FinalType = Inst->getValueType(0);
19086     if (SliceType != FinalType)
19087       LastInst =
19088           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
19089     return LastInst;
19090   }
19091 
19092   /// Check if this slice can be merged with an expensive cross register
19093   /// bank copy. E.g.,
19094   /// i = load i32
19095   /// f = bitcast i32 i to float
19096   bool canMergeExpensiveCrossRegisterBankCopy() const {
19097     if (!Inst || !Inst->hasOneUse())
19098       return false;
19099     SDNode *Use = *Inst->use_begin();
19100     if (Use->getOpcode() != ISD::BITCAST)
19101       return false;
19102     assert(DAG && "Missing context");
19103     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
19104     EVT ResVT = Use->getValueType(0);
19105     const TargetRegisterClass *ResRC =
19106         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
19107     const TargetRegisterClass *ArgRC =
19108         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
19109                            Use->getOperand(0)->isDivergent());
19110     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
19111       return false;
19112 
19113     // At this point, we know that we perform a cross-register-bank copy.
19114     // Check if it is expensive.
19115     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
19116     // Assume bitcasts are cheap, unless both register classes do not
19117     // explicitly share a common sub class.
19118     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
19119       return false;
19120 
19121     // Check if it will be merged with the load.
19122     // 1. Check the alignment / fast memory access constraint.
19123     unsigned IsFast = 0;
19124     if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT,
19125                                 Origin->getAddressSpace(), getAlign(),
19126                                 Origin->getMemOperand()->getFlags(), &IsFast) ||
19127         !IsFast)
19128       return false;
19129 
19130     // 2. Check that the load is a legal operation for that type.
19131     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
19132       return false;
19133 
19134     // 3. Check that we do not have a zext in the way.
19135     if (Inst->getValueType(0) != getLoadedType())
19136       return false;
19137 
19138     return true;
19139   }
19140 };
19141 
19142 } // end anonymous namespace
19143 
19144 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
19145 /// \p UsedBits looks like 0..0 1..1 0..0.
19146 static bool areUsedBitsDense(const APInt &UsedBits) {
19147   // If all the bits are one, this is dense!
19148   if (UsedBits.isAllOnes())
19149     return true;
19150 
19151   // Get rid of the unused bits on the right.
19152   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
19153   // Get rid of the unused bits on the left.
19154   if (NarrowedUsedBits.countl_zero())
19155     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
19156   // Check that the chunk of bits is completely used.
19157   return NarrowedUsedBits.isAllOnes();
19158 }
19159 
19160 /// Check whether or not \p First and \p Second are next to each other
19161 /// in memory. This means that there is no hole between the bits loaded
19162 /// by \p First and the bits loaded by \p Second.
19163 static bool areSlicesNextToEachOther(const LoadedSlice &First,
19164                                      const LoadedSlice &Second) {
19165   assert(First.Origin == Second.Origin && First.Origin &&
19166          "Unable to match different memory origins.");
19167   APInt UsedBits = First.getUsedBits();
19168   assert((UsedBits & Second.getUsedBits()) == 0 &&
19169          "Slices are not supposed to overlap.");
19170   UsedBits |= Second.getUsedBits();
19171   return areUsedBitsDense(UsedBits);
19172 }
19173 
19174 /// Adjust the \p GlobalLSCost according to the target
19175 /// paring capabilities and the layout of the slices.
19176 /// \pre \p GlobalLSCost should account for at least as many loads as
19177 /// there is in the slices in \p LoadedSlices.
19178 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
19179                                  LoadedSlice::Cost &GlobalLSCost) {
19180   unsigned NumberOfSlices = LoadedSlices.size();
19181   // If there is less than 2 elements, no pairing is possible.
19182   if (NumberOfSlices < 2)
19183     return;
19184 
19185   // Sort the slices so that elements that are likely to be next to each
19186   // other in memory are next to each other in the list.
19187   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
19188     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
19189     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
19190   });
19191   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
19192   // First (resp. Second) is the first (resp. Second) potentially candidate
19193   // to be placed in a paired load.
19194   const LoadedSlice *First = nullptr;
19195   const LoadedSlice *Second = nullptr;
19196   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
19197                 // Set the beginning of the pair.
19198                                                            First = Second) {
19199     Second = &LoadedSlices[CurrSlice];
19200 
19201     // If First is NULL, it means we start a new pair.
19202     // Get to the next slice.
19203     if (!First)
19204       continue;
19205 
19206     EVT LoadedType = First->getLoadedType();
19207 
19208     // If the types of the slices are different, we cannot pair them.
19209     if (LoadedType != Second->getLoadedType())
19210       continue;
19211 
19212     // Check if the target supplies paired loads for this type.
19213     Align RequiredAlignment;
19214     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
19215       // move to the next pair, this type is hopeless.
19216       Second = nullptr;
19217       continue;
19218     }
19219     // Check if we meet the alignment requirement.
19220     if (First->getAlign() < RequiredAlignment)
19221       continue;
19222 
19223     // Check that both loads are next to each other in memory.
19224     if (!areSlicesNextToEachOther(*First, *Second))
19225       continue;
19226 
19227     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
19228     --GlobalLSCost.Loads;
19229     // Move to the next pair.
19230     Second = nullptr;
19231   }
19232 }
19233 
19234 /// Check the profitability of all involved LoadedSlice.
19235 /// Currently, it is considered profitable if there is exactly two
19236 /// involved slices (1) which are (2) next to each other in memory, and
19237 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
19238 ///
19239 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
19240 /// the elements themselves.
19241 ///
19242 /// FIXME: When the cost model will be mature enough, we can relax
19243 /// constraints (1) and (2).
19244 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
19245                                 const APInt &UsedBits, bool ForCodeSize) {
19246   unsigned NumberOfSlices = LoadedSlices.size();
19247   if (StressLoadSlicing)
19248     return NumberOfSlices > 1;
19249 
19250   // Check (1).
19251   if (NumberOfSlices != 2)
19252     return false;
19253 
19254   // Check (2).
19255   if (!areUsedBitsDense(UsedBits))
19256     return false;
19257 
19258   // Check (3).
19259   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
19260   // The original code has one big load.
19261   OrigCost.Loads = 1;
19262   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
19263     const LoadedSlice &LS = LoadedSlices[CurrSlice];
19264     // Accumulate the cost of all the slices.
19265     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
19266     GlobalSlicingCost += SliceCost;
19267 
19268     // Account as cost in the original configuration the gain obtained
19269     // with the current slices.
19270     OrigCost.addSliceGain(LS);
19271   }
19272 
19273   // If the target supports paired load, adjust the cost accordingly.
19274   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
19275   return OrigCost > GlobalSlicingCost;
19276 }
19277 
19278 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
19279 /// operations, split it in the various pieces being extracted.
19280 ///
19281 /// This sort of thing is introduced by SROA.
19282 /// This slicing takes care not to insert overlapping loads.
19283 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
19284 bool DAGCombiner::SliceUpLoad(SDNode *N) {
19285   if (Level < AfterLegalizeDAG)
19286     return false;
19287 
19288   LoadSDNode *LD = cast<LoadSDNode>(N);
19289   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
19290       !LD->getValueType(0).isInteger())
19291     return false;
19292 
19293   // The algorithm to split up a load of a scalable vector into individual
19294   // elements currently requires knowing the length of the loaded type,
19295   // so will need adjusting to work on scalable vectors.
19296   if (LD->getValueType(0).isScalableVector())
19297     return false;
19298 
19299   // Keep track of already used bits to detect overlapping values.
19300   // In that case, we will just abort the transformation.
19301   APInt UsedBits(LD->getValueSizeInBits(0), 0);
19302 
19303   SmallVector<LoadedSlice, 4> LoadedSlices;
19304 
19305   // Check if this load is used as several smaller chunks of bits.
19306   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
19307   // of computation for each trunc.
19308   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
19309        UI != UIEnd; ++UI) {
19310     // Skip the uses of the chain.
19311     if (UI.getUse().getResNo() != 0)
19312       continue;
19313 
19314     SDNode *User = *UI;
19315     unsigned Shift = 0;
19316 
19317     // Check if this is a trunc(lshr).
19318     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
19319         isa<ConstantSDNode>(User->getOperand(1))) {
19320       Shift = User->getConstantOperandVal(1);
19321       User = *User->use_begin();
19322     }
19323 
19324     // At this point, User is a Truncate, iff we encountered, trunc or
19325     // trunc(lshr).
19326     if (User->getOpcode() != ISD::TRUNCATE)
19327       return false;
19328 
19329     // The width of the type must be a power of 2 and greater than 8-bits.
19330     // Otherwise the load cannot be represented in LLVM IR.
19331     // Moreover, if we shifted with a non-8-bits multiple, the slice
19332     // will be across several bytes. We do not support that.
19333     unsigned Width = User->getValueSizeInBits(0);
19334     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
19335       return false;
19336 
19337     // Build the slice for this chain of computations.
19338     LoadedSlice LS(User, LD, Shift, &DAG);
19339     APInt CurrentUsedBits = LS.getUsedBits();
19340 
19341     // Check if this slice overlaps with another.
19342     if ((CurrentUsedBits & UsedBits) != 0)
19343       return false;
19344     // Update the bits used globally.
19345     UsedBits |= CurrentUsedBits;
19346 
19347     // Check if the new slice would be legal.
19348     if (!LS.isLegal())
19349       return false;
19350 
19351     // Record the slice.
19352     LoadedSlices.push_back(LS);
19353   }
19354 
19355   // Abort slicing if it does not seem to be profitable.
19356   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
19357     return false;
19358 
19359   ++SlicedLoads;
19360 
19361   // Rewrite each chain to use an independent load.
19362   // By construction, each chain can be represented by a unique load.
19363 
19364   // Prepare the argument for the new token factor for all the slices.
19365   SmallVector<SDValue, 8> ArgChains;
19366   for (const LoadedSlice &LS : LoadedSlices) {
19367     SDValue SliceInst = LS.loadSlice();
19368     CombineTo(LS.Inst, SliceInst, true);
19369     if (SliceInst.getOpcode() != ISD::LOAD)
19370       SliceInst = SliceInst.getOperand(0);
19371     assert(SliceInst->getOpcode() == ISD::LOAD &&
19372            "It takes more than a zext to get to the loaded slice!!");
19373     ArgChains.push_back(SliceInst.getValue(1));
19374   }
19375 
19376   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
19377                               ArgChains);
19378   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
19379   AddToWorklist(Chain.getNode());
19380   return true;
19381 }
19382 
19383 /// Check to see if V is (and load (ptr), imm), where the load is having
19384 /// specific bytes cleared out.  If so, return the byte size being masked out
19385 /// and the shift amount.
19386 static std::pair<unsigned, unsigned>
19387 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
19388   std::pair<unsigned, unsigned> Result(0, 0);
19389 
19390   // Check for the structure we're looking for.
19391   if (V->getOpcode() != ISD::AND ||
19392       !isa<ConstantSDNode>(V->getOperand(1)) ||
19393       !ISD::isNormalLoad(V->getOperand(0).getNode()))
19394     return Result;
19395 
19396   // Check the chain and pointer.
19397   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
19398   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
19399 
19400   // This only handles simple types.
19401   if (V.getValueType() != MVT::i16 &&
19402       V.getValueType() != MVT::i32 &&
19403       V.getValueType() != MVT::i64)
19404     return Result;
19405 
19406   // Check the constant mask.  Invert it so that the bits being masked out are
19407   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
19408   // follow the sign bit for uniformity.
19409   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
19410   unsigned NotMaskLZ = llvm::countl_zero(NotMask);
19411   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
19412   unsigned NotMaskTZ = llvm::countr_zero(NotMask);
19413   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
19414   if (NotMaskLZ == 64) return Result;  // All zero mask.
19415 
19416   // See if we have a continuous run of bits.  If so, we have 0*1+0*
19417   if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
19418     return Result;
19419 
19420   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
19421   if (V.getValueType() != MVT::i64 && NotMaskLZ)
19422     NotMaskLZ -= 64-V.getValueSizeInBits();
19423 
19424   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
19425   switch (MaskedBytes) {
19426   case 1:
19427   case 2:
19428   case 4: break;
19429   default: return Result; // All one mask, or 5-byte mask.
19430   }
19431 
19432   // Verify that the first bit starts at a multiple of mask so that the access
19433   // is aligned the same as the access width.
19434   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
19435 
19436   // For narrowing to be valid, it must be the case that the load the
19437   // immediately preceding memory operation before the store.
19438   if (LD == Chain.getNode())
19439     ; // ok.
19440   else if (Chain->getOpcode() == ISD::TokenFactor &&
19441            SDValue(LD, 1).hasOneUse()) {
19442     // LD has only 1 chain use so they are no indirect dependencies.
19443     if (!LD->isOperandOf(Chain.getNode()))
19444       return Result;
19445   } else
19446     return Result; // Fail.
19447 
19448   Result.first = MaskedBytes;
19449   Result.second = NotMaskTZ/8;
19450   return Result;
19451 }
19452 
19453 /// Check to see if IVal is something that provides a value as specified by
19454 /// MaskInfo. If so, replace the specified store with a narrower store of
19455 /// truncated IVal.
19456 static SDValue
19457 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
19458                                 SDValue IVal, StoreSDNode *St,
19459                                 DAGCombiner *DC) {
19460   unsigned NumBytes = MaskInfo.first;
19461   unsigned ByteShift = MaskInfo.second;
19462   SelectionDAG &DAG = DC->getDAG();
19463 
19464   // Check to see if IVal is all zeros in the part being masked in by the 'or'
19465   // that uses this.  If not, this is not a replacement.
19466   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
19467                                   ByteShift*8, (ByteShift+NumBytes)*8);
19468   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
19469 
19470   // Check that it is legal on the target to do this.  It is legal if the new
19471   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
19472   // legalization. If the source type is legal, but the store type isn't, see
19473   // if we can use a truncating store.
19474   MVT VT = MVT::getIntegerVT(NumBytes * 8);
19475   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19476   bool UseTruncStore;
19477   if (DC->isTypeLegal(VT))
19478     UseTruncStore = false;
19479   else if (TLI.isTypeLegal(IVal.getValueType()) &&
19480            TLI.isTruncStoreLegal(IVal.getValueType(), VT))
19481     UseTruncStore = true;
19482   else
19483     return SDValue();
19484 
19485   // Can't do this for indexed stores.
19486   if (St->isIndexed())
19487     return SDValue();
19488 
19489   // Check that the target doesn't think this is a bad idea.
19490   if (St->getMemOperand() &&
19491       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
19492                               *St->getMemOperand()))
19493     return SDValue();
19494 
19495   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
19496   // shifted by ByteShift and truncated down to NumBytes.
19497   if (ByteShift) {
19498     SDLoc DL(IVal);
19499     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
19500                        DAG.getConstant(ByteShift*8, DL,
19501                                     DC->getShiftAmountTy(IVal.getValueType())));
19502   }
19503 
19504   // Figure out the offset for the store and the alignment of the access.
19505   unsigned StOffset;
19506   if (DAG.getDataLayout().isLittleEndian())
19507     StOffset = ByteShift;
19508   else
19509     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
19510 
19511   SDValue Ptr = St->getBasePtr();
19512   if (StOffset) {
19513     SDLoc DL(IVal);
19514     Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(StOffset), DL);
19515   }
19516 
19517   ++OpsNarrowed;
19518   if (UseTruncStore)
19519     return DAG.getTruncStore(St->getChain(), SDLoc(St), IVal, Ptr,
19520                              St->getPointerInfo().getWithOffset(StOffset),
19521                              VT, St->getOriginalAlign());
19522 
19523   // Truncate down to the new size.
19524   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
19525 
19526   return DAG
19527       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
19528                 St->getPointerInfo().getWithOffset(StOffset),
19529                 St->getOriginalAlign());
19530 }
19531 
19532 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
19533 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
19534 /// narrowing the load and store if it would end up being a win for performance
19535 /// or code size.
19536 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
19537   StoreSDNode *ST  = cast<StoreSDNode>(N);
19538   if (!ST->isSimple())
19539     return SDValue();
19540 
19541   SDValue Chain = ST->getChain();
19542   SDValue Value = ST->getValue();
19543   SDValue Ptr   = ST->getBasePtr();
19544   EVT VT = Value.getValueType();
19545 
19546   if (ST->isTruncatingStore() || VT.isVector())
19547     return SDValue();
19548 
19549   unsigned Opc = Value.getOpcode();
19550 
19551   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
19552       !Value.hasOneUse())
19553     return SDValue();
19554 
19555   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
19556   // is a byte mask indicating a consecutive number of bytes, check to see if
19557   // Y is known to provide just those bytes.  If so, we try to replace the
19558   // load + replace + store sequence with a single (narrower) store, which makes
19559   // the load dead.
19560   if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) {
19561     std::pair<unsigned, unsigned> MaskedLoad;
19562     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
19563     if (MaskedLoad.first)
19564       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19565                                                   Value.getOperand(1), ST,this))
19566         return NewST;
19567 
19568     // Or is commutative, so try swapping X and Y.
19569     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
19570     if (MaskedLoad.first)
19571       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
19572                                                   Value.getOperand(0), ST,this))
19573         return NewST;
19574   }
19575 
19576   if (!EnableReduceLoadOpStoreWidth)
19577     return SDValue();
19578 
19579   if (Value.getOperand(1).getOpcode() != ISD::Constant)
19580     return SDValue();
19581 
19582   SDValue N0 = Value.getOperand(0);
19583   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
19584       Chain == SDValue(N0.getNode(), 1)) {
19585     LoadSDNode *LD = cast<LoadSDNode>(N0);
19586     if (LD->getBasePtr() != Ptr ||
19587         LD->getPointerInfo().getAddrSpace() !=
19588         ST->getPointerInfo().getAddrSpace())
19589       return SDValue();
19590 
19591     // Find the type to narrow it the load / op / store to.
19592     SDValue N1 = Value.getOperand(1);
19593     unsigned BitWidth = N1.getValueSizeInBits();
19594     APInt Imm = N1->getAsAPIntVal();
19595     if (Opc == ISD::AND)
19596       Imm ^= APInt::getAllOnes(BitWidth);
19597     if (Imm == 0 || Imm.isAllOnes())
19598       return SDValue();
19599     unsigned ShAmt = Imm.countr_zero();
19600     unsigned MSB = BitWidth - Imm.countl_zero() - 1;
19601     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
19602     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19603     // The narrowing should be profitable, the load/store operation should be
19604     // legal (or custom) and the store size should be equal to the NewVT width.
19605     while (NewBW < BitWidth &&
19606            (NewVT.getStoreSizeInBits() != NewBW ||
19607             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
19608             !TLI.isNarrowingProfitable(VT, NewVT))) {
19609       NewBW = NextPowerOf2(NewBW);
19610       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
19611     }
19612     if (NewBW >= BitWidth)
19613       return SDValue();
19614 
19615     // If the lsb changed does not start at the type bitwidth boundary,
19616     // start at the previous one.
19617     if (ShAmt % NewBW)
19618       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
19619     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
19620                                    std::min(BitWidth, ShAmt + NewBW));
19621     if ((Imm & Mask) == Imm) {
19622       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
19623       if (Opc == ISD::AND)
19624         NewImm ^= APInt::getAllOnes(NewBW);
19625       uint64_t PtrOff = ShAmt / 8;
19626       // For big endian targets, we need to adjust the offset to the pointer to
19627       // load the correct bytes.
19628       if (DAG.getDataLayout().isBigEndian())
19629         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
19630 
19631       unsigned IsFast = 0;
19632       Align NewAlign = commonAlignment(LD->getAlign(), PtrOff);
19633       if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT,
19634                                   LD->getAddressSpace(), NewAlign,
19635                                   LD->getMemOperand()->getFlags(), &IsFast) ||
19636           !IsFast)
19637         return SDValue();
19638 
19639       SDValue NewPtr =
19640           DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD));
19641       SDValue NewLD =
19642           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
19643                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
19644                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
19645       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
19646                                    DAG.getConstant(NewImm, SDLoc(Value),
19647                                                    NewVT));
19648       SDValue NewST =
19649           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
19650                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
19651 
19652       AddToWorklist(NewPtr.getNode());
19653       AddToWorklist(NewLD.getNode());
19654       AddToWorklist(NewVal.getNode());
19655       WorklistRemover DeadNodes(*this);
19656       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
19657       ++OpsNarrowed;
19658       return NewST;
19659     }
19660   }
19661 
19662   return SDValue();
19663 }
19664 
19665 /// For a given floating point load / store pair, if the load value isn't used
19666 /// by any other operations, then consider transforming the pair to integer
19667 /// load / store operations if the target deems the transformation profitable.
19668 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
19669   StoreSDNode *ST  = cast<StoreSDNode>(N);
19670   SDValue Value = ST->getValue();
19671   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
19672       Value.hasOneUse()) {
19673     LoadSDNode *LD = cast<LoadSDNode>(Value);
19674     EVT VT = LD->getMemoryVT();
19675     if (!VT.isFloatingPoint() ||
19676         VT != ST->getMemoryVT() ||
19677         LD->isNonTemporal() ||
19678         ST->isNonTemporal() ||
19679         LD->getPointerInfo().getAddrSpace() != 0 ||
19680         ST->getPointerInfo().getAddrSpace() != 0)
19681       return SDValue();
19682 
19683     TypeSize VTSize = VT.getSizeInBits();
19684 
19685     // We don't know the size of scalable types at compile time so we cannot
19686     // create an integer of the equivalent size.
19687     if (VTSize.isScalable())
19688       return SDValue();
19689 
19690     unsigned FastLD = 0, FastST = 0;
19691     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue());
19692     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
19693         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
19694         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
19695         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) ||
19696         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19697                                 *LD->getMemOperand(), &FastLD) ||
19698         !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT,
19699                                 *ST->getMemOperand(), &FastST) ||
19700         !FastLD || !FastST)
19701       return SDValue();
19702 
19703     SDValue NewLD =
19704         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
19705                     LD->getPointerInfo(), LD->getAlign());
19706 
19707     SDValue NewST =
19708         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
19709                      ST->getPointerInfo(), ST->getAlign());
19710 
19711     AddToWorklist(NewLD.getNode());
19712     AddToWorklist(NewST.getNode());
19713     WorklistRemover DeadNodes(*this);
19714     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
19715     ++LdStFP2Int;
19716     return NewST;
19717   }
19718 
19719   return SDValue();
19720 }
19721 
19722 // This is a helper function for visitMUL to check the profitability
19723 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
19724 // MulNode is the original multiply, AddNode is (add x, c1),
19725 // and ConstNode is c2.
19726 //
19727 // If the (add x, c1) has multiple uses, we could increase
19728 // the number of adds if we make this transformation.
19729 // It would only be worth doing this if we can remove a
19730 // multiply in the process. Check for that here.
19731 // To illustrate:
19732 //     (A + c1) * c3
19733 //     (A + c2) * c3
19734 // We're checking for cases where we have common "c3 * A" expressions.
19735 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue AddNode,
19736                                               SDValue ConstNode) {
19737   APInt Val;
19738 
19739   // If the add only has one use, and the target thinks the folding is
19740   // profitable or does not lead to worse code, this would be OK to do.
19741   if (AddNode->hasOneUse() &&
19742       TLI.isMulAddWithConstProfitable(AddNode, ConstNode))
19743     return true;
19744 
19745   // Walk all the users of the constant with which we're multiplying.
19746   for (SDNode *Use : ConstNode->uses()) {
19747     if (Use == MulNode) // This use is the one we're on right now. Skip it.
19748       continue;
19749 
19750     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
19751       SDNode *OtherOp;
19752       SDNode *MulVar = AddNode.getOperand(0).getNode();
19753 
19754       // OtherOp is what we're multiplying against the constant.
19755       if (Use->getOperand(0) == ConstNode)
19756         OtherOp = Use->getOperand(1).getNode();
19757       else
19758         OtherOp = Use->getOperand(0).getNode();
19759 
19760       // Check to see if multiply is with the same operand of our "add".
19761       //
19762       //     ConstNode  = CONST
19763       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
19764       //     ...
19765       //     AddNode  = (A + c1)  <-- MulVar is A.
19766       //         = AddNode * ConstNode   <-- current visiting instruction.
19767       //
19768       // If we make this transformation, we will have a common
19769       // multiply (ConstNode * A) that we can save.
19770       if (OtherOp == MulVar)
19771         return true;
19772 
19773       // Now check to see if a future expansion will give us a common
19774       // multiply.
19775       //
19776       //     ConstNode  = CONST
19777       //     AddNode    = (A + c1)
19778       //     ...   = AddNode * ConstNode <-- current visiting instruction.
19779       //     ...
19780       //     OtherOp = (A + c2)
19781       //     Use     = OtherOp * ConstNode <-- visiting Use.
19782       //
19783       // If we make this transformation, we will have a common
19784       // multiply (CONST * A) after we also do the same transformation
19785       // to the "t2" instruction.
19786       if (OtherOp->getOpcode() == ISD::ADD &&
19787           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
19788           OtherOp->getOperand(0).getNode() == MulVar)
19789         return true;
19790     }
19791   }
19792 
19793   // Didn't find a case where this would be profitable.
19794   return false;
19795 }
19796 
19797 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
19798                                          unsigned NumStores) {
19799   SmallVector<SDValue, 8> Chains;
19800   SmallPtrSet<const SDNode *, 8> Visited;
19801   SDLoc StoreDL(StoreNodes[0].MemNode);
19802 
19803   for (unsigned i = 0; i < NumStores; ++i) {
19804     Visited.insert(StoreNodes[i].MemNode);
19805   }
19806 
19807   // don't include nodes that are children or repeated nodes.
19808   for (unsigned i = 0; i < NumStores; ++i) {
19809     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
19810       Chains.push_back(StoreNodes[i].MemNode->getChain());
19811   }
19812 
19813   assert(!Chains.empty() && "Chain should have generated a chain");
19814   return DAG.getTokenFactor(StoreDL, Chains);
19815 }
19816 
19817 bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
19818   const Value *UnderlyingObj = nullptr;
19819   for (const auto &MemOp : StoreNodes) {
19820     const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
19821     // Pseudo value like stack frame has its own frame index and size, should
19822     // not use the first store's frame index for other frames.
19823     if (MMO->getPseudoValue())
19824       return false;
19825 
19826     if (!MMO->getValue())
19827       return false;
19828 
19829     const Value *Obj = getUnderlyingObject(MMO->getValue());
19830 
19831     if (UnderlyingObj && UnderlyingObj != Obj)
19832       return false;
19833 
19834     if (!UnderlyingObj)
19835       UnderlyingObj = Obj;
19836   }
19837 
19838   return true;
19839 }
19840 
19841 bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
19842     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
19843     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
19844   // Make sure we have something to merge.
19845   if (NumStores < 2)
19846     return false;
19847 
19848   assert((!UseTrunc || !UseVector) &&
19849          "This optimization cannot emit a vector truncating store");
19850 
19851   // The latest Node in the DAG.
19852   SDLoc DL(StoreNodes[0].MemNode);
19853 
19854   TypeSize ElementSizeBits = MemVT.getStoreSizeInBits();
19855   unsigned SizeInBits = NumStores * ElementSizeBits;
19856   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
19857 
19858   std::optional<MachineMemOperand::Flags> Flags;
19859   AAMDNodes AAInfo;
19860   for (unsigned I = 0; I != NumStores; ++I) {
19861     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
19862     if (!Flags) {
19863       Flags = St->getMemOperand()->getFlags();
19864       AAInfo = St->getAAInfo();
19865       continue;
19866     }
19867     // Skip merging if there's an inconsistent flag.
19868     if (Flags != St->getMemOperand()->getFlags())
19869       return false;
19870     // Concatenate AA metadata.
19871     AAInfo = AAInfo.concat(St->getAAInfo());
19872   }
19873 
19874   EVT StoreTy;
19875   if (UseVector) {
19876     unsigned Elts = NumStores * NumMemElts;
19877     // Get the type for the merged vector store.
19878     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
19879   } else
19880     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
19881 
19882   SDValue StoredVal;
19883   if (UseVector) {
19884     if (IsConstantSrc) {
19885       SmallVector<SDValue, 8> BuildVector;
19886       for (unsigned I = 0; I != NumStores; ++I) {
19887         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
19888         SDValue Val = St->getValue();
19889         // If constant is of the wrong type, convert it now.  This comes up
19890         // when one of our stores was truncating.
19891         if (MemVT != Val.getValueType()) {
19892           Val = peekThroughBitcasts(Val);
19893           // Deal with constants of wrong size.
19894           if (ElementSizeBits != Val.getValueSizeInBits()) {
19895             auto *C = dyn_cast<ConstantSDNode>(Val);
19896             if (!C)
19897               // Not clear how to truncate FP values.
19898               // TODO: Handle truncation of build_vector constants
19899               return false;
19900 
19901             EVT IntMemVT =
19902                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
19903             Val = DAG.getConstant(C->getAPIntValue()
19904                                       .zextOrTrunc(Val.getValueSizeInBits())
19905                                       .zextOrTrunc(ElementSizeBits),
19906                                   SDLoc(C), IntMemVT);
19907           }
19908           // Make sure correctly size type is the correct type.
19909           Val = DAG.getBitcast(MemVT, Val);
19910         }
19911         BuildVector.push_back(Val);
19912       }
19913       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
19914                                                : ISD::BUILD_VECTOR,
19915                               DL, StoreTy, BuildVector);
19916     } else {
19917       SmallVector<SDValue, 8> Ops;
19918       for (unsigned i = 0; i < NumStores; ++i) {
19919         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
19920         SDValue Val = peekThroughBitcasts(St->getValue());
19921         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
19922         // type MemVT. If the underlying value is not the correct
19923         // type, but it is an extraction of an appropriate vector we
19924         // can recast Val to be of the correct type. This may require
19925         // converting between EXTRACT_VECTOR_ELT and
19926         // EXTRACT_SUBVECTOR.
19927         if ((MemVT != Val.getValueType()) &&
19928             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
19929              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
19930           EVT MemVTScalarTy = MemVT.getScalarType();
19931           // We may need to add a bitcast here to get types to line up.
19932           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
19933             Val = DAG.getBitcast(MemVT, Val);
19934           } else if (MemVT.isVector() &&
19935                      Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
19936             Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val);
19937           } else {
19938             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
19939                                             : ISD::EXTRACT_VECTOR_ELT;
19940             SDValue Vec = Val.getOperand(0);
19941             SDValue Idx = Val.getOperand(1);
19942             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
19943           }
19944         }
19945         Ops.push_back(Val);
19946       }
19947 
19948       // Build the extracted vector elements back into a vector.
19949       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
19950                                                : ISD::BUILD_VECTOR,
19951                               DL, StoreTy, Ops);
19952     }
19953   } else {
19954     // We should always use a vector store when merging extracted vector
19955     // elements, so this path implies a store of constants.
19956     assert(IsConstantSrc && "Merged vector elements should use vector store");
19957 
19958     APInt StoreInt(SizeInBits, 0);
19959 
19960     // Construct a single integer constant which is made of the smaller
19961     // constant inputs.
19962     bool IsLE = DAG.getDataLayout().isLittleEndian();
19963     for (unsigned i = 0; i < NumStores; ++i) {
19964       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
19965       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
19966 
19967       SDValue Val = St->getValue();
19968       Val = peekThroughBitcasts(Val);
19969       StoreInt <<= ElementSizeBits;
19970       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
19971         StoreInt |= C->getAPIntValue()
19972                         .zextOrTrunc(ElementSizeBits)
19973                         .zextOrTrunc(SizeInBits);
19974       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
19975         StoreInt |= C->getValueAPF()
19976                         .bitcastToAPInt()
19977                         .zextOrTrunc(ElementSizeBits)
19978                         .zextOrTrunc(SizeInBits);
19979         // If fp truncation is necessary give up for now.
19980         if (MemVT.getSizeInBits() != ElementSizeBits)
19981           return false;
19982       } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) ||
19983                  ISD::isBuildVectorOfConstantFPSDNodes(Val.getNode())) {
19984         // Not yet handled
19985         return false;
19986       } else {
19987         llvm_unreachable("Invalid constant element type");
19988       }
19989     }
19990 
19991     // Create the new Load and Store operations.
19992     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
19993   }
19994 
19995   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
19996   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
19997   bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
19998 
19999   // make sure we use trunc store if it's necessary to be legal.
20000   // When generate the new widen store, if the first store's pointer info can
20001   // not be reused, discard the pointer info except the address space because
20002   // now the widen store can not be represented by the original pointer info
20003   // which is for the narrow memory object.
20004   SDValue NewStore;
20005   if (!UseTrunc) {
20006     NewStore = DAG.getStore(
20007         NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
20008         CanReusePtrInfo
20009             ? FirstInChain->getPointerInfo()
20010             : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20011         FirstInChain->getAlign(), *Flags, AAInfo);
20012   } else { // Must be realized as a trunc store
20013     EVT LegalizedStoredValTy =
20014         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
20015     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
20016     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
20017     SDValue ExtendedStoreVal =
20018         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
20019                         LegalizedStoredValTy);
20020     NewStore = DAG.getTruncStore(
20021         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
20022         CanReusePtrInfo
20023             ? FirstInChain->getPointerInfo()
20024             : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
20025         StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
20026         AAInfo);
20027   }
20028 
20029   // Replace all merged stores with the new store.
20030   for (unsigned i = 0; i < NumStores; ++i)
20031     CombineTo(StoreNodes[i].MemNode, NewStore);
20032 
20033   AddToWorklist(NewChain.getNode());
20034   return true;
20035 }
20036 
20037 void DAGCombiner::getStoreMergeCandidates(
20038     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
20039     SDNode *&RootNode) {
20040   // This holds the base pointer, index, and the offset in bytes from the base
20041   // pointer. We must have a base and an offset. Do not handle stores to undef
20042   // base pointers.
20043   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20044   if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef())
20045     return;
20046 
20047   SDValue Val = peekThroughBitcasts(St->getValue());
20048   StoreSource StoreSrc = getStoreSource(Val);
20049   assert(StoreSrc != StoreSource::Unknown && "Expected known source for store");
20050 
20051   // Match on loadbaseptr if relevant.
20052   EVT MemVT = St->getMemoryVT();
20053   BaseIndexOffset LBasePtr;
20054   EVT LoadVT;
20055   if (StoreSrc == StoreSource::Load) {
20056     auto *Ld = cast<LoadSDNode>(Val);
20057     LBasePtr = BaseIndexOffset::match(Ld, DAG);
20058     LoadVT = Ld->getMemoryVT();
20059     // Load and store should be the same type.
20060     if (MemVT != LoadVT)
20061       return;
20062     // Loads must only have one use.
20063     if (!Ld->hasNUsesOfValue(1, 0))
20064       return;
20065     // The memory operands must not be volatile/indexed/atomic.
20066     // TODO: May be able to relax for unordered atomics (see D66309)
20067     if (!Ld->isSimple() || Ld->isIndexed())
20068       return;
20069   }
20070   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
20071                             int64_t &Offset) -> bool {
20072     // The memory operands must not be volatile/indexed/atomic.
20073     // TODO: May be able to relax for unordered atomics (see D66309)
20074     if (!Other->isSimple() || Other->isIndexed())
20075       return false;
20076     // Don't mix temporal stores with non-temporal stores.
20077     if (St->isNonTemporal() != Other->isNonTemporal())
20078       return false;
20079     if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*St, *Other))
20080       return false;
20081     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
20082     // Allow merging constants of different types as integers.
20083     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
20084                                            : Other->getMemoryVT() != MemVT;
20085     switch (StoreSrc) {
20086     case StoreSource::Load: {
20087       if (NoTypeMatch)
20088         return false;
20089       // The Load's Base Ptr must also match.
20090       auto *OtherLd = dyn_cast<LoadSDNode>(OtherBC);
20091       if (!OtherLd)
20092         return false;
20093       BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
20094       if (LoadVT != OtherLd->getMemoryVT())
20095         return false;
20096       // Loads must only have one use.
20097       if (!OtherLd->hasNUsesOfValue(1, 0))
20098         return false;
20099       // The memory operands must not be volatile/indexed/atomic.
20100       // TODO: May be able to relax for unordered atomics (see D66309)
20101       if (!OtherLd->isSimple() || OtherLd->isIndexed())
20102         return false;
20103       // Don't mix temporal loads with non-temporal loads.
20104       if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
20105         return false;
20106       if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
20107                                                    *OtherLd))
20108         return false;
20109       if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
20110         return false;
20111       break;
20112     }
20113     case StoreSource::Constant:
20114       if (NoTypeMatch)
20115         return false;
20116       if (getStoreSource(OtherBC) != StoreSource::Constant)
20117         return false;
20118       break;
20119     case StoreSource::Extract:
20120       // Do not merge truncated stores here.
20121       if (Other->isTruncatingStore())
20122         return false;
20123       if (!MemVT.bitsEq(OtherBC.getValueType()))
20124         return false;
20125       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
20126           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
20127         return false;
20128       break;
20129     default:
20130       llvm_unreachable("Unhandled store source for merging");
20131     }
20132     Ptr = BaseIndexOffset::match(Other, DAG);
20133     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
20134   };
20135 
20136   // Check if the pair of StoreNode and the RootNode already bail out many
20137   // times which is over the limit in dependence check.
20138   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
20139                                         SDNode *RootNode) -> bool {
20140     auto RootCount = StoreRootCountMap.find(StoreNode);
20141     return RootCount != StoreRootCountMap.end() &&
20142            RootCount->second.first == RootNode &&
20143            RootCount->second.second > StoreMergeDependenceLimit;
20144   };
20145 
20146   auto TryToAddCandidate = [&](SDNode::use_iterator UseIter) {
20147     // This must be a chain use.
20148     if (UseIter.getOperandNo() != 0)
20149       return;
20150     if (auto *OtherStore = dyn_cast<StoreSDNode>(*UseIter)) {
20151       BaseIndexOffset Ptr;
20152       int64_t PtrDiff;
20153       if (CandidateMatch(OtherStore, Ptr, PtrDiff) &&
20154           !OverLimitInDependenceCheck(OtherStore, RootNode))
20155         StoreNodes.push_back(MemOpLink(OtherStore, PtrDiff));
20156     }
20157   };
20158 
20159   // We looking for a root node which is an ancestor to all mergable
20160   // stores. We search up through a load, to our root and then down
20161   // through all children. For instance we will find Store{1,2,3} if
20162   // St is Store1, Store2. or Store3 where the root is not a load
20163   // which always true for nonvolatile ops. TODO: Expand
20164   // the search to find all valid candidates through multiple layers of loads.
20165   //
20166   // Root
20167   // |-------|-------|
20168   // Load    Load    Store3
20169   // |       |
20170   // Store1   Store2
20171   //
20172   // FIXME: We should be able to climb and
20173   // descend TokenFactors to find candidates as well.
20174 
20175   RootNode = St->getChain().getNode();
20176 
20177   unsigned NumNodesExplored = 0;
20178   const unsigned MaxSearchNodes = 1024;
20179   if (auto *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
20180     RootNode = Ldn->getChain().getNode();
20181     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20182          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) {
20183       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) { // walk down chain
20184         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
20185           TryToAddCandidate(I2);
20186       }
20187       // Check stores that depend on the root (e.g. Store 3 in the chart above).
20188       if (I.getOperandNo() == 0 && isa<StoreSDNode>(*I)) {
20189         TryToAddCandidate(I);
20190       }
20191     }
20192   } else {
20193     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
20194          I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored)
20195       TryToAddCandidate(I);
20196   }
20197 }
20198 
20199 // We need to check that merging these stores does not cause a loop in the
20200 // DAG. Any store candidate may depend on another candidate indirectly through
20201 // its operands. Check in parallel by searching up from operands of candidates.
20202 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
20203     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
20204     SDNode *RootNode) {
20205   // FIXME: We should be able to truncate a full search of
20206   // predecessors by doing a BFS and keeping tabs the originating
20207   // stores from which worklist nodes come from in a similar way to
20208   // TokenFactor simplfication.
20209 
20210   SmallPtrSet<const SDNode *, 32> Visited;
20211   SmallVector<const SDNode *, 8> Worklist;
20212 
20213   // RootNode is a predecessor to all candidates so we need not search
20214   // past it. Add RootNode (peeking through TokenFactors). Do not count
20215   // these towards size check.
20216 
20217   Worklist.push_back(RootNode);
20218   while (!Worklist.empty()) {
20219     auto N = Worklist.pop_back_val();
20220     if (!Visited.insert(N).second)
20221       continue; // Already present in Visited.
20222     if (N->getOpcode() == ISD::TokenFactor) {
20223       for (SDValue Op : N->ops())
20224         Worklist.push_back(Op.getNode());
20225     }
20226   }
20227 
20228   // Don't count pruning nodes towards max.
20229   unsigned int Max = 1024 + Visited.size();
20230   // Search Ops of store candidates.
20231   for (unsigned i = 0; i < NumStores; ++i) {
20232     SDNode *N = StoreNodes[i].MemNode;
20233     // Of the 4 Store Operands:
20234     //   * Chain (Op 0) -> We have already considered these
20235     //                     in candidate selection, but only by following the
20236     //                     chain dependencies. We could still have a chain
20237     //                     dependency to a load, that has a non-chain dep to
20238     //                     another load, that depends on a store, etc. So it is
20239     //                     possible to have dependencies that consist of a mix
20240     //                     of chain and non-chain deps, and we need to include
20241     //                     chain operands in the analysis here..
20242     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
20243     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
20244     //                       but aren't necessarily fromt the same base node, so
20245     //                       cycles possible (e.g. via indexed store).
20246     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
20247     //               non-indexed stores). Not constant on all targets (e.g. ARM)
20248     //               and so can participate in a cycle.
20249     for (unsigned j = 0; j < N->getNumOperands(); ++j)
20250       Worklist.push_back(N->getOperand(j).getNode());
20251   }
20252   // Search through DAG. We can stop early if we find a store node.
20253   for (unsigned i = 0; i < NumStores; ++i)
20254     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
20255                                      Max)) {
20256       // If the searching bail out, record the StoreNode and RootNode in the
20257       // StoreRootCountMap. If we have seen the pair many times over a limit,
20258       // we won't add the StoreNode into StoreNodes set again.
20259       if (Visited.size() >= Max) {
20260         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
20261         if (RootCount.first == RootNode)
20262           RootCount.second++;
20263         else
20264           RootCount = {RootNode, 1};
20265       }
20266       return false;
20267     }
20268   return true;
20269 }
20270 
20271 unsigned
20272 DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes,
20273                                   int64_t ElementSizeBytes) const {
20274   while (true) {
20275     // Find a store past the width of the first store.
20276     size_t StartIdx = 0;
20277     while ((StartIdx + 1 < StoreNodes.size()) &&
20278            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
20279               StoreNodes[StartIdx + 1].OffsetFromBase)
20280       ++StartIdx;
20281 
20282     // Bail if we don't have enough candidates to merge.
20283     if (StartIdx + 1 >= StoreNodes.size())
20284       return 0;
20285 
20286     // Trim stores that overlapped with the first store.
20287     if (StartIdx)
20288       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
20289 
20290     // Scan the memory operations on the chain and find the first
20291     // non-consecutive store memory address.
20292     unsigned NumConsecutiveStores = 1;
20293     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
20294     // Check that the addresses are consecutive starting from the second
20295     // element in the list of stores.
20296     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
20297       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
20298       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20299         break;
20300       NumConsecutiveStores = i + 1;
20301     }
20302     if (NumConsecutiveStores > 1)
20303       return NumConsecutiveStores;
20304 
20305     // There are no consecutive stores at the start of the list.
20306     // Remove the first store and try again.
20307     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
20308   }
20309 }
20310 
20311 bool DAGCombiner::tryStoreMergeOfConstants(
20312     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20313     EVT MemVT, SDNode *RootNode, bool AllowVectors) {
20314   LLVMContext &Context = *DAG.getContext();
20315   const DataLayout &DL = DAG.getDataLayout();
20316   int64_t ElementSizeBytes = MemVT.getStoreSize();
20317   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20318   bool MadeChange = false;
20319 
20320   // Store the constants into memory as one consecutive store.
20321   while (NumConsecutiveStores >= 2) {
20322     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20323     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20324     Align FirstStoreAlign = FirstInChain->getAlign();
20325     unsigned LastLegalType = 1;
20326     unsigned LastLegalVectorType = 1;
20327     bool LastIntegerTrunc = false;
20328     bool NonZero = false;
20329     unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
20330     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20331       StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
20332       SDValue StoredVal = ST->getValue();
20333       bool IsElementZero = false;
20334       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
20335         IsElementZero = C->isZero();
20336       else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
20337         IsElementZero = C->getConstantFPValue()->isNullValue();
20338       else if (ISD::isBuildVectorAllZeros(StoredVal.getNode()))
20339         IsElementZero = true;
20340       if (IsElementZero) {
20341         if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
20342           FirstZeroAfterNonZero = i;
20343       }
20344       NonZero |= !IsElementZero;
20345 
20346       // Find a legal type for the constant store.
20347       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20348       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20349       unsigned IsFast = 0;
20350 
20351       // Break early when size is too large to be legal.
20352       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20353         break;
20354 
20355       if (TLI.isTypeLegal(StoreTy) &&
20356           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20357                                DAG.getMachineFunction()) &&
20358           TLI.allowsMemoryAccess(Context, DL, StoreTy,
20359                                  *FirstInChain->getMemOperand(), &IsFast) &&
20360           IsFast) {
20361         LastIntegerTrunc = false;
20362         LastLegalType = i + 1;
20363         // Or check whether a truncstore is legal.
20364       } else if (TLI.getTypeAction(Context, StoreTy) ==
20365                  TargetLowering::TypePromoteInteger) {
20366         EVT LegalizedStoredValTy =
20367             TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
20368         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20369             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20370                                  DAG.getMachineFunction()) &&
20371             TLI.allowsMemoryAccess(Context, DL, StoreTy,
20372                                    *FirstInChain->getMemOperand(), &IsFast) &&
20373             IsFast) {
20374           LastIntegerTrunc = true;
20375           LastLegalType = i + 1;
20376         }
20377       }
20378 
20379       // We only use vectors if the target allows it and the function is not
20380       // marked with the noimplicitfloat attribute.
20381       if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
20382           AllowVectors) {
20383         // Find a legal type for the vector store.
20384         unsigned Elts = (i + 1) * NumMemElts;
20385         EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20386         if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
20387             TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20388             TLI.allowsMemoryAccess(Context, DL, Ty,
20389                                    *FirstInChain->getMemOperand(), &IsFast) &&
20390             IsFast)
20391           LastLegalVectorType = i + 1;
20392       }
20393     }
20394 
20395     bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors;
20396     unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
20397     bool UseTrunc = LastIntegerTrunc && !UseVector;
20398 
20399     // Check if we found a legal integer type that creates a meaningful
20400     // merge.
20401     if (NumElem < 2) {
20402       // We know that candidate stores are in order and of correct
20403       // shape. While there is no mergeable sequence from the
20404       // beginning one may start later in the sequence. The only
20405       // reason a merge of size N could have failed where another of
20406       // the same size would not have, is if the alignment has
20407       // improved or we've dropped a non-zero value. Drop as many
20408       // candidates as we can here.
20409       unsigned NumSkip = 1;
20410       while ((NumSkip < NumConsecutiveStores) &&
20411              (NumSkip < FirstZeroAfterNonZero) &&
20412              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20413         NumSkip++;
20414 
20415       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20416       NumConsecutiveStores -= NumSkip;
20417       continue;
20418     }
20419 
20420     // Check that we can merge these candidates without causing a cycle.
20421     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20422                                                   RootNode)) {
20423       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20424       NumConsecutiveStores -= NumElem;
20425       continue;
20426     }
20427 
20428     MadeChange |= mergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
20429                                                   /*IsConstantSrc*/ true,
20430                                                   UseVector, UseTrunc);
20431 
20432     // Remove merged stores for next iteration.
20433     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20434     NumConsecutiveStores -= NumElem;
20435   }
20436   return MadeChange;
20437 }
20438 
20439 bool DAGCombiner::tryStoreMergeOfExtracts(
20440     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores,
20441     EVT MemVT, SDNode *RootNode) {
20442   LLVMContext &Context = *DAG.getContext();
20443   const DataLayout &DL = DAG.getDataLayout();
20444   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20445   bool MadeChange = false;
20446 
20447   // Loop on Consecutive Stores on success.
20448   while (NumConsecutiveStores >= 2) {
20449     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20450     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20451     Align FirstStoreAlign = FirstInChain->getAlign();
20452     unsigned NumStoresToMerge = 1;
20453     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20454       // Find a legal type for the vector store.
20455       unsigned Elts = (i + 1) * NumMemElts;
20456       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
20457       unsigned IsFast = 0;
20458 
20459       // Break early when size is too large to be legal.
20460       if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
20461         break;
20462 
20463       if (TLI.isTypeLegal(Ty) &&
20464           TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) &&
20465           TLI.allowsMemoryAccess(Context, DL, Ty,
20466                                  *FirstInChain->getMemOperand(), &IsFast) &&
20467           IsFast)
20468         NumStoresToMerge = i + 1;
20469     }
20470 
20471     // Check if we found a legal integer type creating a meaningful
20472     // merge.
20473     if (NumStoresToMerge < 2) {
20474       // We know that candidate stores are in order and of correct
20475       // shape. While there is no mergeable sequence from the
20476       // beginning one may start later in the sequence. The only
20477       // reason a merge of size N could have failed where another of
20478       // the same size would not have, is if the alignment has
20479       // improved. Drop as many candidates as we can here.
20480       unsigned NumSkip = 1;
20481       while ((NumSkip < NumConsecutiveStores) &&
20482              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20483         NumSkip++;
20484 
20485       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20486       NumConsecutiveStores -= NumSkip;
20487       continue;
20488     }
20489 
20490     // Check that we can merge these candidates without causing a cycle.
20491     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge,
20492                                                   RootNode)) {
20493       StoreNodes.erase(StoreNodes.begin(),
20494                        StoreNodes.begin() + NumStoresToMerge);
20495       NumConsecutiveStores -= NumStoresToMerge;
20496       continue;
20497     }
20498 
20499     MadeChange |= mergeStoresOfConstantsOrVecElts(
20500         StoreNodes, MemVT, NumStoresToMerge, /*IsConstantSrc*/ false,
20501         /*UseVector*/ true, /*UseTrunc*/ false);
20502 
20503     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge);
20504     NumConsecutiveStores -= NumStoresToMerge;
20505   }
20506   return MadeChange;
20507 }
20508 
20509 bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
20510                                        unsigned NumConsecutiveStores, EVT MemVT,
20511                                        SDNode *RootNode, bool AllowVectors,
20512                                        bool IsNonTemporalStore,
20513                                        bool IsNonTemporalLoad) {
20514   LLVMContext &Context = *DAG.getContext();
20515   const DataLayout &DL = DAG.getDataLayout();
20516   int64_t ElementSizeBytes = MemVT.getStoreSize();
20517   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
20518   bool MadeChange = false;
20519 
20520   // Look for load nodes which are used by the stored values.
20521   SmallVector<MemOpLink, 8> LoadNodes;
20522 
20523   // Find acceptable loads. Loads need to have the same chain (token factor),
20524   // must not be zext, volatile, indexed, and they must be consecutive.
20525   BaseIndexOffset LdBasePtr;
20526 
20527   for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
20528     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
20529     SDValue Val = peekThroughBitcasts(St->getValue());
20530     LoadSDNode *Ld = cast<LoadSDNode>(Val);
20531 
20532     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
20533     // If this is not the first ptr that we check.
20534     int64_t LdOffset = 0;
20535     if (LdBasePtr.getBase().getNode()) {
20536       // The base ptr must be the same.
20537       if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
20538         break;
20539     } else {
20540       // Check that all other base pointers are the same as this one.
20541       LdBasePtr = LdPtr;
20542     }
20543 
20544     // We found a potential memory operand to merge.
20545     LoadNodes.push_back(MemOpLink(Ld, LdOffset));
20546   }
20547 
20548   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
20549     Align RequiredAlignment;
20550     bool NeedRotate = false;
20551     if (LoadNodes.size() == 2) {
20552       // If we have load/store pair instructions and we only have two values,
20553       // don't bother merging.
20554       if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
20555           StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
20556         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
20557         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
20558         break;
20559       }
20560       // If the loads are reversed, see if we can rotate the halves into place.
20561       int64_t Offset0 = LoadNodes[0].OffsetFromBase;
20562       int64_t Offset1 = LoadNodes[1].OffsetFromBase;
20563       EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
20564       if (Offset0 - Offset1 == ElementSizeBytes &&
20565           (hasOperation(ISD::ROTL, PairVT) ||
20566            hasOperation(ISD::ROTR, PairVT))) {
20567         std::swap(LoadNodes[0], LoadNodes[1]);
20568         NeedRotate = true;
20569       }
20570     }
20571     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
20572     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
20573     Align FirstStoreAlign = FirstInChain->getAlign();
20574     LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
20575 
20576     // Scan the memory operations on the chain and find the first
20577     // non-consecutive load memory address. These variables hold the index in
20578     // the store node array.
20579 
20580     unsigned LastConsecutiveLoad = 1;
20581 
20582     // This variable refers to the size and not index in the array.
20583     unsigned LastLegalVectorType = 1;
20584     unsigned LastLegalIntegerType = 1;
20585     bool isDereferenceable = true;
20586     bool DoIntegerTruncate = false;
20587     int64_t StartAddress = LoadNodes[0].OffsetFromBase;
20588     SDValue LoadChain = FirstLoad->getChain();
20589     for (unsigned i = 1; i < LoadNodes.size(); ++i) {
20590       // All loads must share the same chain.
20591       if (LoadNodes[i].MemNode->getChain() != LoadChain)
20592         break;
20593 
20594       int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
20595       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
20596         break;
20597       LastConsecutiveLoad = i;
20598 
20599       if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
20600         isDereferenceable = false;
20601 
20602       // Find a legal type for the vector store.
20603       unsigned Elts = (i + 1) * NumMemElts;
20604       EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20605 
20606       // Break early when size is too large to be legal.
20607       if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
20608         break;
20609 
20610       unsigned IsFastSt = 0;
20611       unsigned IsFastLd = 0;
20612       // Don't try vector types if we need a rotate. We may still fail the
20613       // legality checks for the integer type, but we can't handle the rotate
20614       // case with vectors.
20615       // FIXME: We could use a shuffle in place of the rotate.
20616       if (!NeedRotate && TLI.isTypeLegal(StoreTy) &&
20617           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20618                                DAG.getMachineFunction()) &&
20619           TLI.allowsMemoryAccess(Context, DL, StoreTy,
20620                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
20621           IsFastSt &&
20622           TLI.allowsMemoryAccess(Context, DL, StoreTy,
20623                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
20624           IsFastLd) {
20625         LastLegalVectorType = i + 1;
20626       }
20627 
20628       // Find a legal type for the integer store.
20629       unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
20630       StoreTy = EVT::getIntegerVT(Context, SizeInBits);
20631       if (TLI.isTypeLegal(StoreTy) &&
20632           TLI.canMergeStoresTo(FirstStoreAS, StoreTy,
20633                                DAG.getMachineFunction()) &&
20634           TLI.allowsMemoryAccess(Context, DL, StoreTy,
20635                                  *FirstInChain->getMemOperand(), &IsFastSt) &&
20636           IsFastSt &&
20637           TLI.allowsMemoryAccess(Context, DL, StoreTy,
20638                                  *FirstLoad->getMemOperand(), &IsFastLd) &&
20639           IsFastLd) {
20640         LastLegalIntegerType = i + 1;
20641         DoIntegerTruncate = false;
20642         // Or check whether a truncstore and extload is legal.
20643       } else if (TLI.getTypeAction(Context, StoreTy) ==
20644                  TargetLowering::TypePromoteInteger) {
20645         EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
20646         if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
20647             TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy,
20648                                  DAG.getMachineFunction()) &&
20649             TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20650             TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) &&
20651             TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
20652             TLI.allowsMemoryAccess(Context, DL, StoreTy,
20653                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
20654             IsFastSt &&
20655             TLI.allowsMemoryAccess(Context, DL, StoreTy,
20656                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
20657             IsFastLd) {
20658           LastLegalIntegerType = i + 1;
20659           DoIntegerTruncate = true;
20660         }
20661       }
20662     }
20663 
20664     // Only use vector types if the vector type is larger than the integer
20665     // type. If they are the same, use integers.
20666     bool UseVectorTy =
20667         LastLegalVectorType > LastLegalIntegerType && AllowVectors;
20668     unsigned LastLegalType =
20669         std::max(LastLegalVectorType, LastLegalIntegerType);
20670 
20671     // We add +1 here because the LastXXX variables refer to location while
20672     // the NumElem refers to array/index size.
20673     unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
20674     NumElem = std::min(LastLegalType, NumElem);
20675     Align FirstLoadAlign = FirstLoad->getAlign();
20676 
20677     if (NumElem < 2) {
20678       // We know that candidate stores are in order and of correct
20679       // shape. While there is no mergeable sequence from the
20680       // beginning one may start later in the sequence. The only
20681       // reason a merge of size N could have failed where another of
20682       // the same size would not have is if the alignment or either
20683       // the load or store has improved. Drop as many candidates as we
20684       // can here.
20685       unsigned NumSkip = 1;
20686       while ((NumSkip < LoadNodes.size()) &&
20687              (LoadNodes[NumSkip].MemNode->getAlign() <= FirstLoadAlign) &&
20688              (StoreNodes[NumSkip].MemNode->getAlign() <= FirstStoreAlign))
20689         NumSkip++;
20690       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
20691       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
20692       NumConsecutiveStores -= NumSkip;
20693       continue;
20694     }
20695 
20696     // Check that we can merge these candidates without causing a cycle.
20697     if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
20698                                                   RootNode)) {
20699       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20700       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
20701       NumConsecutiveStores -= NumElem;
20702       continue;
20703     }
20704 
20705     // Find if it is better to use vectors or integers to load and store
20706     // to memory.
20707     EVT JointMemOpVT;
20708     if (UseVectorTy) {
20709       // Find a legal type for the vector store.
20710       unsigned Elts = NumElem * NumMemElts;
20711       JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
20712     } else {
20713       unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
20714       JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
20715     }
20716 
20717     SDLoc LoadDL(LoadNodes[0].MemNode);
20718     SDLoc StoreDL(StoreNodes[0].MemNode);
20719 
20720     // The merged loads are required to have the same incoming chain, so
20721     // using the first's chain is acceptable.
20722 
20723     SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
20724     bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
20725     AddToWorklist(NewStoreChain.getNode());
20726 
20727     MachineMemOperand::Flags LdMMOFlags =
20728         isDereferenceable ? MachineMemOperand::MODereferenceable
20729                           : MachineMemOperand::MONone;
20730     if (IsNonTemporalLoad)
20731       LdMMOFlags |= MachineMemOperand::MONonTemporal;
20732 
20733     LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
20734 
20735     MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
20736                                               ? MachineMemOperand::MONonTemporal
20737                                               : MachineMemOperand::MONone;
20738 
20739     StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
20740 
20741     SDValue NewLoad, NewStore;
20742     if (UseVectorTy || !DoIntegerTruncate) {
20743       NewLoad = DAG.getLoad(
20744           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
20745           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
20746       SDValue StoreOp = NewLoad;
20747       if (NeedRotate) {
20748         unsigned LoadWidth = ElementSizeBytes * 8 * 2;
20749         assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
20750                "Unexpected type for rotate-able load pair");
20751         SDValue RotAmt =
20752             DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
20753         // Target can convert to the identical ROTR if it does not have ROTL.
20754         StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
20755       }
20756       NewStore = DAG.getStore(
20757           NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
20758           CanReusePtrInfo ? FirstInChain->getPointerInfo()
20759                           : MachinePointerInfo(FirstStoreAS),
20760           FirstStoreAlign, StMMOFlags);
20761     } else { // This must be the truncstore/extload case
20762       EVT ExtendedTy =
20763           TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
20764       NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
20765                                FirstLoad->getChain(), FirstLoad->getBasePtr(),
20766                                FirstLoad->getPointerInfo(), JointMemOpVT,
20767                                FirstLoadAlign, LdMMOFlags);
20768       NewStore = DAG.getTruncStore(
20769           NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
20770           CanReusePtrInfo ? FirstInChain->getPointerInfo()
20771                           : MachinePointerInfo(FirstStoreAS),
20772           JointMemOpVT, FirstInChain->getAlign(),
20773           FirstInChain->getMemOperand()->getFlags());
20774     }
20775 
20776     // Transfer chain users from old loads to the new load.
20777     for (unsigned i = 0; i < NumElem; ++i) {
20778       LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
20779       DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
20780                                     SDValue(NewLoad.getNode(), 1));
20781     }
20782 
20783     // Replace all stores with the new store. Recursively remove corresponding
20784     // values if they are no longer used.
20785     for (unsigned i = 0; i < NumElem; ++i) {
20786       SDValue Val = StoreNodes[i].MemNode->getOperand(1);
20787       CombineTo(StoreNodes[i].MemNode, NewStore);
20788       if (Val->use_empty())
20789         recursivelyDeleteUnusedNodes(Val.getNode());
20790     }
20791 
20792     MadeChange = true;
20793     StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
20794     LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
20795     NumConsecutiveStores -= NumElem;
20796   }
20797   return MadeChange;
20798 }
20799 
20800 bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
20801   if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging)
20802     return false;
20803 
20804   // TODO: Extend this function to merge stores of scalable vectors.
20805   // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8>
20806   // store since we know <vscale x 16 x i8> is exactly twice as large as
20807   // <vscale x 8 x i8>). Until then, bail out for scalable vectors.
20808   EVT MemVT = St->getMemoryVT();
20809   if (MemVT.isScalableVT())
20810     return false;
20811   if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
20812     return false;
20813 
20814   // This function cannot currently deal with non-byte-sized memory sizes.
20815   int64_t ElementSizeBytes = MemVT.getStoreSize();
20816   if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits())
20817     return false;
20818 
20819   // Do not bother looking at stored values that are not constants, loads, or
20820   // extracted vector elements.
20821   SDValue StoredVal = peekThroughBitcasts(St->getValue());
20822   const StoreSource StoreSrc = getStoreSource(StoredVal);
20823   if (StoreSrc == StoreSource::Unknown)
20824     return false;
20825 
20826   SmallVector<MemOpLink, 8> StoreNodes;
20827   SDNode *RootNode;
20828   // Find potential store merge candidates by searching through chain sub-DAG
20829   getStoreMergeCandidates(St, StoreNodes, RootNode);
20830 
20831   // Check if there is anything to merge.
20832   if (StoreNodes.size() < 2)
20833     return false;
20834 
20835   // Sort the memory operands according to their distance from the
20836   // base pointer.
20837   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
20838     return LHS.OffsetFromBase < RHS.OffsetFromBase;
20839   });
20840 
20841   bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute(
20842       Attribute::NoImplicitFloat);
20843   bool IsNonTemporalStore = St->isNonTemporal();
20844   bool IsNonTemporalLoad = StoreSrc == StoreSource::Load &&
20845                            cast<LoadSDNode>(StoredVal)->isNonTemporal();
20846 
20847   // Store Merge attempts to merge the lowest stores. This generally
20848   // works out as if successful, as the remaining stores are checked
20849   // after the first collection of stores is merged. However, in the
20850   // case that a non-mergeable store is found first, e.g., {p[-2],
20851   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
20852   // mergeable cases. To prevent this, we prune such stores from the
20853   // front of StoreNodes here.
20854   bool MadeChange = false;
20855   while (StoreNodes.size() > 1) {
20856     unsigned NumConsecutiveStores =
20857         getConsecutiveStores(StoreNodes, ElementSizeBytes);
20858     // There are no more stores in the list to examine.
20859     if (NumConsecutiveStores == 0)
20860       return MadeChange;
20861 
20862     // We have at least 2 consecutive stores. Try to merge them.
20863     assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores");
20864     switch (StoreSrc) {
20865     case StoreSource::Constant:
20866       MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores,
20867                                              MemVT, RootNode, AllowVectors);
20868       break;
20869 
20870     case StoreSource::Extract:
20871       MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores,
20872                                             MemVT, RootNode);
20873       break;
20874 
20875     case StoreSource::Load:
20876       MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores,
20877                                          MemVT, RootNode, AllowVectors,
20878                                          IsNonTemporalStore, IsNonTemporalLoad);
20879       break;
20880 
20881     default:
20882       llvm_unreachable("Unhandled store source type");
20883     }
20884   }
20885   return MadeChange;
20886 }
20887 
20888 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
20889   SDLoc SL(ST);
20890   SDValue ReplStore;
20891 
20892   // Replace the chain to avoid dependency.
20893   if (ST->isTruncatingStore()) {
20894     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
20895                                   ST->getBasePtr(), ST->getMemoryVT(),
20896                                   ST->getMemOperand());
20897   } else {
20898     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
20899                              ST->getMemOperand());
20900   }
20901 
20902   // Create token to keep both nodes around.
20903   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
20904                               MVT::Other, ST->getChain(), ReplStore);
20905 
20906   // Make sure the new and old chains are cleaned up.
20907   AddToWorklist(Token.getNode());
20908 
20909   // Don't add users to work list.
20910   return CombineTo(ST, Token, false);
20911 }
20912 
20913 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
20914   SDValue Value = ST->getValue();
20915   if (Value.getOpcode() == ISD::TargetConstantFP)
20916     return SDValue();
20917 
20918   if (!ISD::isNormalStore(ST))
20919     return SDValue();
20920 
20921   SDLoc DL(ST);
20922 
20923   SDValue Chain = ST->getChain();
20924   SDValue Ptr = ST->getBasePtr();
20925 
20926   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
20927 
20928   // NOTE: If the original store is volatile, this transform must not increase
20929   // the number of stores.  For example, on x86-32 an f64 can be stored in one
20930   // processor operation but an i64 (which is not legal) requires two.  So the
20931   // transform should not be done in this case.
20932 
20933   SDValue Tmp;
20934   switch (CFP->getSimpleValueType(0).SimpleTy) {
20935   default:
20936     llvm_unreachable("Unknown FP type");
20937   case MVT::f16:    // We don't do this for these yet.
20938   case MVT::bf16:
20939   case MVT::f80:
20940   case MVT::f128:
20941   case MVT::ppcf128:
20942     return SDValue();
20943   case MVT::f32:
20944     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
20945         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
20946       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
20947                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
20948                             MVT::i32);
20949       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
20950     }
20951 
20952     return SDValue();
20953   case MVT::f64:
20954     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
20955          ST->isSimple()) ||
20956         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
20957       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
20958                             getZExtValue(), SDLoc(CFP), MVT::i64);
20959       return DAG.getStore(Chain, DL, Tmp,
20960                           Ptr, ST->getMemOperand());
20961     }
20962 
20963     if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) &&
20964         !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) {
20965       // Many FP stores are not made apparent until after legalize, e.g. for
20966       // argument passing.  Since this is so common, custom legalize the
20967       // 64-bit integer store into two 32-bit stores.
20968       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
20969       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
20970       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
20971       if (DAG.getDataLayout().isBigEndian())
20972         std::swap(Lo, Hi);
20973 
20974       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
20975       AAMDNodes AAInfo = ST->getAAInfo();
20976 
20977       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
20978                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
20979       Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(4), DL);
20980       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
20981                                  ST->getPointerInfo().getWithOffset(4),
20982                                  ST->getOriginalAlign(), MMOFlags, AAInfo);
20983       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
20984                          St0, St1);
20985     }
20986 
20987     return SDValue();
20988   }
20989 }
20990 
20991 // (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
20992 //
20993 // If a store of a load with an element inserted into it has no other
20994 // uses in between the chain, then we can consider the vector store
20995 // dead and replace it with just the single scalar element store.
20996 SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
20997   SDLoc DL(ST);
20998   SDValue Value = ST->getValue();
20999   SDValue Ptr = ST->getBasePtr();
21000   SDValue Chain = ST->getChain();
21001   if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
21002     return SDValue();
21003 
21004   SDValue Elt = Value.getOperand(1);
21005   SDValue Idx = Value.getOperand(2);
21006 
21007   // If the element isn't byte sized or is implicitly truncated then we can't
21008   // compute an offset.
21009   EVT EltVT = Elt.getValueType();
21010   if (!EltVT.isByteSized() ||
21011       EltVT != Value.getOperand(0).getValueType().getVectorElementType())
21012     return SDValue();
21013 
21014   auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
21015   if (!Ld || Ld->getBasePtr() != Ptr ||
21016       ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
21017       !ISD::isNormalStore(ST) ||
21018       Ld->getAddressSpace() != ST->getAddressSpace() ||
21019       !Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1)))
21020     return SDValue();
21021 
21022   unsigned IsFast;
21023   if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21024                               Elt.getValueType(), ST->getAddressSpace(),
21025                               ST->getAlign(), ST->getMemOperand()->getFlags(),
21026                               &IsFast) ||
21027       !IsFast)
21028     return SDValue();
21029 
21030   MachinePointerInfo PointerInfo(ST->getAddressSpace());
21031 
21032   // If the offset is a known constant then try to recover the pointer
21033   // info
21034   SDValue NewPtr;
21035   if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
21036     unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
21037     NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL);
21038     PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
21039   } else {
21040     NewPtr = TLI.getVectorElementPointer(DAG, Ptr, Value.getValueType(), Idx);
21041   }
21042 
21043   return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
21044                       ST->getMemOperand()->getFlags());
21045 }
21046 
21047 SDValue DAGCombiner::visitSTORE(SDNode *N) {
21048   StoreSDNode *ST  = cast<StoreSDNode>(N);
21049   SDValue Chain = ST->getChain();
21050   SDValue Value = ST->getValue();
21051   SDValue Ptr   = ST->getBasePtr();
21052 
21053   // If this is a store of a bit convert, store the input value if the
21054   // resultant store does not need a higher alignment than the original.
21055   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
21056       ST->isUnindexed()) {
21057     EVT SVT = Value.getOperand(0).getValueType();
21058     // If the store is volatile, we only want to change the store type if the
21059     // resulting store is legal. Otherwise we might increase the number of
21060     // memory accesses. We don't care if the original type was legal or not
21061     // as we assume software couldn't rely on the number of accesses of an
21062     // illegal type.
21063     // TODO: May be able to relax for unordered atomics (see D66309)
21064     if (((!LegalOperations && ST->isSimple()) ||
21065          TLI.isOperationLegal(ISD::STORE, SVT)) &&
21066         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
21067                                      DAG, *ST->getMemOperand())) {
21068       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21069                           ST->getMemOperand());
21070     }
21071   }
21072 
21073   // Turn 'store undef, Ptr' -> nothing.
21074   if (Value.isUndef() && ST->isUnindexed())
21075     return Chain;
21076 
21077   // Try to infer better alignment information than the store already has.
21078   if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() &&
21079       !ST->isAtomic()) {
21080     if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) {
21081       if (*Alignment > ST->getAlign() &&
21082           isAligned(*Alignment, ST->getSrcValueOffset())) {
21083         SDValue NewStore =
21084             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
21085                               ST->getMemoryVT(), *Alignment,
21086                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
21087         // NewStore will always be N as we are only refining the alignment
21088         assert(NewStore.getNode() == N);
21089         (void)NewStore;
21090       }
21091     }
21092   }
21093 
21094   // Try transforming a pair floating point load / store ops to integer
21095   // load / store ops.
21096   if (SDValue NewST = TransformFPLoadStorePair(N))
21097     return NewST;
21098 
21099   // Try transforming several stores into STORE (BSWAP).
21100   if (SDValue Store = mergeTruncStores(ST))
21101     return Store;
21102 
21103   if (ST->isUnindexed()) {
21104     // Walk up chain skipping non-aliasing memory nodes, on this store and any
21105     // adjacent stores.
21106     if (findBetterNeighborChains(ST)) {
21107       // replaceStoreChain uses CombineTo, which handled all of the worklist
21108       // manipulation. Return the original node to not do anything else.
21109       return SDValue(ST, 0);
21110     }
21111     Chain = ST->getChain();
21112   }
21113 
21114   // FIXME: is there such a thing as a truncating indexed store?
21115   if (ST->isTruncatingStore() && ST->isUnindexed() &&
21116       Value.getValueType().isInteger() &&
21117       (!isa<ConstantSDNode>(Value) ||
21118        !cast<ConstantSDNode>(Value)->isOpaque())) {
21119     // Convert a truncating store of a extension into a standard store.
21120     if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
21121          Value.getOpcode() == ISD::SIGN_EXTEND ||
21122          Value.getOpcode() == ISD::ANY_EXTEND) &&
21123         Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
21124         TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
21125       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
21126                           ST->getMemOperand());
21127 
21128     APInt TruncDemandedBits =
21129         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
21130                              ST->getMemoryVT().getScalarSizeInBits());
21131 
21132     // See if we can simplify the operation with SimplifyDemandedBits, which
21133     // only works if the value has a single use.
21134     AddToWorklist(Value.getNode());
21135     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
21136       // Re-visit the store if anything changed and the store hasn't been merged
21137       // with another node (N is deleted) SimplifyDemandedBits will add Value's
21138       // node back to the worklist if necessary, but we also need to re-visit
21139       // the Store node itself.
21140       if (N->getOpcode() != ISD::DELETED_NODE)
21141         AddToWorklist(N);
21142       return SDValue(N, 0);
21143     }
21144 
21145     // Otherwise, see if we can simplify the input to this truncstore with
21146     // knowledge that only the low bits are being used.  For example:
21147     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
21148     if (SDValue Shorter =
21149             TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG))
21150       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
21151                                ST->getMemOperand());
21152 
21153     // If we're storing a truncated constant, see if we can simplify it.
21154     // TODO: Move this to targetShrinkDemandedConstant?
21155     if (auto *Cst = dyn_cast<ConstantSDNode>(Value))
21156       if (!Cst->isOpaque()) {
21157         const APInt &CValue = Cst->getAPIntValue();
21158         APInt NewVal = CValue & TruncDemandedBits;
21159         if (NewVal != CValue) {
21160           SDValue Shorter =
21161               DAG.getConstant(NewVal, SDLoc(N), Value.getValueType());
21162           return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr,
21163                                    ST->getMemoryVT(), ST->getMemOperand());
21164         }
21165       }
21166   }
21167 
21168   // If this is a load followed by a store to the same location, then the store
21169   // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
21170   // TODO: Add big-endian truncate support with test coverage.
21171   // TODO: Can relax for unordered atomics (see D66309)
21172   SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
21173                          ? peekThroughTruncates(Value)
21174                          : Value;
21175   if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
21176     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
21177         ST->isUnindexed() && ST->isSimple() &&
21178         Ld->getAddressSpace() == ST->getAddressSpace() &&
21179         // There can't be any side effects between the load and store, such as
21180         // a call or store.
21181         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
21182       // The store is dead, remove it.
21183       return Chain;
21184     }
21185   }
21186 
21187   // Try scalarizing vector stores of loads where we only change one element
21188   if (SDValue NewST = replaceStoreOfInsertLoad(ST))
21189     return NewST;
21190 
21191   // TODO: Can relax for unordered atomics (see D66309)
21192   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
21193     if (ST->isUnindexed() && ST->isSimple() &&
21194         ST1->isUnindexed() && ST1->isSimple()) {
21195       if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr &&
21196           ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() &&
21197           ST->getAddressSpace() == ST1->getAddressSpace()) {
21198         // If this is a store followed by a store with the same value to the
21199         // same location, then the store is dead/noop.
21200         return Chain;
21201       }
21202 
21203       if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() &&
21204           !ST1->getBasePtr().isUndef() &&
21205           ST->getAddressSpace() == ST1->getAddressSpace()) {
21206         // If we consider two stores and one smaller in size is a scalable
21207         // vector type and another one a bigger size store with a fixed type,
21208         // then we could not allow the scalable store removal because we don't
21209         // know its final size in the end.
21210         if (ST->getMemoryVT().isScalableVector() ||
21211             ST1->getMemoryVT().isScalableVector()) {
21212           if (ST1->getBasePtr() == Ptr &&
21213               TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
21214                                   ST->getMemoryVT().getStoreSize())) {
21215             CombineTo(ST1, ST1->getChain());
21216             return SDValue(N, 0);
21217           }
21218         } else {
21219           const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
21220           const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
21221           // If this is a store who's preceding store to a subset of the current
21222           // location and no one other node is chained to that store we can
21223           // effectively drop the store. Do not remove stores to undef as they
21224           // may be used as data sinks.
21225           if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
21226                               ChainBase,
21227                               ST1->getMemoryVT().getFixedSizeInBits())) {
21228             CombineTo(ST1, ST1->getChain());
21229             return SDValue(N, 0);
21230           }
21231         }
21232       }
21233     }
21234   }
21235 
21236   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
21237   // truncating store.  We can do this even if this is already a truncstore.
21238   if ((Value.getOpcode() == ISD::FP_ROUND ||
21239        Value.getOpcode() == ISD::TRUNCATE) &&
21240       Value->hasOneUse() && ST->isUnindexed() &&
21241       TLI.canCombineTruncStore(Value.getOperand(0).getValueType(),
21242                                ST->getMemoryVT(), LegalOperations)) {
21243     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
21244                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
21245   }
21246 
21247   // Always perform this optimization before types are legal. If the target
21248   // prefers, also try this after legalization to catch stores that were created
21249   // by intrinsics or other nodes.
21250   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
21251     while (true) {
21252       // There can be multiple store sequences on the same chain.
21253       // Keep trying to merge store sequences until we are unable to do so
21254       // or until we merge the last store on the chain.
21255       bool Changed = mergeConsecutiveStores(ST);
21256       if (!Changed) break;
21257       // Return N as merge only uses CombineTo and no worklist clean
21258       // up is necessary.
21259       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
21260         return SDValue(N, 0);
21261     }
21262   }
21263 
21264   // Try transforming N to an indexed store.
21265   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
21266     return SDValue(N, 0);
21267 
21268   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
21269   //
21270   // Make sure to do this only after attempting to merge stores in order to
21271   //  avoid changing the types of some subset of stores due to visit order,
21272   //  preventing their merging.
21273   if (isa<ConstantFPSDNode>(ST->getValue())) {
21274     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
21275       return NewSt;
21276   }
21277 
21278   if (SDValue NewSt = splitMergedValStore(ST))
21279     return NewSt;
21280 
21281   return ReduceLoadOpStoreWidth(N);
21282 }
21283 
21284 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
21285   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
21286   if (!LifetimeEnd->hasOffset())
21287     return SDValue();
21288 
21289   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
21290                                         LifetimeEnd->getOffset(), false);
21291 
21292   // We walk up the chains to find stores.
21293   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
21294   while (!Chains.empty()) {
21295     SDValue Chain = Chains.pop_back_val();
21296     if (!Chain.hasOneUse())
21297       continue;
21298     switch (Chain.getOpcode()) {
21299     case ISD::TokenFactor:
21300       for (unsigned Nops = Chain.getNumOperands(); Nops;)
21301         Chains.push_back(Chain.getOperand(--Nops));
21302       break;
21303     case ISD::LIFETIME_START:
21304     case ISD::LIFETIME_END:
21305       // We can forward past any lifetime start/end that can be proven not to
21306       // alias the node.
21307       if (!mayAlias(Chain.getNode(), N))
21308         Chains.push_back(Chain.getOperand(0));
21309       break;
21310     case ISD::STORE: {
21311       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
21312       // TODO: Can relax for unordered atomics (see D66309)
21313       if (!ST->isSimple() || ST->isIndexed())
21314         continue;
21315       const TypeSize StoreSize = ST->getMemoryVT().getStoreSize();
21316       // The bounds of a scalable store are not known until runtime, so this
21317       // store cannot be elided.
21318       if (StoreSize.isScalable())
21319         continue;
21320       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
21321       // If we store purely within object bounds just before its lifetime ends,
21322       // we can remove the store.
21323       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
21324                                    StoreSize.getFixedValue() * 8)) {
21325         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
21326                    dbgs() << "\nwithin LIFETIME_END of : ";
21327                    LifetimeEndBase.dump(); dbgs() << "\n");
21328         CombineTo(ST, ST->getChain());
21329         return SDValue(N, 0);
21330       }
21331     }
21332     }
21333   }
21334   return SDValue();
21335 }
21336 
21337 /// For the instruction sequence of store below, F and I values
21338 /// are bundled together as an i64 value before being stored into memory.
21339 /// Sometimes it is more efficent to generate separate stores for F and I,
21340 /// which can remove the bitwise instructions or sink them to colder places.
21341 ///
21342 ///   (store (or (zext (bitcast F to i32) to i64),
21343 ///              (shl (zext I to i64), 32)), addr)  -->
21344 ///   (store F, addr) and (store I, addr+4)
21345 ///
21346 /// Similarly, splitting for other merged store can also be beneficial, like:
21347 /// For pair of {i32, i32}, i64 store --> two i32 stores.
21348 /// For pair of {i32, i16}, i64 store --> two i32 stores.
21349 /// For pair of {i16, i16}, i32 store --> two i16 stores.
21350 /// For pair of {i16, i8},  i32 store --> two i16 stores.
21351 /// For pair of {i8, i8},   i16 store --> two i8 stores.
21352 ///
21353 /// We allow each target to determine specifically which kind of splitting is
21354 /// supported.
21355 ///
21356 /// The store patterns are commonly seen from the simple code snippet below
21357 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
21358 ///   void goo(const std::pair<int, float> &);
21359 ///   hoo() {
21360 ///     ...
21361 ///     goo(std::make_pair(tmp, ftmp));
21362 ///     ...
21363 ///   }
21364 ///
21365 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
21366   if (OptLevel == CodeGenOptLevel::None)
21367     return SDValue();
21368 
21369   // Can't change the number of memory accesses for a volatile store or break
21370   // atomicity for an atomic one.
21371   if (!ST->isSimple())
21372     return SDValue();
21373 
21374   SDValue Val = ST->getValue();
21375   SDLoc DL(ST);
21376 
21377   // Match OR operand.
21378   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
21379     return SDValue();
21380 
21381   // Match SHL operand and get Lower and Higher parts of Val.
21382   SDValue Op1 = Val.getOperand(0);
21383   SDValue Op2 = Val.getOperand(1);
21384   SDValue Lo, Hi;
21385   if (Op1.getOpcode() != ISD::SHL) {
21386     std::swap(Op1, Op2);
21387     if (Op1.getOpcode() != ISD::SHL)
21388       return SDValue();
21389   }
21390   Lo = Op2;
21391   Hi = Op1.getOperand(0);
21392   if (!Op1.hasOneUse())
21393     return SDValue();
21394 
21395   // Match shift amount to HalfValBitSize.
21396   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
21397   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
21398   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
21399     return SDValue();
21400 
21401   // Lo and Hi are zero-extended from int with size less equal than 32
21402   // to i64.
21403   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
21404       !Lo.getOperand(0).getValueType().isScalarInteger() ||
21405       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
21406       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
21407       !Hi.getOperand(0).getValueType().isScalarInteger() ||
21408       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
21409     return SDValue();
21410 
21411   // Use the EVT of low and high parts before bitcast as the input
21412   // of target query.
21413   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
21414                   ? Lo.getOperand(0).getValueType()
21415                   : Lo.getValueType();
21416   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
21417                    ? Hi.getOperand(0).getValueType()
21418                    : Hi.getValueType();
21419   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
21420     return SDValue();
21421 
21422   // Start to split store.
21423   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
21424   AAMDNodes AAInfo = ST->getAAInfo();
21425 
21426   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
21427   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
21428   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
21429   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
21430 
21431   SDValue Chain = ST->getChain();
21432   SDValue Ptr = ST->getBasePtr();
21433   // Lower value store.
21434   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
21435                              ST->getOriginalAlign(), MMOFlags, AAInfo);
21436   Ptr =
21437       DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL);
21438   // Higher value store.
21439   SDValue St1 = DAG.getStore(
21440       St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
21441       ST->getOriginalAlign(), MMOFlags, AAInfo);
21442   return St1;
21443 }
21444 
21445 // Merge an insertion into an existing shuffle:
21446 // (insert_vector_elt (vector_shuffle X, Y, Mask),
21447 //                   .(extract_vector_elt X, N), InsIndex)
21448 //   --> (vector_shuffle X, Y, NewMask)
21449 //  and variations where shuffle operands may be CONCAT_VECTORS.
21450 static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef<int> Mask,
21451                                 SmallVectorImpl<int> &NewMask, SDValue Elt,
21452                                 unsigned InsIndex) {
21453   if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
21454       !isa<ConstantSDNode>(Elt.getOperand(1)))
21455     return false;
21456 
21457   // Vec's operand 0 is using indices from 0 to N-1 and
21458   // operand 1 from N to 2N - 1, where N is the number of
21459   // elements in the vectors.
21460   SDValue InsertVal0 = Elt.getOperand(0);
21461   int ElementOffset = -1;
21462 
21463   // We explore the inputs of the shuffle in order to see if we find the
21464   // source of the extract_vector_elt. If so, we can use it to modify the
21465   // shuffle rather than perform an insert_vector_elt.
21466   SmallVector<std::pair<int, SDValue>, 8> ArgWorkList;
21467   ArgWorkList.emplace_back(Mask.size(), Y);
21468   ArgWorkList.emplace_back(0, X);
21469 
21470   while (!ArgWorkList.empty()) {
21471     int ArgOffset;
21472     SDValue ArgVal;
21473     std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val();
21474 
21475     if (ArgVal == InsertVal0) {
21476       ElementOffset = ArgOffset;
21477       break;
21478     }
21479 
21480     // Peek through concat_vector.
21481     if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) {
21482       int CurrentArgOffset =
21483           ArgOffset + ArgVal.getValueType().getVectorNumElements();
21484       int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements();
21485       for (SDValue Op : reverse(ArgVal->ops())) {
21486         CurrentArgOffset -= Step;
21487         ArgWorkList.emplace_back(CurrentArgOffset, Op);
21488       }
21489 
21490       // Make sure we went through all the elements and did not screw up index
21491       // computation.
21492       assert(CurrentArgOffset == ArgOffset);
21493     }
21494   }
21495 
21496   // If we failed to find a match, see if we can replace an UNDEF shuffle
21497   // operand.
21498   if (ElementOffset == -1) {
21499     if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType())
21500       return false;
21501     ElementOffset = Mask.size();
21502     Y = InsertVal0;
21503   }
21504 
21505   NewMask.assign(Mask.begin(), Mask.end());
21506   NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1);
21507   assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 &&
21508          "NewMask[InsIndex] is out of bound");
21509   return true;
21510 }
21511 
21512 // Merge an insertion into an existing shuffle:
21513 // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N),
21514 // InsIndex)
21515 //   --> (vector_shuffle X, Y) and variations where shuffle operands may be
21516 //   CONCAT_VECTORS.
21517 SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) {
21518   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21519          "Expected extract_vector_elt");
21520   SDValue InsertVal = N->getOperand(1);
21521   SDValue Vec = N->getOperand(0);
21522 
21523   auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec);
21524   if (!SVN || !Vec.hasOneUse())
21525     return SDValue();
21526 
21527   ArrayRef<int> Mask = SVN->getMask();
21528   SDValue X = Vec.getOperand(0);
21529   SDValue Y = Vec.getOperand(1);
21530 
21531   SmallVector<int, 16> NewMask(Mask);
21532   if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) {
21533     SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
21534         Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG);
21535     if (LegalShuffle)
21536       return LegalShuffle;
21537   }
21538 
21539   return SDValue();
21540 }
21541 
21542 // Convert a disguised subvector insertion into a shuffle:
21543 // insert_vector_elt V, (bitcast X from vector type), IdxC -->
21544 // bitcast(shuffle (bitcast V), (extended X), Mask)
21545 // Note: We do not use an insert_subvector node because that requires a
21546 // legal subvector type.
21547 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
21548   assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT &&
21549          "Expected extract_vector_elt");
21550   SDValue InsertVal = N->getOperand(1);
21551 
21552   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
21553       !InsertVal.getOperand(0).getValueType().isVector())
21554     return SDValue();
21555 
21556   SDValue SubVec = InsertVal.getOperand(0);
21557   SDValue DestVec = N->getOperand(0);
21558   EVT SubVecVT = SubVec.getValueType();
21559   EVT VT = DestVec.getValueType();
21560   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
21561   // If the source only has a single vector element, the cost of creating adding
21562   // it to a vector is likely to exceed the cost of a insert_vector_elt.
21563   if (NumSrcElts == 1)
21564     return SDValue();
21565   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
21566   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
21567 
21568   // Step 1: Create a shuffle mask that implements this insert operation. The
21569   // vector that we are inserting into will be operand 0 of the shuffle, so
21570   // those elements are just 'i'. The inserted subvector is in the first
21571   // positions of operand 1 of the shuffle. Example:
21572   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
21573   SmallVector<int, 16> Mask(NumMaskVals);
21574   for (unsigned i = 0; i != NumMaskVals; ++i) {
21575     if (i / NumSrcElts == InsIndex)
21576       Mask[i] = (i % NumSrcElts) + NumMaskVals;
21577     else
21578       Mask[i] = i;
21579   }
21580 
21581   // Bail out if the target can not handle the shuffle we want to create.
21582   EVT SubVecEltVT = SubVecVT.getVectorElementType();
21583   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
21584   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
21585     return SDValue();
21586 
21587   // Step 2: Create a wide vector from the inserted source vector by appending
21588   // undefined elements. This is the same size as our destination vector.
21589   SDLoc DL(N);
21590   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
21591   ConcatOps[0] = SubVec;
21592   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
21593 
21594   // Step 3: Shuffle in the padded subvector.
21595   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
21596   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
21597   AddToWorklist(PaddedSubV.getNode());
21598   AddToWorklist(DestVecBC.getNode());
21599   AddToWorklist(Shuf.getNode());
21600   return DAG.getBitcast(VT, Shuf);
21601 }
21602 
21603 // Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
21604 // possible and the new load will be quick. We use more loads but less shuffles
21605 // and inserts.
21606 SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
21607   EVT VT = N->getValueType(0);
21608 
21609   // InsIndex is expected to be the first of last lane.
21610   if (!VT.isFixedLengthVector() ||
21611       (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
21612     return SDValue();
21613 
21614   // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
21615   // depending on the InsIndex.
21616   auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
21617   SDValue Scalar = N->getOperand(1);
21618   if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
21619         return InsIndex == P.index() || P.value() < 0 ||
21620                (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
21621                (InsIndex == VT.getVectorNumElements() - 1 &&
21622                 P.value() == (int)P.index() + 1);
21623       }))
21624     return SDValue();
21625 
21626   // We optionally skip over an extend so long as both loads are extended in the
21627   // same way from the same type.
21628   unsigned Extend = 0;
21629   if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
21630       Scalar.getOpcode() == ISD::SIGN_EXTEND ||
21631       Scalar.getOpcode() == ISD::ANY_EXTEND) {
21632     Extend = Scalar.getOpcode();
21633     Scalar = Scalar.getOperand(0);
21634   }
21635 
21636   auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
21637   if (!ScalarLoad)
21638     return SDValue();
21639 
21640   SDValue Vec = Shuffle->getOperand(0);
21641   if (Extend) {
21642     if (Vec.getOpcode() != Extend)
21643       return SDValue();
21644     Vec = Vec.getOperand(0);
21645   }
21646   auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
21647   if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
21648     return SDValue();
21649 
21650   int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
21651   if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
21652       !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21653       ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
21654       ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
21655     return SDValue();
21656 
21657   // Check that the offset between the pointers to produce a single continuous
21658   // load.
21659   if (InsIndex == 0) {
21660     if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
21661                                             -1))
21662       return SDValue();
21663   } else {
21664     if (!DAG.areNonVolatileConsecutiveLoads(
21665             VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
21666       return SDValue();
21667   }
21668 
21669   // And that the new unaligned load will be fast.
21670   unsigned IsFast = 0;
21671   Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
21672   if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
21673                               Vec.getValueType(), VecLoad->getAddressSpace(),
21674                               NewAlign, VecLoad->getMemOperand()->getFlags(),
21675                               &IsFast) ||
21676       !IsFast)
21677     return SDValue();
21678 
21679   // Calculate the new Ptr and create the new load.
21680   SDLoc DL(N);
21681   SDValue Ptr = ScalarLoad->getBasePtr();
21682   if (InsIndex != 0)
21683     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
21684                       DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
21685   MachinePointerInfo PtrInfo =
21686       InsIndex == 0 ? ScalarLoad->getPointerInfo()
21687                     : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
21688 
21689   SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
21690                              ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
21691   DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
21692   DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
21693   return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
21694 }
21695 
21696 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
21697   SDValue InVec = N->getOperand(0);
21698   SDValue InVal = N->getOperand(1);
21699   SDValue EltNo = N->getOperand(2);
21700   SDLoc DL(N);
21701 
21702   EVT VT = InVec.getValueType();
21703   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
21704 
21705   // Insert into out-of-bounds element is undefined.
21706   if (IndexC && VT.isFixedLengthVector() &&
21707       IndexC->getZExtValue() >= VT.getVectorNumElements())
21708     return DAG.getUNDEF(VT);
21709 
21710   // Remove redundant insertions:
21711   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
21712   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21713       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
21714     return InVec;
21715 
21716   if (!IndexC) {
21717     // If this is variable insert to undef vector, it might be better to splat:
21718     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
21719     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT))
21720       return DAG.getSplat(VT, DL, InVal);
21721     return SDValue();
21722   }
21723 
21724   if (VT.isScalableVector())
21725     return SDValue();
21726 
21727   unsigned NumElts = VT.getVectorNumElements();
21728 
21729   // We must know which element is being inserted for folds below here.
21730   unsigned Elt = IndexC->getZExtValue();
21731 
21732   // Handle <1 x ???> vector insertion special cases.
21733   if (NumElts == 1) {
21734     // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y
21735     if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
21736         InVal.getOperand(0).getValueType() == VT &&
21737         isNullConstant(InVal.getOperand(1)))
21738       return InVal.getOperand(0);
21739   }
21740 
21741   // Canonicalize insert_vector_elt dag nodes.
21742   // Example:
21743   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
21744   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
21745   //
21746   // Do this only if the child insert_vector node has one use; also
21747   // do this only if indices are both constants and Idx1 < Idx0.
21748   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
21749       && isa<ConstantSDNode>(InVec.getOperand(2))) {
21750     unsigned OtherElt = InVec.getConstantOperandVal(2);
21751     if (Elt < OtherElt) {
21752       // Swap nodes.
21753       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
21754                                   InVec.getOperand(0), InVal, EltNo);
21755       AddToWorklist(NewOp.getNode());
21756       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
21757                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
21758     }
21759   }
21760 
21761   if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt))
21762     return Shuf;
21763 
21764   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
21765     return Shuf;
21766 
21767   if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
21768     return Shuf;
21769 
21770   // Attempt to convert an insert_vector_elt chain into a legal build_vector.
21771   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
21772     // vXi1 vector - we don't need to recurse.
21773     if (NumElts == 1)
21774       return DAG.getBuildVector(VT, DL, {InVal});
21775 
21776     // If we haven't already collected the element, insert into the op list.
21777     EVT MaxEltVT = InVal.getValueType();
21778     auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt,
21779                                 unsigned Idx) {
21780       if (!Ops[Idx]) {
21781         Ops[Idx] = Elt;
21782         if (VT.isInteger()) {
21783           EVT EltVT = Elt.getValueType();
21784           MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT;
21785         }
21786       }
21787     };
21788 
21789     // Ensure all the operands are the same value type, fill any missing
21790     // operands with UNDEF and create the BUILD_VECTOR.
21791     auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) {
21792       assert(Ops.size() == NumElts && "Unexpected vector size");
21793       for (SDValue &Op : Ops) {
21794         if (Op)
21795           Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op;
21796         else
21797           Op = DAG.getUNDEF(MaxEltVT);
21798       }
21799       return DAG.getBuildVector(VT, DL, Ops);
21800     };
21801 
21802     SmallVector<SDValue, 8> Ops(NumElts, SDValue());
21803     Ops[Elt] = InVal;
21804 
21805     // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR.
21806     for (SDValue CurVec = InVec; CurVec;) {
21807       // UNDEF - build new BUILD_VECTOR from already inserted operands.
21808       if (CurVec.isUndef())
21809         return CanonicalizeBuildVector(Ops);
21810 
21811       // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR.
21812       if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) {
21813         for (unsigned I = 0; I != NumElts; ++I)
21814           AddBuildVectorOp(Ops, CurVec.getOperand(I), I);
21815         return CanonicalizeBuildVector(Ops);
21816       }
21817 
21818       // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR.
21819       if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) {
21820         AddBuildVectorOp(Ops, CurVec.getOperand(0), 0);
21821         return CanonicalizeBuildVector(Ops);
21822       }
21823 
21824       // INSERT_VECTOR_ELT - insert operand and continue up the chain.
21825       if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse())
21826         if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)))
21827           if (CurIdx->getAPIntValue().ult(NumElts)) {
21828             unsigned Idx = CurIdx->getZExtValue();
21829             AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx);
21830 
21831             // Found entire BUILD_VECTOR.
21832             if (all_of(Ops, [](SDValue Op) { return !!Op; }))
21833               return CanonicalizeBuildVector(Ops);
21834 
21835             CurVec = CurVec->getOperand(0);
21836             continue;
21837           }
21838 
21839       // VECTOR_SHUFFLE - if all the operands match the shuffle's sources,
21840       // update the shuffle mask (and second operand if we started with unary
21841       // shuffle) and create a new legal shuffle.
21842       if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) {
21843         auto *SVN = cast<ShuffleVectorSDNode>(CurVec);
21844         SDValue LHS = SVN->getOperand(0);
21845         SDValue RHS = SVN->getOperand(1);
21846         SmallVector<int, 16> Mask(SVN->getMask());
21847         bool Merged = true;
21848         for (auto I : enumerate(Ops)) {
21849           SDValue &Op = I.value();
21850           if (Op) {
21851             SmallVector<int, 16> NewMask;
21852             if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) {
21853               Merged = false;
21854               break;
21855             }
21856             Mask = std::move(NewMask);
21857           }
21858         }
21859         if (Merged)
21860           if (SDValue NewShuffle =
21861                   TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG))
21862             return NewShuffle;
21863       }
21864 
21865       // If all insertions are zero value, try to convert to AND mask.
21866       // TODO: Do this for -1 with OR mask?
21867       if (!LegalOperations && llvm::isNullConstant(InVal) &&
21868           all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
21869           count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
21870         SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
21871         SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
21872         SmallVector<SDValue, 8> Mask(NumElts);
21873         for (unsigned I = 0; I != NumElts; ++I)
21874           Mask[I] = Ops[I] ? Zero : AllOnes;
21875         return DAG.getNode(ISD::AND, DL, VT, CurVec,
21876                            DAG.getBuildVector(VT, DL, Mask));
21877       }
21878 
21879       // Failed to find a match in the chain - bail.
21880       break;
21881     }
21882 
21883     // See if we can fill in the missing constant elements as zeros.
21884     // TODO: Should we do this for any constant?
21885     APInt DemandedZeroElts = APInt::getZero(NumElts);
21886     for (unsigned I = 0; I != NumElts; ++I)
21887       if (!Ops[I])
21888         DemandedZeroElts.setBit(I);
21889 
21890     if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) {
21891       SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT)
21892                                     : DAG.getConstantFP(0, DL, MaxEltVT);
21893       for (unsigned I = 0; I != NumElts; ++I)
21894         if (!Ops[I])
21895           Ops[I] = Zero;
21896 
21897       return CanonicalizeBuildVector(Ops);
21898     }
21899   }
21900 
21901   return SDValue();
21902 }
21903 
21904 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
21905                                                   SDValue EltNo,
21906                                                   LoadSDNode *OriginalLoad) {
21907   assert(OriginalLoad->isSimple());
21908 
21909   EVT ResultVT = EVE->getValueType(0);
21910   EVT VecEltVT = InVecVT.getVectorElementType();
21911 
21912   // If the vector element type is not a multiple of a byte then we are unable
21913   // to correctly compute an address to load only the extracted element as a
21914   // scalar.
21915   if (!VecEltVT.isByteSized())
21916     return SDValue();
21917 
21918   ISD::LoadExtType ExtTy =
21919       ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD;
21920   if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) ||
21921       !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
21922     return SDValue();
21923 
21924   Align Alignment = OriginalLoad->getAlign();
21925   MachinePointerInfo MPI;
21926   SDLoc DL(EVE);
21927   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
21928     int Elt = ConstEltNo->getZExtValue();
21929     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
21930     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
21931     Alignment = commonAlignment(Alignment, PtrOff);
21932   } else {
21933     // Discard the pointer info except the address space because the memory
21934     // operand can't represent this new access since the offset is variable.
21935     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
21936     Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8);
21937   }
21938 
21939   unsigned IsFast = 0;
21940   if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT,
21941                               OriginalLoad->getAddressSpace(), Alignment,
21942                               OriginalLoad->getMemOperand()->getFlags(),
21943                               &IsFast) ||
21944       !IsFast)
21945     return SDValue();
21946 
21947   SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(),
21948                                                InVecVT, EltNo);
21949 
21950   // We are replacing a vector load with a scalar load. The new load must have
21951   // identical memory op ordering to the original.
21952   SDValue Load;
21953   if (ResultVT.bitsGT(VecEltVT)) {
21954     // If the result type of vextract is wider than the load, then issue an
21955     // extending load instead.
21956     ISD::LoadExtType ExtType =
21957         TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, VecEltVT) ? ISD::ZEXTLOAD
21958                                                               : ISD::EXTLOAD;
21959     Load = DAG.getExtLoad(ExtType, DL, ResultVT, OriginalLoad->getChain(),
21960                           NewPtr, MPI, VecEltVT, Alignment,
21961                           OriginalLoad->getMemOperand()->getFlags(),
21962                           OriginalLoad->getAAInfo());
21963     DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
21964   } else {
21965     // The result type is narrower or the same width as the vector element
21966     Load = DAG.getLoad(VecEltVT, DL, OriginalLoad->getChain(), NewPtr, MPI,
21967                        Alignment, OriginalLoad->getMemOperand()->getFlags(),
21968                        OriginalLoad->getAAInfo());
21969     DAG.makeEquivalentMemoryOrdering(OriginalLoad, Load);
21970     if (ResultVT.bitsLT(VecEltVT))
21971       Load = DAG.getNode(ISD::TRUNCATE, DL, ResultVT, Load);
21972     else
21973       Load = DAG.getBitcast(ResultVT, Load);
21974   }
21975   ++OpsNarrowed;
21976   return Load;
21977 }
21978 
21979 /// Transform a vector binary operation into a scalar binary operation by moving
21980 /// the math/logic after an extract element of a vector.
21981 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
21982                                        bool LegalOperations) {
21983   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
21984   SDValue Vec = ExtElt->getOperand(0);
21985   SDValue Index = ExtElt->getOperand(1);
21986   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
21987   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
21988       Vec->getNumValues() != 1)
21989     return SDValue();
21990 
21991   // Targets may want to avoid this to prevent an expensive register transfer.
21992   if (!TLI.shouldScalarizeBinop(Vec))
21993     return SDValue();
21994 
21995   // Extracting an element of a vector constant is constant-folded, so this
21996   // transform is just replacing a vector op with a scalar op while moving the
21997   // extract.
21998   SDValue Op0 = Vec.getOperand(0);
21999   SDValue Op1 = Vec.getOperand(1);
22000   APInt SplatVal;
22001   if (isAnyConstantBuildVector(Op0, true) ||
22002       ISD::isConstantSplatVector(Op0.getNode(), SplatVal) ||
22003       isAnyConstantBuildVector(Op1, true) ||
22004       ISD::isConstantSplatVector(Op1.getNode(), SplatVal)) {
22005     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
22006     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
22007     SDLoc DL(ExtElt);
22008     EVT VT = ExtElt->getValueType(0);
22009     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
22010     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
22011     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
22012   }
22013 
22014   return SDValue();
22015 }
22016 
22017 // Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract,
22018 // recursively analyse all of it's users. and try to model themselves as
22019 // bit sequence extractions. If all of them agree on the new, narrower element
22020 // type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that
22021 // new element type, do so now.
22022 // This is mainly useful to recover from legalization that scalarized
22023 // the vector as wide elements, but tries to rebuild it with narrower elements.
22024 //
22025 // Some more nodes could be modelled if that helps cover interesting patterns.
22026 bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(
22027     SDNode *N) {
22028   // We perform this optimization post type-legalization because
22029   // the type-legalizer often scalarizes integer-promoted vectors.
22030   // Performing this optimization before may cause legalizaton cycles.
22031   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22032     return false;
22033 
22034   // TODO: Add support for big-endian.
22035   if (DAG.getDataLayout().isBigEndian())
22036     return false;
22037 
22038   SDValue VecOp = N->getOperand(0);
22039   EVT VecVT = VecOp.getValueType();
22040   assert(!VecVT.isScalableVector() && "Only for fixed vectors.");
22041 
22042   // We must start with a constant extraction index.
22043   auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1));
22044   if (!IndexC)
22045     return false;
22046 
22047   assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() &&
22048          "Original ISD::EXTRACT_VECTOR_ELT is undefinend?");
22049 
22050   // TODO: deal with the case of implicit anyext of the extraction.
22051   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22052   EVT ScalarVT = N->getValueType(0);
22053   if (VecVT.getScalarType() != ScalarVT)
22054     return false;
22055 
22056   // TODO: deal with the cases other than everything being integer-typed.
22057   if (!ScalarVT.isScalarInteger())
22058     return false;
22059 
22060   struct Entry {
22061     SDNode *Producer;
22062 
22063     // Which bits of VecOp does it contain?
22064     unsigned BitPos;
22065     int NumBits;
22066     // NOTE: the actual width of \p Producer may be wider than NumBits!
22067 
22068     Entry(Entry &&) = default;
22069     Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_)
22070         : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {}
22071 
22072     Entry() = delete;
22073     Entry(const Entry &) = delete;
22074     Entry &operator=(const Entry &) = delete;
22075     Entry &operator=(Entry &&) = delete;
22076   };
22077   SmallVector<Entry, 32> Worklist;
22078   SmallVector<Entry, 32> Leafs;
22079 
22080   // We start at the "root" ISD::EXTRACT_VECTOR_ELT.
22081   Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(),
22082                         /*NumBits=*/VecEltBitWidth);
22083 
22084   while (!Worklist.empty()) {
22085     Entry E = Worklist.pop_back_val();
22086     // Does the node not even use any of the VecOp bits?
22087     if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() &&
22088           E.BitPos + E.NumBits <= VecVT.getSizeInBits()))
22089       return false; // Let's allow the other combines clean this up first.
22090     // Did we fail to model any of the users of the Producer?
22091     bool ProducerIsLeaf = false;
22092     // Look at each user of this Producer.
22093     for (SDNode *User : E.Producer->uses()) {
22094       switch (User->getOpcode()) {
22095       // TODO: support ISD::BITCAST
22096       // TODO: support ISD::ANY_EXTEND
22097       // TODO: support ISD::ZERO_EXTEND
22098       // TODO: support ISD::SIGN_EXTEND
22099       case ISD::TRUNCATE:
22100         // Truncation simply means we keep position, but extract less bits.
22101         Worklist.emplace_back(User, E.BitPos,
22102                               /*NumBits=*/User->getValueSizeInBits(0));
22103         break;
22104       // TODO: support ISD::SRA
22105       // TODO: support ISD::SHL
22106       case ISD::SRL:
22107         // We should be shifting the Producer by a constant amount.
22108         if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1));
22109             User->getOperand(0).getNode() == E.Producer && ShAmtC) {
22110           // Logical right-shift means that we start extraction later,
22111           // but stop it at the same position we did previously.
22112           unsigned ShAmt = ShAmtC->getZExtValue();
22113           Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt);
22114           break;
22115         }
22116         [[fallthrough]];
22117       default:
22118         // We can not model this user of the Producer.
22119         // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT.
22120         ProducerIsLeaf = true;
22121         // Profitability check: all users that we can not model
22122         //                      must be ISD::BUILD_VECTOR's.
22123         if (User->getOpcode() != ISD::BUILD_VECTOR)
22124           return false;
22125         break;
22126       }
22127     }
22128     if (ProducerIsLeaf)
22129       Leafs.emplace_back(std::move(E));
22130   }
22131 
22132   unsigned NewVecEltBitWidth = Leafs.front().NumBits;
22133 
22134   // If we are still at the same element granularity, give up,
22135   if (NewVecEltBitWidth == VecEltBitWidth)
22136     return false;
22137 
22138   // The vector width must be a multiple of the new element width.
22139   if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0)
22140     return false;
22141 
22142   // All leafs must agree on the new element width.
22143   // All leafs must not expect any "padding" bits ontop of that width.
22144   // All leafs must start extraction from multiple of that width.
22145   if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) {
22146         return (unsigned)E.NumBits == NewVecEltBitWidth &&
22147                E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth &&
22148                E.BitPos % NewVecEltBitWidth == 0;
22149       }))
22150     return false;
22151 
22152   EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth);
22153   EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT,
22154                                   VecVT.getSizeInBits() / NewVecEltBitWidth);
22155 
22156   if (LegalTypes &&
22157       !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT)))
22158     return false;
22159 
22160   if (LegalOperations &&
22161       !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) &&
22162         TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, NewVecVT)))
22163     return false;
22164 
22165   SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp);
22166   for (const Entry &E : Leafs) {
22167     SDLoc DL(E.Producer);
22168     unsigned NewIndex = E.BitPos / NewVecEltBitWidth;
22169     assert(NewIndex < NewVecVT.getVectorNumElements() &&
22170            "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?");
22171     SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp,
22172                             DAG.getVectorIdxConstant(NewIndex, DL));
22173     CombineTo(E.Producer, V);
22174   }
22175 
22176   return true;
22177 }
22178 
22179 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
22180   SDValue VecOp = N->getOperand(0);
22181   SDValue Index = N->getOperand(1);
22182   EVT ScalarVT = N->getValueType(0);
22183   EVT VecVT = VecOp.getValueType();
22184   if (VecOp.isUndef())
22185     return DAG.getUNDEF(ScalarVT);
22186 
22187   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
22188   //
22189   // This only really matters if the index is non-constant since other combines
22190   // on the constant elements already work.
22191   SDLoc DL(N);
22192   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
22193       Index == VecOp.getOperand(2)) {
22194     SDValue Elt = VecOp.getOperand(1);
22195     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
22196   }
22197 
22198   // (vextract (scalar_to_vector val, 0) -> val
22199   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22200     // Only 0'th element of SCALAR_TO_VECTOR is defined.
22201     if (DAG.isKnownNeverZero(Index))
22202       return DAG.getUNDEF(ScalarVT);
22203 
22204     // Check if the result type doesn't match the inserted element type.
22205     // The inserted element and extracted element may have mismatched bitwidth.
22206     // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector.
22207     SDValue InOp = VecOp.getOperand(0);
22208     if (InOp.getValueType() != ScalarVT) {
22209       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22210       if (InOp.getValueType().bitsGT(ScalarVT))
22211         return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp);
22212       return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp);
22213     }
22214     return InOp;
22215   }
22216 
22217   // extract_vector_elt of out-of-bounds element -> UNDEF
22218   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
22219   if (IndexC && VecVT.isFixedLengthVector() &&
22220       IndexC->getAPIntValue().uge(VecVT.getVectorNumElements()))
22221     return DAG.getUNDEF(ScalarVT);
22222 
22223   // extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx
22224   if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) {
22225     return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
22226                                      VecOp.getOperand(0), Index));
22227   }
22228 
22229   // extract_vector_elt (build_vector x, y), 1 -> y
22230   if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
22231        VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
22232       TLI.isTypeLegal(VecVT)) {
22233     assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
22234             VecVT.isFixedLengthVector()) &&
22235            "BUILD_VECTOR used for scalable vectors");
22236     unsigned IndexVal =
22237         VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0;
22238     SDValue Elt = VecOp.getOperand(IndexVal);
22239     EVT InEltVT = Elt.getValueType();
22240 
22241     if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
22242         isNullConstant(Elt)) {
22243       // Sometimes build_vector's scalar input types do not match result type.
22244       if (ScalarVT == InEltVT)
22245         return Elt;
22246 
22247       // TODO: It may be useful to truncate if free if the build_vector
22248       // implicitly converts.
22249     }
22250   }
22251 
22252   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
22253     return BO;
22254 
22255   if (VecVT.isScalableVector())
22256     return SDValue();
22257 
22258   // All the code from this point onwards assumes fixed width vectors, but it's
22259   // possible that some of the combinations could be made to work for scalable
22260   // vectors too.
22261   unsigned NumElts = VecVT.getVectorNumElements();
22262   unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
22263 
22264   // See if the extracted element is constant, in which case fold it if its
22265   // a legal fp immediate.
22266   if (IndexC && ScalarVT.isFloatingPoint()) {
22267     APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue());
22268     KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask);
22269     if (KnownElt.isConstant()) {
22270       APFloat CstFP =
22271           APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant());
22272       if (TLI.isFPImmLegal(CstFP, ScalarVT))
22273         return DAG.getConstantFP(CstFP, DL, ScalarVT);
22274     }
22275   }
22276 
22277   // TODO: These transforms should not require the 'hasOneUse' restriction, but
22278   // there are regressions on multiple targets without it. We can end up with a
22279   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
22280   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
22281       VecOp.hasOneUse()) {
22282     // The vector index of the LSBs of the source depend on the endian-ness.
22283     bool IsLE = DAG.getDataLayout().isLittleEndian();
22284     unsigned ExtractIndex = IndexC->getZExtValue();
22285     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
22286     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
22287     SDValue BCSrc = VecOp.getOperand(0);
22288     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
22289       return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT);
22290 
22291     if (LegalTypes && BCSrc.getValueType().isInteger() &&
22292         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
22293       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
22294       // trunc i64 X to i32
22295       SDValue X = BCSrc.getOperand(0);
22296       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
22297              "Extract element and scalar to vector can't change element type "
22298              "from FP to integer.");
22299       unsigned XBitWidth = X.getValueSizeInBits();
22300       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
22301 
22302       // An extract element return value type can be wider than its vector
22303       // operand element type. In that case, the high bits are undefined, so
22304       // it's possible that we may need to extend rather than truncate.
22305       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
22306         assert(XBitWidth % VecEltBitWidth == 0 &&
22307                "Scalar bitwidth must be a multiple of vector element bitwidth");
22308         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
22309       }
22310     }
22311   }
22312 
22313   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
22314   // We only perform this optimization before the op legalization phase because
22315   // we may introduce new vector instructions which are not backed by TD
22316   // patterns. For example on AVX, extracting elements from a wide vector
22317   // without using extract_subvector. However, if we can find an underlying
22318   // scalar value, then we can always use that.
22319   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
22320     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
22321     // Find the new index to extract from.
22322     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
22323 
22324     // Extracting an undef index is undef.
22325     if (OrigElt == -1)
22326       return DAG.getUNDEF(ScalarVT);
22327 
22328     // Select the right vector half to extract from.
22329     SDValue SVInVec;
22330     if (OrigElt < (int)NumElts) {
22331       SVInVec = VecOp.getOperand(0);
22332     } else {
22333       SVInVec = VecOp.getOperand(1);
22334       OrigElt -= NumElts;
22335     }
22336 
22337     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
22338       SDValue InOp = SVInVec.getOperand(OrigElt);
22339       if (InOp.getValueType() != ScalarVT) {
22340         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
22341         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
22342       }
22343 
22344       return InOp;
22345     }
22346 
22347     // FIXME: We should handle recursing on other vector shuffles and
22348     // scalar_to_vector here as well.
22349 
22350     if (!LegalOperations ||
22351         // FIXME: Should really be just isOperationLegalOrCustom.
22352         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
22353         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
22354       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
22355                          DAG.getVectorIdxConstant(OrigElt, DL));
22356     }
22357   }
22358 
22359   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
22360   // simplify it based on the (valid) extraction indices.
22361   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
22362         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
22363                Use->getOperand(0) == VecOp &&
22364                isa<ConstantSDNode>(Use->getOperand(1));
22365       })) {
22366     APInt DemandedElts = APInt::getZero(NumElts);
22367     for (SDNode *Use : VecOp->uses()) {
22368       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
22369       if (CstElt->getAPIntValue().ult(NumElts))
22370         DemandedElts.setBit(CstElt->getZExtValue());
22371     }
22372     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
22373       // We simplified the vector operand of this extract element. If this
22374       // extract is not dead, visit it again so it is folded properly.
22375       if (N->getOpcode() != ISD::DELETED_NODE)
22376         AddToWorklist(N);
22377       return SDValue(N, 0);
22378     }
22379     APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth);
22380     if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) {
22381       // We simplified the vector operand of this extract element. If this
22382       // extract is not dead, visit it again so it is folded properly.
22383       if (N->getOpcode() != ISD::DELETED_NODE)
22384         AddToWorklist(N);
22385       return SDValue(N, 0);
22386     }
22387   }
22388 
22389   if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N))
22390     return SDValue(N, 0);
22391 
22392   // Everything under here is trying to match an extract of a loaded value.
22393   // If the result of load has to be truncated, then it's not necessarily
22394   // profitable.
22395   bool BCNumEltsChanged = false;
22396   EVT ExtVT = VecVT.getVectorElementType();
22397   EVT LVT = ExtVT;
22398   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
22399     return SDValue();
22400 
22401   if (VecOp.getOpcode() == ISD::BITCAST) {
22402     // Don't duplicate a load with other uses.
22403     if (!VecOp.hasOneUse())
22404       return SDValue();
22405 
22406     EVT BCVT = VecOp.getOperand(0).getValueType();
22407     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
22408       return SDValue();
22409     if (NumElts != BCVT.getVectorNumElements())
22410       BCNumEltsChanged = true;
22411     VecOp = VecOp.getOperand(0);
22412     ExtVT = BCVT.getVectorElementType();
22413   }
22414 
22415   // extract (vector load $addr), i --> load $addr + i * size
22416   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
22417       ISD::isNormalLoad(VecOp.getNode()) &&
22418       !Index->hasPredecessor(VecOp.getNode())) {
22419     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
22420     if (VecLoad && VecLoad->isSimple())
22421       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
22422   }
22423 
22424   // Perform only after legalization to ensure build_vector / vector_shuffle
22425   // optimizations have already been done.
22426   if (!LegalOperations || !IndexC)
22427     return SDValue();
22428 
22429   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
22430   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
22431   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
22432   int Elt = IndexC->getZExtValue();
22433   LoadSDNode *LN0 = nullptr;
22434   if (ISD::isNormalLoad(VecOp.getNode())) {
22435     LN0 = cast<LoadSDNode>(VecOp);
22436   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
22437              VecOp.getOperand(0).getValueType() == ExtVT &&
22438              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
22439     // Don't duplicate a load with other uses.
22440     if (!VecOp.hasOneUse())
22441       return SDValue();
22442 
22443     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
22444   }
22445   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
22446     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
22447     // =>
22448     // (load $addr+1*size)
22449 
22450     // Don't duplicate a load with other uses.
22451     if (!VecOp.hasOneUse())
22452       return SDValue();
22453 
22454     // If the bit convert changed the number of elements, it is unsafe
22455     // to examine the mask.
22456     if (BCNumEltsChanged)
22457       return SDValue();
22458 
22459     // Select the input vector, guarding against out of range extract vector.
22460     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
22461     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
22462 
22463     if (VecOp.getOpcode() == ISD::BITCAST) {
22464       // Don't duplicate a load with other uses.
22465       if (!VecOp.hasOneUse())
22466         return SDValue();
22467 
22468       VecOp = VecOp.getOperand(0);
22469     }
22470     if (ISD::isNormalLoad(VecOp.getNode())) {
22471       LN0 = cast<LoadSDNode>(VecOp);
22472       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
22473       Index = DAG.getConstant(Elt, DL, Index.getValueType());
22474     }
22475   } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged &&
22476              VecVT.getVectorElementType() == ScalarVT &&
22477              (!LegalTypes ||
22478               TLI.isTypeLegal(
22479                   VecOp.getOperand(0).getValueType().getVectorElementType()))) {
22480     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0
22481     //      -> extract_vector_elt a, 0
22482     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1
22483     //      -> extract_vector_elt a, 1
22484     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2
22485     //      -> extract_vector_elt b, 0
22486     // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3
22487     //      -> extract_vector_elt b, 1
22488     SDLoc SL(N);
22489     EVT ConcatVT = VecOp.getOperand(0).getValueType();
22490     unsigned ConcatNumElts = ConcatVT.getVectorNumElements();
22491     SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL,
22492                                      Index.getValueType());
22493 
22494     SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts);
22495     SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL,
22496                               ConcatVT.getVectorElementType(),
22497                               ConcatOp, NewIdx);
22498     return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt);
22499   }
22500 
22501   // Make sure we found a non-volatile load and the extractelement is
22502   // the only use.
22503   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
22504     return SDValue();
22505 
22506   // If Idx was -1 above, Elt is going to be -1, so just return undef.
22507   if (Elt == -1)
22508     return DAG.getUNDEF(LVT);
22509 
22510   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
22511 }
22512 
22513 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
22514 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
22515   // We perform this optimization post type-legalization because
22516   // the type-legalizer often scalarizes integer-promoted vectors.
22517   // Performing this optimization before may create bit-casts which
22518   // will be type-legalized to complex code sequences.
22519   // We perform this optimization only before the operation legalizer because we
22520   // may introduce illegal operations.
22521   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
22522     return SDValue();
22523 
22524   unsigned NumInScalars = N->getNumOperands();
22525   SDLoc DL(N);
22526   EVT VT = N->getValueType(0);
22527 
22528   // Check to see if this is a BUILD_VECTOR of a bunch of values
22529   // which come from any_extend or zero_extend nodes. If so, we can create
22530   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
22531   // optimizations. We do not handle sign-extend because we can't fill the sign
22532   // using shuffles.
22533   EVT SourceType = MVT::Other;
22534   bool AllAnyExt = true;
22535 
22536   for (unsigned i = 0; i != NumInScalars; ++i) {
22537     SDValue In = N->getOperand(i);
22538     // Ignore undef inputs.
22539     if (In.isUndef()) continue;
22540 
22541     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
22542     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
22543 
22544     // Abort if the element is not an extension.
22545     if (!ZeroExt && !AnyExt) {
22546       SourceType = MVT::Other;
22547       break;
22548     }
22549 
22550     // The input is a ZeroExt or AnyExt. Check the original type.
22551     EVT InTy = In.getOperand(0).getValueType();
22552 
22553     // Check that all of the widened source types are the same.
22554     if (SourceType == MVT::Other)
22555       // First time.
22556       SourceType = InTy;
22557     else if (InTy != SourceType) {
22558       // Multiple income types. Abort.
22559       SourceType = MVT::Other;
22560       break;
22561     }
22562 
22563     // Check if all of the extends are ANY_EXTENDs.
22564     AllAnyExt &= AnyExt;
22565   }
22566 
22567   // In order to have valid types, all of the inputs must be extended from the
22568   // same source type and all of the inputs must be any or zero extend.
22569   // Scalar sizes must be a power of two.
22570   EVT OutScalarTy = VT.getScalarType();
22571   bool ValidTypes =
22572       SourceType != MVT::Other &&
22573       llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
22574       llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
22575 
22576   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
22577   // turn into a single shuffle instruction.
22578   if (!ValidTypes)
22579     return SDValue();
22580 
22581   // If we already have a splat buildvector, then don't fold it if it means
22582   // introducing zeros.
22583   if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true))
22584     return SDValue();
22585 
22586   bool isLE = DAG.getDataLayout().isLittleEndian();
22587   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
22588   assert(ElemRatio > 1 && "Invalid element size ratio");
22589   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
22590                                DAG.getConstant(0, DL, SourceType);
22591 
22592   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
22593   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
22594 
22595   // Populate the new build_vector
22596   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
22597     SDValue Cast = N->getOperand(i);
22598     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
22599             Cast.getOpcode() == ISD::ZERO_EXTEND ||
22600             Cast.isUndef()) && "Invalid cast opcode");
22601     SDValue In;
22602     if (Cast.isUndef())
22603       In = DAG.getUNDEF(SourceType);
22604     else
22605       In = Cast->getOperand(0);
22606     unsigned Index = isLE ? (i * ElemRatio) :
22607                             (i * ElemRatio + (ElemRatio - 1));
22608 
22609     assert(Index < Ops.size() && "Invalid index");
22610     Ops[Index] = In;
22611   }
22612 
22613   // The type of the new BUILD_VECTOR node.
22614   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
22615   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
22616          "Invalid vector size");
22617   // Check if the new vector type is legal.
22618   if (!isTypeLegal(VecVT) ||
22619       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
22620        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
22621     return SDValue();
22622 
22623   // Make the new BUILD_VECTOR.
22624   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
22625 
22626   // The new BUILD_VECTOR node has the potential to be further optimized.
22627   AddToWorklist(BV.getNode());
22628   // Bitcast to the desired type.
22629   return DAG.getBitcast(VT, BV);
22630 }
22631 
22632 // Simplify (build_vec (trunc $1)
22633 //                     (trunc (srl $1 half-width))
22634 //                     (trunc (srl $1 (2 * half-width))))
22635 // to (bitcast $1)
22636 SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
22637   assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22638 
22639   EVT VT = N->getValueType(0);
22640 
22641   // Don't run this before LegalizeTypes if VT is legal.
22642   // Targets may have other preferences.
22643   if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT))
22644     return SDValue();
22645 
22646   // Only for little endian
22647   if (!DAG.getDataLayout().isLittleEndian())
22648     return SDValue();
22649 
22650   SDLoc DL(N);
22651   EVT OutScalarTy = VT.getScalarType();
22652   uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits();
22653 
22654   // Only for power of two types to be sure that bitcast works well
22655   if (!isPowerOf2_64(ScalarTypeBitsize))
22656     return SDValue();
22657 
22658   unsigned NumInScalars = N->getNumOperands();
22659 
22660   // Look through bitcasts
22661   auto PeekThroughBitcast = [](SDValue Op) {
22662     if (Op.getOpcode() == ISD::BITCAST)
22663       return Op.getOperand(0);
22664     return Op;
22665   };
22666 
22667   // The source value where all the parts are extracted.
22668   SDValue Src;
22669   for (unsigned i = 0; i != NumInScalars; ++i) {
22670     SDValue In = PeekThroughBitcast(N->getOperand(i));
22671     // Ignore undef inputs.
22672     if (In.isUndef()) continue;
22673 
22674     if (In.getOpcode() != ISD::TRUNCATE)
22675       return SDValue();
22676 
22677     In = PeekThroughBitcast(In.getOperand(0));
22678 
22679     if (In.getOpcode() != ISD::SRL) {
22680       // For now only build_vec without shuffling, handle shifts here in the
22681       // future.
22682       if (i != 0)
22683         return SDValue();
22684 
22685       Src = In;
22686     } else {
22687       // In is SRL
22688       SDValue part = PeekThroughBitcast(In.getOperand(0));
22689 
22690       if (!Src) {
22691         Src = part;
22692       } else if (Src != part) {
22693         // Vector parts do not stem from the same variable
22694         return SDValue();
22695       }
22696 
22697       SDValue ShiftAmtVal = In.getOperand(1);
22698       if (!isa<ConstantSDNode>(ShiftAmtVal))
22699         return SDValue();
22700 
22701       uint64_t ShiftAmt = In.getConstantOperandVal(1);
22702 
22703       // The extracted value is not extracted at the right position
22704       if (ShiftAmt != i * ScalarTypeBitsize)
22705         return SDValue();
22706     }
22707   }
22708 
22709   // Only cast if the size is the same
22710   if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
22711     return SDValue();
22712 
22713   return DAG.getBitcast(VT, Src);
22714 }
22715 
22716 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
22717                                            ArrayRef<int> VectorMask,
22718                                            SDValue VecIn1, SDValue VecIn2,
22719                                            unsigned LeftIdx, bool DidSplitVec) {
22720   SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL);
22721 
22722   EVT VT = N->getValueType(0);
22723   EVT InVT1 = VecIn1.getValueType();
22724   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
22725 
22726   unsigned NumElems = VT.getVectorNumElements();
22727   unsigned ShuffleNumElems = NumElems;
22728 
22729   // If we artificially split a vector in two already, then the offsets in the
22730   // operands will all be based off of VecIn1, even those in VecIn2.
22731   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
22732 
22733   uint64_t VTSize = VT.getFixedSizeInBits();
22734   uint64_t InVT1Size = InVT1.getFixedSizeInBits();
22735   uint64_t InVT2Size = InVT2.getFixedSizeInBits();
22736 
22737   assert(InVT2Size <= InVT1Size &&
22738          "Inputs must be sorted to be in non-increasing vector size order.");
22739 
22740   // We can't generate a shuffle node with mismatched input and output types.
22741   // Try to make the types match the type of the output.
22742   if (InVT1 != VT || InVT2 != VT) {
22743     if ((VTSize % InVT1Size == 0) && InVT1 == InVT2) {
22744       // If the output vector length is a multiple of both input lengths,
22745       // we can concatenate them and pad the rest with undefs.
22746       unsigned NumConcats = VTSize / InVT1Size;
22747       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
22748       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
22749       ConcatOps[0] = VecIn1;
22750       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
22751       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22752       VecIn2 = SDValue();
22753     } else if (InVT1Size == VTSize * 2) {
22754       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
22755         return SDValue();
22756 
22757       if (!VecIn2.getNode()) {
22758         // If we only have one input vector, and it's twice the size of the
22759         // output, split it in two.
22760         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
22761                              DAG.getVectorIdxConstant(NumElems, DL));
22762         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
22763         // Since we now have shorter input vectors, adjust the offset of the
22764         // second vector's start.
22765         Vec2Offset = NumElems;
22766       } else {
22767         assert(InVT2Size <= InVT1Size &&
22768                "Second input is not going to be larger than the first one.");
22769 
22770         // VecIn1 is wider than the output, and we have another, possibly
22771         // smaller input. Pad the smaller input with undefs, shuffle at the
22772         // input vector width, and extract the output.
22773         // The shuffle type is different than VT, so check legality again.
22774         if (LegalOperations &&
22775             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
22776           return SDValue();
22777 
22778         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
22779         // lower it back into a BUILD_VECTOR. So if the inserted type is
22780         // illegal, don't even try.
22781         if (InVT1 != InVT2) {
22782           if (!TLI.isTypeLegal(InVT2))
22783             return SDValue();
22784           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
22785                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
22786         }
22787         ShuffleNumElems = NumElems * 2;
22788       }
22789     } else if (InVT2Size * 2 == VTSize && InVT1Size == VTSize) {
22790       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
22791       ConcatOps[0] = VecIn2;
22792       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
22793     } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) {
22794       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) ||
22795           !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2))
22796         return SDValue();
22797       // If dest vector has less than two elements, then use shuffle and extract
22798       // from larger regs will cost even more.
22799       if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode())
22800         return SDValue();
22801       assert(InVT2Size <= InVT1Size &&
22802              "Second input is not going to be larger than the first one.");
22803 
22804       // VecIn1 is wider than the output, and we have another, possibly
22805       // smaller input. Pad the smaller input with undefs, shuffle at the
22806       // input vector width, and extract the output.
22807       // The shuffle type is different than VT, so check legality again.
22808       if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
22809         return SDValue();
22810 
22811       if (InVT1 != InVT2) {
22812         VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
22813                              DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
22814       }
22815       ShuffleNumElems = InVT1Size / VTSize * NumElems;
22816     } else {
22817       // TODO: Support cases where the length mismatch isn't exactly by a
22818       // factor of 2.
22819       // TODO: Move this check upwards, so that if we have bad type
22820       // mismatches, we don't create any DAG nodes.
22821       return SDValue();
22822     }
22823   }
22824 
22825   // Initialize mask to undef.
22826   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
22827 
22828   // Only need to run up to the number of elements actually used, not the
22829   // total number of elements in the shuffle - if we are shuffling a wider
22830   // vector, the high lanes should be set to undef.
22831   for (unsigned i = 0; i != NumElems; ++i) {
22832     if (VectorMask[i] <= 0)
22833       continue;
22834 
22835     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
22836     if (VectorMask[i] == (int)LeftIdx) {
22837       Mask[i] = ExtIndex;
22838     } else if (VectorMask[i] == (int)LeftIdx + 1) {
22839       Mask[i] = Vec2Offset + ExtIndex;
22840     }
22841   }
22842 
22843   // The type the input vectors may have changed above.
22844   InVT1 = VecIn1.getValueType();
22845 
22846   // If we already have a VecIn2, it should have the same type as VecIn1.
22847   // If we don't, get an undef/zero vector of the appropriate type.
22848   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
22849   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
22850 
22851   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
22852   if (ShuffleNumElems > NumElems)
22853     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
22854 
22855   return Shuffle;
22856 }
22857 
22858 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
22859   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
22860 
22861   // First, determine where the build vector is not undef.
22862   // TODO: We could extend this to handle zero elements as well as undefs.
22863   int NumBVOps = BV->getNumOperands();
22864   int ZextElt = -1;
22865   for (int i = 0; i != NumBVOps; ++i) {
22866     SDValue Op = BV->getOperand(i);
22867     if (Op.isUndef())
22868       continue;
22869     if (ZextElt == -1)
22870       ZextElt = i;
22871     else
22872       return SDValue();
22873   }
22874   // Bail out if there's no non-undef element.
22875   if (ZextElt == -1)
22876     return SDValue();
22877 
22878   // The build vector contains some number of undef elements and exactly
22879   // one other element. That other element must be a zero-extended scalar
22880   // extracted from a vector at a constant index to turn this into a shuffle.
22881   // Also, require that the build vector does not implicitly truncate/extend
22882   // its elements.
22883   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
22884   EVT VT = BV->getValueType(0);
22885   SDValue Zext = BV->getOperand(ZextElt);
22886   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
22887       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
22888       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
22889       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
22890     return SDValue();
22891 
22892   // The zero-extend must be a multiple of the source size, and we must be
22893   // building a vector of the same size as the source of the extract element.
22894   SDValue Extract = Zext.getOperand(0);
22895   unsigned DestSize = Zext.getValueSizeInBits();
22896   unsigned SrcSize = Extract.getValueSizeInBits();
22897   if (DestSize % SrcSize != 0 ||
22898       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
22899     return SDValue();
22900 
22901   // Create a shuffle mask that will combine the extracted element with zeros
22902   // and undefs.
22903   int ZextRatio = DestSize / SrcSize;
22904   int NumMaskElts = NumBVOps * ZextRatio;
22905   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
22906   for (int i = 0; i != NumMaskElts; ++i) {
22907     if (i / ZextRatio == ZextElt) {
22908       // The low bits of the (potentially translated) extracted element map to
22909       // the source vector. The high bits map to zero. We will use a zero vector
22910       // as the 2nd source operand of the shuffle, so use the 1st element of
22911       // that vector (mask value is number-of-elements) for the high bits.
22912       int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
22913       ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
22914                                            : NumMaskElts;
22915     }
22916 
22917     // Undef elements of the build vector remain undef because we initialize
22918     // the shuffle mask with -1.
22919   }
22920 
22921   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
22922   // bitcast (shuffle V, ZeroVec, VectorMask)
22923   SDLoc DL(BV);
22924   EVT VecVT = Extract.getOperand(0).getValueType();
22925   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
22926   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
22927   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
22928                                              ZeroVec, ShufMask, DAG);
22929   if (!Shuf)
22930     return SDValue();
22931   return DAG.getBitcast(VT, Shuf);
22932 }
22933 
22934 // FIXME: promote to STLExtras.
22935 template <typename R, typename T>
22936 static auto getFirstIndexOf(R &&Range, const T &Val) {
22937   auto I = find(Range, Val);
22938   if (I == Range.end())
22939     return static_cast<decltype(std::distance(Range.begin(), I))>(-1);
22940   return std::distance(Range.begin(), I);
22941 }
22942 
22943 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
22944 // operations. If the types of the vectors we're extracting from allow it,
22945 // turn this into a vector_shuffle node.
22946 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
22947   SDLoc DL(N);
22948   EVT VT = N->getValueType(0);
22949 
22950   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
22951   if (!isTypeLegal(VT))
22952     return SDValue();
22953 
22954   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
22955     return V;
22956 
22957   // May only combine to shuffle after legalize if shuffle is legal.
22958   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
22959     return SDValue();
22960 
22961   bool UsesZeroVector = false;
22962   unsigned NumElems = N->getNumOperands();
22963 
22964   // Record, for each element of the newly built vector, which input vector
22965   // that element comes from. -1 stands for undef, 0 for the zero vector,
22966   // and positive values for the input vectors.
22967   // VectorMask maps each element to its vector number, and VecIn maps vector
22968   // numbers to their initial SDValues.
22969 
22970   SmallVector<int, 8> VectorMask(NumElems, -1);
22971   SmallVector<SDValue, 8> VecIn;
22972   VecIn.push_back(SDValue());
22973 
22974   for (unsigned i = 0; i != NumElems; ++i) {
22975     SDValue Op = N->getOperand(i);
22976 
22977     if (Op.isUndef())
22978       continue;
22979 
22980     // See if we can use a blend with a zero vector.
22981     // TODO: Should we generalize this to a blend with an arbitrary constant
22982     // vector?
22983     if (isNullConstant(Op) || isNullFPConstant(Op)) {
22984       UsesZeroVector = true;
22985       VectorMask[i] = 0;
22986       continue;
22987     }
22988 
22989     // Not an undef or zero. If the input is something other than an
22990     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
22991     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
22992         !isa<ConstantSDNode>(Op.getOperand(1)))
22993       return SDValue();
22994     SDValue ExtractedFromVec = Op.getOperand(0);
22995 
22996     if (ExtractedFromVec.getValueType().isScalableVector())
22997       return SDValue();
22998 
22999     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
23000     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
23001       return SDValue();
23002 
23003     // All inputs must have the same element type as the output.
23004     if (VT.getVectorElementType() !=
23005         ExtractedFromVec.getValueType().getVectorElementType())
23006       return SDValue();
23007 
23008     // Have we seen this input vector before?
23009     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
23010     // a map back from SDValues to numbers isn't worth it.
23011     int Idx = getFirstIndexOf(VecIn, ExtractedFromVec);
23012     if (Idx == -1) { // A new source vector?
23013       Idx = VecIn.size();
23014       VecIn.push_back(ExtractedFromVec);
23015     }
23016 
23017     VectorMask[i] = Idx;
23018   }
23019 
23020   // If we didn't find at least one input vector, bail out.
23021   if (VecIn.size() < 2)
23022     return SDValue();
23023 
23024   // If all the Operands of BUILD_VECTOR extract from same
23025   // vector, then split the vector efficiently based on the maximum
23026   // vector access index and adjust the VectorMask and
23027   // VecIn accordingly.
23028   bool DidSplitVec = false;
23029   if (VecIn.size() == 2) {
23030     unsigned MaxIndex = 0;
23031     unsigned NearestPow2 = 0;
23032     SDValue Vec = VecIn.back();
23033     EVT InVT = Vec.getValueType();
23034     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
23035 
23036     for (unsigned i = 0; i < NumElems; i++) {
23037       if (VectorMask[i] <= 0)
23038         continue;
23039       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
23040       IndexVec[i] = Index;
23041       MaxIndex = std::max(MaxIndex, Index);
23042     }
23043 
23044     NearestPow2 = PowerOf2Ceil(MaxIndex);
23045     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
23046         NumElems * 2 < NearestPow2) {
23047       unsigned SplitSize = NearestPow2 / 2;
23048       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
23049                                      InVT.getVectorElementType(), SplitSize);
23050       if (TLI.isTypeLegal(SplitVT) &&
23051           SplitSize + SplitVT.getVectorNumElements() <=
23052               InVT.getVectorNumElements()) {
23053         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23054                                      DAG.getVectorIdxConstant(SplitSize, DL));
23055         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
23056                                      DAG.getVectorIdxConstant(0, DL));
23057         VecIn.pop_back();
23058         VecIn.push_back(VecIn1);
23059         VecIn.push_back(VecIn2);
23060         DidSplitVec = true;
23061 
23062         for (unsigned i = 0; i < NumElems; i++) {
23063           if (VectorMask[i] <= 0)
23064             continue;
23065           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
23066         }
23067       }
23068     }
23069   }
23070 
23071   // Sort input vectors by decreasing vector element count,
23072   // while preserving the relative order of equally-sized vectors.
23073   // Note that we keep the first "implicit zero vector as-is.
23074   SmallVector<SDValue, 8> SortedVecIn(VecIn);
23075   llvm::stable_sort(MutableArrayRef<SDValue>(SortedVecIn).drop_front(),
23076                     [](const SDValue &a, const SDValue &b) {
23077                       return a.getValueType().getVectorNumElements() >
23078                              b.getValueType().getVectorNumElements();
23079                     });
23080 
23081   // We now also need to rebuild the VectorMask, because it referenced element
23082   // order in VecIn, and we just sorted them.
23083   for (int &SourceVectorIndex : VectorMask) {
23084     if (SourceVectorIndex <= 0)
23085       continue;
23086     unsigned Idx = getFirstIndexOf(SortedVecIn, VecIn[SourceVectorIndex]);
23087     assert(Idx > 0 && Idx < SortedVecIn.size() &&
23088            VecIn[SourceVectorIndex] == SortedVecIn[Idx] && "Remapping failure");
23089     SourceVectorIndex = Idx;
23090   }
23091 
23092   VecIn = std::move(SortedVecIn);
23093 
23094   // TODO: Should this fire if some of the input vectors has illegal type (like
23095   // it does now), or should we let legalization run its course first?
23096 
23097   // Shuffle phase:
23098   // Take pairs of vectors, and shuffle them so that the result has elements
23099   // from these vectors in the correct places.
23100   // For example, given:
23101   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
23102   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
23103   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
23104   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
23105   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
23106   // We will generate:
23107   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
23108   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
23109   SmallVector<SDValue, 4> Shuffles;
23110   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
23111     unsigned LeftIdx = 2 * In + 1;
23112     SDValue VecLeft = VecIn[LeftIdx];
23113     SDValue VecRight =
23114         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
23115 
23116     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
23117                                                 VecRight, LeftIdx, DidSplitVec))
23118       Shuffles.push_back(Shuffle);
23119     else
23120       return SDValue();
23121   }
23122 
23123   // If we need the zero vector as an "ingredient" in the blend tree, add it
23124   // to the list of shuffles.
23125   if (UsesZeroVector)
23126     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
23127                                       : DAG.getConstantFP(0.0, DL, VT));
23128 
23129   // If we only have one shuffle, we're done.
23130   if (Shuffles.size() == 1)
23131     return Shuffles[0];
23132 
23133   // Update the vector mask to point to the post-shuffle vectors.
23134   for (int &Vec : VectorMask)
23135     if (Vec == 0)
23136       Vec = Shuffles.size() - 1;
23137     else
23138       Vec = (Vec - 1) / 2;
23139 
23140   // More than one shuffle. Generate a binary tree of blends, e.g. if from
23141   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
23142   // generate:
23143   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
23144   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
23145   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
23146   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
23147   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
23148   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
23149   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
23150 
23151   // Make sure the initial size of the shuffle list is even.
23152   if (Shuffles.size() % 2)
23153     Shuffles.push_back(DAG.getUNDEF(VT));
23154 
23155   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
23156     if (CurSize % 2) {
23157       Shuffles[CurSize] = DAG.getUNDEF(VT);
23158       CurSize++;
23159     }
23160     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
23161       int Left = 2 * In;
23162       int Right = 2 * In + 1;
23163       SmallVector<int, 8> Mask(NumElems, -1);
23164       SDValue L = Shuffles[Left];
23165       ArrayRef<int> LMask;
23166       bool IsLeftShuffle = L.getOpcode() == ISD::VECTOR_SHUFFLE &&
23167                            L.use_empty() && L.getOperand(1).isUndef() &&
23168                            L.getOperand(0).getValueType() == L.getValueType();
23169       if (IsLeftShuffle) {
23170         LMask = cast<ShuffleVectorSDNode>(L.getNode())->getMask();
23171         L = L.getOperand(0);
23172       }
23173       SDValue R = Shuffles[Right];
23174       ArrayRef<int> RMask;
23175       bool IsRightShuffle = R.getOpcode() == ISD::VECTOR_SHUFFLE &&
23176                             R.use_empty() && R.getOperand(1).isUndef() &&
23177                             R.getOperand(0).getValueType() == R.getValueType();
23178       if (IsRightShuffle) {
23179         RMask = cast<ShuffleVectorSDNode>(R.getNode())->getMask();
23180         R = R.getOperand(0);
23181       }
23182       for (unsigned I = 0; I != NumElems; ++I) {
23183         if (VectorMask[I] == Left) {
23184           Mask[I] = I;
23185           if (IsLeftShuffle)
23186             Mask[I] = LMask[I];
23187           VectorMask[I] = In;
23188         } else if (VectorMask[I] == Right) {
23189           Mask[I] = I + NumElems;
23190           if (IsRightShuffle)
23191             Mask[I] = RMask[I] + NumElems;
23192           VectorMask[I] = In;
23193         }
23194       }
23195 
23196       Shuffles[In] = DAG.getVectorShuffle(VT, DL, L, R, Mask);
23197     }
23198   }
23199   return Shuffles[0];
23200 }
23201 
23202 // Try to turn a build vector of zero extends of extract vector elts into a
23203 // a vector zero extend and possibly an extract subvector.
23204 // TODO: Support sign extend?
23205 // TODO: Allow undef elements?
23206 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
23207   if (LegalOperations)
23208     return SDValue();
23209 
23210   EVT VT = N->getValueType(0);
23211 
23212   bool FoundZeroExtend = false;
23213   SDValue Op0 = N->getOperand(0);
23214   auto checkElem = [&](SDValue Op) -> int64_t {
23215     unsigned Opc = Op.getOpcode();
23216     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
23217     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
23218         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
23219         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
23220       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
23221         return C->getZExtValue();
23222     return -1;
23223   };
23224 
23225   // Make sure the first element matches
23226   // (zext (extract_vector_elt X, C))
23227   // Offset must be a constant multiple of the
23228   // known-minimum vector length of the result type.
23229   int64_t Offset = checkElem(Op0);
23230   if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0)
23231     return SDValue();
23232 
23233   unsigned NumElems = N->getNumOperands();
23234   SDValue In = Op0.getOperand(0).getOperand(0);
23235   EVT InSVT = In.getValueType().getScalarType();
23236   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
23237 
23238   // Don't create an illegal input type after type legalization.
23239   if (LegalTypes && !TLI.isTypeLegal(InVT))
23240     return SDValue();
23241 
23242   // Ensure all the elements come from the same vector and are adjacent.
23243   for (unsigned i = 1; i != NumElems; ++i) {
23244     if ((Offset + i) != checkElem(N->getOperand(i)))
23245       return SDValue();
23246   }
23247 
23248   SDLoc DL(N);
23249   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
23250                    Op0.getOperand(0).getOperand(1));
23251   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
23252                      VT, In);
23253 }
23254 
23255 // If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND,
23256 // and all other elements being constant zero's, granularize the BUILD_VECTOR's
23257 // element width, absorbing the ZERO_EXTEND, turning it into a constant zero op.
23258 // This patten can appear during legalization.
23259 //
23260 // NOTE: This can be generalized to allow more than a single
23261 //       non-constant-zero op, UNDEF's, and to be KnownBits-based,
23262 SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) {
23263   // Don't run this after legalization. Targets may have other preferences.
23264   if (Level >= AfterLegalizeDAG)
23265     return SDValue();
23266 
23267   // FIXME: support big-endian.
23268   if (DAG.getDataLayout().isBigEndian())
23269     return SDValue();
23270 
23271   EVT VT = N->getValueType(0);
23272   EVT OpVT = N->getOperand(0).getValueType();
23273   assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?");
23274 
23275   EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23276 
23277   if (!TLI.isTypeLegal(OpIntVT) ||
23278       (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT)))
23279     return SDValue();
23280 
23281   unsigned EltBitwidth = VT.getScalarSizeInBits();
23282   // NOTE: the actual width of operands may be wider than that!
23283 
23284   // Analyze all operands of this BUILD_VECTOR. What is the largest number of
23285   // active bits they all have? We'll want to truncate them all to that width.
23286   unsigned ActiveBits = 0;
23287   APInt KnownZeroOps(VT.getVectorNumElements(), 0);
23288   for (auto I : enumerate(N->ops())) {
23289     SDValue Op = I.value();
23290     // FIXME: support UNDEF elements?
23291     if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
23292       unsigned OpActiveBits =
23293           Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits();
23294       if (OpActiveBits == 0) {
23295         KnownZeroOps.setBit(I.index());
23296         continue;
23297       }
23298       // Profitability check: don't allow non-zero constant operands.
23299       return SDValue();
23300     }
23301     // Profitability check: there must only be a single non-zero operand,
23302     // and it must be the first operand of the BUILD_VECTOR.
23303     if (I.index() != 0)
23304       return SDValue();
23305     // The operand must be a zero-extension itself.
23306     // FIXME: this could be generalized to known leading zeros check.
23307     if (Op.getOpcode() != ISD::ZERO_EXTEND)
23308       return SDValue();
23309     unsigned CurrActiveBits =
23310         Op.getOperand(0).getValueSizeInBits().getFixedValue();
23311     assert(!ActiveBits && "Already encountered non-constant-zero operand?");
23312     ActiveBits = CurrActiveBits;
23313     // We want to at least halve the element size.
23314     if (2 * ActiveBits > EltBitwidth)
23315       return SDValue();
23316   }
23317 
23318   // This BUILD_VECTOR must have at least one non-constant-zero operand.
23319   if (ActiveBits == 0)
23320     return SDValue();
23321 
23322   // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits,
23323   // into how many chunks can we split our element width?
23324   EVT NewScalarIntVT, NewIntVT;
23325   std::optional<unsigned> Factor;
23326   // We can split the element into at least two chunks, but not into more
23327   // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor
23328   // for which the element width is a multiple of it,
23329   // and the resulting types/operations on that chunk width are legal.
23330   assert(2 * ActiveBits <= EltBitwidth &&
23331          "We know that half or less bits of the element are active.");
23332   for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) {
23333     if (EltBitwidth % Scale != 0)
23334       continue;
23335     unsigned ChunkBitwidth = EltBitwidth / Scale;
23336     assert(ChunkBitwidth >= ActiveBits && "As per starting point.");
23337     NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth);
23338     NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT,
23339                                 Scale * N->getNumOperands());
23340     if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) ||
23341         (LegalOperations &&
23342          !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) &&
23343            TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, NewIntVT))))
23344       continue;
23345     Factor = Scale;
23346     break;
23347   }
23348   if (!Factor)
23349     return SDValue();
23350 
23351   SDLoc DL(N);
23352   SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT);
23353 
23354   // Recreate the BUILD_VECTOR, with elements now being Factor times smaller.
23355   SmallVector<SDValue, 16> NewOps;
23356   NewOps.reserve(NewIntVT.getVectorNumElements());
23357   for (auto I : enumerate(N->ops())) {
23358     SDValue Op = I.value();
23359     assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here.");
23360     unsigned SrcOpIdx = I.index();
23361     if (KnownZeroOps[SrcOpIdx]) {
23362       NewOps.append(*Factor, ZeroOp);
23363       continue;
23364     }
23365     Op = DAG.getBitcast(OpIntVT, Op);
23366     Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op);
23367     NewOps.emplace_back(Op);
23368     NewOps.append(*Factor - 1, ZeroOp);
23369   }
23370   assert(NewOps.size() == NewIntVT.getVectorNumElements());
23371   SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps);
23372   NewBV = DAG.getBitcast(VT, NewBV);
23373   return NewBV;
23374 }
23375 
23376 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
23377   EVT VT = N->getValueType(0);
23378 
23379   // A vector built entirely of undefs is undef.
23380   if (ISD::allOperandsUndef(N))
23381     return DAG.getUNDEF(VT);
23382 
23383   // If this is a splat of a bitcast from another vector, change to a
23384   // concat_vector.
23385   // For example:
23386   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
23387   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
23388   //
23389   // If X is a build_vector itself, the concat can become a larger build_vector.
23390   // TODO: Maybe this is useful for non-splat too?
23391   if (!LegalOperations) {
23392     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
23393       Splat = peekThroughBitcasts(Splat);
23394       EVT SrcVT = Splat.getValueType();
23395       if (SrcVT.isVector()) {
23396         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
23397         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
23398                                      SrcVT.getVectorElementType(), NumElts);
23399         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
23400           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
23401           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
23402                                        NewVT, Ops);
23403           return DAG.getBitcast(VT, Concat);
23404         }
23405       }
23406     }
23407   }
23408 
23409   // Check if we can express BUILD VECTOR via subvector extract.
23410   if (!LegalTypes && (N->getNumOperands() > 1)) {
23411     SDValue Op0 = N->getOperand(0);
23412     auto checkElem = [&](SDValue Op) -> uint64_t {
23413       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
23414           (Op0.getOperand(0) == Op.getOperand(0)))
23415         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
23416           return CNode->getZExtValue();
23417       return -1;
23418     };
23419 
23420     int Offset = checkElem(Op0);
23421     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
23422       if (Offset + i != checkElem(N->getOperand(i))) {
23423         Offset = -1;
23424         break;
23425       }
23426     }
23427 
23428     if ((Offset == 0) &&
23429         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
23430       return Op0.getOperand(0);
23431     if ((Offset != -1) &&
23432         ((Offset % N->getValueType(0).getVectorNumElements()) ==
23433          0)) // IDX must be multiple of output size.
23434       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
23435                          Op0.getOperand(0), Op0.getOperand(1));
23436   }
23437 
23438   if (SDValue V = convertBuildVecZextToZext(N))
23439     return V;
23440 
23441   if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N))
23442     return V;
23443 
23444   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
23445     return V;
23446 
23447   if (SDValue V = reduceBuildVecTruncToBitCast(N))
23448     return V;
23449 
23450   if (SDValue V = reduceBuildVecToShuffle(N))
23451     return V;
23452 
23453   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
23454   // Do this late as some of the above may replace the splat.
23455   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
23456     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
23457       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
23458       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
23459     }
23460 
23461   return SDValue();
23462 }
23463 
23464 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
23465   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23466   EVT OpVT = N->getOperand(0).getValueType();
23467 
23468   // If the operands are legal vectors, leave them alone.
23469   if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
23470     return SDValue();
23471 
23472   SDLoc DL(N);
23473   EVT VT = N->getValueType(0);
23474   SmallVector<SDValue, 8> Ops;
23475 
23476   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
23477   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
23478 
23479   // Keep track of what we encounter.
23480   bool AnyInteger = false;
23481   bool AnyFP = false;
23482   for (const SDValue &Op : N->ops()) {
23483     if (ISD::BITCAST == Op.getOpcode() &&
23484         !Op.getOperand(0).getValueType().isVector())
23485       Ops.push_back(Op.getOperand(0));
23486     else if (ISD::UNDEF == Op.getOpcode())
23487       Ops.push_back(ScalarUndef);
23488     else
23489       return SDValue();
23490 
23491     // Note whether we encounter an integer or floating point scalar.
23492     // If it's neither, bail out, it could be something weird like x86mmx.
23493     EVT LastOpVT = Ops.back().getValueType();
23494     if (LastOpVT.isFloatingPoint())
23495       AnyFP = true;
23496     else if (LastOpVT.isInteger())
23497       AnyInteger = true;
23498     else
23499       return SDValue();
23500   }
23501 
23502   // If any of the operands is a floating point scalar bitcast to a vector,
23503   // use floating point types throughout, and bitcast everything.
23504   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
23505   if (AnyFP) {
23506     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
23507     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
23508     if (AnyInteger) {
23509       for (SDValue &Op : Ops) {
23510         if (Op.getValueType() == SVT)
23511           continue;
23512         if (Op.isUndef())
23513           Op = ScalarUndef;
23514         else
23515           Op = DAG.getBitcast(SVT, Op);
23516       }
23517     }
23518   }
23519 
23520   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
23521                                VT.getSizeInBits() / SVT.getSizeInBits());
23522   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
23523 }
23524 
23525 // Attempt to merge nested concat_vectors/undefs.
23526 // Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d))
23527 //  --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d)
23528 static SDValue combineConcatVectorOfConcatVectors(SDNode *N,
23529                                                   SelectionDAG &DAG) {
23530   EVT VT = N->getValueType(0);
23531 
23532   // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types.
23533   EVT SubVT;
23534   SDValue FirstConcat;
23535   for (const SDValue &Op : N->ops()) {
23536     if (Op.isUndef())
23537       continue;
23538     if (Op.getOpcode() != ISD::CONCAT_VECTORS)
23539       return SDValue();
23540     if (!FirstConcat) {
23541       SubVT = Op.getOperand(0).getValueType();
23542       if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
23543         return SDValue();
23544       FirstConcat = Op;
23545       continue;
23546     }
23547     if (SubVT != Op.getOperand(0).getValueType())
23548       return SDValue();
23549   }
23550   assert(FirstConcat && "Concat of all-undefs found");
23551 
23552   SmallVector<SDValue> ConcatOps;
23553   for (const SDValue &Op : N->ops()) {
23554     if (Op.isUndef()) {
23555       ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT));
23556       continue;
23557     }
23558     ConcatOps.append(Op->op_begin(), Op->op_end());
23559   }
23560   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps);
23561 }
23562 
23563 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
23564 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
23565 // most two distinct vectors the same size as the result, attempt to turn this
23566 // into a legal shuffle.
23567 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
23568   EVT VT = N->getValueType(0);
23569   EVT OpVT = N->getOperand(0).getValueType();
23570 
23571   // We currently can't generate an appropriate shuffle for a scalable vector.
23572   if (VT.isScalableVector())
23573     return SDValue();
23574 
23575   int NumElts = VT.getVectorNumElements();
23576   int NumOpElts = OpVT.getVectorNumElements();
23577 
23578   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
23579   SmallVector<int, 8> Mask;
23580 
23581   for (SDValue Op : N->ops()) {
23582     Op = peekThroughBitcasts(Op);
23583 
23584     // UNDEF nodes convert to UNDEF shuffle mask values.
23585     if (Op.isUndef()) {
23586       Mask.append((unsigned)NumOpElts, -1);
23587       continue;
23588     }
23589 
23590     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
23591       return SDValue();
23592 
23593     // What vector are we extracting the subvector from and at what index?
23594     SDValue ExtVec = Op.getOperand(0);
23595     int ExtIdx = Op.getConstantOperandVal(1);
23596 
23597     // We want the EVT of the original extraction to correctly scale the
23598     // extraction index.
23599     EVT ExtVT = ExtVec.getValueType();
23600     ExtVec = peekThroughBitcasts(ExtVec);
23601 
23602     // UNDEF nodes convert to UNDEF shuffle mask values.
23603     if (ExtVec.isUndef()) {
23604       Mask.append((unsigned)NumOpElts, -1);
23605       continue;
23606     }
23607 
23608     // Ensure that we are extracting a subvector from a vector the same
23609     // size as the result.
23610     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
23611       return SDValue();
23612 
23613     // Scale the subvector index to account for any bitcast.
23614     int NumExtElts = ExtVT.getVectorNumElements();
23615     if (0 == (NumExtElts % NumElts))
23616       ExtIdx /= (NumExtElts / NumElts);
23617     else if (0 == (NumElts % NumExtElts))
23618       ExtIdx *= (NumElts / NumExtElts);
23619     else
23620       return SDValue();
23621 
23622     // At most we can reference 2 inputs in the final shuffle.
23623     if (SV0.isUndef() || SV0 == ExtVec) {
23624       SV0 = ExtVec;
23625       for (int i = 0; i != NumOpElts; ++i)
23626         Mask.push_back(i + ExtIdx);
23627     } else if (SV1.isUndef() || SV1 == ExtVec) {
23628       SV1 = ExtVec;
23629       for (int i = 0; i != NumOpElts; ++i)
23630         Mask.push_back(i + ExtIdx + NumElts);
23631     } else {
23632       return SDValue();
23633     }
23634   }
23635 
23636   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23637   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
23638                                      DAG.getBitcast(VT, SV1), Mask, DAG);
23639 }
23640 
23641 static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
23642   unsigned CastOpcode = N->getOperand(0).getOpcode();
23643   switch (CastOpcode) {
23644   case ISD::SINT_TO_FP:
23645   case ISD::UINT_TO_FP:
23646   case ISD::FP_TO_SINT:
23647   case ISD::FP_TO_UINT:
23648     // TODO: Allow more opcodes?
23649     //  case ISD::BITCAST:
23650     //  case ISD::TRUNCATE:
23651     //  case ISD::ZERO_EXTEND:
23652     //  case ISD::SIGN_EXTEND:
23653     //  case ISD::FP_EXTEND:
23654     break;
23655   default:
23656     return SDValue();
23657   }
23658 
23659   EVT SrcVT = N->getOperand(0).getOperand(0).getValueType();
23660   if (!SrcVT.isVector())
23661     return SDValue();
23662 
23663   // All operands of the concat must be the same kind of cast from the same
23664   // source type.
23665   SmallVector<SDValue, 4> SrcOps;
23666   for (SDValue Op : N->ops()) {
23667     if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() ||
23668         Op.getOperand(0).getValueType() != SrcVT)
23669       return SDValue();
23670     SrcOps.push_back(Op.getOperand(0));
23671   }
23672 
23673   // The wider cast must be supported by the target. This is unusual because
23674   // the operation support type parameter depends on the opcode. In addition,
23675   // check the other type in the cast to make sure this is really legal.
23676   EVT VT = N->getValueType(0);
23677   EVT SrcEltVT = SrcVT.getVectorElementType();
23678   ElementCount NumElts = SrcVT.getVectorElementCount() * N->getNumOperands();
23679   EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts);
23680   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
23681   switch (CastOpcode) {
23682   case ISD::SINT_TO_FP:
23683   case ISD::UINT_TO_FP:
23684     if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) ||
23685         !TLI.isTypeLegal(VT))
23686       return SDValue();
23687     break;
23688   case ISD::FP_TO_SINT:
23689   case ISD::FP_TO_UINT:
23690     if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) ||
23691         !TLI.isTypeLegal(ConcatSrcVT))
23692       return SDValue();
23693     break;
23694   default:
23695     llvm_unreachable("Unexpected cast opcode");
23696   }
23697 
23698   // concat (cast X), (cast Y)... -> cast (concat X, Y...)
23699   SDLoc DL(N);
23700   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps);
23701   return DAG.getNode(CastOpcode, DL, VT, NewConcat);
23702 }
23703 
23704 // See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
23705 // the operands is a SHUFFLE_VECTOR, and all other operands are also operands
23706 // to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
23707 static SDValue combineConcatVectorOfShuffleAndItsOperands(
23708     SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
23709     bool LegalOperations) {
23710   EVT VT = N->getValueType(0);
23711   EVT OpVT = N->getOperand(0).getValueType();
23712   if (VT.isScalableVector())
23713     return SDValue();
23714 
23715   // For now, only allow simple 2-operand concatenations.
23716   if (N->getNumOperands() != 2)
23717     return SDValue();
23718 
23719   // Don't create illegal types/shuffles when not allowed to.
23720   if ((LegalTypes && !TLI.isTypeLegal(VT)) ||
23721       (LegalOperations &&
23722        !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)))
23723     return SDValue();
23724 
23725   // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them,
23726   // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us,
23727   // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR,
23728   // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!).
23729   // (4) and for now, the SHUFFLE_VECTOR must be unary.
23730   ShuffleVectorSDNode *SVN = nullptr;
23731   for (SDValue Op : N->ops()) {
23732     if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op);
23733         CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) &&
23734         all_of(N->ops(), [CurSVN](SDValue Op) {
23735           // FIXME: can we allow UNDEF operands?
23736           return !Op.isUndef() &&
23737                  (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op));
23738         })) {
23739       SVN = CurSVN;
23740       break;
23741     }
23742   }
23743   if (!SVN)
23744     return SDValue();
23745 
23746   // We are going to pad the shuffle operands, so any indice, that was picking
23747   // from the second operand, must be adjusted.
23748   SmallVector<int, 16> AdjustedMask;
23749   AdjustedMask.reserve(SVN->getMask().size());
23750   assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!");
23751   append_range(AdjustedMask, SVN->getMask());
23752 
23753   // Identity masks for the operands of the (padded) shuffle.
23754   SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements());
23755   MutableArrayRef<int> FirstShufOpIdentityMask =
23756       MutableArrayRef<int>(IdentityMask)
23757           .take_front(OpVT.getVectorNumElements());
23758   MutableArrayRef<int> SecondShufOpIdentityMask =
23759       MutableArrayRef<int>(IdentityMask).take_back(OpVT.getVectorNumElements());
23760   std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0);
23761   std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(),
23762             VT.getVectorNumElements());
23763 
23764   // New combined shuffle mask.
23765   SmallVector<int, 32> Mask;
23766   Mask.reserve(VT.getVectorNumElements());
23767   for (SDValue Op : N->ops()) {
23768     assert(!Op.isUndef() && "Not expecting to concatenate UNDEF.");
23769     if (Op.getNode() == SVN) {
23770       append_range(Mask, AdjustedMask);
23771       continue;
23772     }
23773     if (Op == SVN->getOperand(0)) {
23774       append_range(Mask, FirstShufOpIdentityMask);
23775       continue;
23776     }
23777     if (Op == SVN->getOperand(1)) {
23778       append_range(Mask, SecondShufOpIdentityMask);
23779       continue;
23780     }
23781     llvm_unreachable("Unexpected operand!");
23782   }
23783 
23784   // Don't create illegal shuffle masks.
23785   if (!TLI.isShuffleMaskLegal(Mask, VT))
23786     return SDValue();
23787 
23788   // Pad the shuffle operands with UNDEF.
23789   SDLoc dl(N);
23790   std::array<SDValue, 2> ShufOps;
23791   for (auto I : zip(SVN->ops(), ShufOps)) {
23792     SDValue ShufOp = std::get<0>(I);
23793     SDValue &NewShufOp = std::get<1>(I);
23794     if (ShufOp.isUndef())
23795       NewShufOp = DAG.getUNDEF(VT);
23796     else {
23797       SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(),
23798                                           DAG.getUNDEF(OpVT));
23799       ShufOpParts[0] = ShufOp;
23800       NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts);
23801     }
23802   }
23803   // Finally, create the new wide shuffle.
23804   return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask);
23805 }
23806 
23807 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
23808   // If we only have one input vector, we don't need to do any concatenation.
23809   if (N->getNumOperands() == 1)
23810     return N->getOperand(0);
23811 
23812   // Check if all of the operands are undefs.
23813   EVT VT = N->getValueType(0);
23814   if (ISD::allOperandsUndef(N))
23815     return DAG.getUNDEF(VT);
23816 
23817   // Optimize concat_vectors where all but the first of the vectors are undef.
23818   if (all_of(drop_begin(N->ops()),
23819              [](const SDValue &Op) { return Op.isUndef(); })) {
23820     SDValue In = N->getOperand(0);
23821     assert(In.getValueType().isVector() && "Must concat vectors");
23822 
23823     // If the input is a concat_vectors, just make a larger concat by padding
23824     // with smaller undefs.
23825     //
23826     // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
23827     // here could cause an infinite loop. That legalizing happens when LegalDAG
23828     // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
23829     // scalable.
23830     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
23831         !(LegalDAG && In.getValueType().isScalableVector())) {
23832       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
23833       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
23834       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
23835       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
23836     }
23837 
23838     SDValue Scalar = peekThroughOneUseBitcasts(In);
23839 
23840     // concat_vectors(scalar_to_vector(scalar), undef) ->
23841     //     scalar_to_vector(scalar)
23842     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
23843          Scalar.hasOneUse()) {
23844       EVT SVT = Scalar.getValueType().getVectorElementType();
23845       if (SVT == Scalar.getOperand(0).getValueType())
23846         Scalar = Scalar.getOperand(0);
23847     }
23848 
23849     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
23850     if (!Scalar.getValueType().isVector()) {
23851       // If the bitcast type isn't legal, it might be a trunc of a legal type;
23852       // look through the trunc so we can still do the transform:
23853       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
23854       if (Scalar->getOpcode() == ISD::TRUNCATE &&
23855           !TLI.isTypeLegal(Scalar.getValueType()) &&
23856           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
23857         Scalar = Scalar->getOperand(0);
23858 
23859       EVT SclTy = Scalar.getValueType();
23860 
23861       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
23862         return SDValue();
23863 
23864       // Bail out if the vector size is not a multiple of the scalar size.
23865       if (VT.getSizeInBits() % SclTy.getSizeInBits())
23866         return SDValue();
23867 
23868       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
23869       if (VNTNumElms < 2)
23870         return SDValue();
23871 
23872       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
23873       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
23874         return SDValue();
23875 
23876       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
23877       return DAG.getBitcast(VT, Res);
23878     }
23879   }
23880 
23881   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
23882   // We have already tested above for an UNDEF only concatenation.
23883   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
23884   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
23885   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
23886     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
23887   };
23888   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
23889     SmallVector<SDValue, 8> Opnds;
23890     EVT SVT = VT.getScalarType();
23891 
23892     EVT MinVT = SVT;
23893     if (!SVT.isFloatingPoint()) {
23894       // If BUILD_VECTOR are from built from integer, they may have different
23895       // operand types. Get the smallest type and truncate all operands to it.
23896       bool FoundMinVT = false;
23897       for (const SDValue &Op : N->ops())
23898         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
23899           EVT OpSVT = Op.getOperand(0).getValueType();
23900           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
23901           FoundMinVT = true;
23902         }
23903       assert(FoundMinVT && "Concat vector type mismatch");
23904     }
23905 
23906     for (const SDValue &Op : N->ops()) {
23907       EVT OpVT = Op.getValueType();
23908       unsigned NumElts = OpVT.getVectorNumElements();
23909 
23910       if (ISD::UNDEF == Op.getOpcode())
23911         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
23912 
23913       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
23914         if (SVT.isFloatingPoint()) {
23915           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
23916           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
23917         } else {
23918           for (unsigned i = 0; i != NumElts; ++i)
23919             Opnds.push_back(
23920                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
23921         }
23922       }
23923     }
23924 
23925     assert(VT.getVectorNumElements() == Opnds.size() &&
23926            "Concat vector type mismatch");
23927     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
23928   }
23929 
23930   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
23931   // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
23932   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
23933     return V;
23934 
23935   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
23936     // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE.
23937     if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG))
23938       return V;
23939 
23940     // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
23941     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
23942       return V;
23943   }
23944 
23945   if (SDValue V = combineConcatVectorOfCasts(N, DAG))
23946     return V;
23947 
23948   if (SDValue V = combineConcatVectorOfShuffleAndItsOperands(
23949           N, DAG, TLI, LegalTypes, LegalOperations))
23950     return V;
23951 
23952   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
23953   // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR
23954   // operands and look for a CONCAT operations that place the incoming vectors
23955   // at the exact same location.
23956   //
23957   // For scalable vectors, EXTRACT_SUBVECTOR indexes are implicitly scaled.
23958   SDValue SingleSource = SDValue();
23959   unsigned PartNumElem =
23960       N->getOperand(0).getValueType().getVectorMinNumElements();
23961 
23962   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
23963     SDValue Op = N->getOperand(i);
23964 
23965     if (Op.isUndef())
23966       continue;
23967 
23968     // Check if this is the identity extract:
23969     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
23970       return SDValue();
23971 
23972     // Find the single incoming vector for the extract_subvector.
23973     if (SingleSource.getNode()) {
23974       if (Op.getOperand(0) != SingleSource)
23975         return SDValue();
23976     } else {
23977       SingleSource = Op.getOperand(0);
23978 
23979       // Check the source type is the same as the type of the result.
23980       // If not, this concat may extend the vector, so we can not
23981       // optimize it away.
23982       if (SingleSource.getValueType() != N->getValueType(0))
23983         return SDValue();
23984     }
23985 
23986     // Check that we are reading from the identity index.
23987     unsigned IdentityIndex = i * PartNumElem;
23988     if (Op.getConstantOperandAPInt(1) != IdentityIndex)
23989       return SDValue();
23990   }
23991 
23992   if (SingleSource.getNode())
23993     return SingleSource;
23994 
23995   return SDValue();
23996 }
23997 
23998 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
23999 // if the subvector can be sourced for free.
24000 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
24001   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
24002       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
24003     return V.getOperand(1);
24004   }
24005   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
24006   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
24007       V.getOperand(0).getValueType() == SubVT &&
24008       (IndexC->getZExtValue() % SubVT.getVectorMinNumElements()) == 0) {
24009     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorMinNumElements();
24010     return V.getOperand(SubIdx);
24011   }
24012   return SDValue();
24013 }
24014 
24015 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
24016                                               SelectionDAG &DAG,
24017                                               bool LegalOperations) {
24018   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24019   SDValue BinOp = Extract->getOperand(0);
24020   unsigned BinOpcode = BinOp.getOpcode();
24021   if (!TLI.isBinOp(BinOpcode) || BinOp->getNumValues() != 1)
24022     return SDValue();
24023 
24024   EVT VecVT = BinOp.getValueType();
24025   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
24026   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
24027     return SDValue();
24028 
24029   SDValue Index = Extract->getOperand(1);
24030   EVT SubVT = Extract->getValueType(0);
24031   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT, LegalOperations))
24032     return SDValue();
24033 
24034   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
24035   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
24036 
24037   // TODO: We could handle the case where only 1 operand is being inserted by
24038   //       creating an extract of the other operand, but that requires checking
24039   //       number of uses and/or costs.
24040   if (!Sub0 || !Sub1)
24041     return SDValue();
24042 
24043   // We are inserting both operands of the wide binop only to extract back
24044   // to the narrow vector size. Eliminate all of the insert/extract:
24045   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
24046   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
24047                      BinOp->getFlags());
24048 }
24049 
24050 /// If we are extracting a subvector produced by a wide binary operator try
24051 /// to use a narrow binary operator and/or avoid concatenation and extraction.
24052 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG,
24053                                           bool LegalOperations) {
24054   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
24055   // some of these bailouts with other transforms.
24056 
24057   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG, LegalOperations))
24058     return V;
24059 
24060   // The extract index must be a constant, so we can map it to a concat operand.
24061   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
24062   if (!ExtractIndexC)
24063     return SDValue();
24064 
24065   // We are looking for an optionally bitcasted wide vector binary operator
24066   // feeding an extract subvector.
24067   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24068   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
24069   unsigned BOpcode = BinOp.getOpcode();
24070   if (!TLI.isBinOp(BOpcode) || BinOp->getNumValues() != 1)
24071     return SDValue();
24072 
24073   // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be
24074   // reduced to the unary fneg when it is visited, and we probably want to deal
24075   // with fneg in a target-specific way.
24076   if (BOpcode == ISD::FSUB) {
24077     auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true);
24078     if (C && C->getValueAPF().isNegZero())
24079       return SDValue();
24080   }
24081 
24082   // The binop must be a vector type, so we can extract some fraction of it.
24083   EVT WideBVT = BinOp.getValueType();
24084   // The optimisations below currently assume we are dealing with fixed length
24085   // vectors. It is possible to add support for scalable vectors, but at the
24086   // moment we've done no analysis to prove whether they are profitable or not.
24087   if (!WideBVT.isFixedLengthVector())
24088     return SDValue();
24089 
24090   EVT VT = Extract->getValueType(0);
24091   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
24092   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
24093          "Extract index is not a multiple of the vector length.");
24094 
24095   // Bail out if this is not a proper multiple width extraction.
24096   unsigned WideWidth = WideBVT.getSizeInBits();
24097   unsigned NarrowWidth = VT.getSizeInBits();
24098   if (WideWidth % NarrowWidth != 0)
24099     return SDValue();
24100 
24101   // Bail out if we are extracting a fraction of a single operation. This can
24102   // occur because we potentially looked through a bitcast of the binop.
24103   unsigned NarrowingRatio = WideWidth / NarrowWidth;
24104   unsigned WideNumElts = WideBVT.getVectorNumElements();
24105   if (WideNumElts % NarrowingRatio != 0)
24106     return SDValue();
24107 
24108   // Bail out if the target does not support a narrower version of the binop.
24109   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
24110                                    WideNumElts / NarrowingRatio);
24111   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT,
24112                                              LegalOperations))
24113     return SDValue();
24114 
24115   // If extraction is cheap, we don't need to look at the binop operands
24116   // for concat ops. The narrow binop alone makes this transform profitable.
24117   // We can't just reuse the original extract index operand because we may have
24118   // bitcasted.
24119   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
24120   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
24121   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
24122       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
24123     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
24124     SDLoc DL(Extract);
24125     SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24126     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24127                             BinOp.getOperand(0), NewExtIndex);
24128     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24129                             BinOp.getOperand(1), NewExtIndex);
24130     SDValue NarrowBinOp =
24131         DAG.getNode(BOpcode, DL, NarrowBVT, X, Y, BinOp->getFlags());
24132     return DAG.getBitcast(VT, NarrowBinOp);
24133   }
24134 
24135   // Only handle the case where we are doubling and then halving. A larger ratio
24136   // may require more than two narrow binops to replace the wide binop.
24137   if (NarrowingRatio != 2)
24138     return SDValue();
24139 
24140   // TODO: The motivating case for this transform is an x86 AVX1 target. That
24141   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
24142   // flavors, but no other 256-bit integer support. This could be extended to
24143   // handle any binop, but that may require fixing/adding other folds to avoid
24144   // codegen regressions.
24145   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
24146     return SDValue();
24147 
24148   // We need at least one concatenation operation of a binop operand to make
24149   // this transform worthwhile. The concat must double the input vector sizes.
24150   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
24151     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
24152       return V.getOperand(ConcatOpNum);
24153     return SDValue();
24154   };
24155   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
24156   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
24157 
24158   if (SubVecL || SubVecR) {
24159     // If a binop operand was not the result of a concat, we must extract a
24160     // half-sized operand for our new narrow binop:
24161     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
24162     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
24163     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
24164     SDLoc DL(Extract);
24165     SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL);
24166     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
24167                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24168                                       BinOp.getOperand(0), IndexC);
24169 
24170     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
24171                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
24172                                       BinOp.getOperand(1), IndexC);
24173 
24174     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
24175     return DAG.getBitcast(VT, NarrowBinOp);
24176   }
24177 
24178   return SDValue();
24179 }
24180 
24181 /// If we are extracting a subvector from a wide vector load, convert to a
24182 /// narrow load to eliminate the extraction:
24183 /// (extract_subvector (load wide vector)) --> (load narrow vector)
24184 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
24185   // TODO: Add support for big-endian. The offset calculation must be adjusted.
24186   if (DAG.getDataLayout().isBigEndian())
24187     return SDValue();
24188 
24189   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
24190   if (!Ld || Ld->getExtensionType() || !Ld->isSimple())
24191     return SDValue();
24192 
24193   // Allow targets to opt-out.
24194   EVT VT = Extract->getValueType(0);
24195 
24196   // We can only create byte sized loads.
24197   if (!VT.isByteSized())
24198     return SDValue();
24199 
24200   unsigned Index = Extract->getConstantOperandVal(1);
24201   unsigned NumElts = VT.getVectorMinNumElements();
24202   // A fixed length vector being extracted from a scalable vector
24203   // may not be any *smaller* than the scalable one.
24204   if (Index == 0 && NumElts >= Ld->getValueType(0).getVectorMinNumElements())
24205     return SDValue();
24206 
24207   // The definition of EXTRACT_SUBVECTOR states that the index must be a
24208   // multiple of the minimum number of elements in the result type.
24209   assert(Index % NumElts == 0 && "The extract subvector index is not a "
24210                                  "multiple of the result's element count");
24211 
24212   // It's fine to use TypeSize here as we know the offset will not be negative.
24213   TypeSize Offset = VT.getStoreSize() * (Index / NumElts);
24214 
24215   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24216   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
24217     return SDValue();
24218 
24219   // The narrow load will be offset from the base address of the old load if
24220   // we are extracting from something besides index 0 (little-endian).
24221   SDLoc DL(Extract);
24222 
24223   // TODO: Use "BaseIndexOffset" to make this more effective.
24224   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
24225 
24226   uint64_t StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
24227   MachineFunction &MF = DAG.getMachineFunction();
24228   MachineMemOperand *MMO;
24229   if (Offset.isScalable()) {
24230     MachinePointerInfo MPI =
24231         MachinePointerInfo(Ld->getPointerInfo().getAddrSpace());
24232     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize);
24233   } else
24234     MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(),
24235                                   StoreSize);
24236 
24237   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
24238   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
24239   return NewLd;
24240 }
24241 
24242 /// Given  EXTRACT_SUBVECTOR(VECTOR_SHUFFLE(Op0, Op1, Mask)),
24243 /// try to produce  VECTOR_SHUFFLE(EXTRACT_SUBVECTOR(Op?, ?),
24244 ///                                EXTRACT_SUBVECTOR(Op?, ?),
24245 ///                                Mask'))
24246 /// iff it is legal and profitable to do so. Notably, the trimmed mask
24247 /// (containing only the elements that are extracted)
24248 /// must reference at most two subvectors.
24249 static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
24250                                                      SelectionDAG &DAG,
24251                                                      const TargetLowering &TLI,
24252                                                      bool LegalOperations) {
24253   assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
24254          "Must only be called on EXTRACT_SUBVECTOR's");
24255 
24256   SDValue N0 = N->getOperand(0);
24257 
24258   // Only deal with non-scalable vectors.
24259   EVT NarrowVT = N->getValueType(0);
24260   EVT WideVT = N0.getValueType();
24261   if (!NarrowVT.isFixedLengthVector() || !WideVT.isFixedLengthVector())
24262     return SDValue();
24263 
24264   // The operand must be a shufflevector.
24265   auto *WideShuffleVector = dyn_cast<ShuffleVectorSDNode>(N0);
24266   if (!WideShuffleVector)
24267     return SDValue();
24268 
24269   // The old shuffleneeds to go away.
24270   if (!WideShuffleVector->hasOneUse())
24271     return SDValue();
24272 
24273   // And the narrow shufflevector that we'll form must be legal.
24274   if (LegalOperations &&
24275       !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, NarrowVT))
24276     return SDValue();
24277 
24278   uint64_t FirstExtractedEltIdx = N->getConstantOperandVal(1);
24279   int NumEltsExtracted = NarrowVT.getVectorNumElements();
24280   assert((FirstExtractedEltIdx % NumEltsExtracted) == 0 &&
24281          "Extract index is not a multiple of the output vector length.");
24282 
24283   int WideNumElts = WideVT.getVectorNumElements();
24284 
24285   SmallVector<int, 16> NewMask;
24286   NewMask.reserve(NumEltsExtracted);
24287   SmallSetVector<std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>, 2>
24288       DemandedSubvectors;
24289 
24290   // Try to decode the wide mask into narrow mask from at most two subvectors.
24291   for (int M : WideShuffleVector->getMask().slice(FirstExtractedEltIdx,
24292                                                   NumEltsExtracted)) {
24293     assert((M >= -1) && (M < (2 * WideNumElts)) &&
24294            "Out-of-bounds shuffle mask?");
24295 
24296     if (M < 0) {
24297       // Does not depend on operands, does not require adjustment.
24298       NewMask.emplace_back(M);
24299       continue;
24300     }
24301 
24302     // From which operand of the shuffle does this shuffle mask element pick?
24303     int WideShufOpIdx = M / WideNumElts;
24304     // Which element of that operand is picked?
24305     int OpEltIdx = M % WideNumElts;
24306 
24307     assert((OpEltIdx + WideShufOpIdx * WideNumElts) == M &&
24308            "Shuffle mask vector decomposition failure.");
24309 
24310     // And which NumEltsExtracted-sized subvector of that operand is that?
24311     int OpSubvecIdx = OpEltIdx / NumEltsExtracted;
24312     // And which element within that subvector of that operand is that?
24313     int OpEltIdxInSubvec = OpEltIdx % NumEltsExtracted;
24314 
24315     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted) == OpEltIdx &&
24316            "Shuffle mask subvector decomposition failure.");
24317 
24318     assert((OpEltIdxInSubvec + OpSubvecIdx * NumEltsExtracted +
24319             WideShufOpIdx * WideNumElts) == M &&
24320            "Shuffle mask full decomposition failure.");
24321 
24322     SDValue Op = WideShuffleVector->getOperand(WideShufOpIdx);
24323 
24324     if (Op.isUndef()) {
24325       // Picking from an undef operand. Let's adjust mask instead.
24326       NewMask.emplace_back(-1);
24327       continue;
24328     }
24329 
24330     const std::pair<SDValue, int> DemandedSubvector =
24331         std::make_pair(Op, OpSubvecIdx);
24332 
24333     if (DemandedSubvectors.insert(DemandedSubvector)) {
24334       if (DemandedSubvectors.size() > 2)
24335         return SDValue(); // We can't handle more than two subvectors.
24336       // How many elements into the WideVT does this subvector start?
24337       int Index = NumEltsExtracted * OpSubvecIdx;
24338       // Bail out if the extraction isn't going to be cheap.
24339       if (!TLI.isExtractSubvectorCheap(NarrowVT, WideVT, Index))
24340         return SDValue();
24341     }
24342 
24343     // Ok, but from which operand of the new shuffle will this element pick?
24344     int NewOpIdx =
24345         getFirstIndexOf(DemandedSubvectors.getArrayRef(), DemandedSubvector);
24346     assert((NewOpIdx == 0 || NewOpIdx == 1) && "Unexpected operand index.");
24347 
24348     int AdjM = OpEltIdxInSubvec + NewOpIdx * NumEltsExtracted;
24349     NewMask.emplace_back(AdjM);
24350   }
24351   assert(NewMask.size() == (unsigned)NumEltsExtracted && "Produced bad mask.");
24352   assert(DemandedSubvectors.size() <= 2 &&
24353          "Should have ended up demanding at most two subvectors.");
24354 
24355   // Did we discover that the shuffle does not actually depend on operands?
24356   if (DemandedSubvectors.empty())
24357     return DAG.getUNDEF(NarrowVT);
24358 
24359   // Profitability check: only deal with extractions from the first subvector
24360   // unless the mask becomes an identity mask.
24361   if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) ||
24362       any_of(NewMask, [](int M) { return M < 0; }))
24363     for (auto &DemandedSubvector : DemandedSubvectors)
24364       if (DemandedSubvector.second != 0)
24365         return SDValue();
24366 
24367   // We still perform the exact same EXTRACT_SUBVECTOR,  just on different
24368   // operand[s]/index[es], so there is no point in checking for it's legality.
24369 
24370   // Do not turn a legal shuffle into an illegal one.
24371   if (TLI.isShuffleMaskLegal(WideShuffleVector->getMask(), WideVT) &&
24372       !TLI.isShuffleMaskLegal(NewMask, NarrowVT))
24373     return SDValue();
24374 
24375   SDLoc DL(N);
24376 
24377   SmallVector<SDValue, 2> NewOps;
24378   for (const std::pair<SDValue /*Op*/, int /*SubvectorIndex*/>
24379            &DemandedSubvector : DemandedSubvectors) {
24380     // How many elements into the WideVT does this subvector start?
24381     int Index = NumEltsExtracted * DemandedSubvector.second;
24382     SDValue IndexC = DAG.getVectorIdxConstant(Index, DL);
24383     NewOps.emplace_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowVT,
24384                                     DemandedSubvector.first, IndexC));
24385   }
24386   assert((NewOps.size() == 1 || NewOps.size() == 2) &&
24387          "Should end up with either one or two ops");
24388 
24389   // If we ended up with only one operand, pad with an undef.
24390   if (NewOps.size() == 1)
24391     NewOps.emplace_back(DAG.getUNDEF(NarrowVT));
24392 
24393   return DAG.getVectorShuffle(NarrowVT, DL, NewOps[0], NewOps[1], NewMask);
24394 }
24395 
24396 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
24397   EVT NVT = N->getValueType(0);
24398   SDValue V = N->getOperand(0);
24399   uint64_t ExtIdx = N->getConstantOperandVal(1);
24400 
24401   // Extract from UNDEF is UNDEF.
24402   if (V.isUndef())
24403     return DAG.getUNDEF(NVT);
24404 
24405   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
24406     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
24407       return NarrowLoad;
24408 
24409   // Combine an extract of an extract into a single extract_subvector.
24410   // ext (ext X, C), 0 --> ext X, C
24411   if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) {
24412     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
24413                                     V.getConstantOperandVal(1)) &&
24414         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
24415       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
24416                          V.getOperand(1));
24417     }
24418   }
24419 
24420   // ty1 extract_vector(ty2 splat(V))) -> ty1 splat(V)
24421   if (V.getOpcode() == ISD::SPLAT_VECTOR)
24422     if (DAG.isConstantValueOfAnyType(V.getOperand(0)) || V.hasOneUse())
24423       if (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, NVT))
24424         return DAG.getSplatVector(NVT, SDLoc(N), V.getOperand(0));
24425 
24426   // Try to move vector bitcast after extract_subv by scaling extraction index:
24427   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
24428   if (V.getOpcode() == ISD::BITCAST &&
24429       V.getOperand(0).getValueType().isVector() &&
24430       (!LegalOperations || TLI.isOperationLegal(ISD::BITCAST, NVT))) {
24431     SDValue SrcOp = V.getOperand(0);
24432     EVT SrcVT = SrcOp.getValueType();
24433     unsigned SrcNumElts = SrcVT.getVectorMinNumElements();
24434     unsigned DestNumElts = V.getValueType().getVectorMinNumElements();
24435     if ((SrcNumElts % DestNumElts) == 0) {
24436       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
24437       ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio;
24438       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
24439                                       NewExtEC);
24440       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
24441         SDLoc DL(N);
24442         SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL);
24443         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24444                                          V.getOperand(0), NewIndex);
24445         return DAG.getBitcast(NVT, NewExtract);
24446       }
24447     }
24448     if ((DestNumElts % SrcNumElts) == 0) {
24449       unsigned DestSrcRatio = DestNumElts / SrcNumElts;
24450       if (NVT.getVectorElementCount().isKnownMultipleOf(DestSrcRatio)) {
24451         ElementCount NewExtEC =
24452             NVT.getVectorElementCount().divideCoefficientBy(DestSrcRatio);
24453         EVT ScalarVT = SrcVT.getScalarType();
24454         if ((ExtIdx % DestSrcRatio) == 0) {
24455           SDLoc DL(N);
24456           unsigned IndexValScaled = ExtIdx / DestSrcRatio;
24457           EVT NewExtVT =
24458               EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC);
24459           if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
24460             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24461             SDValue NewExtract =
24462                 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
24463                             V.getOperand(0), NewIndex);
24464             return DAG.getBitcast(NVT, NewExtract);
24465           }
24466           if (NewExtEC.isScalar() &&
24467               TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) {
24468             SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL);
24469             SDValue NewExtract =
24470                 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT,
24471                             V.getOperand(0), NewIndex);
24472             return DAG.getBitcast(NVT, NewExtract);
24473           }
24474         }
24475       }
24476     }
24477   }
24478 
24479   if (V.getOpcode() == ISD::CONCAT_VECTORS) {
24480     unsigned ExtNumElts = NVT.getVectorMinNumElements();
24481     EVT ConcatSrcVT = V.getOperand(0).getValueType();
24482     assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() &&
24483            "Concat and extract subvector do not change element type");
24484     assert((ExtIdx % ExtNumElts) == 0 &&
24485            "Extract index is not a multiple of the input vector length.");
24486 
24487     unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements();
24488     unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts;
24489 
24490     // If the concatenated source types match this extract, it's a direct
24491     // simplification:
24492     // extract_subvec (concat V1, V2, ...), i --> Vi
24493     if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount())
24494       return V.getOperand(ConcatOpIdx);
24495 
24496     // If the concatenated source vectors are a multiple length of this extract,
24497     // then extract a fraction of one of those source vectors directly from a
24498     // concat operand. Example:
24499     //   v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 -->
24500     //   v2i8 extract_subvec v8i8 Y, 6
24501     if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() &&
24502         ConcatSrcNumElts % ExtNumElts == 0) {
24503       SDLoc DL(N);
24504       unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts;
24505       assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts &&
24506              "Trying to extract from >1 concat operand?");
24507       assert(NewExtIdx % ExtNumElts == 0 &&
24508              "Extract index is not a multiple of the input vector length.");
24509       SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL);
24510       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT,
24511                          V.getOperand(ConcatOpIdx), NewIndexC);
24512     }
24513   }
24514 
24515   if (SDValue V =
24516           foldExtractSubvectorFromShuffleVector(N, DAG, TLI, LegalOperations))
24517     return V;
24518 
24519   V = peekThroughBitcasts(V);
24520 
24521   // If the input is a build vector. Try to make a smaller build vector.
24522   if (V.getOpcode() == ISD::BUILD_VECTOR) {
24523     EVT InVT = V.getValueType();
24524     unsigned ExtractSize = NVT.getSizeInBits();
24525     unsigned EltSize = InVT.getScalarSizeInBits();
24526     // Only do this if we won't split any elements.
24527     if (ExtractSize % EltSize == 0) {
24528       unsigned NumElems = ExtractSize / EltSize;
24529       EVT EltVT = InVT.getVectorElementType();
24530       EVT ExtractVT =
24531           NumElems == 1 ? EltVT
24532                         : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
24533       if ((Level < AfterLegalizeDAG ||
24534            (NumElems == 1 ||
24535             TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
24536           (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
24537         unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize;
24538 
24539         if (NumElems == 1) {
24540           SDValue Src = V->getOperand(IdxVal);
24541           if (EltVT != Src.getValueType())
24542             Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Src);
24543           return DAG.getBitcast(NVT, Src);
24544         }
24545 
24546         // Extract the pieces from the original build_vector.
24547         SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
24548                                               V->ops().slice(IdxVal, NumElems));
24549         return DAG.getBitcast(NVT, BuildVec);
24550       }
24551     }
24552   }
24553 
24554   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
24555     // Handle only simple case where vector being inserted and vector
24556     // being extracted are of same size.
24557     EVT SmallVT = V.getOperand(1).getValueType();
24558     if (!NVT.bitsEq(SmallVT))
24559       return SDValue();
24560 
24561     // Combine:
24562     //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
24563     // Into:
24564     //    indices are equal or bit offsets are equal => V1
24565     //    otherwise => (extract_subvec V1, ExtIdx)
24566     uint64_t InsIdx = V.getConstantOperandVal(2);
24567     if (InsIdx * SmallVT.getScalarSizeInBits() ==
24568         ExtIdx * NVT.getScalarSizeInBits()) {
24569       if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT))
24570         return SDValue();
24571 
24572       return DAG.getBitcast(NVT, V.getOperand(1));
24573     }
24574     return DAG.getNode(
24575         ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
24576         DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
24577         N->getOperand(1));
24578   }
24579 
24580   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG, LegalOperations))
24581     return NarrowBOp;
24582 
24583   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
24584     return SDValue(N, 0);
24585 
24586   return SDValue();
24587 }
24588 
24589 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
24590 /// followed by concatenation. Narrow vector ops may have better performance
24591 /// than wide ops, and this can unlock further narrowing of other vector ops.
24592 /// Targets can invert this transform later if it is not profitable.
24593 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
24594                                          SelectionDAG &DAG) {
24595   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
24596   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
24597       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
24598       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
24599     return SDValue();
24600 
24601   // Split the wide shuffle mask into halves. Any mask element that is accessing
24602   // operand 1 is offset down to account for narrowing of the vectors.
24603   ArrayRef<int> Mask = Shuf->getMask();
24604   EVT VT = Shuf->getValueType(0);
24605   unsigned NumElts = VT.getVectorNumElements();
24606   unsigned HalfNumElts = NumElts / 2;
24607   SmallVector<int, 16> Mask0(HalfNumElts, -1);
24608   SmallVector<int, 16> Mask1(HalfNumElts, -1);
24609   for (unsigned i = 0; i != NumElts; ++i) {
24610     if (Mask[i] == -1)
24611       continue;
24612     // If we reference the upper (undef) subvector then the element is undef.
24613     if ((Mask[i] % NumElts) >= HalfNumElts)
24614       continue;
24615     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
24616     if (i < HalfNumElts)
24617       Mask0[i] = M;
24618     else
24619       Mask1[i - HalfNumElts] = M;
24620   }
24621 
24622   // Ask the target if this is a valid transform.
24623   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
24624   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
24625                                 HalfNumElts);
24626   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
24627       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
24628     return SDValue();
24629 
24630   // shuffle (concat X, undef), (concat Y, undef), Mask -->
24631   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
24632   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
24633   SDLoc DL(Shuf);
24634   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
24635   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
24636   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
24637 }
24638 
24639 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
24640 // or turn a shuffle of a single concat into simpler shuffle then concat.
24641 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
24642   EVT VT = N->getValueType(0);
24643   unsigned NumElts = VT.getVectorNumElements();
24644 
24645   SDValue N0 = N->getOperand(0);
24646   SDValue N1 = N->getOperand(1);
24647   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
24648   ArrayRef<int> Mask = SVN->getMask();
24649 
24650   SmallVector<SDValue, 4> Ops;
24651   EVT ConcatVT = N0.getOperand(0).getValueType();
24652   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
24653   unsigned NumConcats = NumElts / NumElemsPerConcat;
24654 
24655   auto IsUndefMaskElt = [](int i) { return i == -1; };
24656 
24657   // Special case: shuffle(concat(A,B)) can be more efficiently represented
24658   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
24659   // half vector elements.
24660   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
24661       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
24662                    IsUndefMaskElt)) {
24663     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
24664                               N0.getOperand(1),
24665                               Mask.slice(0, NumElemsPerConcat));
24666     N1 = DAG.getUNDEF(ConcatVT);
24667     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
24668   }
24669 
24670   // Look at every vector that's inserted. We're looking for exact
24671   // subvector-sized copies from a concatenated vector
24672   for (unsigned I = 0; I != NumConcats; ++I) {
24673     unsigned Begin = I * NumElemsPerConcat;
24674     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
24675 
24676     // Make sure we're dealing with a copy.
24677     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
24678       Ops.push_back(DAG.getUNDEF(ConcatVT));
24679       continue;
24680     }
24681 
24682     int OpIdx = -1;
24683     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
24684       if (IsUndefMaskElt(SubMask[i]))
24685         continue;
24686       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
24687         return SDValue();
24688       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
24689       if (0 <= OpIdx && EltOpIdx != OpIdx)
24690         return SDValue();
24691       OpIdx = EltOpIdx;
24692     }
24693     assert(0 <= OpIdx && "Unknown concat_vectors op");
24694 
24695     if (OpIdx < (int)N0.getNumOperands())
24696       Ops.push_back(N0.getOperand(OpIdx));
24697     else
24698       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
24699   }
24700 
24701   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
24702 }
24703 
24704 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
24705 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
24706 //
24707 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
24708 // a simplification in some sense, but it isn't appropriate in general: some
24709 // BUILD_VECTORs are substantially cheaper than others. The general case
24710 // of a BUILD_VECTOR requires inserting each element individually (or
24711 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
24712 // all constants is a single constant pool load.  A BUILD_VECTOR where each
24713 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
24714 // are undef lowers to a small number of element insertions.
24715 //
24716 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
24717 // We don't fold shuffles where one side is a non-zero constant, and we don't
24718 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
24719 // non-constant operands. This seems to work out reasonably well in practice.
24720 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
24721                                        SelectionDAG &DAG,
24722                                        const TargetLowering &TLI) {
24723   EVT VT = SVN->getValueType(0);
24724   unsigned NumElts = VT.getVectorNumElements();
24725   SDValue N0 = SVN->getOperand(0);
24726   SDValue N1 = SVN->getOperand(1);
24727 
24728   if (!N0->hasOneUse())
24729     return SDValue();
24730 
24731   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
24732   // discussed above.
24733   if (!N1.isUndef()) {
24734     if (!N1->hasOneUse())
24735       return SDValue();
24736 
24737     bool N0AnyConst = isAnyConstantBuildVector(N0);
24738     bool N1AnyConst = isAnyConstantBuildVector(N1);
24739     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
24740       return SDValue();
24741     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
24742       return SDValue();
24743   }
24744 
24745   // If both inputs are splats of the same value then we can safely merge this
24746   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
24747   bool IsSplat = false;
24748   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
24749   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
24750   if (BV0 && BV1)
24751     if (SDValue Splat0 = BV0->getSplatValue())
24752       IsSplat = (Splat0 == BV1->getSplatValue());
24753 
24754   SmallVector<SDValue, 8> Ops;
24755   SmallSet<SDValue, 16> DuplicateOps;
24756   for (int M : SVN->getMask()) {
24757     SDValue Op = DAG.getUNDEF(VT.getScalarType());
24758     if (M >= 0) {
24759       int Idx = M < (int)NumElts ? M : M - NumElts;
24760       SDValue &S = (M < (int)NumElts ? N0 : N1);
24761       if (S.getOpcode() == ISD::BUILD_VECTOR) {
24762         Op = S.getOperand(Idx);
24763       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
24764         SDValue Op0 = S.getOperand(0);
24765         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
24766       } else {
24767         // Operand can't be combined - bail out.
24768         return SDValue();
24769       }
24770     }
24771 
24772     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
24773     // generating a splat; semantically, this is fine, but it's likely to
24774     // generate low-quality code if the target can't reconstruct an appropriate
24775     // shuffle.
24776     if (!Op.isUndef() && !isIntOrFPConstant(Op))
24777       if (!IsSplat && !DuplicateOps.insert(Op).second)
24778         return SDValue();
24779 
24780     Ops.push_back(Op);
24781   }
24782 
24783   // BUILD_VECTOR requires all inputs to be of the same type, find the
24784   // maximum type and extend them all.
24785   EVT SVT = VT.getScalarType();
24786   if (SVT.isInteger())
24787     for (SDValue &Op : Ops)
24788       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
24789   if (SVT != VT.getScalarType())
24790     for (SDValue &Op : Ops)
24791       Op = Op.isUndef() ? DAG.getUNDEF(SVT)
24792                         : (TLI.isZExtFree(Op.getValueType(), SVT)
24793                                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
24794                                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT));
24795   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
24796 }
24797 
24798 // Match shuffles that can be converted to *_vector_extend_in_reg.
24799 // This is often generated during legalization.
24800 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)),
24801 // and returns the EVT to which the extension should be performed.
24802 // NOTE: this assumes that the src is the first operand of the shuffle.
24803 static std::optional<EVT> canCombineShuffleToExtendVectorInreg(
24804     unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match,
24805     SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes,
24806     bool LegalOperations) {
24807   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24808 
24809   // TODO Add support for big-endian when we have a test case.
24810   if (!VT.isInteger() || IsBigEndian)
24811     return std::nullopt;
24812 
24813   unsigned NumElts = VT.getVectorNumElements();
24814   unsigned EltSizeInBits = VT.getScalarSizeInBits();
24815 
24816   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
24817   // power-of-2 extensions as they are the most likely.
24818   // FIXME: should try Scale == NumElts case too,
24819   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
24820     // The vector width must be a multiple of Scale.
24821     if (NumElts % Scale != 0)
24822       continue;
24823 
24824     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
24825     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
24826 
24827     if ((LegalTypes && !TLI.isTypeLegal(OutVT)) ||
24828         (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT)))
24829       continue;
24830 
24831     if (Match(Scale))
24832       return OutVT;
24833   }
24834 
24835   return std::nullopt;
24836 }
24837 
24838 // Match shuffles that can be converted to any_vector_extend_in_reg.
24839 // This is often generated during legalization.
24840 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
24841 static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN,
24842                                                     SelectionDAG &DAG,
24843                                                     const TargetLowering &TLI,
24844                                                     bool LegalOperations) {
24845   EVT VT = SVN->getValueType(0);
24846   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24847 
24848   // TODO Add support for big-endian when we have a test case.
24849   if (!VT.isInteger() || IsBigEndian)
24850     return SDValue();
24851 
24852   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
24853   auto isAnyExtend = [NumElts = VT.getVectorNumElements(),
24854                       Mask = SVN->getMask()](unsigned Scale) {
24855     for (unsigned i = 0; i != NumElts; ++i) {
24856       if (Mask[i] < 0)
24857         continue;
24858       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
24859         continue;
24860       return false;
24861     }
24862     return true;
24863   };
24864 
24865   unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG;
24866   SDValue N0 = SVN->getOperand(0);
24867   // Never create an illegal type. Only create unsupported operations if we
24868   // are pre-legalization.
24869   std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
24870       Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations);
24871   if (!OutVT)
24872     return SDValue();
24873   return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0));
24874 }
24875 
24876 // Match shuffles that can be converted to zero_extend_vector_inreg.
24877 // This is often generated during legalization.
24878 // e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src))
24879 static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN,
24880                                                      SelectionDAG &DAG,
24881                                                      const TargetLowering &TLI,
24882                                                      bool LegalOperations) {
24883   bool LegalTypes = true;
24884   EVT VT = SVN->getValueType(0);
24885   assert(!VT.isScalableVector() && "Encountered scalable shuffle?");
24886   unsigned NumElts = VT.getVectorNumElements();
24887   unsigned EltSizeInBits = VT.getScalarSizeInBits();
24888 
24889   // TODO: add support for big-endian when we have a test case.
24890   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
24891   if (!VT.isInteger() || IsBigEndian)
24892     return SDValue();
24893 
24894   SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end());
24895   auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) {
24896     for (int &Indice : Mask) {
24897       if (Indice < 0)
24898         continue;
24899       int OpIdx = (unsigned)Indice < NumElts ? 0 : 1;
24900       int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts;
24901       Fn(Indice, OpIdx, OpEltIdx);
24902     }
24903   };
24904 
24905   // Which elements of which operand does this shuffle demand?
24906   std::array<APInt, 2> OpsDemandedElts;
24907   for (APInt &OpDemandedElts : OpsDemandedElts)
24908     OpDemandedElts = APInt::getZero(NumElts);
24909   ForEachDecomposedIndice(
24910       [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) {
24911         OpsDemandedElts[OpIdx].setBit(OpEltIdx);
24912       });
24913 
24914   // Element-wise(!), which of these demanded elements are know to be zero?
24915   std::array<APInt, 2> OpsKnownZeroElts;
24916   for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts))
24917     std::get<2>(I) =
24918         DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I));
24919 
24920   // Manifest zeroable element knowledge in the shuffle mask.
24921   // NOTE: we don't have 'zeroable' sentinel value in generic DAG,
24922   //       this is a local invention, but it won't leak into DAG.
24923   // FIXME: should we not manifest them, but just check when matching?
24924   bool HadZeroableElts = false;
24925   ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts](
24926                               int &Indice, int OpIdx, int OpEltIdx) {
24927     if (OpsKnownZeroElts[OpIdx][OpEltIdx]) {
24928       Indice = -2; // Zeroable element.
24929       HadZeroableElts = true;
24930     }
24931   });
24932 
24933   // Don't proceed unless we've refined at least one zeroable mask indice.
24934   // If we didn't, then we are still trying to match the same shuffle mask
24935   // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG,
24936   // and evidently failed. Proceeding will lead to endless combine loops.
24937   if (!HadZeroableElts)
24938     return SDValue();
24939 
24940   // The shuffle may be more fine-grained than we want. Widen elements first.
24941   // FIXME: should we do this before manifesting zeroable shuffle mask indices?
24942   SmallVector<int, 16> ScaledMask;
24943   getShuffleMaskWithWidestElts(Mask, ScaledMask);
24944   assert(Mask.size() >= ScaledMask.size() &&
24945          Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening.");
24946   int Prescale = Mask.size() / ScaledMask.size();
24947 
24948   NumElts = ScaledMask.size();
24949   EltSizeInBits *= Prescale;
24950 
24951   EVT PrescaledVT = EVT::getVectorVT(
24952       *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits),
24953       NumElts);
24954 
24955   if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT))
24956     return SDValue();
24957 
24958   // For example,
24959   // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32))
24960   // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types)
24961   auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) {
24962     assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 &&
24963            "Unexpected mask scaling factor.");
24964     ArrayRef<int> Mask = ScaledMask;
24965     for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale;
24966          SrcElt != NumSrcElts; ++SrcElt) {
24967       // Analyze the shuffle mask in Scale-sized chunks.
24968       ArrayRef<int> MaskChunk = Mask.take_front(Scale);
24969       assert(MaskChunk.size() == Scale && "Unexpected mask size.");
24970       Mask = Mask.drop_front(MaskChunk.size());
24971       // The first indice in this chunk must be SrcElt, but not zero!
24972       // FIXME: undef should be fine, but that results in more-defined result.
24973       if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt)
24974         return false;
24975       // The rest of the indices in this chunk must be zeros.
24976       // FIXME: undef should be fine, but that results in more-defined result.
24977       if (!all_of(MaskChunk.drop_front(1),
24978                   [](int Indice) { return Indice == -2; }))
24979         return false;
24980     }
24981     assert(Mask.empty() && "Did not process the whole mask?");
24982     return true;
24983   };
24984 
24985   unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG;
24986   for (bool Commuted : {false, true}) {
24987     SDValue Op = SVN->getOperand(!Commuted ? 0 : 1);
24988     if (Commuted)
24989       ShuffleVectorSDNode::commuteMask(ScaledMask);
24990     std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg(
24991         Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes,
24992         LegalOperations);
24993     if (OutVT)
24994       return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT,
24995                                             DAG.getBitcast(PrescaledVT, Op)));
24996   }
24997   return SDValue();
24998 }
24999 
25000 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
25001 // each source element of a large type into the lowest elements of a smaller
25002 // destination type. This is often generated during legalization.
25003 // If the source node itself was a '*_extend_vector_inreg' node then we should
25004 // then be able to remove it.
25005 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
25006                                         SelectionDAG &DAG) {
25007   EVT VT = SVN->getValueType(0);
25008   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
25009 
25010   // TODO Add support for big-endian when we have a test case.
25011   if (!VT.isInteger() || IsBigEndian)
25012     return SDValue();
25013 
25014   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
25015 
25016   unsigned Opcode = N0.getOpcode();
25017   if (!ISD::isExtVecInRegOpcode(Opcode))
25018     return SDValue();
25019 
25020   SDValue N00 = N0.getOperand(0);
25021   ArrayRef<int> Mask = SVN->getMask();
25022   unsigned NumElts = VT.getVectorNumElements();
25023   unsigned EltSizeInBits = VT.getScalarSizeInBits();
25024   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
25025   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
25026 
25027   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
25028     return SDValue();
25029   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
25030 
25031   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
25032   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
25033   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
25034   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
25035     for (unsigned i = 0; i != NumElts; ++i) {
25036       if (Mask[i] < 0)
25037         continue;
25038       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
25039         continue;
25040       return false;
25041     }
25042     return true;
25043   };
25044 
25045   // At the moment we just handle the case where we've truncated back to the
25046   // same size as before the extension.
25047   // TODO: handle more extension/truncation cases as cases arise.
25048   if (EltSizeInBits != ExtSrcSizeInBits)
25049     return SDValue();
25050 
25051   // We can remove *extend_vector_inreg only if the truncation happens at
25052   // the same scale as the extension.
25053   if (isTruncate(ExtScale))
25054     return DAG.getBitcast(VT, N00);
25055 
25056   return SDValue();
25057 }
25058 
25059 // Combine shuffles of splat-shuffles of the form:
25060 // shuffle (shuffle V, undef, splat-mask), undef, M
25061 // If splat-mask contains undef elements, we need to be careful about
25062 // introducing undef's in the folded mask which are not the result of composing
25063 // the masks of the shuffles.
25064 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
25065                                         SelectionDAG &DAG) {
25066   EVT VT = Shuf->getValueType(0);
25067   unsigned NumElts = VT.getVectorNumElements();
25068 
25069   if (!Shuf->getOperand(1).isUndef())
25070     return SDValue();
25071 
25072   // See if this unary non-splat shuffle actually *is* a splat shuffle,
25073   // in disguise, with all demanded elements being identical.
25074   // FIXME: this can be done per-operand.
25075   if (!Shuf->isSplat()) {
25076     APInt DemandedElts(NumElts, 0);
25077     for (int Idx : Shuf->getMask()) {
25078       if (Idx < 0)
25079         continue; // Ignore sentinel indices.
25080       assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
25081       DemandedElts.setBit(Idx);
25082     }
25083     assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
25084     APInt UndefElts;
25085     if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
25086       // Even if all demanded elements are splat, some of them could be undef.
25087       // Which lowest demanded element is *not* known-undef?
25088       std::optional<unsigned> MinNonUndefIdx;
25089       for (int Idx : Shuf->getMask()) {
25090         if (Idx < 0 || UndefElts[Idx])
25091           continue; // Ignore sentinel indices, and undef elements.
25092         MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U));
25093       }
25094       if (!MinNonUndefIdx)
25095         return DAG.getUNDEF(VT); // All undef - result is undef.
25096       assert(*MinNonUndefIdx < NumElts && "Expected valid element index.");
25097       SmallVector<int, 8> SplatMask(Shuf->getMask().begin(),
25098                                     Shuf->getMask().end());
25099       for (int &Idx : SplatMask) {
25100         if (Idx < 0)
25101           continue; // Passthrough sentinel indices.
25102         // Otherwise, just pick the lowest demanded non-undef element.
25103         // Or sentinel undef, if we know we'd pick a known-undef element.
25104         Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx;
25105       }
25106       assert(SplatMask != Shuf->getMask() && "Expected mask to change!");
25107       return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0),
25108                                   Shuf->getOperand(1), SplatMask);
25109     }
25110   }
25111 
25112   // If the inner operand is a known splat with no undefs, just return that directly.
25113   // TODO: Create DemandedElts mask from Shuf's mask.
25114   // TODO: Allow undef elements and merge with the shuffle code below.
25115   if (DAG.isSplatValue(Shuf->getOperand(0), /*AllowUndefs*/ false))
25116     return Shuf->getOperand(0);
25117 
25118   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25119   if (!Splat || !Splat->isSplat())
25120     return SDValue();
25121 
25122   ArrayRef<int> ShufMask = Shuf->getMask();
25123   ArrayRef<int> SplatMask = Splat->getMask();
25124   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
25125 
25126   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
25127   // every undef mask element in the splat-shuffle has a corresponding undef
25128   // element in the user-shuffle's mask or if the composition of mask elements
25129   // would result in undef.
25130   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
25131   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
25132   //   In this case it is not legal to simplify to the splat-shuffle because we
25133   //   may be exposing the users of the shuffle an undef element at index 1
25134   //   which was not there before the combine.
25135   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
25136   //   In this case the composition of masks yields SplatMask, so it's ok to
25137   //   simplify to the splat-shuffle.
25138   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
25139   //   In this case the composed mask includes all undef elements of SplatMask
25140   //   and in addition sets element zero to undef. It is safe to simplify to
25141   //   the splat-shuffle.
25142   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
25143                                        ArrayRef<int> SplatMask) {
25144     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
25145       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
25146           SplatMask[UserMask[i]] != -1)
25147         return false;
25148     return true;
25149   };
25150   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
25151     return Shuf->getOperand(0);
25152 
25153   // Create a new shuffle with a mask that is composed of the two shuffles'
25154   // masks.
25155   SmallVector<int, 32> NewMask;
25156   for (int Idx : ShufMask)
25157     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
25158 
25159   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
25160                               Splat->getOperand(0), Splat->getOperand(1),
25161                               NewMask);
25162 }
25163 
25164 // Combine shuffles of bitcasts into a shuffle of the bitcast type, providing
25165 // the mask can be treated as a larger type.
25166 static SDValue combineShuffleOfBitcast(ShuffleVectorSDNode *SVN,
25167                                        SelectionDAG &DAG,
25168                                        const TargetLowering &TLI,
25169                                        bool LegalOperations) {
25170   SDValue Op0 = SVN->getOperand(0);
25171   SDValue Op1 = SVN->getOperand(1);
25172   EVT VT = SVN->getValueType(0);
25173   if (Op0.getOpcode() != ISD::BITCAST)
25174     return SDValue();
25175   EVT InVT = Op0.getOperand(0).getValueType();
25176   if (!InVT.isVector() ||
25177       (!Op1.isUndef() && (Op1.getOpcode() != ISD::BITCAST ||
25178                           Op1.getOperand(0).getValueType() != InVT)))
25179     return SDValue();
25180   if (isAnyConstantBuildVector(Op0.getOperand(0)) &&
25181       (Op1.isUndef() || isAnyConstantBuildVector(Op1.getOperand(0))))
25182     return SDValue();
25183 
25184   int VTLanes = VT.getVectorNumElements();
25185   int InLanes = InVT.getVectorNumElements();
25186   if (VTLanes <= InLanes || VTLanes % InLanes != 0 ||
25187       (LegalOperations &&
25188        !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, InVT)))
25189     return SDValue();
25190   int Factor = VTLanes / InLanes;
25191 
25192   // Check that each group of lanes in the mask are either undef or make a valid
25193   // mask for the wider lane type.
25194   ArrayRef<int> Mask = SVN->getMask();
25195   SmallVector<int> NewMask;
25196   if (!widenShuffleMaskElts(Factor, Mask, NewMask))
25197     return SDValue();
25198 
25199   if (!TLI.isShuffleMaskLegal(NewMask, InVT))
25200     return SDValue();
25201 
25202   // Create the new shuffle with the new mask and bitcast it back to the
25203   // original type.
25204   SDLoc DL(SVN);
25205   Op0 = Op0.getOperand(0);
25206   Op1 = Op1.isUndef() ? DAG.getUNDEF(InVT) : Op1.getOperand(0);
25207   SDValue NewShuf = DAG.getVectorShuffle(InVT, DL, Op0, Op1, NewMask);
25208   return DAG.getBitcast(VT, NewShuf);
25209 }
25210 
25211 /// Combine shuffle of shuffle of the form:
25212 /// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X
25213 static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf,
25214                                      SelectionDAG &DAG) {
25215   if (!OuterShuf->getOperand(1).isUndef())
25216     return SDValue();
25217   auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0));
25218   if (!InnerShuf || !InnerShuf->getOperand(1).isUndef())
25219     return SDValue();
25220 
25221   ArrayRef<int> OuterMask = OuterShuf->getMask();
25222   ArrayRef<int> InnerMask = InnerShuf->getMask();
25223   unsigned NumElts = OuterMask.size();
25224   assert(NumElts == InnerMask.size() && "Mask length mismatch");
25225   SmallVector<int, 32> CombinedMask(NumElts, -1);
25226   int SplatIndex = -1;
25227   for (unsigned i = 0; i != NumElts; ++i) {
25228     // Undef lanes remain undef.
25229     int OuterMaskElt = OuterMask[i];
25230     if (OuterMaskElt == -1)
25231       continue;
25232 
25233     // Peek through the shuffle masks to get the underlying source element.
25234     int InnerMaskElt = InnerMask[OuterMaskElt];
25235     if (InnerMaskElt == -1)
25236       continue;
25237 
25238     // Initialize the splatted element.
25239     if (SplatIndex == -1)
25240       SplatIndex = InnerMaskElt;
25241 
25242     // Non-matching index - this is not a splat.
25243     if (SplatIndex != InnerMaskElt)
25244       return SDValue();
25245 
25246     CombinedMask[i] = InnerMaskElt;
25247   }
25248   assert((all_of(CombinedMask, [](int M) { return M == -1; }) ||
25249           getSplatIndex(CombinedMask) != -1) &&
25250          "Expected a splat mask");
25251 
25252   // TODO: The transform may be a win even if the mask is not legal.
25253   EVT VT = OuterShuf->getValueType(0);
25254   assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types");
25255   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT))
25256     return SDValue();
25257 
25258   return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0),
25259                               InnerShuf->getOperand(1), CombinedMask);
25260 }
25261 
25262 /// If the shuffle mask is taking exactly one element from the first vector
25263 /// operand and passing through all other elements from the second vector
25264 /// operand, return the index of the mask element that is choosing an element
25265 /// from the first operand. Otherwise, return -1.
25266 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
25267   int MaskSize = Mask.size();
25268   int EltFromOp0 = -1;
25269   // TODO: This does not match if there are undef elements in the shuffle mask.
25270   // Should we ignore undefs in the shuffle mask instead? The trade-off is
25271   // removing an instruction (a shuffle), but losing the knowledge that some
25272   // vector lanes are not needed.
25273   for (int i = 0; i != MaskSize; ++i) {
25274     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
25275       // We're looking for a shuffle of exactly one element from operand 0.
25276       if (EltFromOp0 != -1)
25277         return -1;
25278       EltFromOp0 = i;
25279     } else if (Mask[i] != i + MaskSize) {
25280       // Nothing from operand 1 can change lanes.
25281       return -1;
25282     }
25283   }
25284   return EltFromOp0;
25285 }
25286 
25287 /// If a shuffle inserts exactly one element from a source vector operand into
25288 /// another vector operand and we can access the specified element as a scalar,
25289 /// then we can eliminate the shuffle.
25290 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
25291                                       SelectionDAG &DAG) {
25292   // First, check if we are taking one element of a vector and shuffling that
25293   // element into another vector.
25294   ArrayRef<int> Mask = Shuf->getMask();
25295   SmallVector<int, 16> CommutedMask(Mask);
25296   SDValue Op0 = Shuf->getOperand(0);
25297   SDValue Op1 = Shuf->getOperand(1);
25298   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
25299   if (ShufOp0Index == -1) {
25300     // Commute mask and check again.
25301     ShuffleVectorSDNode::commuteMask(CommutedMask);
25302     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
25303     if (ShufOp0Index == -1)
25304       return SDValue();
25305     // Commute operands to match the commuted shuffle mask.
25306     std::swap(Op0, Op1);
25307     Mask = CommutedMask;
25308   }
25309 
25310   // The shuffle inserts exactly one element from operand 0 into operand 1.
25311   // Now see if we can access that element as a scalar via a real insert element
25312   // instruction.
25313   // TODO: We can try harder to locate the element as a scalar. Examples: it
25314   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
25315   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
25316          "Shuffle mask value must be from operand 0");
25317   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
25318     return SDValue();
25319 
25320   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
25321   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
25322     return SDValue();
25323 
25324   // There's an existing insertelement with constant insertion index, so we
25325   // don't need to check the legality/profitability of a replacement operation
25326   // that differs at most in the constant value. The target should be able to
25327   // lower any of those in a similar way. If not, legalization will expand this
25328   // to a scalar-to-vector plus shuffle.
25329   //
25330   // Note that the shuffle may move the scalar from the position that the insert
25331   // element used. Therefore, our new insert element occurs at the shuffle's
25332   // mask index value, not the insert's index value.
25333   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
25334   SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
25335   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
25336                      Op1, Op0.getOperand(1), NewInsIndex);
25337 }
25338 
25339 /// If we have a unary shuffle of a shuffle, see if it can be folded away
25340 /// completely. This has the potential to lose undef knowledge because the first
25341 /// shuffle may not have an undef mask element where the second one does. So
25342 /// only call this after doing simplifications based on demanded elements.
25343 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
25344   // shuf (shuf0 X, Y, Mask0), undef, Mask
25345   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
25346   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
25347     return SDValue();
25348 
25349   ArrayRef<int> Mask = Shuf->getMask();
25350   ArrayRef<int> Mask0 = Shuf0->getMask();
25351   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
25352     // Ignore undef elements.
25353     if (Mask[i] == -1)
25354       continue;
25355     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
25356 
25357     // Is the element of the shuffle operand chosen by this shuffle the same as
25358     // the element chosen by the shuffle operand itself?
25359     if (Mask0[Mask[i]] != Mask0[i])
25360       return SDValue();
25361   }
25362   // Every element of this shuffle is identical to the result of the previous
25363   // shuffle, so we can replace this value.
25364   return Shuf->getOperand(0);
25365 }
25366 
25367 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
25368   EVT VT = N->getValueType(0);
25369   unsigned NumElts = VT.getVectorNumElements();
25370 
25371   SDValue N0 = N->getOperand(0);
25372   SDValue N1 = N->getOperand(1);
25373 
25374   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
25375 
25376   // Canonicalize shuffle undef, undef -> undef
25377   if (N0.isUndef() && N1.isUndef())
25378     return DAG.getUNDEF(VT);
25379 
25380   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
25381 
25382   // Canonicalize shuffle v, v -> v, undef
25383   if (N0 == N1)
25384     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
25385                                 createUnaryMask(SVN->getMask(), NumElts));
25386 
25387   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
25388   if (N0.isUndef())
25389     return DAG.getCommutedVectorShuffle(*SVN);
25390 
25391   // Remove references to rhs if it is undef
25392   if (N1.isUndef()) {
25393     bool Changed = false;
25394     SmallVector<int, 8> NewMask;
25395     for (unsigned i = 0; i != NumElts; ++i) {
25396       int Idx = SVN->getMaskElt(i);
25397       if (Idx >= (int)NumElts) {
25398         Idx = -1;
25399         Changed = true;
25400       }
25401       NewMask.push_back(Idx);
25402     }
25403     if (Changed)
25404       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
25405   }
25406 
25407   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
25408     return InsElt;
25409 
25410   // A shuffle of a single vector that is a splatted value can always be folded.
25411   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
25412     return V;
25413 
25414   if (SDValue V = formSplatFromShuffles(SVN, DAG))
25415     return V;
25416 
25417   // If it is a splat, check if the argument vector is another splat or a
25418   // build_vector.
25419   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
25420     int SplatIndex = SVN->getSplatIndex();
25421     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
25422         TLI.isBinOp(N0.getOpcode()) && N0->getNumValues() == 1) {
25423       // splat (vector_bo L, R), Index -->
25424       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
25425       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
25426       SDLoc DL(N);
25427       EVT EltVT = VT.getScalarType();
25428       SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL);
25429       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
25430       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
25431       SDValue NewBO =
25432           DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, N0->getFlags());
25433       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
25434       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
25435       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
25436     }
25437 
25438     // splat(scalar_to_vector(x), 0) -> build_vector(x,...,x)
25439     // splat(insert_vector_elt(v, x, c), c) -> build_vector(x,...,x)
25440     if ((!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) &&
25441         N0.hasOneUse()) {
25442       if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && SplatIndex == 0)
25443         return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(0));
25444 
25445       if (N0.getOpcode() == ISD::INSERT_VECTOR_ELT)
25446         if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2)))
25447           if (Idx->getAPIntValue() == SplatIndex)
25448             return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1));
25449 
25450       // Look through a bitcast if LE and splatting lane 0, through to a
25451       // scalar_to_vector or a build_vector.
25452       if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() &&
25453           SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() &&
25454           (N0.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR ||
25455            N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR)) {
25456         EVT N00VT = N0.getOperand(0).getValueType();
25457         if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() &&
25458             VT.isInteger() && N00VT.isInteger()) {
25459           EVT InVT =
25460               TLI.getTypeToTransformTo(*DAG.getContext(), VT.getScalarType());
25461           SDValue Op = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0),
25462                                           SDLoc(N), InVT);
25463           return DAG.getSplatBuildVector(VT, SDLoc(N), Op);
25464         }
25465       }
25466     }
25467 
25468     // If this is a bit convert that changes the element type of the vector but
25469     // not the number of vector elements, look through it.  Be careful not to
25470     // look though conversions that change things like v4f32 to v2f64.
25471     SDNode *V = N0.getNode();
25472     if (V->getOpcode() == ISD::BITCAST) {
25473       SDValue ConvInput = V->getOperand(0);
25474       if (ConvInput.getValueType().isVector() &&
25475           ConvInput.getValueType().getVectorNumElements() == NumElts)
25476         V = ConvInput.getNode();
25477     }
25478 
25479     if (V->getOpcode() == ISD::BUILD_VECTOR) {
25480       assert(V->getNumOperands() == NumElts &&
25481              "BUILD_VECTOR has wrong number of operands");
25482       SDValue Base;
25483       bool AllSame = true;
25484       for (unsigned i = 0; i != NumElts; ++i) {
25485         if (!V->getOperand(i).isUndef()) {
25486           Base = V->getOperand(i);
25487           break;
25488         }
25489       }
25490       // Splat of <u, u, u, u>, return <u, u, u, u>
25491       if (!Base.getNode())
25492         return N0;
25493       for (unsigned i = 0; i != NumElts; ++i) {
25494         if (V->getOperand(i) != Base) {
25495           AllSame = false;
25496           break;
25497         }
25498       }
25499       // Splat of <x, x, x, x>, return <x, x, x, x>
25500       if (AllSame)
25501         return N0;
25502 
25503       // Canonicalize any other splat as a build_vector.
25504       SDValue Splatted = V->getOperand(SplatIndex);
25505       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
25506       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
25507 
25508       // We may have jumped through bitcasts, so the type of the
25509       // BUILD_VECTOR may not match the type of the shuffle.
25510       if (V->getValueType(0) != VT)
25511         NewBV = DAG.getBitcast(VT, NewBV);
25512       return NewBV;
25513     }
25514   }
25515 
25516   // Simplify source operands based on shuffle mask.
25517   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
25518     return SDValue(N, 0);
25519 
25520   // This is intentionally placed after demanded elements simplification because
25521   // it could eliminate knowledge of undef elements created by this shuffle.
25522   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
25523     return ShufOp;
25524 
25525   // Match shuffles that can be converted to any_vector_extend_in_reg.
25526   if (SDValue V =
25527           combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations))
25528     return V;
25529 
25530   // Combine "truncate_vector_in_reg" style shuffles.
25531   if (SDValue V = combineTruncationShuffle(SVN, DAG))
25532     return V;
25533 
25534   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
25535       Level < AfterLegalizeVectorOps &&
25536       (N1.isUndef() ||
25537       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
25538        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
25539     if (SDValue V = partitionShuffleOfConcats(N, DAG))
25540       return V;
25541   }
25542 
25543   // A shuffle of a concat of the same narrow vector can be reduced to use
25544   // only low-half elements of a concat with undef:
25545   // shuf (concat X, X), undef, Mask --> shuf (concat X, undef), undef, Mask'
25546   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N1.isUndef() &&
25547       N0.getNumOperands() == 2 &&
25548       N0.getOperand(0) == N0.getOperand(1)) {
25549     int HalfNumElts = (int)NumElts / 2;
25550     SmallVector<int, 8> NewMask;
25551     for (unsigned i = 0; i != NumElts; ++i) {
25552       int Idx = SVN->getMaskElt(i);
25553       if (Idx >= HalfNumElts) {
25554         assert(Idx < (int)NumElts && "Shuffle mask chooses undef op");
25555         Idx -= HalfNumElts;
25556       }
25557       NewMask.push_back(Idx);
25558     }
25559     if (TLI.isShuffleMaskLegal(NewMask, VT)) {
25560       SDValue UndefVec = DAG.getUNDEF(N0.getOperand(0).getValueType());
25561       SDValue NewCat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
25562                                    N0.getOperand(0), UndefVec);
25563       return DAG.getVectorShuffle(VT, SDLoc(N), NewCat, N1, NewMask);
25564     }
25565   }
25566 
25567   // See if we can replace a shuffle with an insert_subvector.
25568   // e.g. v2i32 into v8i32:
25569   // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7).
25570   // --> insert_subvector(lhs,rhs1,4).
25571   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) &&
25572       TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) {
25573     auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) {
25574       // Ensure RHS subvectors are legal.
25575       assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors");
25576       EVT SubVT = RHS.getOperand(0).getValueType();
25577       int NumSubVecs = RHS.getNumOperands();
25578       int NumSubElts = SubVT.getVectorNumElements();
25579       assert((NumElts % NumSubElts) == 0 && "Subvector mismatch");
25580       if (!TLI.isTypeLegal(SubVT))
25581         return SDValue();
25582 
25583       // Don't bother if we have an unary shuffle (matches undef + LHS elts).
25584       if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; }))
25585         return SDValue();
25586 
25587       // Search [NumSubElts] spans for RHS sequence.
25588       // TODO: Can we avoid nested loops to increase performance?
25589       SmallVector<int> InsertionMask(NumElts);
25590       for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) {
25591         for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) {
25592           // Reset mask to identity.
25593           std::iota(InsertionMask.begin(), InsertionMask.end(), 0);
25594 
25595           // Add subvector insertion.
25596           std::iota(InsertionMask.begin() + SubIdx,
25597                     InsertionMask.begin() + SubIdx + NumSubElts,
25598                     NumElts + (SubVec * NumSubElts));
25599 
25600           // See if the shuffle mask matches the reference insertion mask.
25601           bool MatchingShuffle = true;
25602           for (int i = 0; i != (int)NumElts; ++i) {
25603             int ExpectIdx = InsertionMask[i];
25604             int ActualIdx = Mask[i];
25605             if (0 <= ActualIdx && ExpectIdx != ActualIdx) {
25606               MatchingShuffle = false;
25607               break;
25608             }
25609           }
25610 
25611           if (MatchingShuffle)
25612             return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS,
25613                                RHS.getOperand(SubVec),
25614                                DAG.getVectorIdxConstant(SubIdx, SDLoc(N)));
25615         }
25616       }
25617       return SDValue();
25618     };
25619     ArrayRef<int> Mask = SVN->getMask();
25620     if (N1.getOpcode() == ISD::CONCAT_VECTORS)
25621       if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask))
25622         return InsertN1;
25623     if (N0.getOpcode() == ISD::CONCAT_VECTORS) {
25624       SmallVector<int> CommuteMask(Mask);
25625       ShuffleVectorSDNode::commuteMask(CommuteMask);
25626       if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask))
25627         return InsertN0;
25628     }
25629   }
25630 
25631   // If we're not performing a select/blend shuffle, see if we can convert the
25632   // shuffle into a AND node, with all the out-of-lane elements are known zero.
25633   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25634     bool IsInLaneMask = true;
25635     ArrayRef<int> Mask = SVN->getMask();
25636     SmallVector<int, 16> ClearMask(NumElts, -1);
25637     APInt DemandedLHS = APInt::getZero(NumElts);
25638     APInt DemandedRHS = APInt::getZero(NumElts);
25639     for (int I = 0; I != (int)NumElts; ++I) {
25640       int M = Mask[I];
25641       if (M < 0)
25642         continue;
25643       ClearMask[I] = M == I ? I : (I + NumElts);
25644       IsInLaneMask &= (M == I) || (M == (int)(I + NumElts));
25645       if (M != I) {
25646         APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS;
25647         Demanded.setBit(M % NumElts);
25648       }
25649     }
25650     // TODO: Should we try to mask with N1 as well?
25651     if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
25652         (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
25653         (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
25654       SDLoc DL(N);
25655       EVT IntVT = VT.changeVectorElementTypeToInteger();
25656       EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
25657       // Transform the type to a legal type so that the buildvector constant
25658       // elements are not illegal. Make sure that the result is larger than the
25659       // original type, incase the value is split into two (eg i64->i32).
25660       if (!TLI.isTypeLegal(IntSVT) && LegalTypes)
25661         IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT);
25662       if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) {
25663         SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT);
25664         SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT);
25665         SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT));
25666         for (int I = 0; I != (int)NumElts; ++I)
25667           if (0 <= Mask[I])
25668             AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt;
25669 
25670         // See if a clear mask is legal instead of going via
25671         // XformToShuffleWithZero which loses UNDEF mask elements.
25672         if (TLI.isVectorClearMaskLegal(ClearMask, IntVT))
25673           return DAG.getBitcast(
25674               VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0),
25675                                       DAG.getConstant(0, DL, IntVT), ClearMask));
25676 
25677         if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
25678           return DAG.getBitcast(
25679               VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0),
25680                               DAG.getBuildVector(IntVT, DL, AndMask)));
25681       }
25682     }
25683   }
25684 
25685   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
25686   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
25687   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
25688     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
25689       return Res;
25690 
25691   // If this shuffle only has a single input that is a bitcasted shuffle,
25692   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
25693   // back to their original types.
25694   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
25695       N1.isUndef() && Level < AfterLegalizeVectorOps &&
25696       TLI.isTypeLegal(VT)) {
25697 
25698     SDValue BC0 = peekThroughOneUseBitcasts(N0);
25699     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
25700       EVT SVT = VT.getScalarType();
25701       EVT InnerVT = BC0->getValueType(0);
25702       EVT InnerSVT = InnerVT.getScalarType();
25703 
25704       // Determine which shuffle works with the smaller scalar type.
25705       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
25706       EVT ScaleSVT = ScaleVT.getScalarType();
25707 
25708       if (TLI.isTypeLegal(ScaleVT) &&
25709           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
25710           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
25711         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25712         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
25713 
25714         // Scale the shuffle masks to the smaller scalar type.
25715         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
25716         SmallVector<int, 8> InnerMask;
25717         SmallVector<int, 8> OuterMask;
25718         narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask);
25719         narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask);
25720 
25721         // Merge the shuffle masks.
25722         SmallVector<int, 8> NewMask;
25723         for (int M : OuterMask)
25724           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
25725 
25726         // Test for shuffle mask legality over both commutations.
25727         SDValue SV0 = BC0->getOperand(0);
25728         SDValue SV1 = BC0->getOperand(1);
25729         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25730         if (!LegalMask) {
25731           std::swap(SV0, SV1);
25732           ShuffleVectorSDNode::commuteMask(NewMask);
25733           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
25734         }
25735 
25736         if (LegalMask) {
25737           SV0 = DAG.getBitcast(ScaleVT, SV0);
25738           SV1 = DAG.getBitcast(ScaleVT, SV1);
25739           return DAG.getBitcast(
25740               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
25741         }
25742       }
25743     }
25744   }
25745 
25746   // Match shuffles of bitcasts, so long as the mask can be treated as the
25747   // larger type.
25748   if (SDValue V = combineShuffleOfBitcast(SVN, DAG, TLI, LegalOperations))
25749     return V;
25750 
25751   // Compute the combined shuffle mask for a shuffle with SV0 as the first
25752   // operand, and SV1 as the second operand.
25753   // i.e. Merge SVN(OtherSVN, N1) -> shuffle(SV0, SV1, Mask) iff Commute = false
25754   //      Merge SVN(N1, OtherSVN) -> shuffle(SV0, SV1, Mask') iff Commute = true
25755   auto MergeInnerShuffle =
25756       [NumElts, &VT](bool Commute, ShuffleVectorSDNode *SVN,
25757                      ShuffleVectorSDNode *OtherSVN, SDValue N1,
25758                      const TargetLowering &TLI, SDValue &SV0, SDValue &SV1,
25759                      SmallVectorImpl<int> &Mask) -> bool {
25760     // Don't try to fold splats; they're likely to simplify somehow, or they
25761     // might be free.
25762     if (OtherSVN->isSplat())
25763       return false;
25764 
25765     SV0 = SV1 = SDValue();
25766     Mask.clear();
25767 
25768     for (unsigned i = 0; i != NumElts; ++i) {
25769       int Idx = SVN->getMaskElt(i);
25770       if (Idx < 0) {
25771         // Propagate Undef.
25772         Mask.push_back(Idx);
25773         continue;
25774       }
25775 
25776       if (Commute)
25777         Idx = (Idx < (int)NumElts) ? (Idx + NumElts) : (Idx - NumElts);
25778 
25779       SDValue CurrentVec;
25780       if (Idx < (int)NumElts) {
25781         // This shuffle index refers to the inner shuffle N0. Lookup the inner
25782         // shuffle mask to identify which vector is actually referenced.
25783         Idx = OtherSVN->getMaskElt(Idx);
25784         if (Idx < 0) {
25785           // Propagate Undef.
25786           Mask.push_back(Idx);
25787           continue;
25788         }
25789         CurrentVec = (Idx < (int)NumElts) ? OtherSVN->getOperand(0)
25790                                           : OtherSVN->getOperand(1);
25791       } else {
25792         // This shuffle index references an element within N1.
25793         CurrentVec = N1;
25794       }
25795 
25796       // Simple case where 'CurrentVec' is UNDEF.
25797       if (CurrentVec.isUndef()) {
25798         Mask.push_back(-1);
25799         continue;
25800       }
25801 
25802       // Canonicalize the shuffle index. We don't know yet if CurrentVec
25803       // will be the first or second operand of the combined shuffle.
25804       Idx = Idx % NumElts;
25805       if (!SV0.getNode() || SV0 == CurrentVec) {
25806         // Ok. CurrentVec is the left hand side.
25807         // Update the mask accordingly.
25808         SV0 = CurrentVec;
25809         Mask.push_back(Idx);
25810         continue;
25811       }
25812       if (!SV1.getNode() || SV1 == CurrentVec) {
25813         // Ok. CurrentVec is the right hand side.
25814         // Update the mask accordingly.
25815         SV1 = CurrentVec;
25816         Mask.push_back(Idx + NumElts);
25817         continue;
25818       }
25819 
25820       // Last chance - see if the vector is another shuffle and if it
25821       // uses one of the existing candidate shuffle ops.
25822       if (auto *CurrentSVN = dyn_cast<ShuffleVectorSDNode>(CurrentVec)) {
25823         int InnerIdx = CurrentSVN->getMaskElt(Idx);
25824         if (InnerIdx < 0) {
25825           Mask.push_back(-1);
25826           continue;
25827         }
25828         SDValue InnerVec = (InnerIdx < (int)NumElts)
25829                                ? CurrentSVN->getOperand(0)
25830                                : CurrentSVN->getOperand(1);
25831         if (InnerVec.isUndef()) {
25832           Mask.push_back(-1);
25833           continue;
25834         }
25835         InnerIdx %= NumElts;
25836         if (InnerVec == SV0) {
25837           Mask.push_back(InnerIdx);
25838           continue;
25839         }
25840         if (InnerVec == SV1) {
25841           Mask.push_back(InnerIdx + NumElts);
25842           continue;
25843         }
25844       }
25845 
25846       // Bail out if we cannot convert the shuffle pair into a single shuffle.
25847       return false;
25848     }
25849 
25850     if (llvm::all_of(Mask, [](int M) { return M < 0; }))
25851       return true;
25852 
25853     // Avoid introducing shuffles with illegal mask.
25854     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
25855     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
25856     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
25857     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
25858     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
25859     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
25860     if (TLI.isShuffleMaskLegal(Mask, VT))
25861       return true;
25862 
25863     std::swap(SV0, SV1);
25864     ShuffleVectorSDNode::commuteMask(Mask);
25865     return TLI.isShuffleMaskLegal(Mask, VT);
25866   };
25867 
25868   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
25869     // Canonicalize shuffles according to rules:
25870     //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
25871     //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
25872     //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
25873     if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
25874         N0.getOpcode() != ISD::VECTOR_SHUFFLE) {
25875       // The incoming shuffle must be of the same type as the result of the
25876       // current shuffle.
25877       assert(N1->getOperand(0).getValueType() == VT &&
25878              "Shuffle types don't match");
25879 
25880       SDValue SV0 = N1->getOperand(0);
25881       SDValue SV1 = N1->getOperand(1);
25882       bool HasSameOp0 = N0 == SV0;
25883       bool IsSV1Undef = SV1.isUndef();
25884       if (HasSameOp0 || IsSV1Undef || N0 == SV1)
25885         // Commute the operands of this shuffle so merging below will trigger.
25886         return DAG.getCommutedVectorShuffle(*SVN);
25887     }
25888 
25889     // Canonicalize splat shuffles to the RHS to improve merging below.
25890     //  shuffle(splat(A,u), shuffle(C,D)) -> shuffle'(shuffle(C,D), splat(A,u))
25891     if (N0.getOpcode() == ISD::VECTOR_SHUFFLE &&
25892         N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
25893         cast<ShuffleVectorSDNode>(N0)->isSplat() &&
25894         !cast<ShuffleVectorSDNode>(N1)->isSplat()) {
25895       return DAG.getCommutedVectorShuffle(*SVN);
25896     }
25897 
25898     // Try to fold according to rules:
25899     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
25900     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
25901     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
25902     // Don't try to fold shuffles with illegal type.
25903     // Only fold if this shuffle is the only user of the other shuffle.
25904     // Try matching shuffle(C,shuffle(A,B)) commutted patterns as well.
25905     for (int i = 0; i != 2; ++i) {
25906       if (N->getOperand(i).getOpcode() == ISD::VECTOR_SHUFFLE &&
25907           N->isOnlyUserOf(N->getOperand(i).getNode())) {
25908         // The incoming shuffle must be of the same type as the result of the
25909         // current shuffle.
25910         auto *OtherSV = cast<ShuffleVectorSDNode>(N->getOperand(i));
25911         assert(OtherSV->getOperand(0).getValueType() == VT &&
25912                "Shuffle types don't match");
25913 
25914         SDValue SV0, SV1;
25915         SmallVector<int, 4> Mask;
25916         if (MergeInnerShuffle(i != 0, SVN, OtherSV, N->getOperand(1 - i), TLI,
25917                               SV0, SV1, Mask)) {
25918           // Check if all indices in Mask are Undef. In case, propagate Undef.
25919           if (llvm::all_of(Mask, [](int M) { return M < 0; }))
25920             return DAG.getUNDEF(VT);
25921 
25922           return DAG.getVectorShuffle(VT, SDLoc(N),
25923                                       SV0 ? SV0 : DAG.getUNDEF(VT),
25924                                       SV1 ? SV1 : DAG.getUNDEF(VT), Mask);
25925         }
25926       }
25927     }
25928 
25929     // Merge shuffles through binops if we are able to merge it with at least
25930     // one other shuffles.
25931     // shuffle(bop(shuffle(x,y),shuffle(z,w)),undef)
25932     // shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)))
25933     unsigned SrcOpcode = N0.getOpcode();
25934     if (TLI.isBinOp(SrcOpcode) && N->isOnlyUserOf(N0.getNode()) &&
25935         (N1.isUndef() ||
25936          (SrcOpcode == N1.getOpcode() && N->isOnlyUserOf(N1.getNode())))) {
25937       // Get binop source ops, or just pass on the undef.
25938       SDValue Op00 = N0.getOperand(0);
25939       SDValue Op01 = N0.getOperand(1);
25940       SDValue Op10 = N1.isUndef() ? N1 : N1.getOperand(0);
25941       SDValue Op11 = N1.isUndef() ? N1 : N1.getOperand(1);
25942       // TODO: We might be able to relax the VT check but we don't currently
25943       // have any isBinOp() that has different result/ops VTs so play safe until
25944       // we have test coverage.
25945       if (Op00.getValueType() == VT && Op10.getValueType() == VT &&
25946           Op01.getValueType() == VT && Op11.getValueType() == VT &&
25947           (Op00.getOpcode() == ISD::VECTOR_SHUFFLE ||
25948            Op10.getOpcode() == ISD::VECTOR_SHUFFLE ||
25949            Op01.getOpcode() == ISD::VECTOR_SHUFFLE ||
25950            Op11.getOpcode() == ISD::VECTOR_SHUFFLE)) {
25951         auto CanMergeInnerShuffle = [&](SDValue &SV0, SDValue &SV1,
25952                                         SmallVectorImpl<int> &Mask, bool LeftOp,
25953                                         bool Commute) {
25954           SDValue InnerN = Commute ? N1 : N0;
25955           SDValue Op0 = LeftOp ? Op00 : Op01;
25956           SDValue Op1 = LeftOp ? Op10 : Op11;
25957           if (Commute)
25958             std::swap(Op0, Op1);
25959           // Only accept the merged shuffle if we don't introduce undef elements,
25960           // or the inner shuffle already contained undef elements.
25961           auto *SVN0 = dyn_cast<ShuffleVectorSDNode>(Op0);
25962           return SVN0 && InnerN->isOnlyUserOf(SVN0) &&
25963                  MergeInnerShuffle(Commute, SVN, SVN0, Op1, TLI, SV0, SV1,
25964                                    Mask) &&
25965                  (llvm::any_of(SVN0->getMask(), [](int M) { return M < 0; }) ||
25966                   llvm::none_of(Mask, [](int M) { return M < 0; }));
25967         };
25968 
25969         // Ensure we don't increase the number of shuffles - we must merge a
25970         // shuffle from at least one of the LHS and RHS ops.
25971         bool MergedLeft = false;
25972         SDValue LeftSV0, LeftSV1;
25973         SmallVector<int, 4> LeftMask;
25974         if (CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, false) ||
25975             CanMergeInnerShuffle(LeftSV0, LeftSV1, LeftMask, true, true)) {
25976           MergedLeft = true;
25977         } else {
25978           LeftMask.assign(SVN->getMask().begin(), SVN->getMask().end());
25979           LeftSV0 = Op00, LeftSV1 = Op10;
25980         }
25981 
25982         bool MergedRight = false;
25983         SDValue RightSV0, RightSV1;
25984         SmallVector<int, 4> RightMask;
25985         if (CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, false) ||
25986             CanMergeInnerShuffle(RightSV0, RightSV1, RightMask, false, true)) {
25987           MergedRight = true;
25988         } else {
25989           RightMask.assign(SVN->getMask().begin(), SVN->getMask().end());
25990           RightSV0 = Op01, RightSV1 = Op11;
25991         }
25992 
25993         if (MergedLeft || MergedRight) {
25994           SDLoc DL(N);
25995           SDValue LHS = DAG.getVectorShuffle(
25996               VT, DL, LeftSV0 ? LeftSV0 : DAG.getUNDEF(VT),
25997               LeftSV1 ? LeftSV1 : DAG.getUNDEF(VT), LeftMask);
25998           SDValue RHS = DAG.getVectorShuffle(
25999               VT, DL, RightSV0 ? RightSV0 : DAG.getUNDEF(VT),
26000               RightSV1 ? RightSV1 : DAG.getUNDEF(VT), RightMask);
26001           return DAG.getNode(SrcOpcode, DL, VT, LHS, RHS);
26002         }
26003       }
26004     }
26005   }
26006 
26007   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
26008     return V;
26009 
26010   // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG.
26011   // Perform this really late, because it could eliminate knowledge
26012   // of undef elements created by this shuffle.
26013   if (Level < AfterLegalizeTypes)
26014     if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI,
26015                                                           LegalOperations))
26016       return V;
26017 
26018   return SDValue();
26019 }
26020 
26021 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
26022   EVT VT = N->getValueType(0);
26023   if (!VT.isFixedLengthVector())
26024     return SDValue();
26025 
26026   // Try to convert a scalar binop with an extracted vector element to a vector
26027   // binop. This is intended to reduce potentially expensive register moves.
26028   // TODO: Check if both operands are extracted.
26029   // TODO: How to prefer scalar/vector ops with multiple uses of the extact?
26030   // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT().
26031   SDValue Scalar = N->getOperand(0);
26032   unsigned Opcode = Scalar.getOpcode();
26033   EVT VecEltVT = VT.getScalarType();
26034   if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 &&
26035       TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT &&
26036       Scalar.getOperand(0).getValueType() == VecEltVT &&
26037       Scalar.getOperand(1).getValueType() == VecEltVT &&
26038       Scalar->isOnlyUserOf(Scalar.getOperand(0).getNode()) &&
26039       Scalar->isOnlyUserOf(Scalar.getOperand(1).getNode()) &&
26040       DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) {
26041     // Match an extract element and get a shuffle mask equivalent.
26042     SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1);
26043 
26044     for (int i : {0, 1}) {
26045       // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...}
26046       // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...}
26047       SDValue EE = Scalar.getOperand(i);
26048       auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1));
26049       if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
26050           EE.getOperand(0).getValueType() == VT &&
26051           isa<ConstantSDNode>(EE.getOperand(1))) {
26052         // Mask = {ExtractIndex, undef, undef....}
26053         ShufMask[0] = EE.getConstantOperandVal(1);
26054         // Make sure the shuffle is legal if we are crossing lanes.
26055         if (TLI.isShuffleMaskLegal(ShufMask, VT)) {
26056           SDLoc DL(N);
26057           SDValue V[] = {EE.getOperand(0),
26058                          DAG.getConstant(C->getAPIntValue(), DL, VT)};
26059           SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]);
26060           return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT),
26061                                       ShufMask);
26062         }
26063       }
26064     }
26065   }
26066 
26067   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
26068   // with a VECTOR_SHUFFLE and possible truncate.
26069   if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
26070       !Scalar.getOperand(0).getValueType().isFixedLengthVector())
26071     return SDValue();
26072 
26073   // If we have an implicit truncate, truncate here if it is legal.
26074   if (VecEltVT != Scalar.getValueType() &&
26075       Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) {
26076     SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar);
26077     return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
26078   }
26079 
26080   auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1));
26081   if (!ExtIndexC)
26082     return SDValue();
26083 
26084   SDValue SrcVec = Scalar.getOperand(0);
26085   EVT SrcVT = SrcVec.getValueType();
26086   unsigned SrcNumElts = SrcVT.getVectorNumElements();
26087   unsigned VTNumElts = VT.getVectorNumElements();
26088   if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) {
26089     // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...}
26090     SmallVector<int, 8> Mask(SrcNumElts, -1);
26091     Mask[0] = ExtIndexC->getZExtValue();
26092     SDValue LegalShuffle = TLI.buildLegalVectorShuffle(
26093         SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG);
26094     if (!LegalShuffle)
26095       return SDValue();
26096 
26097     // If the initial vector is the same size, the shuffle is the result.
26098     if (VT == SrcVT)
26099       return LegalShuffle;
26100 
26101     // If not, shorten the shuffled vector.
26102     if (VTNumElts != SrcNumElts) {
26103       SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
26104       EVT SubVT = EVT::getVectorVT(*DAG.getContext(),
26105                                    SrcVT.getVectorElementType(), VTNumElts);
26106       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle,
26107                          ZeroIdx);
26108     }
26109   }
26110 
26111   return SDValue();
26112 }
26113 
26114 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
26115   EVT VT = N->getValueType(0);
26116   SDValue N0 = N->getOperand(0);
26117   SDValue N1 = N->getOperand(1);
26118   SDValue N2 = N->getOperand(2);
26119   uint64_t InsIdx = N->getConstantOperandVal(2);
26120 
26121   // If inserting an UNDEF, just return the original vector.
26122   if (N1.isUndef())
26123     return N0;
26124 
26125   // If this is an insert of an extracted vector into an undef vector, we can
26126   // just use the input to the extract if the types match, and can simplify
26127   // in some cases even if they don't.
26128   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26129       N1.getOperand(1) == N2) {
26130     EVT SrcVT = N1.getOperand(0).getValueType();
26131     if (SrcVT == VT)
26132       return N1.getOperand(0);
26133     // TODO: To remove the zero check, need to adjust the offset to
26134     // a multiple of the new src type.
26135     if (isNullConstant(N2) &&
26136         VT.isScalableVector() == SrcVT.isScalableVector()) {
26137       if (VT.getVectorMinNumElements() >= SrcVT.getVectorMinNumElements())
26138         return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26139                            VT, N0, N1.getOperand(0), N2);
26140       else
26141         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
26142                            VT, N1.getOperand(0), N2);
26143     }
26144   }
26145 
26146   // Simplify scalar inserts into an undef vector:
26147   // insert_subvector undef, (splat X), N2 -> splat X
26148   if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR)
26149     if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse())
26150       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0));
26151 
26152   // If we are inserting a bitcast value into an undef, with the same
26153   // number of elements, just use the bitcast input of the extract.
26154   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
26155   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
26156   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
26157       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
26158       N1.getOperand(0).getOperand(1) == N2 &&
26159       N1.getOperand(0).getOperand(0).getValueType().getVectorElementCount() ==
26160           VT.getVectorElementCount() &&
26161       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
26162           VT.getSizeInBits()) {
26163     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
26164   }
26165 
26166   // If both N1 and N2 are bitcast values on which insert_subvector
26167   // would makes sense, pull the bitcast through.
26168   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
26169   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
26170   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
26171     SDValue CN0 = N0.getOperand(0);
26172     SDValue CN1 = N1.getOperand(0);
26173     EVT CN0VT = CN0.getValueType();
26174     EVT CN1VT = CN1.getValueType();
26175     if (CN0VT.isVector() && CN1VT.isVector() &&
26176         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
26177         CN0VT.getVectorElementCount() == VT.getVectorElementCount()) {
26178       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
26179                                       CN0.getValueType(), CN0, CN1, N2);
26180       return DAG.getBitcast(VT, NewINSERT);
26181     }
26182   }
26183 
26184   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
26185   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
26186   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
26187   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26188       N0.getOperand(1).getValueType() == N1.getValueType() &&
26189       N0.getOperand(2) == N2)
26190     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
26191                        N1, N2);
26192 
26193   // Eliminate an intermediate insert into an undef vector:
26194   // insert_subvector undef, (insert_subvector undef, X, 0), 0 -->
26195   // insert_subvector undef, X, 0
26196   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
26197       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) &&
26198       isNullConstant(N2))
26199     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
26200                        N1.getOperand(1), N2);
26201 
26202   // Push subvector bitcasts to the output, adjusting the index as we go.
26203   // insert_subvector(bitcast(v), bitcast(s), c1)
26204   // -> bitcast(insert_subvector(v, s, c2))
26205   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
26206       N1.getOpcode() == ISD::BITCAST) {
26207     SDValue N0Src = peekThroughBitcasts(N0);
26208     SDValue N1Src = peekThroughBitcasts(N1);
26209     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
26210     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
26211     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
26212         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
26213       EVT NewVT;
26214       SDLoc DL(N);
26215       SDValue NewIdx;
26216       LLVMContext &Ctx = *DAG.getContext();
26217       ElementCount NumElts = VT.getVectorElementCount();
26218       unsigned EltSizeInBits = VT.getScalarSizeInBits();
26219       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
26220         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
26221         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
26222         NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL);
26223       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
26224         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
26225         if (NumElts.isKnownMultipleOf(Scale) && (InsIdx % Scale) == 0) {
26226           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT,
26227                                    NumElts.divideCoefficientBy(Scale));
26228           NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL);
26229         }
26230       }
26231       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
26232         SDValue Res = DAG.getBitcast(NewVT, N0Src);
26233         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
26234         return DAG.getBitcast(VT, Res);
26235       }
26236     }
26237   }
26238 
26239   // Canonicalize insert_subvector dag nodes.
26240   // Example:
26241   // (insert_subvector (insert_subvector A, Idx0), Idx1)
26242   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
26243   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
26244       N1.getValueType() == N0.getOperand(1).getValueType()) {
26245     unsigned OtherIdx = N0.getConstantOperandVal(2);
26246     if (InsIdx < OtherIdx) {
26247       // Swap nodes.
26248       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
26249                                   N0.getOperand(0), N1, N2);
26250       AddToWorklist(NewOp.getNode());
26251       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
26252                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
26253     }
26254   }
26255 
26256   // If the input vector is a concatenation, and the insert replaces
26257   // one of the pieces, we can optimize into a single concat_vectors.
26258   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
26259       N0.getOperand(0).getValueType() == N1.getValueType() &&
26260       N0.getOperand(0).getValueType().isScalableVector() ==
26261           N1.getValueType().isScalableVector()) {
26262     unsigned Factor = N1.getValueType().getVectorMinNumElements();
26263     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
26264     Ops[InsIdx / Factor] = N1;
26265     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
26266   }
26267 
26268   // Simplify source operands based on insertion.
26269   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
26270     return SDValue(N, 0);
26271 
26272   return SDValue();
26273 }
26274 
26275 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
26276   SDValue N0 = N->getOperand(0);
26277 
26278   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
26279   if (N0->getOpcode() == ISD::FP16_TO_FP)
26280     return N0->getOperand(0);
26281 
26282   return SDValue();
26283 }
26284 
26285 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
26286   auto Op = N->getOpcode();
26287   assert((Op == ISD::FP16_TO_FP || Op == ISD::BF16_TO_FP) &&
26288          "opcode should be FP16_TO_FP or BF16_TO_FP.");
26289   SDValue N0 = N->getOperand(0);
26290 
26291   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) or
26292   // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26293   if (!TLI.shouldKeepZExtForFP16Conv() && N0->getOpcode() == ISD::AND) {
26294     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
26295     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
26296       return DAG.getNode(Op, SDLoc(N), N->getValueType(0), N0.getOperand(0));
26297     }
26298   }
26299 
26300   return SDValue();
26301 }
26302 
26303 SDValue DAGCombiner::visitFP_TO_BF16(SDNode *N) {
26304   SDValue N0 = N->getOperand(0);
26305 
26306   // fold (fp_to_bf16 (bf16_to_fp op)) -> op
26307   if (N0->getOpcode() == ISD::BF16_TO_FP)
26308     return N0->getOperand(0);
26309 
26310   return SDValue();
26311 }
26312 
26313 SDValue DAGCombiner::visitBF16_TO_FP(SDNode *N) {
26314   // fold bf16_to_fp(op & 0xffff) -> bf16_to_fp(op)
26315   return visitFP16_TO_FP(N);
26316 }
26317 
26318 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
26319   SDValue N0 = N->getOperand(0);
26320   EVT VT = N0.getValueType();
26321   unsigned Opcode = N->getOpcode();
26322 
26323   // VECREDUCE over 1-element vector is just an extract.
26324   if (VT.getVectorElementCount().isScalar()) {
26325     SDLoc dl(N);
26326     SDValue Res =
26327         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
26328                     DAG.getVectorIdxConstant(0, dl));
26329     if (Res.getValueType() != N->getValueType(0))
26330       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
26331     return Res;
26332   }
26333 
26334   // On an boolean vector an and/or reduction is the same as a umin/umax
26335   // reduction. Convert them if the latter is legal while the former isn't.
26336   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
26337     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
26338         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
26339     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
26340         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
26341         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
26342       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
26343   }
26344 
26345   // vecreduce_or(insert_subvector(zero or undef, val)) -> vecreduce_or(val)
26346   // vecreduce_and(insert_subvector(ones or undef, val)) -> vecreduce_and(val)
26347   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
26348       TLI.isTypeLegal(N0.getOperand(1).getValueType())) {
26349     SDValue Vec = N0.getOperand(0);
26350     SDValue Subvec = N0.getOperand(1);
26351     if ((Opcode == ISD::VECREDUCE_OR &&
26352          (N0.getOperand(0).isUndef() || isNullOrNullSplat(Vec))) ||
26353         (Opcode == ISD::VECREDUCE_AND &&
26354          (N0.getOperand(0).isUndef() || isAllOnesOrAllOnesSplat(Vec))))
26355       return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), Subvec);
26356   }
26357 
26358   return SDValue();
26359 }
26360 
26361 SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
26362   SelectionDAG::FlagInserter FlagsInserter(DAG, N);
26363 
26364   // FSUB -> FMA combines:
26365   if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
26366     AddToWorklist(Fused.getNode());
26367     return Fused;
26368   }
26369   return SDValue();
26370 }
26371 
26372 SDValue DAGCombiner::visitVPOp(SDNode *N) {
26373 
26374   if (N->getOpcode() == ISD::VP_GATHER)
26375     if (SDValue SD = visitVPGATHER(N))
26376       return SD;
26377 
26378   if (N->getOpcode() == ISD::VP_SCATTER)
26379     if (SDValue SD = visitVPSCATTER(N))
26380       return SD;
26381 
26382   if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD)
26383     if (SDValue SD = visitVP_STRIDED_LOAD(N))
26384       return SD;
26385 
26386   if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE)
26387     if (SDValue SD = visitVP_STRIDED_STORE(N))
26388       return SD;
26389 
26390   // VP operations in which all vector elements are disabled - either by
26391   // determining that the mask is all false or that the EVL is 0 - can be
26392   // eliminated.
26393   bool AreAllEltsDisabled = false;
26394   if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode()))
26395     AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx));
26396   if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode()))
26397     AreAllEltsDisabled |=
26398         ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
26399 
26400   // This is the only generic VP combine we support for now.
26401   if (!AreAllEltsDisabled) {
26402     switch (N->getOpcode()) {
26403     case ISD::VP_FADD:
26404       return visitVP_FADD(N);
26405     case ISD::VP_FSUB:
26406       return visitVP_FSUB(N);
26407     case ISD::VP_FMA:
26408       return visitFMA<VPMatchContext>(N);
26409     }
26410     return SDValue();
26411   }
26412 
26413   // Binary operations can be replaced by UNDEF.
26414   if (ISD::isVPBinaryOp(N->getOpcode()))
26415     return DAG.getUNDEF(N->getValueType(0));
26416 
26417   // VP Memory operations can be replaced by either the chain (stores) or the
26418   // chain + undef (loads).
26419   if (const auto *MemSD = dyn_cast<MemSDNode>(N)) {
26420     if (MemSD->writeMem())
26421       return MemSD->getChain();
26422     return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain());
26423   }
26424 
26425   // Reduction operations return the start operand when no elements are active.
26426   if (ISD::isVPReduction(N->getOpcode()))
26427     return N->getOperand(0);
26428 
26429   return SDValue();
26430 }
26431 
26432 SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
26433   SDValue Chain = N->getOperand(0);
26434   SDValue Ptr = N->getOperand(1);
26435   EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26436 
26437   // Check if the memory, where FP state is written to, is used only in a single
26438   // load operation.
26439   LoadSDNode *LdNode = nullptr;
26440   for (auto *U : Ptr->uses()) {
26441     if (U == N)
26442       continue;
26443     if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
26444       if (LdNode && LdNode != Ld)
26445         return SDValue();
26446       LdNode = Ld;
26447       continue;
26448     }
26449     return SDValue();
26450   }
26451   if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26452       !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26453       !LdNode->getChain().reachesChainWithoutSideEffects(SDValue(N, 0)))
26454     return SDValue();
26455 
26456   // Check if the loaded value is used only in a store operation.
26457   StoreSDNode *StNode = nullptr;
26458   for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
26459     SDUse &U = I.getUse();
26460     if (U.getResNo() == 0) {
26461       if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
26462         if (StNode)
26463           return SDValue();
26464         StNode = St;
26465       } else {
26466         return SDValue();
26467       }
26468     }
26469   }
26470   if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26471       !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26472       !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26473     return SDValue();
26474 
26475   // Create new node GET_FPENV_MEM, which uses the store address to write FP
26476   // environment.
26477   SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
26478                                 StNode->getMemOperand());
26479   CombineTo(StNode, Res, false);
26480   return Res;
26481 }
26482 
26483 SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
26484   SDValue Chain = N->getOperand(0);
26485   SDValue Ptr = N->getOperand(1);
26486   EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
26487 
26488   // Check if the address of FP state is used also in a store operation only.
26489   StoreSDNode *StNode = nullptr;
26490   for (auto *U : Ptr->uses()) {
26491     if (U == N)
26492       continue;
26493     if (auto *St = dyn_cast<StoreSDNode>(U)) {
26494       if (StNode && StNode != St)
26495         return SDValue();
26496       StNode = St;
26497       continue;
26498     }
26499     return SDValue();
26500   }
26501   if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
26502       !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
26503       !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
26504     return SDValue();
26505 
26506   // Check if the stored value is loaded from some location and the loaded
26507   // value is used only in the store operation.
26508   SDValue StValue = StNode->getValue();
26509   auto *LdNode = dyn_cast<LoadSDNode>(StValue);
26510   if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
26511       !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
26512       !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
26513     return SDValue();
26514 
26515   // Create new node SET_FPENV_MEM, which uses the load address to read FP
26516   // environment.
26517   SDValue Res =
26518       DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
26519                       LdNode->getMemOperand());
26520   return Res;
26521 }
26522 
26523 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
26524 /// with the destination vector and a zero vector.
26525 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
26526 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
26527 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
26528   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
26529 
26530   EVT VT = N->getValueType(0);
26531   SDValue LHS = N->getOperand(0);
26532   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
26533   SDLoc DL(N);
26534 
26535   // Make sure we're not running after operation legalization where it
26536   // may have custom lowered the vector shuffles.
26537   if (LegalOperations)
26538     return SDValue();
26539 
26540   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
26541     return SDValue();
26542 
26543   EVT RVT = RHS.getValueType();
26544   unsigned NumElts = RHS.getNumOperands();
26545 
26546   // Attempt to create a valid clear mask, splitting the mask into
26547   // sub elements and checking to see if each is
26548   // all zeros or all ones - suitable for shuffle masking.
26549   auto BuildClearMask = [&](int Split) {
26550     int NumSubElts = NumElts * Split;
26551     int NumSubBits = RVT.getScalarSizeInBits() / Split;
26552 
26553     SmallVector<int, 8> Indices;
26554     for (int i = 0; i != NumSubElts; ++i) {
26555       int EltIdx = i / Split;
26556       int SubIdx = i % Split;
26557       SDValue Elt = RHS.getOperand(EltIdx);
26558       // X & undef --> 0 (not undef). So this lane must be converted to choose
26559       // from the zero constant vector (same as if the element had all 0-bits).
26560       if (Elt.isUndef()) {
26561         Indices.push_back(i + NumSubElts);
26562         continue;
26563       }
26564 
26565       APInt Bits;
26566       if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
26567         Bits = Cst->getAPIntValue();
26568       else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
26569         Bits = CstFP->getValueAPF().bitcastToAPInt();
26570       else
26571         return SDValue();
26572 
26573       // Extract the sub element from the constant bit mask.
26574       if (DAG.getDataLayout().isBigEndian())
26575         Bits = Bits.extractBits(NumSubBits, (Split - SubIdx - 1) * NumSubBits);
26576       else
26577         Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits);
26578 
26579       if (Bits.isAllOnes())
26580         Indices.push_back(i);
26581       else if (Bits == 0)
26582         Indices.push_back(i + NumSubElts);
26583       else
26584         return SDValue();
26585     }
26586 
26587     // Let's see if the target supports this vector_shuffle.
26588     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
26589     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
26590     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
26591       return SDValue();
26592 
26593     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
26594     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
26595                                                    DAG.getBitcast(ClearVT, LHS),
26596                                                    Zero, Indices));
26597   };
26598 
26599   // Determine maximum split level (byte level masking).
26600   int MaxSplit = 1;
26601   if (RVT.getScalarSizeInBits() % 8 == 0)
26602     MaxSplit = RVT.getScalarSizeInBits() / 8;
26603 
26604   for (int Split = 1; Split <= MaxSplit; ++Split)
26605     if (RVT.getScalarSizeInBits() % Split == 0)
26606       if (SDValue S = BuildClearMask(Split))
26607         return S;
26608 
26609   return SDValue();
26610 }
26611 
26612 /// If a vector binop is performed on splat values, it may be profitable to
26613 /// extract, scalarize, and insert/splat.
26614 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG,
26615                                       const SDLoc &DL) {
26616   SDValue N0 = N->getOperand(0);
26617   SDValue N1 = N->getOperand(1);
26618   unsigned Opcode = N->getOpcode();
26619   EVT VT = N->getValueType(0);
26620   EVT EltVT = VT.getVectorElementType();
26621   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26622 
26623   // TODO: Remove/replace the extract cost check? If the elements are available
26624   //       as scalars, then there may be no extract cost. Should we ask if
26625   //       inserting a scalar back into a vector is cheap instead?
26626   int Index0, Index1;
26627   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26628   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
26629   // Extract element from splat_vector should be free.
26630   // TODO: use DAG.isSplatValue instead?
26631   bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR &&
26632                            N1.getOpcode() == ISD::SPLAT_VECTOR;
26633   if (!Src0 || !Src1 || Index0 != Index1 ||
26634       Src0.getValueType().getVectorElementType() != EltVT ||
26635       Src1.getValueType().getVectorElementType() != EltVT ||
26636       !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) ||
26637       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
26638     return SDValue();
26639 
26640   SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26641   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC);
26642   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC);
26643   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
26644 
26645   // If all lanes but 1 are undefined, no need to splat the scalar result.
26646   // TODO: Keep track of undefs and use that info in the general case.
26647   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
26648       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
26649       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
26650     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
26651     // build_vec ..undef, (bo X, Y), undef...
26652     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
26653     Ops[Index0] = ScalarBO;
26654     return DAG.getBuildVector(VT, DL, Ops);
26655   }
26656 
26657   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
26658   return DAG.getSplat(VT, DL, ScalarBO);
26659 }
26660 
26661 /// Visit a vector cast operation, like FP_EXTEND.
26662 SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
26663   EVT VT = N->getValueType(0);
26664   assert(VT.isVector() && "SimplifyVCastOp only works on vectors!");
26665   EVT EltVT = VT.getVectorElementType();
26666   unsigned Opcode = N->getOpcode();
26667 
26668   SDValue N0 = N->getOperand(0);
26669   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
26670 
26671   // TODO: promote operation might be also good here?
26672   int Index0;
26673   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
26674   if (Src0 &&
26675       (N0.getOpcode() == ISD::SPLAT_VECTOR ||
26676        TLI.isExtractVecEltCheap(VT, Index0)) &&
26677       TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
26678       TLI.preferScalarizeSplat(N)) {
26679     EVT SrcVT = N0.getValueType();
26680     EVT SrcEltVT = SrcVT.getVectorElementType();
26681     SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
26682     SDValue Elt =
26683         DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
26684     SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags());
26685     if (VT.isScalableVector())
26686       return DAG.getSplatVector(VT, DL, ScalarBO);
26687     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
26688     return DAG.getBuildVector(VT, DL, Ops);
26689   }
26690 
26691   return SDValue();
26692 }
26693 
26694 /// Visit a binary vector operation, like ADD.
26695 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) {
26696   EVT VT = N->getValueType(0);
26697   assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
26698 
26699   SDValue LHS = N->getOperand(0);
26700   SDValue RHS = N->getOperand(1);
26701   unsigned Opcode = N->getOpcode();
26702   SDNodeFlags Flags = N->getFlags();
26703 
26704   // Move unary shuffles with identical masks after a vector binop:
26705   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
26706   //   --> shuffle (VBinOp A, B), Undef, Mask
26707   // This does not require type legality checks because we are creating the
26708   // same types of operations that are in the original sequence. We do have to
26709   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
26710   // though. This code is adapted from the identical transform in instcombine.
26711   if (DAG.isSafeToSpeculativelyExecute(Opcode)) {
26712     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
26713     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
26714     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
26715         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
26716         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
26717       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
26718                                      RHS.getOperand(0), Flags);
26719       SDValue UndefV = LHS.getOperand(1);
26720       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
26721     }
26722 
26723     // Try to sink a splat shuffle after a binop with a uniform constant.
26724     // This is limited to cases where neither the shuffle nor the constant have
26725     // undefined elements because that could be poison-unsafe or inhibit
26726     // demanded elements analysis. It is further limited to not change a splat
26727     // of an inserted scalar because that may be optimized better by
26728     // load-folding or other target-specific behaviors.
26729     if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) &&
26730         Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() &&
26731         Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
26732       // binop (splat X), (splat C) --> splat (binop X, C)
26733       SDValue X = Shuf0->getOperand(0);
26734       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags);
26735       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
26736                                   Shuf0->getMask());
26737     }
26738     if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) &&
26739         Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() &&
26740         Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) {
26741       // binop (splat C), (splat X) --> splat (binop C, X)
26742       SDValue X = Shuf1->getOperand(0);
26743       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags);
26744       return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT),
26745                                   Shuf1->getMask());
26746     }
26747   }
26748 
26749   // The following pattern is likely to emerge with vector reduction ops. Moving
26750   // the binary operation ahead of insertion may allow using a narrower vector
26751   // instruction that has better performance than the wide version of the op:
26752   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
26753   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
26754       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
26755       LHS.getOperand(2) == RHS.getOperand(2) &&
26756       (LHS.hasOneUse() || RHS.hasOneUse())) {
26757     SDValue X = LHS.getOperand(1);
26758     SDValue Y = RHS.getOperand(1);
26759     SDValue Z = LHS.getOperand(2);
26760     EVT NarrowVT = X.getValueType();
26761     if (NarrowVT == Y.getValueType() &&
26762         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT,
26763                                               LegalOperations)) {
26764       // (binop undef, undef) may not return undef, so compute that result.
26765       SDValue VecC =
26766           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
26767       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
26768       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
26769     }
26770   }
26771 
26772   // Make sure all but the first op are undef or constant.
26773   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
26774     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
26775            all_of(drop_begin(Concat->ops()), [](const SDValue &Op) {
26776              return Op.isUndef() ||
26777                     ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
26778            });
26779   };
26780 
26781   // The following pattern is likely to emerge with vector reduction ops. Moving
26782   // the binary operation ahead of the concat may allow using a narrower vector
26783   // instruction that has better performance than the wide version of the op:
26784   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
26785   //   concat (VBinOp X, Y), VecC
26786   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
26787       (LHS.hasOneUse() || RHS.hasOneUse())) {
26788     EVT NarrowVT = LHS.getOperand(0).getValueType();
26789     if (NarrowVT == RHS.getOperand(0).getValueType() &&
26790         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
26791       unsigned NumOperands = LHS.getNumOperands();
26792       SmallVector<SDValue, 4> ConcatOps;
26793       for (unsigned i = 0; i != NumOperands; ++i) {
26794         // This constant fold for operands 1 and up.
26795         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
26796                                         RHS.getOperand(i)));
26797       }
26798 
26799       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
26800     }
26801   }
26802 
26803   if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL))
26804     return V;
26805 
26806   return SDValue();
26807 }
26808 
26809 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
26810                                     SDValue N2) {
26811   assert(N0.getOpcode() == ISD::SETCC &&
26812          "First argument must be a SetCC node!");
26813 
26814   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
26815                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
26816 
26817   // If we got a simplified select_cc node back from SimplifySelectCC, then
26818   // break it down into a new SETCC node, and a new SELECT node, and then return
26819   // the SELECT node, since we were called with a SELECT node.
26820   if (SCC.getNode()) {
26821     // Check to see if we got a select_cc back (to turn into setcc/select).
26822     // Otherwise, just return whatever node we got back, like fabs.
26823     if (SCC.getOpcode() == ISD::SELECT_CC) {
26824       const SDNodeFlags Flags = N0->getFlags();
26825       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
26826                                   N0.getValueType(),
26827                                   SCC.getOperand(0), SCC.getOperand(1),
26828                                   SCC.getOperand(4), Flags);
26829       AddToWorklist(SETCC.getNode());
26830       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
26831                                          SCC.getOperand(2), SCC.getOperand(3));
26832       SelectNode->setFlags(Flags);
26833       return SelectNode;
26834     }
26835 
26836     return SCC;
26837   }
26838   return SDValue();
26839 }
26840 
26841 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
26842 /// being selected between, see if we can simplify the select.  Callers of this
26843 /// should assume that TheSelect is deleted if this returns true.  As such, they
26844 /// should return the appropriate thing (e.g. the node) back to the top-level of
26845 /// the DAG combiner loop to avoid it being looked at.
26846 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
26847                                     SDValue RHS) {
26848   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
26849   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
26850   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
26851     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
26852       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
26853       SDValue Sqrt = RHS;
26854       ISD::CondCode CC;
26855       SDValue CmpLHS;
26856       const ConstantFPSDNode *Zero = nullptr;
26857 
26858       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
26859         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
26860         CmpLHS = TheSelect->getOperand(0);
26861         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
26862       } else {
26863         // SELECT or VSELECT
26864         SDValue Cmp = TheSelect->getOperand(0);
26865         if (Cmp.getOpcode() == ISD::SETCC) {
26866           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
26867           CmpLHS = Cmp.getOperand(0);
26868           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
26869         }
26870       }
26871       if (Zero && Zero->isZero() &&
26872           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
26873           CC == ISD::SETULT || CC == ISD::SETLT)) {
26874         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
26875         CombineTo(TheSelect, Sqrt);
26876         return true;
26877       }
26878     }
26879   }
26880   // Cannot simplify select with vector condition
26881   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
26882 
26883   // If this is a select from two identical things, try to pull the operation
26884   // through the select.
26885   if (LHS.getOpcode() != RHS.getOpcode() ||
26886       !LHS.hasOneUse() || !RHS.hasOneUse())
26887     return false;
26888 
26889   // If this is a load and the token chain is identical, replace the select
26890   // of two loads with a load through a select of the address to load from.
26891   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
26892   // constants have been dropped into the constant pool.
26893   if (LHS.getOpcode() == ISD::LOAD) {
26894     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
26895     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
26896 
26897     // Token chains must be identical.
26898     if (LHS.getOperand(0) != RHS.getOperand(0) ||
26899         // Do not let this transformation reduce the number of volatile loads.
26900         // Be conservative for atomics for the moment
26901         // TODO: This does appear to be legal for unordered atomics (see D66309)
26902         !LLD->isSimple() || !RLD->isSimple() ||
26903         // FIXME: If either is a pre/post inc/dec load,
26904         // we'd need to split out the address adjustment.
26905         LLD->isIndexed() || RLD->isIndexed() ||
26906         // If this is an EXTLOAD, the VT's must match.
26907         LLD->getMemoryVT() != RLD->getMemoryVT() ||
26908         // If this is an EXTLOAD, the kind of extension must match.
26909         (LLD->getExtensionType() != RLD->getExtensionType() &&
26910          // The only exception is if one of the extensions is anyext.
26911          LLD->getExtensionType() != ISD::EXTLOAD &&
26912          RLD->getExtensionType() != ISD::EXTLOAD) ||
26913         // FIXME: this discards src value information.  This is
26914         // over-conservative. It would be beneficial to be able to remember
26915         // both potential memory locations.  Since we are discarding
26916         // src value info, don't do the transformation if the memory
26917         // locations are not in the default address space.
26918         LLD->getPointerInfo().getAddrSpace() != 0 ||
26919         RLD->getPointerInfo().getAddrSpace() != 0 ||
26920         // We can't produce a CMOV of a TargetFrameIndex since we won't
26921         // generate the address generation required.
26922         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
26923         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
26924         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
26925                                       LLD->getBasePtr().getValueType()))
26926       return false;
26927 
26928     // The loads must not depend on one another.
26929     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
26930       return false;
26931 
26932     // Check that the select condition doesn't reach either load.  If so,
26933     // folding this will induce a cycle into the DAG.  If not, this is safe to
26934     // xform, so create a select of the addresses.
26935 
26936     SmallPtrSet<const SDNode *, 32> Visited;
26937     SmallVector<const SDNode *, 16> Worklist;
26938 
26939     // Always fail if LLD and RLD are not independent. TheSelect is a
26940     // predecessor to all Nodes in question so we need not search past it.
26941 
26942     Visited.insert(TheSelect);
26943     Worklist.push_back(LLD);
26944     Worklist.push_back(RLD);
26945 
26946     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
26947         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
26948       return false;
26949 
26950     SDValue Addr;
26951     if (TheSelect->getOpcode() == ISD::SELECT) {
26952       // We cannot do this optimization if any pair of {RLD, LLD} is a
26953       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
26954       // Loads, we only need to check if CondNode is a successor to one of the
26955       // loads. We can further avoid this if there's no use of their chain
26956       // value.
26957       SDNode *CondNode = TheSelect->getOperand(0).getNode();
26958       Worklist.push_back(CondNode);
26959 
26960       if ((LLD->hasAnyUseOfValue(1) &&
26961            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
26962           (RLD->hasAnyUseOfValue(1) &&
26963            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
26964         return false;
26965 
26966       Addr = DAG.getSelect(SDLoc(TheSelect),
26967                            LLD->getBasePtr().getValueType(),
26968                            TheSelect->getOperand(0), LLD->getBasePtr(),
26969                            RLD->getBasePtr());
26970     } else {  // Otherwise SELECT_CC
26971       // We cannot do this optimization if any pair of {RLD, LLD} is a
26972       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
26973       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
26974       // one of the loads. We can further avoid this if there's no use of their
26975       // chain value.
26976 
26977       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
26978       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
26979       Worklist.push_back(CondLHS);
26980       Worklist.push_back(CondRHS);
26981 
26982       if ((LLD->hasAnyUseOfValue(1) &&
26983            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
26984           (RLD->hasAnyUseOfValue(1) &&
26985            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
26986         return false;
26987 
26988       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
26989                          LLD->getBasePtr().getValueType(),
26990                          TheSelect->getOperand(0),
26991                          TheSelect->getOperand(1),
26992                          LLD->getBasePtr(), RLD->getBasePtr(),
26993                          TheSelect->getOperand(4));
26994     }
26995 
26996     SDValue Load;
26997     // It is safe to replace the two loads if they have different alignments,
26998     // but the new load must be the minimum (most restrictive) alignment of the
26999     // inputs.
27000     Align Alignment = std::min(LLD->getAlign(), RLD->getAlign());
27001     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
27002     if (!RLD->isInvariant())
27003       MMOFlags &= ~MachineMemOperand::MOInvariant;
27004     if (!RLD->isDereferenceable())
27005       MMOFlags &= ~MachineMemOperand::MODereferenceable;
27006     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
27007       // FIXME: Discards pointer and AA info.
27008       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
27009                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
27010                          MMOFlags);
27011     } else {
27012       // FIXME: Discards pointer and AA info.
27013       Load = DAG.getExtLoad(
27014           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
27015                                                   : LLD->getExtensionType(),
27016           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
27017           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
27018     }
27019 
27020     // Users of the select now use the result of the load.
27021     CombineTo(TheSelect, Load);
27022 
27023     // Users of the old loads now use the new load's chain.  We know the
27024     // old-load value is dead now.
27025     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
27026     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
27027     return true;
27028   }
27029 
27030   return false;
27031 }
27032 
27033 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
27034 /// bitwise 'and'.
27035 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
27036                                             SDValue N1, SDValue N2, SDValue N3,
27037                                             ISD::CondCode CC) {
27038   // If this is a select where the false operand is zero and the compare is a
27039   // check of the sign bit, see if we can perform the "gzip trick":
27040   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
27041   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
27042   EVT XType = N0.getValueType();
27043   EVT AType = N2.getValueType();
27044   if (!isNullConstant(N3) || !XType.bitsGE(AType))
27045     return SDValue();
27046 
27047   // If the comparison is testing for a positive value, we have to invert
27048   // the sign bit mask, so only do that transform if the target has a bitwise
27049   // 'and not' instruction (the invert is free).
27050   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
27051     // (X > -1) ? A : 0
27052     // (X >  0) ? X : 0 <-- This is canonical signed max.
27053     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
27054       return SDValue();
27055   } else if (CC == ISD::SETLT) {
27056     // (X <  0) ? A : 0
27057     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
27058     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
27059       return SDValue();
27060   } else {
27061     return SDValue();
27062   }
27063 
27064   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
27065   // constant.
27066   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
27067   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27068   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
27069     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
27070     if (!TLI.shouldAvoidTransformToShift(XType, ShCt)) {
27071       SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27072       SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
27073       AddToWorklist(Shift.getNode());
27074 
27075       if (XType.bitsGT(AType)) {
27076         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27077         AddToWorklist(Shift.getNode());
27078       }
27079 
27080       if (CC == ISD::SETGT)
27081         Shift = DAG.getNOT(DL, Shift, AType);
27082 
27083       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27084     }
27085   }
27086 
27087   unsigned ShCt = XType.getSizeInBits() - 1;
27088   if (TLI.shouldAvoidTransformToShift(XType, ShCt))
27089     return SDValue();
27090 
27091   SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
27092   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
27093   AddToWorklist(Shift.getNode());
27094 
27095   if (XType.bitsGT(AType)) {
27096     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
27097     AddToWorklist(Shift.getNode());
27098   }
27099 
27100   if (CC == ISD::SETGT)
27101     Shift = DAG.getNOT(DL, Shift, AType);
27102 
27103   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
27104 }
27105 
27106 // Fold select(cc, binop(), binop()) -> binop(select(), select()) etc.
27107 SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) {
27108   SDValue N0 = N->getOperand(0);
27109   SDValue N1 = N->getOperand(1);
27110   SDValue N2 = N->getOperand(2);
27111   SDLoc DL(N);
27112 
27113   unsigned BinOpc = N1.getOpcode();
27114   if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) ||
27115       (N1.getResNo() != N2.getResNo()))
27116     return SDValue();
27117 
27118   // The use checks are intentionally on SDNode because we may be dealing
27119   // with opcodes that produce more than one SDValue.
27120   // TODO: Do we really need to check N0 (the condition operand of the select)?
27121   //       But removing that clause could cause an infinite loop...
27122   if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse())
27123     return SDValue();
27124 
27125   // Binops may include opcodes that return multiple values, so all values
27126   // must be created/propagated from the newly created binops below.
27127   SDVTList OpVTs = N1->getVTList();
27128 
27129   // Fold select(cond, binop(x, y), binop(z, y))
27130   //  --> binop(select(cond, x, z), y)
27131   if (N1.getOperand(1) == N2.getOperand(1)) {
27132     SDValue N10 = N1.getOperand(0);
27133     SDValue N20 = N2.getOperand(0);
27134     SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20);
27135     SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1));
27136     NewBinOp->setFlags(N1->getFlags());
27137     NewBinOp->intersectFlagsWith(N2->getFlags());
27138     return SDValue(NewBinOp.getNode(), N1.getResNo());
27139   }
27140 
27141   // Fold select(cond, binop(x, y), binop(x, z))
27142   //  --> binop(x, select(cond, y, z))
27143   if (N1.getOperand(0) == N2.getOperand(0)) {
27144     SDValue N11 = N1.getOperand(1);
27145     SDValue N21 = N2.getOperand(1);
27146     // Second op VT might be different (e.g. shift amount type)
27147     if (N11.getValueType() == N21.getValueType()) {
27148       SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21);
27149       SDValue NewBinOp =
27150           DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel);
27151       NewBinOp->setFlags(N1->getFlags());
27152       NewBinOp->intersectFlagsWith(N2->getFlags());
27153       return SDValue(NewBinOp.getNode(), N1.getResNo());
27154     }
27155   }
27156 
27157   // TODO: Handle isCommutativeBinOp patterns as well?
27158   return SDValue();
27159 }
27160 
27161 // Transform (fneg/fabs (bitconvert x)) to avoid loading constant pool values.
27162 SDValue DAGCombiner::foldSignChangeInBitcast(SDNode *N) {
27163   SDValue N0 = N->getOperand(0);
27164   EVT VT = N->getValueType(0);
27165   bool IsFabs = N->getOpcode() == ISD::FABS;
27166   bool IsFree = IsFabs ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
27167 
27168   if (IsFree || N0.getOpcode() != ISD::BITCAST || !N0.hasOneUse())
27169     return SDValue();
27170 
27171   SDValue Int = N0.getOperand(0);
27172   EVT IntVT = Int.getValueType();
27173 
27174   // The operand to cast should be integer.
27175   if (!IntVT.isInteger() || IntVT.isVector())
27176     return SDValue();
27177 
27178   // (fneg (bitconvert x)) -> (bitconvert (xor x sign))
27179   // (fabs (bitconvert x)) -> (bitconvert (and x ~sign))
27180   APInt SignMask;
27181   if (N0.getValueType().isVector()) {
27182     // For vector, create a sign mask (0x80...) or its inverse (for fabs,
27183     // 0x7f...) per element and splat it.
27184     SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
27185     if (IsFabs)
27186       SignMask = ~SignMask;
27187     SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
27188   } else {
27189     // For scalar, just use the sign mask (0x80... or the inverse, 0x7f...)
27190     SignMask = APInt::getSignMask(IntVT.getSizeInBits());
27191     if (IsFabs)
27192       SignMask = ~SignMask;
27193   }
27194   SDLoc DL(N0);
27195   Int = DAG.getNode(IsFabs ? ISD::AND : ISD::XOR, DL, IntVT, Int,
27196                     DAG.getConstant(SignMask, DL, IntVT));
27197   AddToWorklist(Int.getNode());
27198   return DAG.getBitcast(VT, Int);
27199 }
27200 
27201 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
27202 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
27203 /// in it. This may be a win when the constant is not otherwise available
27204 /// because it replaces two constant pool loads with one.
27205 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
27206     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
27207     ISD::CondCode CC) {
27208   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
27209     return SDValue();
27210 
27211   // If we are before legalize types, we want the other legalization to happen
27212   // first (for example, to avoid messing with soft float).
27213   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
27214   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
27215   EVT VT = N2.getValueType();
27216   if (!TV || !FV || !TLI.isTypeLegal(VT))
27217     return SDValue();
27218 
27219   // If a constant can be materialized without loads, this does not make sense.
27220   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
27221       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
27222       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
27223     return SDValue();
27224 
27225   // If both constants have multiple uses, then we won't need to do an extra
27226   // load. The values are likely around in registers for other users.
27227   if (!TV->hasOneUse() && !FV->hasOneUse())
27228     return SDValue();
27229 
27230   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
27231                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
27232   Type *FPTy = Elts[0]->getType();
27233   const DataLayout &TD = DAG.getDataLayout();
27234 
27235   // Create a ConstantArray of the two constants.
27236   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
27237   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
27238                                       TD.getPrefTypeAlign(FPTy));
27239   Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign();
27240 
27241   // Get offsets to the 0 and 1 elements of the array, so we can select between
27242   // them.
27243   SDValue Zero = DAG.getIntPtrConstant(0, DL);
27244   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
27245   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
27246   SDValue Cond =
27247       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
27248   AddToWorklist(Cond.getNode());
27249   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
27250   AddToWorklist(CstOffset.getNode());
27251   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
27252   AddToWorklist(CPIdx.getNode());
27253   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
27254                      MachinePointerInfo::getConstantPool(
27255                          DAG.getMachineFunction()), Alignment);
27256 }
27257 
27258 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
27259 /// where 'cond' is the comparison specified by CC.
27260 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
27261                                       SDValue N2, SDValue N3, ISD::CondCode CC,
27262                                       bool NotExtCompare) {
27263   // (x ? y : y) -> y.
27264   if (N2 == N3) return N2;
27265 
27266   EVT CmpOpVT = N0.getValueType();
27267   EVT CmpResVT = getSetCCResultType(CmpOpVT);
27268   EVT VT = N2.getValueType();
27269   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
27270   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
27271   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
27272 
27273   // Determine if the condition we're dealing with is constant.
27274   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
27275     AddToWorklist(SCC.getNode());
27276     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
27277       // fold select_cc true, x, y -> x
27278       // fold select_cc false, x, y -> y
27279       return !(SCCC->isZero()) ? N2 : N3;
27280     }
27281   }
27282 
27283   if (SDValue V =
27284           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
27285     return V;
27286 
27287   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
27288     return V;
27289 
27290   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
27291   // where y is has a single bit set.
27292   // A plaintext description would be, we can turn the SELECT_CC into an AND
27293   // when the condition can be materialized as an all-ones register.  Any
27294   // single bit-test can be materialized as an all-ones register with
27295   // shift-left and shift-right-arith.
27296   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
27297       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
27298     SDValue AndLHS = N0->getOperand(0);
27299     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
27300     if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
27301       // Shift the tested bit over the sign bit.
27302       const APInt &AndMask = ConstAndRHS->getAPIntValue();
27303       if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) {
27304         unsigned ShCt = AndMask.getBitWidth() - 1;
27305         SDValue ShlAmt =
27306             DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
27307                             getShiftAmountTy(AndLHS.getValueType()));
27308         SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
27309 
27310         // Now arithmetic right shift it all the way over, so the result is
27311         // either all-ones, or zero.
27312         SDValue ShrAmt =
27313           DAG.getConstant(ShCt, SDLoc(Shl),
27314                           getShiftAmountTy(Shl.getValueType()));
27315         SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
27316 
27317         return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
27318       }
27319     }
27320   }
27321 
27322   // fold select C, 16, 0 -> shl C, 4
27323   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
27324   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
27325 
27326   if ((Fold || Swap) &&
27327       TLI.getBooleanContents(CmpOpVT) ==
27328           TargetLowering::ZeroOrOneBooleanContent &&
27329       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
27330 
27331     if (Swap) {
27332       CC = ISD::getSetCCInverse(CC, CmpOpVT);
27333       std::swap(N2C, N3C);
27334     }
27335 
27336     // If the caller doesn't want us to simplify this into a zext of a compare,
27337     // don't do it.
27338     if (NotExtCompare && N2C->isOne())
27339       return SDValue();
27340 
27341     SDValue Temp, SCC;
27342     // zext (setcc n0, n1)
27343     if (LegalTypes) {
27344       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
27345       Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT);
27346     } else {
27347       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
27348       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
27349     }
27350 
27351     AddToWorklist(SCC.getNode());
27352     AddToWorklist(Temp.getNode());
27353 
27354     if (N2C->isOne())
27355       return Temp;
27356 
27357     unsigned ShCt = N2C->getAPIntValue().logBase2();
27358     if (TLI.shouldAvoidTransformToShift(VT, ShCt))
27359       return SDValue();
27360 
27361     // shl setcc result by log2 n2c
27362     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
27363                        DAG.getConstant(ShCt, SDLoc(Temp),
27364                                        getShiftAmountTy(Temp.getValueType())));
27365   }
27366 
27367   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
27368   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
27369   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
27370   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
27371   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
27372   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
27373   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
27374   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
27375   if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
27376     SDValue ValueOnZero = N2;
27377     SDValue Count = N3;
27378     // If the condition is NE instead of E, swap the operands.
27379     if (CC == ISD::SETNE)
27380       std::swap(ValueOnZero, Count);
27381     // Check if the value on zero is a constant equal to the bits in the type.
27382     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
27383       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
27384         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
27385         // legal, combine to just cttz.
27386         if ((Count.getOpcode() == ISD::CTTZ ||
27387              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
27388             N0 == Count.getOperand(0) &&
27389             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
27390           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
27391         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
27392         // legal, combine to just ctlz.
27393         if ((Count.getOpcode() == ISD::CTLZ ||
27394              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
27395             N0 == Count.getOperand(0) &&
27396             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
27397           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
27398       }
27399     }
27400   }
27401 
27402   // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C
27403   // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C
27404   if (!NotExtCompare && N1C && N2C && N3C &&
27405       N2C->getAPIntValue() == ~N3C->getAPIntValue() &&
27406       ((N1C->isAllOnes() && CC == ISD::SETGT) ||
27407        (N1C->isZero() && CC == ISD::SETLT)) &&
27408       !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) {
27409     SDValue ASR = DAG.getNode(
27410         ISD::SRA, DL, CmpOpVT, N0,
27411         DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT));
27412     return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT),
27413                        DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT));
27414   }
27415 
27416   if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27417     return S;
27418   if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
27419     return S;
27420 
27421   return SDValue();
27422 }
27423 
27424 /// This is a stub for TargetLowering::SimplifySetCC.
27425 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
27426                                    ISD::CondCode Cond, const SDLoc &DL,
27427                                    bool foldBooleans) {
27428   TargetLowering::DAGCombinerInfo
27429     DagCombineInfo(DAG, Level, false, this);
27430   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
27431 }
27432 
27433 /// Given an ISD::SDIV node expressing a divide by constant, return
27434 /// a DAG expression to select that will generate the same value by multiplying
27435 /// by a magic number.
27436 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27437 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
27438   // when optimising for minimum size, we don't want to expand a div to a mul
27439   // and a shift.
27440   if (DAG.getMachineFunction().getFunction().hasMinSize())
27441     return SDValue();
27442 
27443   SmallVector<SDNode *, 8> Built;
27444   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
27445     for (SDNode *N : Built)
27446       AddToWorklist(N);
27447     return S;
27448   }
27449 
27450   return SDValue();
27451 }
27452 
27453 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
27454 /// DAG expression that will generate the same value by right shifting.
27455 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
27456   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27457   if (!C)
27458     return SDValue();
27459 
27460   // Avoid division by zero.
27461   if (C->isZero())
27462     return SDValue();
27463 
27464   SmallVector<SDNode *, 8> Built;
27465   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
27466     for (SDNode *N : Built)
27467       AddToWorklist(N);
27468     return S;
27469   }
27470 
27471   return SDValue();
27472 }
27473 
27474 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
27475 /// expression that will generate the same value by multiplying by a magic
27476 /// number.
27477 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
27478 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
27479   // when optimising for minimum size, we don't want to expand a div to a mul
27480   // and a shift.
27481   if (DAG.getMachineFunction().getFunction().hasMinSize())
27482     return SDValue();
27483 
27484   SmallVector<SDNode *, 8> Built;
27485   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
27486     for (SDNode *N : Built)
27487       AddToWorklist(N);
27488     return S;
27489   }
27490 
27491   return SDValue();
27492 }
27493 
27494 /// Given an ISD::SREM node expressing a remainder by constant power of 2,
27495 /// return a DAG expression that will generate the same value.
27496 SDValue DAGCombiner::BuildSREMPow2(SDNode *N) {
27497   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
27498   if (!C)
27499     return SDValue();
27500 
27501   // Avoid division by zero.
27502   if (C->isZero())
27503     return SDValue();
27504 
27505   SmallVector<SDNode *, 8> Built;
27506   if (SDValue S = TLI.BuildSREMPow2(N, C->getAPIntValue(), DAG, Built)) {
27507     for (SDNode *N : Built)
27508       AddToWorklist(N);
27509     return S;
27510   }
27511 
27512   return SDValue();
27513 }
27514 
27515 // This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp
27516 //
27517 // Returns the node that represents `Log2(Op)`. This may create a new node. If
27518 // we are unable to compute `Log2(Op)` its return `SDValue()`.
27519 //
27520 // All nodes will be created at `DL` and the output will be of type `VT`.
27521 //
27522 // This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set
27523 // `AssumeNonZero` if this function should simply assume (not require proving
27524 // `Op` is non-zero).
27525 static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
27526                                    SDValue Op, unsigned Depth,
27527                                    bool AssumeNonZero) {
27528   assert(VT.isInteger() && "Only integer types are supported!");
27529 
27530   auto PeekThroughCastsAndTrunc = [](SDValue V) {
27531     while (true) {
27532       switch (V.getOpcode()) {
27533       case ISD::TRUNCATE:
27534       case ISD::ZERO_EXTEND:
27535         V = V.getOperand(0);
27536         break;
27537       default:
27538         return V;
27539       }
27540     }
27541   };
27542 
27543   if (VT.isScalableVector())
27544     return SDValue();
27545 
27546   Op = PeekThroughCastsAndTrunc(Op);
27547 
27548   // Helper for determining whether a value is a power-2 constant scalar or a
27549   // vector of such elements.
27550   SmallVector<APInt> Pow2Constants;
27551   auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) {
27552     if (C->isZero() || C->isOpaque())
27553       return false;
27554     // TODO: We may also be able to support negative powers of 2 here.
27555     if (C->getAPIntValue().isPowerOf2()) {
27556       Pow2Constants.emplace_back(C->getAPIntValue());
27557       return true;
27558     }
27559     return false;
27560   };
27561 
27562   if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) {
27563     if (!VT.isVector())
27564       return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT);
27565     // We need to create a build vector
27566     SmallVector<SDValue> Log2Ops;
27567     for (const APInt &Pow2 : Pow2Constants)
27568       Log2Ops.emplace_back(
27569           DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType()));
27570     return DAG.getBuildVector(VT, DL, Log2Ops);
27571   }
27572 
27573   if (Depth >= DAG.MaxRecursionDepth)
27574     return SDValue();
27575 
27576   auto CastToVT = [&](EVT NewVT, SDValue ToCast) {
27577     ToCast = PeekThroughCastsAndTrunc(ToCast);
27578     EVT CurVT = ToCast.getValueType();
27579     if (NewVT == CurVT)
27580       return ToCast;
27581 
27582     if (NewVT.getSizeInBits() == CurVT.getSizeInBits())
27583       return DAG.getBitcast(NewVT, ToCast);
27584 
27585     return DAG.getZExtOrTrunc(ToCast, DL, NewVT);
27586   };
27587 
27588   // log2(X << Y) -> log2(X) + Y
27589   if (Op.getOpcode() == ISD::SHL) {
27590     // 1 << Y and X nuw/nsw << Y are all non-zero.
27591     if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() ||
27592         Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0)))
27593       if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0),
27594                                              Depth + 1, AssumeNonZero))
27595         return DAG.getNode(ISD::ADD, DL, VT, LogX,
27596                            CastToVT(VT, Op.getOperand(1)));
27597   }
27598 
27599   // c ? X : Y -> c ? Log2(X) : Log2(Y)
27600   if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) &&
27601       Op.hasOneUse()) {
27602     if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1),
27603                                            Depth + 1, AssumeNonZero))
27604       if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2),
27605                                              Depth + 1, AssumeNonZero))
27606         return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY);
27607   }
27608 
27609   // log2(umin(X, Y)) -> umin(log2(X), log2(Y))
27610   // log2(umax(X, Y)) -> umax(log2(X), log2(Y))
27611   if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) &&
27612       Op.hasOneUse()) {
27613     // Use AssumeNonZero as false here. Otherwise we can hit case where
27614     // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
27615     if (SDValue LogX =
27616             takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1,
27617                                 /*AssumeNonZero*/ false))
27618       if (SDValue LogY =
27619               takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1,
27620                                   /*AssumeNonZero*/ false))
27621         return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY);
27622   }
27623 
27624   return SDValue();
27625 }
27626 
27627 /// Determines the LogBase2 value for a non-null input value using the
27628 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
27629 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL,
27630                                    bool KnownNonZero, bool InexpensiveOnly,
27631                                    std::optional<EVT> OutVT) {
27632   EVT VT = OutVT ? *OutVT : V.getValueType();
27633   SDValue InexpensiveLogBase2 =
27634       takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero);
27635   if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V))
27636     return InexpensiveLogBase2;
27637 
27638   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
27639   SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT);
27640   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
27641   return LogBase2;
27642 }
27643 
27644 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27645 /// For the reciprocal, we need to find the zero of the function:
27646 ///   F(X) = 1/X - A [which has a zero at X = 1/A]
27647 ///     =>
27648 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
27649 ///     does not require additional intermediate precision]
27650 /// For the last iteration, put numerator N into it to gain more precision:
27651 ///   Result = N X_i + X_i (N - N A X_i)
27652 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
27653                                       SDNodeFlags Flags) {
27654   if (LegalDAG)
27655     return SDValue();
27656 
27657   // TODO: Handle extended types?
27658   EVT VT = Op.getValueType();
27659   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
27660       VT.getScalarType() != MVT::f64)
27661     return SDValue();
27662 
27663   // If estimates are explicitly disabled for this function, we're done.
27664   MachineFunction &MF = DAG.getMachineFunction();
27665   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
27666   if (Enabled == TLI.ReciprocalEstimate::Disabled)
27667     return SDValue();
27668 
27669   // Estimates may be explicitly enabled for this type with a custom number of
27670   // refinement steps.
27671   int Iterations = TLI.getDivRefinementSteps(VT, MF);
27672   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
27673     AddToWorklist(Est.getNode());
27674 
27675     SDLoc DL(Op);
27676     if (Iterations) {
27677       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
27678 
27679       // Newton iterations: Est = Est + Est (N - Arg * Est)
27680       // If this is the last iteration, also multiply by the numerator.
27681       for (int i = 0; i < Iterations; ++i) {
27682         SDValue MulEst = Est;
27683 
27684         if (i == Iterations - 1) {
27685           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
27686           AddToWorklist(MulEst.getNode());
27687         }
27688 
27689         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
27690         AddToWorklist(NewEst.getNode());
27691 
27692         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
27693                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
27694         AddToWorklist(NewEst.getNode());
27695 
27696         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
27697         AddToWorklist(NewEst.getNode());
27698 
27699         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
27700         AddToWorklist(Est.getNode());
27701       }
27702     } else {
27703       // If no iterations are available, multiply with N.
27704       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
27705       AddToWorklist(Est.getNode());
27706     }
27707 
27708     return Est;
27709   }
27710 
27711   return SDValue();
27712 }
27713 
27714 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27715 /// For the reciprocal sqrt, we need to find the zero of the function:
27716 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
27717 ///     =>
27718 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
27719 /// As a result, we precompute A/2 prior to the iteration loop.
27720 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
27721                                          unsigned Iterations,
27722                                          SDNodeFlags Flags, bool Reciprocal) {
27723   EVT VT = Arg.getValueType();
27724   SDLoc DL(Arg);
27725   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
27726 
27727   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
27728   // this entire sequence requires only one FP constant.
27729   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
27730   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
27731 
27732   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
27733   for (unsigned i = 0; i < Iterations; ++i) {
27734     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
27735     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
27736     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
27737     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
27738   }
27739 
27740   // If non-reciprocal square root is requested, multiply the result by Arg.
27741   if (!Reciprocal)
27742     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
27743 
27744   return Est;
27745 }
27746 
27747 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
27748 /// For the reciprocal sqrt, we need to find the zero of the function:
27749 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
27750 ///     =>
27751 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
27752 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
27753                                          unsigned Iterations,
27754                                          SDNodeFlags Flags, bool Reciprocal) {
27755   EVT VT = Arg.getValueType();
27756   SDLoc DL(Arg);
27757   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
27758   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
27759 
27760   // This routine must enter the loop below to work correctly
27761   // when (Reciprocal == false).
27762   assert(Iterations > 0);
27763 
27764   // Newton iterations for reciprocal square root:
27765   // E = (E * -0.5) * ((A * E) * E + -3.0)
27766   for (unsigned i = 0; i < Iterations; ++i) {
27767     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
27768     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
27769     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
27770 
27771     // When calculating a square root at the last iteration build:
27772     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
27773     // (notice a common subexpression)
27774     SDValue LHS;
27775     if (Reciprocal || (i + 1) < Iterations) {
27776       // RSQRT: LHS = (E * -0.5)
27777       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
27778     } else {
27779       // SQRT: LHS = (A * E) * -0.5
27780       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
27781     }
27782 
27783     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
27784   }
27785 
27786   return Est;
27787 }
27788 
27789 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
27790 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
27791 /// Op can be zero.
27792 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
27793                                            bool Reciprocal) {
27794   if (LegalDAG)
27795     return SDValue();
27796 
27797   // TODO: Handle extended types?
27798   EVT VT = Op.getValueType();
27799   if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 &&
27800       VT.getScalarType() != MVT::f64)
27801     return SDValue();
27802 
27803   // If estimates are explicitly disabled for this function, we're done.
27804   MachineFunction &MF = DAG.getMachineFunction();
27805   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
27806   if (Enabled == TLI.ReciprocalEstimate::Disabled)
27807     return SDValue();
27808 
27809   // Estimates may be explicitly enabled for this type with a custom number of
27810   // refinement steps.
27811   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
27812 
27813   bool UseOneConstNR = false;
27814   if (SDValue Est =
27815       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
27816                           Reciprocal)) {
27817     AddToWorklist(Est.getNode());
27818 
27819     if (Iterations > 0)
27820       Est = UseOneConstNR
27821             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
27822             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
27823     if (!Reciprocal) {
27824       SDLoc DL(Op);
27825       // Try the target specific test first.
27826       SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
27827 
27828       // The estimate is now completely wrong if the input was exactly 0.0 or
27829       // possibly a denormal. Force the answer to 0.0 or value provided by
27830       // target for those cases.
27831       Est = DAG.getNode(
27832           Test.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
27833           Test, TLI.getSqrtResultForDenormInput(Op, DAG), Est);
27834     }
27835     return Est;
27836   }
27837 
27838   return SDValue();
27839 }
27840 
27841 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
27842   return buildSqrtEstimateImpl(Op, Flags, true);
27843 }
27844 
27845 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
27846   return buildSqrtEstimateImpl(Op, Flags, false);
27847 }
27848 
27849 /// Return true if there is any possibility that the two addresses overlap.
27850 bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
27851 
27852   struct MemUseCharacteristics {
27853     bool IsVolatile;
27854     bool IsAtomic;
27855     SDValue BasePtr;
27856     int64_t Offset;
27857     std::optional<int64_t> NumBytes;
27858     MachineMemOperand *MMO;
27859   };
27860 
27861   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
27862     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
27863       int64_t Offset = 0;
27864       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
27865         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
27866                      ? C->getSExtValue()
27867                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
27868                            ? -1 * C->getSExtValue()
27869                            : 0;
27870       uint64_t Size =
27871           MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
27872       return {LSN->isVolatile(),
27873               LSN->isAtomic(),
27874               LSN->getBasePtr(),
27875               Offset /*base offset*/,
27876               std::optional<int64_t>(Size),
27877               LSN->getMemOperand()};
27878     }
27879     if (const auto *LN = cast<LifetimeSDNode>(N))
27880       return {false /*isVolatile*/,
27881               /*isAtomic*/ false,
27882               LN->getOperand(1),
27883               (LN->hasOffset()) ? LN->getOffset() : 0,
27884               (LN->hasOffset()) ? std::optional<int64_t>(LN->getSize())
27885                                 : std::optional<int64_t>(),
27886               (MachineMemOperand *)nullptr};
27887     // Default.
27888     return {false /*isvolatile*/,
27889             /*isAtomic*/ false,          SDValue(),
27890             (int64_t)0 /*offset*/,       std::optional<int64_t>() /*size*/,
27891             (MachineMemOperand *)nullptr};
27892   };
27893 
27894   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
27895                         MUC1 = getCharacteristics(Op1);
27896 
27897   // If they are to the same address, then they must be aliases.
27898   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
27899       MUC0.Offset == MUC1.Offset)
27900     return true;
27901 
27902   // If they are both volatile then they cannot be reordered.
27903   if (MUC0.IsVolatile && MUC1.IsVolatile)
27904     return true;
27905 
27906   // Be conservative about atomics for the moment
27907   // TODO: This is way overconservative for unordered atomics (see D66309)
27908   if (MUC0.IsAtomic && MUC1.IsAtomic)
27909     return true;
27910 
27911   if (MUC0.MMO && MUC1.MMO) {
27912     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
27913         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
27914       return false;
27915   }
27916 
27917   // Try to prove that there is aliasing, or that there is no aliasing. Either
27918   // way, we can return now. If nothing can be proved, proceed with more tests.
27919   bool IsAlias;
27920   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
27921                                        DAG, IsAlias))
27922     return IsAlias;
27923 
27924   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
27925   // either are not known.
27926   if (!MUC0.MMO || !MUC1.MMO)
27927     return true;
27928 
27929   // If one operation reads from invariant memory, and the other may store, they
27930   // cannot alias. These should really be checking the equivalent of mayWrite,
27931   // but it only matters for memory nodes other than load /store.
27932   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
27933       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
27934     return false;
27935 
27936   // If we know required SrcValue1 and SrcValue2 have relatively large
27937   // alignment compared to the size and offset of the access, we may be able
27938   // to prove they do not alias. This check is conservative for now to catch
27939   // cases created by splitting vector types, it only works when the offsets are
27940   // multiples of the size of the data.
27941   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
27942   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
27943   Align OrigAlignment0 = MUC0.MMO->getBaseAlign();
27944   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
27945   auto &Size0 = MUC0.NumBytes;
27946   auto &Size1 = MUC1.NumBytes;
27947   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
27948       Size0.has_value() && Size1.has_value() && *Size0 == *Size1 &&
27949       OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 &&
27950       SrcValOffset1 % *Size1 == 0) {
27951     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
27952     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
27953 
27954     // There is no overlap between these relatively aligned accesses of
27955     // similar size. Return no alias.
27956     if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0)
27957       return false;
27958   }
27959 
27960   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
27961                    ? CombinerGlobalAA
27962                    : DAG.getSubtarget().useAA();
27963 #ifndef NDEBUG
27964   if (CombinerAAOnlyFunc.getNumOccurrences() &&
27965       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
27966     UseAA = false;
27967 #endif
27968 
27969   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && Size0 &&
27970       Size1) {
27971     // Use alias analysis information.
27972     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
27973     int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset;
27974     int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset;
27975     if (AA->isNoAlias(
27976             MemoryLocation(MUC0.MMO->getValue(), Overlap0,
27977                            UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
27978             MemoryLocation(MUC1.MMO->getValue(), Overlap1,
27979                            UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
27980       return false;
27981   }
27982 
27983   // Otherwise we have to assume they alias.
27984   return true;
27985 }
27986 
27987 /// Walk up chain skipping non-aliasing memory nodes,
27988 /// looking for aliasing nodes and adding them to the Aliases vector.
27989 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
27990                                    SmallVectorImpl<SDValue> &Aliases) {
27991   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
27992   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
27993 
27994   // Get alias information for node.
27995   // TODO: relax aliasing for unordered atomics (see D66309)
27996   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
27997 
27998   // Starting off.
27999   Chains.push_back(OriginalChain);
28000   unsigned Depth = 0;
28001 
28002   // Attempt to improve chain by a single step
28003   auto ImproveChain = [&](SDValue &C) -> bool {
28004     switch (C.getOpcode()) {
28005     case ISD::EntryToken:
28006       // No need to mark EntryToken.
28007       C = SDValue();
28008       return true;
28009     case ISD::LOAD:
28010     case ISD::STORE: {
28011       // Get alias information for C.
28012       // TODO: Relax aliasing for unordered atomics (see D66309)
28013       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
28014                       cast<LSBaseSDNode>(C.getNode())->isSimple();
28015       if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) {
28016         // Look further up the chain.
28017         C = C.getOperand(0);
28018         return true;
28019       }
28020       // Alias, so stop here.
28021       return false;
28022     }
28023 
28024     case ISD::CopyFromReg:
28025       // Always forward past CopyFromReg.
28026       C = C.getOperand(0);
28027       return true;
28028 
28029     case ISD::LIFETIME_START:
28030     case ISD::LIFETIME_END: {
28031       // We can forward past any lifetime start/end that can be proven not to
28032       // alias the memory access.
28033       if (!mayAlias(N, C.getNode())) {
28034         // Look further up the chain.
28035         C = C.getOperand(0);
28036         return true;
28037       }
28038       return false;
28039     }
28040     default:
28041       return false;
28042     }
28043   };
28044 
28045   // Look at each chain and determine if it is an alias.  If so, add it to the
28046   // aliases list.  If not, then continue up the chain looking for the next
28047   // candidate.
28048   while (!Chains.empty()) {
28049     SDValue Chain = Chains.pop_back_val();
28050 
28051     // Don't bother if we've seen Chain before.
28052     if (!Visited.insert(Chain.getNode()).second)
28053       continue;
28054 
28055     // For TokenFactor nodes, look at each operand and only continue up the
28056     // chain until we reach the depth limit.
28057     //
28058     // FIXME: The depth check could be made to return the last non-aliasing
28059     // chain we found before we hit a tokenfactor rather than the original
28060     // chain.
28061     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
28062       Aliases.clear();
28063       Aliases.push_back(OriginalChain);
28064       return;
28065     }
28066 
28067     if (Chain.getOpcode() == ISD::TokenFactor) {
28068       // We have to check each of the operands of the token factor for "small"
28069       // token factors, so we queue them up.  Adding the operands to the queue
28070       // (stack) in reverse order maintains the original order and increases the
28071       // likelihood that getNode will find a matching token factor (CSE.)
28072       if (Chain.getNumOperands() > 16) {
28073         Aliases.push_back(Chain);
28074         continue;
28075       }
28076       for (unsigned n = Chain.getNumOperands(); n;)
28077         Chains.push_back(Chain.getOperand(--n));
28078       ++Depth;
28079       continue;
28080     }
28081     // Everything else
28082     if (ImproveChain(Chain)) {
28083       // Updated Chain Found, Consider new chain if one exists.
28084       if (Chain.getNode())
28085         Chains.push_back(Chain);
28086       ++Depth;
28087       continue;
28088     }
28089     // No Improved Chain Possible, treat as Alias.
28090     Aliases.push_back(Chain);
28091   }
28092 }
28093 
28094 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
28095 /// (aliasing node.)
28096 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
28097   if (OptLevel == CodeGenOptLevel::None)
28098     return OldChain;
28099 
28100   // Ops for replacing token factor.
28101   SmallVector<SDValue, 8> Aliases;
28102 
28103   // Accumulate all the aliases to this node.
28104   GatherAllAliases(N, OldChain, Aliases);
28105 
28106   // If no operands then chain to entry token.
28107   if (Aliases.empty())
28108     return DAG.getEntryNode();
28109 
28110   // If a single operand then chain to it.  We don't need to revisit it.
28111   if (Aliases.size() == 1)
28112     return Aliases[0];
28113 
28114   // Construct a custom tailored token factor.
28115   return DAG.getTokenFactor(SDLoc(N), Aliases);
28116 }
28117 
28118 // This function tries to collect a bunch of potentially interesting
28119 // nodes to improve the chains of, all at once. This might seem
28120 // redundant, as this function gets called when visiting every store
28121 // node, so why not let the work be done on each store as it's visited?
28122 //
28123 // I believe this is mainly important because mergeConsecutiveStores
28124 // is unable to deal with merging stores of different sizes, so unless
28125 // we improve the chains of all the potential candidates up-front
28126 // before running mergeConsecutiveStores, it might only see some of
28127 // the nodes that will eventually be candidates, and then not be able
28128 // to go from a partially-merged state to the desired final
28129 // fully-merged state.
28130 
28131 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
28132   SmallVector<StoreSDNode *, 8> ChainedStores;
28133   StoreSDNode *STChain = St;
28134   // Intervals records which offsets from BaseIndex have been covered. In
28135   // the common case, every store writes to the immediately previous address
28136   // space and thus merged with the previous interval at insertion time.
28137 
28138   using IMap = llvm::IntervalMap<int64_t, std::monostate, 8,
28139                                  IntervalMapHalfOpenInfo<int64_t>>;
28140   IMap::Allocator A;
28141   IMap Intervals(A);
28142 
28143   // This holds the base pointer, index, and the offset in bytes from the base
28144   // pointer.
28145   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
28146 
28147   // We must have a base and an offset.
28148   if (!BasePtr.getBase().getNode())
28149     return false;
28150 
28151   // Do not handle stores to undef base pointers.
28152   if (BasePtr.getBase().isUndef())
28153     return false;
28154 
28155   // Do not handle stores to opaque types
28156   if (St->getMemoryVT().isZeroSized())
28157     return false;
28158 
28159   // BaseIndexOffset assumes that offsets are fixed-size, which
28160   // is not valid for scalable vectors where the offsets are
28161   // scaled by `vscale`, so bail out early.
28162   if (St->getMemoryVT().isScalableVT())
28163     return false;
28164 
28165   // Add ST's interval.
28166   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8,
28167                    std::monostate{});
28168 
28169   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
28170     if (Chain->getMemoryVT().isScalableVector())
28171       return false;
28172 
28173     // If the chain has more than one use, then we can't reorder the mem ops.
28174     if (!SDValue(Chain, 0)->hasOneUse())
28175       break;
28176     // TODO: Relax for unordered atomics (see D66309)
28177     if (!Chain->isSimple() || Chain->isIndexed())
28178       break;
28179 
28180     // Find the base pointer and offset for this memory node.
28181     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
28182     // Check that the base pointer is the same as the original one.
28183     int64_t Offset;
28184     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
28185       break;
28186     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
28187     // Make sure we don't overlap with other intervals by checking the ones to
28188     // the left or right before inserting.
28189     auto I = Intervals.find(Offset);
28190     // If there's a next interval, we should end before it.
28191     if (I != Intervals.end() && I.start() < (Offset + Length))
28192       break;
28193     // If there's a previous interval, we should start after it.
28194     if (I != Intervals.begin() && (--I).stop() <= Offset)
28195       break;
28196     Intervals.insert(Offset, Offset + Length, std::monostate{});
28197 
28198     ChainedStores.push_back(Chain);
28199     STChain = Chain;
28200   }
28201 
28202   // If we didn't find a chained store, exit.
28203   if (ChainedStores.empty())
28204     return false;
28205 
28206   // Improve all chained stores (St and ChainedStores members) starting from
28207   // where the store chain ended and return single TokenFactor.
28208   SDValue NewChain = STChain->getChain();
28209   SmallVector<SDValue, 8> TFOps;
28210   for (unsigned I = ChainedStores.size(); I;) {
28211     StoreSDNode *S = ChainedStores[--I];
28212     SDValue BetterChain = FindBetterChain(S, NewChain);
28213     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
28214         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
28215     TFOps.push_back(SDValue(S, 0));
28216     ChainedStores[I] = S;
28217   }
28218 
28219   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
28220   SDValue BetterChain = FindBetterChain(St, NewChain);
28221   SDValue NewST;
28222   if (St->isTruncatingStore())
28223     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
28224                               St->getBasePtr(), St->getMemoryVT(),
28225                               St->getMemOperand());
28226   else
28227     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
28228                          St->getBasePtr(), St->getMemOperand());
28229 
28230   TFOps.push_back(NewST);
28231 
28232   // If we improved every element of TFOps, then we've lost the dependence on
28233   // NewChain to successors of St and we need to add it back to TFOps. Do so at
28234   // the beginning to keep relative order consistent with FindBetterChains.
28235   auto hasImprovedChain = [&](SDValue ST) -> bool {
28236     return ST->getOperand(0) != NewChain;
28237   };
28238   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
28239   if (AddNewChain)
28240     TFOps.insert(TFOps.begin(), NewChain);
28241 
28242   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
28243   CombineTo(St, TF);
28244 
28245   // Add TF and its operands to the worklist.
28246   AddToWorklist(TF.getNode());
28247   for (const SDValue &Op : TF->ops())
28248     AddToWorklist(Op.getNode());
28249   AddToWorklist(STChain);
28250   return true;
28251 }
28252 
28253 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
28254   if (OptLevel == CodeGenOptLevel::None)
28255     return false;
28256 
28257   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
28258 
28259   // We must have a base and an offset.
28260   if (!BasePtr.getBase().getNode())
28261     return false;
28262 
28263   // Do not handle stores to undef base pointers.
28264   if (BasePtr.getBase().isUndef())
28265     return false;
28266 
28267   // Directly improve a chain of disjoint stores starting at St.
28268   if (parallelizeChainedStores(St))
28269     return true;
28270 
28271   // Improve St's Chain..
28272   SDValue BetterChain = FindBetterChain(St, St->getChain());
28273   if (St->getChain() != BetterChain) {
28274     replaceStoreChain(St, BetterChain);
28275     return true;
28276   }
28277   return false;
28278 }
28279 
28280 /// This is the entry point for the file.
28281 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
28282                            CodeGenOptLevel OptLevel) {
28283   /// This is the main entry point to this class.
28284   DAGCombiner(*this, AA, OptLevel).Run(Level);
28285 }
28286