1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPC.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/APSInt.h"
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/DenseMap.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/CodeGen/CallingConvLower.h"
38 #include "llvm/CodeGen/ISDOpcodes.h"
39 #include "llvm/CodeGen/MachineBasicBlock.h"
40 #include "llvm/CodeGen/MachineFrameInfo.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineModuleInfo.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/MachineValueType.h"
51 #include "llvm/CodeGen/RuntimeLibcalls.h"
52 #include "llvm/CodeGen/SelectionDAG.h"
53 #include "llvm/CodeGen/SelectionDAGNodes.h"
54 #include "llvm/CodeGen/TargetInstrInfo.h"
55 #include "llvm/CodeGen/TargetLowering.h"
56 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
57 #include "llvm/CodeGen/TargetRegisterInfo.h"
58 #include "llvm/CodeGen/ValueTypes.h"
59 #include "llvm/IR/CallingConv.h"
60 #include "llvm/IR/Constant.h"
61 #include "llvm/IR/Constants.h"
62 #include "llvm/IR/DataLayout.h"
63 #include "llvm/IR/DebugLoc.h"
64 #include "llvm/IR/DerivedTypes.h"
65 #include "llvm/IR/Function.h"
66 #include "llvm/IR/GlobalValue.h"
67 #include "llvm/IR/IRBuilder.h"
68 #include "llvm/IR/Instructions.h"
69 #include "llvm/IR/Intrinsics.h"
70 #include "llvm/IR/IntrinsicsPowerPC.h"
71 #include "llvm/IR/Module.h"
72 #include "llvm/IR/Type.h"
73 #include "llvm/IR/Use.h"
74 #include "llvm/IR/Value.h"
75 #include "llvm/MC/MCContext.h"
76 #include "llvm/MC/MCExpr.h"
77 #include "llvm/MC/MCRegisterInfo.h"
78 #include "llvm/MC/MCSectionXCOFF.h"
79 #include "llvm/MC/MCSymbolXCOFF.h"
80 #include "llvm/Support/AtomicOrdering.h"
81 #include "llvm/Support/BranchProbability.h"
82 #include "llvm/Support/Casting.h"
83 #include "llvm/Support/CodeGen.h"
84 #include "llvm/Support/CommandLine.h"
85 #include "llvm/Support/Compiler.h"
86 #include "llvm/Support/Debug.h"
87 #include "llvm/Support/ErrorHandling.h"
88 #include "llvm/Support/Format.h"
89 #include "llvm/Support/KnownBits.h"
90 #include "llvm/Support/MathExtras.h"
91 #include "llvm/Support/raw_ostream.h"
92 #include "llvm/Target/TargetMachine.h"
93 #include "llvm/Target/TargetOptions.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstdint>
97 #include <iterator>
98 #include <list>
99 #include <optional>
100 #include <utility>
101 #include <vector>
102 
103 using namespace llvm;
104 
105 #define DEBUG_TYPE "ppc-lowering"
106 
107 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
108 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
109 
110 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
111 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
112 
113 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
114 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
115 
116 static cl::opt<bool> DisableSCO("disable-ppc-sco",
117 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
118 
119 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
120 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
121 
122 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
123 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
124 
125 static cl::opt<bool>
126     DisablePerfectShuffle("ppc-disable-perfect-shuffle",
127                           cl::desc("disable vector permute decomposition"),
128                           cl::init(true), cl::Hidden);
129 
130 cl::opt<bool> DisableAutoPairedVecSt(
131     "disable-auto-paired-vec-st",
132     cl::desc("disable automatically generated 32byte paired vector stores"),
133     cl::init(true), cl::Hidden);
134 
135 STATISTIC(NumTailCalls, "Number of tail calls");
136 STATISTIC(NumSiblingCalls, "Number of sibling calls");
137 STATISTIC(ShufflesHandledWithVPERM,
138           "Number of shuffles lowered to a VPERM or XXPERM");
139 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
140 
141 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
142 
143 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
144 
145 static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
146 
147 // FIXME: Remove this once the bug has been fixed!
148 extern cl::opt<bool> ANDIGlueBug;
149 
150 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
151                                      const PPCSubtarget &STI)
152     : TargetLowering(TM), Subtarget(STI) {
153   // Initialize map that relates the PPC addressing modes to the computed flags
154   // of a load/store instruction. The map is used to determine the optimal
155   // addressing mode when selecting load and stores.
156   initializeAddrModeMap();
157   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
158   // arguments are at least 4/8 bytes aligned.
159   bool isPPC64 = Subtarget.isPPC64();
160   setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
161 
162   // Set up the register classes.
163   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
164   if (!useSoftFloat()) {
165     if (hasSPE()) {
166       addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
167       // EFPU2 APU only supports f32
168       if (!Subtarget.hasEFPU2())
169         addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
170     } else {
171       addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
172       addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
173     }
174   }
175 
176   // Match BITREVERSE to customized fast code sequence in the td file.
177   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
178   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
179 
180   // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
181   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
182 
183   // Custom lower inline assembly to check for special registers.
184   setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
185   setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
186 
187   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
188   for (MVT VT : MVT::integer_valuetypes()) {
189     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
190     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
191   }
192 
193   if (Subtarget.isISA3_0()) {
194     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
195     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
196     setTruncStoreAction(MVT::f64, MVT::f16, Legal);
197     setTruncStoreAction(MVT::f32, MVT::f16, Legal);
198   } else {
199     // No extending loads from f16 or HW conversions back and forth.
200     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
201     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
202     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
203     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
204     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
205     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
206     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
207     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
208   }
209 
210   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
211 
212   // PowerPC has pre-inc load and store's.
213   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
214   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
215   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
216   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
217   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
218   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
219   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
220   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
221   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
222   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
223   if (!Subtarget.hasSPE()) {
224     setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
225     setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
226     setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
227     setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
228   }
229 
230   // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
231   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
232   for (MVT VT : ScalarIntVTs) {
233     setOperationAction(ISD::ADDC, VT, Legal);
234     setOperationAction(ISD::ADDE, VT, Legal);
235     setOperationAction(ISD::SUBC, VT, Legal);
236     setOperationAction(ISD::SUBE, VT, Legal);
237   }
238 
239   if (Subtarget.useCRBits()) {
240     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
241 
242     if (isPPC64 || Subtarget.hasFPCVT()) {
243       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
244       AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
245                         isPPC64 ? MVT::i64 : MVT::i32);
246       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
247       AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
248                         isPPC64 ? MVT::i64 : MVT::i32);
249 
250       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
251       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
252                          isPPC64 ? MVT::i64 : MVT::i32);
253       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
254       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
255                         isPPC64 ? MVT::i64 : MVT::i32);
256 
257       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote);
258       AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1,
259                         isPPC64 ? MVT::i64 : MVT::i32);
260       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote);
261       AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1,
262                         isPPC64 ? MVT::i64 : MVT::i32);
263 
264       setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
265       AddPromotedToType(ISD::FP_TO_SINT, MVT::i1,
266                         isPPC64 ? MVT::i64 : MVT::i32);
267       setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
268       AddPromotedToType(ISD::FP_TO_UINT, MVT::i1,
269                         isPPC64 ? MVT::i64 : MVT::i32);
270     } else {
271       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
272       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
273       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
274       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
275     }
276 
277     // PowerPC does not support direct load/store of condition registers.
278     setOperationAction(ISD::LOAD, MVT::i1, Custom);
279     setOperationAction(ISD::STORE, MVT::i1, Custom);
280 
281     // FIXME: Remove this once the ANDI glue bug is fixed:
282     if (ANDIGlueBug)
283       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
284 
285     for (MVT VT : MVT::integer_valuetypes()) {
286       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
287       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
288       setTruncStoreAction(VT, MVT::i1, Expand);
289     }
290 
291     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
292   }
293 
294   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
295   // PPC (the libcall is not available).
296   setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
297   setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
298   setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
299   setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
300 
301   // We do not currently implement these libm ops for PowerPC.
302   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
303   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
304   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
305   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
306   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
307   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
308 
309   // PowerPC has no SREM/UREM instructions unless we are on P9
310   // On P9 we may use a hardware instruction to compute the remainder.
311   // When the result of both the remainder and the division is required it is
312   // more efficient to compute the remainder from the result of the division
313   // rather than use the remainder instruction. The instructions are legalized
314   // directly because the DivRemPairsPass performs the transformation at the IR
315   // level.
316   if (Subtarget.isISA3_0()) {
317     setOperationAction(ISD::SREM, MVT::i32, Legal);
318     setOperationAction(ISD::UREM, MVT::i32, Legal);
319     setOperationAction(ISD::SREM, MVT::i64, Legal);
320     setOperationAction(ISD::UREM, MVT::i64, Legal);
321   } else {
322     setOperationAction(ISD::SREM, MVT::i32, Expand);
323     setOperationAction(ISD::UREM, MVT::i32, Expand);
324     setOperationAction(ISD::SREM, MVT::i64, Expand);
325     setOperationAction(ISD::UREM, MVT::i64, Expand);
326   }
327 
328   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
329   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
330   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
331   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
332   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
333   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
334   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
335   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
336   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
337 
338   // Handle constrained floating-point operations of scalar.
339   // TODO: Handle SPE specific operation.
340   setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
341   setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
342   setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
343   setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
344   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
345 
346   setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
347   setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
348   setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
349   setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
350 
351   if (!Subtarget.hasSPE()) {
352     setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
353     setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
354   }
355 
356   if (Subtarget.hasVSX()) {
357     setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
358     setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
359   }
360 
361   if (Subtarget.hasFSQRT()) {
362     setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
363     setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
364   }
365 
366   if (Subtarget.hasFPRND()) {
367     setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
368     setOperationAction(ISD::STRICT_FCEIL,  MVT::f32, Legal);
369     setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
370     setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
371 
372     setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
373     setOperationAction(ISD::STRICT_FCEIL,  MVT::f64, Legal);
374     setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
375     setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
376   }
377 
378   // We don't support sin/cos/sqrt/fmod/pow
379   setOperationAction(ISD::FSIN , MVT::f64, Expand);
380   setOperationAction(ISD::FCOS , MVT::f64, Expand);
381   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
382   setOperationAction(ISD::FREM , MVT::f64, Expand);
383   setOperationAction(ISD::FPOW , MVT::f64, Expand);
384   setOperationAction(ISD::FSIN , MVT::f32, Expand);
385   setOperationAction(ISD::FCOS , MVT::f32, Expand);
386   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
387   setOperationAction(ISD::FREM , MVT::f32, Expand);
388   setOperationAction(ISD::FPOW , MVT::f32, Expand);
389 
390   // MASS transformation for LLVM intrinsics with replicating fast-math flag
391   // to be consistent to PPCGenScalarMASSEntries pass
392   if (TM.getOptLevel() == CodeGenOpt::Aggressive) {
393     setOperationAction(ISD::FSIN , MVT::f64, Custom);
394     setOperationAction(ISD::FCOS , MVT::f64, Custom);
395     setOperationAction(ISD::FPOW , MVT::f64, Custom);
396     setOperationAction(ISD::FLOG, MVT::f64, Custom);
397     setOperationAction(ISD::FLOG10, MVT::f64, Custom);
398     setOperationAction(ISD::FEXP, MVT::f64, Custom);
399     setOperationAction(ISD::FSIN , MVT::f32, Custom);
400     setOperationAction(ISD::FCOS , MVT::f32, Custom);
401     setOperationAction(ISD::FPOW , MVT::f32, Custom);
402     setOperationAction(ISD::FLOG, MVT::f32, Custom);
403     setOperationAction(ISD::FLOG10, MVT::f32, Custom);
404     setOperationAction(ISD::FEXP, MVT::f32, Custom);
405   }
406 
407   if (Subtarget.hasSPE()) {
408     setOperationAction(ISD::FMA  , MVT::f64, Expand);
409     setOperationAction(ISD::FMA  , MVT::f32, Expand);
410   } else {
411     setOperationAction(ISD::FMA  , MVT::f64, Legal);
412     setOperationAction(ISD::FMA  , MVT::f32, Legal);
413   }
414 
415   if (Subtarget.hasSPE())
416     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
417 
418   setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
419 
420   // If we're enabling GP optimizations, use hardware square root
421   if (!Subtarget.hasFSQRT() &&
422       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
423         Subtarget.hasFRE()))
424     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
425 
426   if (!Subtarget.hasFSQRT() &&
427       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
428         Subtarget.hasFRES()))
429     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
430 
431   if (Subtarget.hasFCPSGN()) {
432     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
433     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
434   } else {
435     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
436     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
437   }
438 
439   if (Subtarget.hasFPRND()) {
440     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
441     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
442     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
443     setOperationAction(ISD::FROUND, MVT::f64, Legal);
444 
445     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
446     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
447     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
448     setOperationAction(ISD::FROUND, MVT::f32, Legal);
449   }
450 
451   // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
452   // instruction xxbrd to speed up scalar BSWAP64.
453   if (Subtarget.isISA3_1()) {
454     setOperationAction(ISD::BSWAP, MVT::i32, Legal);
455     setOperationAction(ISD::BSWAP, MVT::i64, Legal);
456   } else {
457     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
458     setOperationAction(
459         ISD::BSWAP, MVT::i64,
460         (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
461   }
462 
463   // CTPOP or CTTZ were introduced in P8/P9 respectively
464   if (Subtarget.isISA3_0()) {
465     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
466     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
467   } else {
468     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
469     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
470   }
471 
472   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
473     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
474     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
475   } else {
476     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
477     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
478   }
479 
480   // PowerPC does not have ROTR
481   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
482   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
483 
484   if (!Subtarget.useCRBits()) {
485     // PowerPC does not have Select
486     setOperationAction(ISD::SELECT, MVT::i32, Expand);
487     setOperationAction(ISD::SELECT, MVT::i64, Expand);
488     setOperationAction(ISD::SELECT, MVT::f32, Expand);
489     setOperationAction(ISD::SELECT, MVT::f64, Expand);
490   }
491 
492   // PowerPC wants to turn select_cc of FP into fsel when possible.
493   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
494   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
495 
496   // PowerPC wants to optimize integer setcc a bit
497   if (!Subtarget.useCRBits())
498     setOperationAction(ISD::SETCC, MVT::i32, Custom);
499 
500   if (Subtarget.hasFPU()) {
501     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
502     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
503     setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
504 
505     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
506     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
507     setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
508   }
509 
510   // PowerPC does not have BRCOND which requires SetCC
511   if (!Subtarget.useCRBits())
512     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
513 
514   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
515 
516   if (Subtarget.hasSPE()) {
517     // SPE has built-in conversions
518     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
519     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
520     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
521     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
522     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
523     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
524 
525     // SPE supports signaling compare of f32/f64.
526     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
527     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
528   } else {
529     // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
530     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
531     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
532 
533     // PowerPC does not have [U|S]INT_TO_FP
534     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
535     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
536     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
537     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
538   }
539 
540   if (Subtarget.hasDirectMove() && isPPC64) {
541     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
542     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
543     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
544     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
545     if (TM.Options.UnsafeFPMath) {
546       setOperationAction(ISD::LRINT, MVT::f64, Legal);
547       setOperationAction(ISD::LRINT, MVT::f32, Legal);
548       setOperationAction(ISD::LLRINT, MVT::f64, Legal);
549       setOperationAction(ISD::LLRINT, MVT::f32, Legal);
550       setOperationAction(ISD::LROUND, MVT::f64, Legal);
551       setOperationAction(ISD::LROUND, MVT::f32, Legal);
552       setOperationAction(ISD::LLROUND, MVT::f64, Legal);
553       setOperationAction(ISD::LLROUND, MVT::f32, Legal);
554     }
555   } else {
556     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
557     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
558     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
559     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
560   }
561 
562   // We cannot sextinreg(i1).  Expand to shifts.
563   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
564 
565   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
566   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
567   // support continuation, user-level threading, and etc.. As a result, no
568   // other SjLj exception interfaces are implemented and please don't build
569   // your own exception handling based on them.
570   // LLVM/Clang supports zero-cost DWARF exception handling.
571   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
572   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
573 
574   // We want to legalize GlobalAddress and ConstantPool nodes into the
575   // appropriate instructions to materialize the address.
576   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
577   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
578   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
579   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
580   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
581   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
582   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
583   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
584   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
585   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
586 
587   // TRAP is legal.
588   setOperationAction(ISD::TRAP, MVT::Other, Legal);
589 
590   // TRAMPOLINE is custom lowered.
591   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
592   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
593 
594   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
595   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
596 
597   if (Subtarget.is64BitELFABI()) {
598     // VAARG always uses double-word chunks, so promote anything smaller.
599     setOperationAction(ISD::VAARG, MVT::i1, Promote);
600     AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
601     setOperationAction(ISD::VAARG, MVT::i8, Promote);
602     AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
603     setOperationAction(ISD::VAARG, MVT::i16, Promote);
604     AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
605     setOperationAction(ISD::VAARG, MVT::i32, Promote);
606     AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
607     setOperationAction(ISD::VAARG, MVT::Other, Expand);
608   } else if (Subtarget.is32BitELFABI()) {
609     // VAARG is custom lowered with the 32-bit SVR4 ABI.
610     setOperationAction(ISD::VAARG, MVT::Other, Custom);
611     setOperationAction(ISD::VAARG, MVT::i64, Custom);
612   } else
613     setOperationAction(ISD::VAARG, MVT::Other, Expand);
614 
615   // VACOPY is custom lowered with the 32-bit SVR4 ABI.
616   if (Subtarget.is32BitELFABI())
617     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
618   else
619     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
620 
621   // Use the default implementation.
622   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
623   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
624   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
625   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
626   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
627   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
628   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
629   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
630   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
631 
632   // We want to custom lower some of our intrinsics.
633   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
634   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom);
635   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom);
636   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
637   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom);
638 
639   // To handle counter-based loop conditions.
640   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
641 
642   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
643   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
644   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
645   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
646 
647   // Comparisons that require checking two conditions.
648   if (Subtarget.hasSPE()) {
649     setCondCodeAction(ISD::SETO, MVT::f32, Expand);
650     setCondCodeAction(ISD::SETO, MVT::f64, Expand);
651     setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
652     setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
653   }
654   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
655   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
656   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
657   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
658   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
659   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
660   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
661   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
662   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
663   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
664   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
665   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
666 
667   setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
668   setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
669 
670   if (Subtarget.has64BitSupport()) {
671     // They also have instructions for converting between i64 and fp.
672     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
673     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
674     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
675     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
676     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
677     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
678     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
679     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
680     // This is just the low 32 bits of a (signed) fp->i64 conversion.
681     // We cannot do this with Promote because i64 is not a legal type.
682     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
683     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
684 
685     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
686       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
687       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
688     }
689   } else {
690     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
691     if (Subtarget.hasSPE()) {
692       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
693       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
694     } else {
695       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
696       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
697     }
698   }
699 
700   // With the instructions enabled under FPCVT, we can do everything.
701   if (Subtarget.hasFPCVT()) {
702     if (Subtarget.has64BitSupport()) {
703       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
704       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
705       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
706       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
707       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
708       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
709       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
710       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
711     }
712 
713     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
714     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
715     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
716     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
717     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
718     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
719     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
720     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
721   }
722 
723   if (Subtarget.use64BitRegs()) {
724     // 64-bit PowerPC implementations can support i64 types directly
725     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
726     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
727     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
728     // 64-bit PowerPC wants to expand i128 shifts itself.
729     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
730     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
731     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
732   } else {
733     // 32-bit PowerPC wants to expand i64 shifts itself.
734     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
735     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
736     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
737   }
738 
739   // PowerPC has better expansions for funnel shifts than the generic
740   // TargetLowering::expandFunnelShift.
741   if (Subtarget.has64BitSupport()) {
742     setOperationAction(ISD::FSHL, MVT::i64, Custom);
743     setOperationAction(ISD::FSHR, MVT::i64, Custom);
744   }
745   setOperationAction(ISD::FSHL, MVT::i32, Custom);
746   setOperationAction(ISD::FSHR, MVT::i32, Custom);
747 
748   if (Subtarget.hasVSX()) {
749     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
750     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
751     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
752     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
753   }
754 
755   if (Subtarget.hasAltivec()) {
756     for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
757       setOperationAction(ISD::SADDSAT, VT, Legal);
758       setOperationAction(ISD::SSUBSAT, VT, Legal);
759       setOperationAction(ISD::UADDSAT, VT, Legal);
760       setOperationAction(ISD::USUBSAT, VT, Legal);
761     }
762     // First set operation action for all vector types to expand. Then we
763     // will selectively turn on ones that can be effectively codegen'd.
764     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
765       // add/sub are legal for all supported vector VT's.
766       setOperationAction(ISD::ADD, VT, Legal);
767       setOperationAction(ISD::SUB, VT, Legal);
768 
769       // For v2i64, these are only valid with P8Vector. This is corrected after
770       // the loop.
771       if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
772         setOperationAction(ISD::SMAX, VT, Legal);
773         setOperationAction(ISD::SMIN, VT, Legal);
774         setOperationAction(ISD::UMAX, VT, Legal);
775         setOperationAction(ISD::UMIN, VT, Legal);
776       }
777       else {
778         setOperationAction(ISD::SMAX, VT, Expand);
779         setOperationAction(ISD::SMIN, VT, Expand);
780         setOperationAction(ISD::UMAX, VT, Expand);
781         setOperationAction(ISD::UMIN, VT, Expand);
782       }
783 
784       if (Subtarget.hasVSX()) {
785         setOperationAction(ISD::FMAXNUM, VT, Legal);
786         setOperationAction(ISD::FMINNUM, VT, Legal);
787       }
788 
789       // Vector instructions introduced in P8
790       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
791         setOperationAction(ISD::CTPOP, VT, Legal);
792         setOperationAction(ISD::CTLZ, VT, Legal);
793       }
794       else {
795         setOperationAction(ISD::CTPOP, VT, Expand);
796         setOperationAction(ISD::CTLZ, VT, Expand);
797       }
798 
799       // Vector instructions introduced in P9
800       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
801         setOperationAction(ISD::CTTZ, VT, Legal);
802       else
803         setOperationAction(ISD::CTTZ, VT, Expand);
804 
805       // We promote all shuffles to v16i8.
806       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
807       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
808 
809       // We promote all non-typed operations to v4i32.
810       setOperationAction(ISD::AND   , VT, Promote);
811       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
812       setOperationAction(ISD::OR    , VT, Promote);
813       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
814       setOperationAction(ISD::XOR   , VT, Promote);
815       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
816       setOperationAction(ISD::LOAD  , VT, Promote);
817       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
818       setOperationAction(ISD::SELECT, VT, Promote);
819       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
820       setOperationAction(ISD::VSELECT, VT, Legal);
821       setOperationAction(ISD::SELECT_CC, VT, Promote);
822       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
823       setOperationAction(ISD::STORE, VT, Promote);
824       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
825 
826       // No other operations are legal.
827       setOperationAction(ISD::MUL , VT, Expand);
828       setOperationAction(ISD::SDIV, VT, Expand);
829       setOperationAction(ISD::SREM, VT, Expand);
830       setOperationAction(ISD::UDIV, VT, Expand);
831       setOperationAction(ISD::UREM, VT, Expand);
832       setOperationAction(ISD::FDIV, VT, Expand);
833       setOperationAction(ISD::FREM, VT, Expand);
834       setOperationAction(ISD::FNEG, VT, Expand);
835       setOperationAction(ISD::FSQRT, VT, Expand);
836       setOperationAction(ISD::FLOG, VT, Expand);
837       setOperationAction(ISD::FLOG10, VT, Expand);
838       setOperationAction(ISD::FLOG2, VT, Expand);
839       setOperationAction(ISD::FEXP, VT, Expand);
840       setOperationAction(ISD::FEXP2, VT, Expand);
841       setOperationAction(ISD::FSIN, VT, Expand);
842       setOperationAction(ISD::FCOS, VT, Expand);
843       setOperationAction(ISD::FABS, VT, Expand);
844       setOperationAction(ISD::FFLOOR, VT, Expand);
845       setOperationAction(ISD::FCEIL,  VT, Expand);
846       setOperationAction(ISD::FTRUNC, VT, Expand);
847       setOperationAction(ISD::FRINT,  VT, Expand);
848       setOperationAction(ISD::FLDEXP, VT, Expand);
849       setOperationAction(ISD::FNEARBYINT, VT, Expand);
850       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
851       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
852       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
853       setOperationAction(ISD::MULHU, VT, Expand);
854       setOperationAction(ISD::MULHS, VT, Expand);
855       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
856       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
857       setOperationAction(ISD::UDIVREM, VT, Expand);
858       setOperationAction(ISD::SDIVREM, VT, Expand);
859       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
860       setOperationAction(ISD::FPOW, VT, Expand);
861       setOperationAction(ISD::BSWAP, VT, Expand);
862       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
863       setOperationAction(ISD::ROTL, VT, Expand);
864       setOperationAction(ISD::ROTR, VT, Expand);
865 
866       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
867         setTruncStoreAction(VT, InnerVT, Expand);
868         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
869         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
870         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
871       }
872     }
873     setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
874     if (!Subtarget.hasP8Vector()) {
875       setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
876       setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
877       setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
878       setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
879     }
880 
881     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
882     // with merges, splats, etc.
883     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
884 
885     // Vector truncates to sub-word integer that fit in an Altivec/VSX register
886     // are cheap, so handle them before they get expanded to scalar.
887     setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
888     setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
889     setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
890     setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
891     setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
892 
893     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
894     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
895     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
896     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
897     setOperationAction(ISD::SELECT, MVT::v4i32,
898                        Subtarget.useCRBits() ? Legal : Expand);
899     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
900     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
901     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
902     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
903     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
904     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
905     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
906     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
907     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
908     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
909     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
910     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
911     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
912 
913     // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
914     setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
915     // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
916     if (Subtarget.hasAltivec())
917       for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
918         setOperationAction(ISD::ROTL, VT, Legal);
919     // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
920     if (Subtarget.hasP8Altivec())
921       setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
922 
923     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
924     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
925     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
926     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
927 
928     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
929     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
930 
931     if (Subtarget.hasVSX()) {
932       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
933       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
934       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
935     }
936 
937     if (Subtarget.hasP8Altivec())
938       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
939     else
940       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
941 
942     if (Subtarget.isISA3_1()) {
943       setOperationAction(ISD::MUL, MVT::v2i64, Legal);
944       setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
945       setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
946       setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
947       setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
948       setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
949       setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
950       setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
951       setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
952       setOperationAction(ISD::UREM, MVT::v2i64, Legal);
953       setOperationAction(ISD::SREM, MVT::v2i64, Legal);
954       setOperationAction(ISD::UREM, MVT::v4i32, Legal);
955       setOperationAction(ISD::SREM, MVT::v4i32, Legal);
956       setOperationAction(ISD::UREM, MVT::v1i128, Legal);
957       setOperationAction(ISD::SREM, MVT::v1i128, Legal);
958       setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
959       setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
960       setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
961     }
962 
963     setOperationAction(ISD::MUL, MVT::v8i16, Legal);
964     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
965 
966     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
967     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
968 
969     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
970     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
971     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
972     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
973 
974     // Altivec does not contain unordered floating-point compare instructions
975     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
976     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
977     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
978     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
979 
980     if (Subtarget.hasVSX()) {
981       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
982       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
983       if (Subtarget.hasP8Vector()) {
984         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
985         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
986       }
987       if (Subtarget.hasDirectMove() && isPPC64) {
988         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
989         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
990         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
991         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
992         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
993         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
994         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
995         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
996       }
997       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
998 
999       // The nearbyint variants are not allowed to raise the inexact exception
1000       // so we can only code-gen them with unsafe math.
1001       if (TM.Options.UnsafeFPMath) {
1002         setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1003         setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1004       }
1005 
1006       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1007       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1008       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1009       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1010       setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1011       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1012       setOperationAction(ISD::FROUND, MVT::f64, Legal);
1013       setOperationAction(ISD::FRINT, MVT::f64, Legal);
1014 
1015       setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1016       setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1017       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1018       setOperationAction(ISD::FROUND, MVT::f32, Legal);
1019       setOperationAction(ISD::FRINT, MVT::f32, Legal);
1020 
1021       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1022       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1023 
1024       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1025       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1026 
1027       // Share the Altivec comparison restrictions.
1028       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1029       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1030       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
1031       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1032 
1033       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1034       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1035 
1036       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom);
1037 
1038       if (Subtarget.hasP8Vector())
1039         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1040 
1041       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1042 
1043       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1044       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1045       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1046 
1047       if (Subtarget.hasP8Altivec()) {
1048         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1049         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1050         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1051 
1052         // 128 bit shifts can be accomplished via 3 instructions for SHL and
1053         // SRL, but not for SRA because of the instructions available:
1054         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1055         // doing
1056         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1057         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1058         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1059 
1060         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1061       }
1062       else {
1063         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1064         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1065         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1066 
1067         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1068 
1069         // VSX v2i64 only supports non-arithmetic operations.
1070         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1071         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1072       }
1073 
1074       if (Subtarget.isISA3_1())
1075         setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1076       else
1077         setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1078 
1079       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1080       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1081       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1082       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1083 
1084       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom);
1085 
1086       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1087       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1088       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1089       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1090       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1091       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1092       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1093       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1094 
1095       // Custom handling for partial vectors of integers converted to
1096       // floating point. We already have optimal handling for v2i32 through
1097       // the DAG combine, so those aren't necessary.
1098       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1099       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1100       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1101       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1102       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1103       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1104       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1105       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1106       setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1107       setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1108       setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1109       setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1110       setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1111       setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1112       setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1113       setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1114 
1115       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1116       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1117       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1118       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1119       setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1120       setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1121 
1122       setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1123       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1124 
1125       // Handle constrained floating-point operations of vector.
1126       // The predictor is `hasVSX` because altivec instruction has
1127       // no exception but VSX vector instruction has.
1128       setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1129       setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1130       setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1131       setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1132       setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1133       setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1134       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1135       setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1136       setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1137       setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1138       setOperationAction(ISD::STRICT_FCEIL,  MVT::v4f32, Legal);
1139       setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1140       setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1141 
1142       setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1143       setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1144       setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1145       setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1146       setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1147       setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1148       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1149       setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1150       setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1151       setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1152       setOperationAction(ISD::STRICT_FCEIL,  MVT::v2f64, Legal);
1153       setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1154       setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1155 
1156       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1157       addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1158 
1159       for (MVT FPT : MVT::fp_valuetypes())
1160         setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1161 
1162       // Expand the SELECT to SELECT_CC
1163       setOperationAction(ISD::SELECT, MVT::f128, Expand);
1164 
1165       setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1166       setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1167 
1168       // No implementation for these ops for PowerPC.
1169       setOperationAction(ISD::FSIN, MVT::f128, Expand);
1170       setOperationAction(ISD::FCOS, MVT::f128, Expand);
1171       setOperationAction(ISD::FPOW, MVT::f128, Expand);
1172       setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1173       setOperationAction(ISD::FREM, MVT::f128, Expand);
1174     }
1175 
1176     if (Subtarget.hasP8Altivec()) {
1177       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1178       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1179     }
1180 
1181     if (Subtarget.hasP9Vector()) {
1182       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1183       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1184 
1185       // Test data class instructions store results in CR bits.
1186       if (Subtarget.useCRBits()) {
1187         setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
1188         setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
1189         setOperationAction(ISD::IS_FPCLASS, MVT::f128, Custom);
1190       }
1191 
1192       // 128 bit shifts can be accomplished via 3 instructions for SHL and
1193       // SRL, but not for SRA because of the instructions available:
1194       // VS{RL} and VS{RL}O.
1195       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1196       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1197       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1198 
1199       setOperationAction(ISD::FADD, MVT::f128, Legal);
1200       setOperationAction(ISD::FSUB, MVT::f128, Legal);
1201       setOperationAction(ISD::FDIV, MVT::f128, Legal);
1202       setOperationAction(ISD::FMUL, MVT::f128, Legal);
1203       setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1204 
1205       setOperationAction(ISD::FMA, MVT::f128, Legal);
1206       setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1207       setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1208       setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1209       setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1210       setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1211       setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1212 
1213       setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1214       setOperationAction(ISD::FRINT, MVT::f128, Legal);
1215       setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1216       setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1217       setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1218       setOperationAction(ISD::FROUND, MVT::f128, Legal);
1219 
1220       setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1221       setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1222       setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1223 
1224       // Handle constrained floating-point operations of fp128
1225       setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1226       setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1227       setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1228       setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1229       setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1230       setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1231       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1232       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1233       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1234       setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1235       setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1236       setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1237       setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1238       setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1239       setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1240       setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1241       setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1242       setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1243       setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1244       setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1245     } else if (Subtarget.hasVSX()) {
1246       setOperationAction(ISD::LOAD, MVT::f128, Promote);
1247       setOperationAction(ISD::STORE, MVT::f128, Promote);
1248 
1249       AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1250       AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1251 
1252       // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1253       // fp_to_uint and int_to_fp.
1254       setOperationAction(ISD::FADD, MVT::f128, LibCall);
1255       setOperationAction(ISD::FSUB, MVT::f128, LibCall);
1256 
1257       setOperationAction(ISD::FMUL, MVT::f128, Expand);
1258       setOperationAction(ISD::FDIV, MVT::f128, Expand);
1259       setOperationAction(ISD::FNEG, MVT::f128, Expand);
1260       setOperationAction(ISD::FABS, MVT::f128, Expand);
1261       setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1262       setOperationAction(ISD::FMA, MVT::f128, Expand);
1263       setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
1264 
1265       // Expand the fp_extend if the target type is fp128.
1266       setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1267       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand);
1268 
1269       // Expand the fp_round if the source type is fp128.
1270       for (MVT VT : {MVT::f32, MVT::f64}) {
1271         setOperationAction(ISD::FP_ROUND, VT, Custom);
1272         setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
1273       }
1274 
1275       setOperationAction(ISD::SETCC, MVT::f128, Custom);
1276       setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
1277       setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
1278       setOperationAction(ISD::BR_CC, MVT::f128, Expand);
1279 
1280       // Lower following f128 select_cc pattern:
1281       // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1282       setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
1283 
1284       // We need to handle f128 SELECT_CC with integer result type.
1285       setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1286       setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1287     }
1288 
1289     if (Subtarget.hasP9Altivec()) {
1290       if (Subtarget.isISA3_1()) {
1291         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal);
1292         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Legal);
1293         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Legal);
1294         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Legal);
1295       } else {
1296         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1297         setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1298       }
1299       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8,  Legal);
1300       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1301       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1302       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
1303       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1304       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1305       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1306 
1307       setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1308       setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1309       setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1310       setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1311     }
1312 
1313     if (Subtarget.hasP10Vector()) {
1314       setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
1315     }
1316   }
1317 
1318   if (Subtarget.pairedVectorMemops()) {
1319     addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1320     setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1321     setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1322   }
1323   if (Subtarget.hasMMA()) {
1324     if (Subtarget.isISAFuture())
1325       addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1326     else
1327       addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1328     setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1329     setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1330     setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1331   }
1332 
1333   if (Subtarget.has64BitSupport())
1334     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1335 
1336   if (Subtarget.isISA3_1())
1337     setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1338 
1339   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1340 
1341   if (!isPPC64) {
1342     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1343     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1344   }
1345 
1346   if (shouldInlineQuadwordAtomics()) {
1347     setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
1348     setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
1349     setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom);
1350   }
1351 
1352   setBooleanContents(ZeroOrOneBooleanContent);
1353 
1354   if (Subtarget.hasAltivec()) {
1355     // Altivec instructions set fields to all zeros or all ones.
1356     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1357   }
1358 
1359   setLibcallName(RTLIB::MULO_I128, nullptr);
1360   if (!isPPC64) {
1361     // These libcalls are not available in 32-bit.
1362     setLibcallName(RTLIB::SHL_I128, nullptr);
1363     setLibcallName(RTLIB::SRL_I128, nullptr);
1364     setLibcallName(RTLIB::SRA_I128, nullptr);
1365     setLibcallName(RTLIB::MUL_I128, nullptr);
1366     setLibcallName(RTLIB::MULO_I64, nullptr);
1367   }
1368 
1369   if (shouldInlineQuadwordAtomics())
1370     setMaxAtomicSizeInBitsSupported(128);
1371   else if (isPPC64)
1372     setMaxAtomicSizeInBitsSupported(64);
1373   else
1374     setMaxAtomicSizeInBitsSupported(32);
1375 
1376   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1377 
1378   // We have target-specific dag combine patterns for the following nodes:
1379   setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL,
1380                        ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
1381   if (Subtarget.hasFPCVT())
1382     setTargetDAGCombine(ISD::UINT_TO_FP);
1383   setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1384   if (Subtarget.useCRBits())
1385     setTargetDAGCombine(ISD::BRCOND);
1386   setTargetDAGCombine({ISD::BSWAP, ISD::INTRINSIC_WO_CHAIN,
1387                        ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID});
1388 
1389   setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND, ISD::ANY_EXTEND});
1390 
1391   setTargetDAGCombine({ISD::TRUNCATE, ISD::VECTOR_SHUFFLE});
1392 
1393   if (Subtarget.useCRBits()) {
1394     setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
1395   }
1396 
1397   setLibcallName(RTLIB::LOG_F128, "logf128");
1398   setLibcallName(RTLIB::LOG2_F128, "log2f128");
1399   setLibcallName(RTLIB::LOG10_F128, "log10f128");
1400   setLibcallName(RTLIB::EXP_F128, "expf128");
1401   setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1402   setLibcallName(RTLIB::SIN_F128, "sinf128");
1403   setLibcallName(RTLIB::COS_F128, "cosf128");
1404   setLibcallName(RTLIB::POW_F128, "powf128");
1405   setLibcallName(RTLIB::FMIN_F128, "fminf128");
1406   setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1407   setLibcallName(RTLIB::REM_F128, "fmodf128");
1408   setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1409   setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1410   setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1411   setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1412   setLibcallName(RTLIB::ROUND_F128, "roundf128");
1413   setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1414   setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1415   setLibcallName(RTLIB::RINT_F128, "rintf128");
1416   setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1417   setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1418   setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1419   setLibcallName(RTLIB::FMA_F128, "fmaf128");
1420 
1421   if (Subtarget.isAIXABI()) {
1422     setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1423     setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1424     setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1425     setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1426   }
1427 
1428   // With 32 condition bits, we don't need to sink (and duplicate) compares
1429   // aggressively in CodeGenPrep.
1430   if (Subtarget.useCRBits()) {
1431     setHasMultipleConditionRegisters();
1432     setJumpIsExpensive();
1433   }
1434 
1435   setMinFunctionAlignment(Align(4));
1436 
1437   switch (Subtarget.getCPUDirective()) {
1438   default: break;
1439   case PPC::DIR_970:
1440   case PPC::DIR_A2:
1441   case PPC::DIR_E500:
1442   case PPC::DIR_E500mc:
1443   case PPC::DIR_E5500:
1444   case PPC::DIR_PWR4:
1445   case PPC::DIR_PWR5:
1446   case PPC::DIR_PWR5X:
1447   case PPC::DIR_PWR6:
1448   case PPC::DIR_PWR6X:
1449   case PPC::DIR_PWR7:
1450   case PPC::DIR_PWR8:
1451   case PPC::DIR_PWR9:
1452   case PPC::DIR_PWR10:
1453   case PPC::DIR_PWR_FUTURE:
1454     setPrefLoopAlignment(Align(16));
1455     setPrefFunctionAlignment(Align(16));
1456     break;
1457   }
1458 
1459   if (Subtarget.enableMachineScheduler())
1460     setSchedulingPreference(Sched::Source);
1461   else
1462     setSchedulingPreference(Sched::Hybrid);
1463 
1464   computeRegisterProperties(STI.getRegisterInfo());
1465 
1466   // The Freescale cores do better with aggressive inlining of memcpy and
1467   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1468   if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1469       Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1470     MaxStoresPerMemset = 32;
1471     MaxStoresPerMemsetOptSize = 16;
1472     MaxStoresPerMemcpy = 32;
1473     MaxStoresPerMemcpyOptSize = 8;
1474     MaxStoresPerMemmove = 32;
1475     MaxStoresPerMemmoveOptSize = 8;
1476   } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1477     // The A2 also benefits from (very) aggressive inlining of memcpy and
1478     // friends. The overhead of a the function call, even when warm, can be
1479     // over one hundred cycles.
1480     MaxStoresPerMemset = 128;
1481     MaxStoresPerMemcpy = 128;
1482     MaxStoresPerMemmove = 128;
1483     MaxLoadsPerMemcmp = 128;
1484   } else {
1485     MaxLoadsPerMemcmp = 8;
1486     MaxLoadsPerMemcmpOptSize = 4;
1487   }
1488 
1489   IsStrictFPEnabled = true;
1490 
1491   // Let the subtarget (CPU) decide if a predictable select is more expensive
1492   // than the corresponding branch. This information is used in CGP to decide
1493   // when to convert selects into branches.
1494   PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1495 }
1496 
1497 // *********************************** NOTE ************************************
1498 // For selecting load and store instructions, the addressing modes are defined
1499 // as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1500 // patterns to match the load the store instructions.
1501 //
1502 // The TD definitions for the addressing modes correspond to their respective
1503 // Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1504 // on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1505 // address mode flags of a particular node. Afterwards, the computed address
1506 // flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1507 // addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1508 // accordingly, based on the preferred addressing mode.
1509 //
1510 // Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1511 // MemOpFlags contains all the possible flags that can be used to compute the
1512 // optimal addressing mode for load and store instructions.
1513 // AddrMode contains all the possible load and store addressing modes available
1514 // on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1515 //
1516 // When adding new load and store instructions, it is possible that new address
1517 // flags may need to be added into MemOpFlags, and a new addressing mode will
1518 // need to be added to AddrMode. An entry of the new addressing mode (consisting
1519 // of the minimal and main distinguishing address flags for the new load/store
1520 // instructions) will need to be added into initializeAddrModeMap() below.
1521 // Finally, when adding new addressing modes, the getAddrModeForFlags() will
1522 // need to be updated to account for selecting the optimal addressing mode.
1523 // *****************************************************************************
1524 /// Initialize the map that relates the different addressing modes of the load
1525 /// and store instructions to a set of flags. This ensures the load/store
1526 /// instruction is correctly matched during instruction selection.
1527 void PPCTargetLowering::initializeAddrModeMap() {
1528   AddrModesMap[PPC::AM_DForm] = {
1529       // LWZ, STW
1530       PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_WordInt,
1531       PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_WordInt,
1532       PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1533       PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1534       // LBZ, LHZ, STB, STH
1535       PPC::MOF_ZExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1536       PPC::MOF_ZExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1537       PPC::MOF_ZExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1538       PPC::MOF_ZExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1539       // LHA
1540       PPC::MOF_SExt | PPC::MOF_RPlusSImm16 | PPC::MOF_SubWordInt,
1541       PPC::MOF_SExt | PPC::MOF_RPlusLo | PPC::MOF_SubWordInt,
1542       PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_SubWordInt,
1543       PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubWordInt,
1544       // LFS, LFD, STFS, STFD
1545       PPC::MOF_RPlusSImm16 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1546       PPC::MOF_RPlusLo | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1547       PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1548       PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetBeforeP9,
1549   };
1550   AddrModesMap[PPC::AM_DSForm] = {
1551       // LWA
1552       PPC::MOF_SExt | PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_WordInt,
1553       PPC::MOF_SExt | PPC::MOF_NotAddNorCst | PPC::MOF_WordInt,
1554       PPC::MOF_SExt | PPC::MOF_AddrIsSImm32 | PPC::MOF_WordInt,
1555       // LD, STD
1556       PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_DoubleWordInt,
1557       PPC::MOF_NotAddNorCst | PPC::MOF_DoubleWordInt,
1558       PPC::MOF_AddrIsSImm32 | PPC::MOF_DoubleWordInt,
1559       // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1560       PPC::MOF_RPlusSImm16Mult4 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1561       PPC::MOF_NotAddNorCst | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1562       PPC::MOF_AddrIsSImm32 | PPC::MOF_ScalarFloat | PPC::MOF_SubtargetP9,
1563   };
1564   AddrModesMap[PPC::AM_DQForm] = {
1565       // LXV, STXV
1566       PPC::MOF_RPlusSImm16Mult16 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1567       PPC::MOF_NotAddNorCst | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1568       PPC::MOF_AddrIsSImm32 | PPC::MOF_Vector | PPC::MOF_SubtargetP9,
1569   };
1570   AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1571                                        PPC::MOF_SubtargetP10};
1572   // TODO: Add mapping for quadword load/store.
1573 }
1574 
1575 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1576 /// the desired ByVal argument alignment.
1577 static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1578   if (MaxAlign == MaxMaxAlign)
1579     return;
1580   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1581     if (MaxMaxAlign >= 32 &&
1582         VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1583       MaxAlign = Align(32);
1584     else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1585              MaxAlign < 16)
1586       MaxAlign = Align(16);
1587   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1588     Align EltAlign;
1589     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1590     if (EltAlign > MaxAlign)
1591       MaxAlign = EltAlign;
1592   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1593     for (auto *EltTy : STy->elements()) {
1594       Align EltAlign;
1595       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1596       if (EltAlign > MaxAlign)
1597         MaxAlign = EltAlign;
1598       if (MaxAlign == MaxMaxAlign)
1599         break;
1600     }
1601   }
1602 }
1603 
1604 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1605 /// function arguments in the caller parameter area.
1606 uint64_t PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1607                                                   const DataLayout &DL) const {
1608   // 16byte and wider vectors are passed on 16byte boundary.
1609   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1610   Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1611   if (Subtarget.hasAltivec())
1612     getMaxByValAlign(Ty, Alignment, Align(16));
1613   return Alignment.value();
1614 }
1615 
1616 bool PPCTargetLowering::useSoftFloat() const {
1617   return Subtarget.useSoftFloat();
1618 }
1619 
1620 bool PPCTargetLowering::hasSPE() const {
1621   return Subtarget.hasSPE();
1622 }
1623 
1624 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1625   return VT.isScalarInteger();
1626 }
1627 
1628 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1629   switch ((PPCISD::NodeType)Opcode) {
1630   case PPCISD::FIRST_NUMBER:    break;
1631   case PPCISD::FSEL:            return "PPCISD::FSEL";
1632   case PPCISD::XSMAXC:          return "PPCISD::XSMAXC";
1633   case PPCISD::XSMINC:          return "PPCISD::XSMINC";
1634   case PPCISD::FCFID:           return "PPCISD::FCFID";
1635   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1636   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1637   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1638   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1639   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1640   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1641   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1642   case PPCISD::FRE:             return "PPCISD::FRE";
1643   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1644   case PPCISD::FTSQRT:
1645     return "PPCISD::FTSQRT";
1646   case PPCISD::FSQRT:
1647     return "PPCISD::FSQRT";
1648   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1649   case PPCISD::VPERM:           return "PPCISD::VPERM";
1650   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1651   case PPCISD::XXSPLTI_SP_TO_DP:
1652     return "PPCISD::XXSPLTI_SP_TO_DP";
1653   case PPCISD::XXSPLTI32DX:
1654     return "PPCISD::XXSPLTI32DX";
1655   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1656   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1657   case PPCISD::XXPERM:
1658     return "PPCISD::XXPERM";
1659   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1660   case PPCISD::CMPB:            return "PPCISD::CMPB";
1661   case PPCISD::Hi:              return "PPCISD::Hi";
1662   case PPCISD::Lo:              return "PPCISD::Lo";
1663   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1664   case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1665   case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1666   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1667   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1668   case PPCISD::PROBED_ALLOCA:   return "PPCISD::PROBED_ALLOCA";
1669   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1670   case PPCISD::SRL:             return "PPCISD::SRL";
1671   case PPCISD::SRA:             return "PPCISD::SRA";
1672   case PPCISD::SHL:             return "PPCISD::SHL";
1673   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1674   case PPCISD::CALL:            return "PPCISD::CALL";
1675   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1676   case PPCISD::CALL_NOTOC:      return "PPCISD::CALL_NOTOC";
1677   case PPCISD::CALL_RM:
1678     return "PPCISD::CALL_RM";
1679   case PPCISD::CALL_NOP_RM:
1680     return "PPCISD::CALL_NOP_RM";
1681   case PPCISD::CALL_NOTOC_RM:
1682     return "PPCISD::CALL_NOTOC_RM";
1683   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1684   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1685   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1686   case PPCISD::BCTRL_RM:
1687     return "PPCISD::BCTRL_RM";
1688   case PPCISD::BCTRL_LOAD_TOC_RM:
1689     return "PPCISD::BCTRL_LOAD_TOC_RM";
1690   case PPCISD::RET_GLUE:        return "PPCISD::RET_GLUE";
1691   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1692   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1693   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1694   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1695   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1696   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1697   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1698   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1699   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1700   case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1701     return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1702   case PPCISD::ANDI_rec_1_EQ_BIT:
1703     return "PPCISD::ANDI_rec_1_EQ_BIT";
1704   case PPCISD::ANDI_rec_1_GT_BIT:
1705     return "PPCISD::ANDI_rec_1_GT_BIT";
1706   case PPCISD::VCMP:            return "PPCISD::VCMP";
1707   case PPCISD::VCMP_rec:        return "PPCISD::VCMP_rec";
1708   case PPCISD::LBRX:            return "PPCISD::LBRX";
1709   case PPCISD::STBRX:           return "PPCISD::STBRX";
1710   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1711   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1712   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1713   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1714   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1715   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1716   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1717   case PPCISD::LOAD_VEC_BE:     return "PPCISD::LOAD_VEC_BE";
1718   case PPCISD::STORE_VEC_BE:    return "PPCISD::STORE_VEC_BE";
1719   case PPCISD::ST_VSR_SCAL_INT:
1720                                 return "PPCISD::ST_VSR_SCAL_INT";
1721   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1722   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1723   case PPCISD::BDZ:             return "PPCISD::BDZ";
1724   case PPCISD::MFFS:            return "PPCISD::MFFS";
1725   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1726   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1727   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1728   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1729   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1730   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1731   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1732   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1733   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1734   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1735   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1736   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1737   case PPCISD::GET_TPOINTER:    return "PPCISD::GET_TPOINTER";
1738   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1739   case PPCISD::TLSGD_AIX:       return "PPCISD::TLSGD_AIX";
1740   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1741   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1742   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1743   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1744   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1745   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1746   case PPCISD::PADDI_DTPREL:
1747     return "PPCISD::PADDI_DTPREL";
1748   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1749   case PPCISD::SC:              return "PPCISD::SC";
1750   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1751   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1752   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1753   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1754   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1755   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1756   case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1757   case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1758   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1759   case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1760   case PPCISD::FP_EXTEND_HALF:  return "PPCISD::FP_EXTEND_HALF";
1761   case PPCISD::MAT_PCREL_ADDR:  return "PPCISD::MAT_PCREL_ADDR";
1762   case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1763     return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1764   case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1765     return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1766   case PPCISD::ACC_BUILD:       return "PPCISD::ACC_BUILD";
1767   case PPCISD::PAIR_BUILD:      return "PPCISD::PAIR_BUILD";
1768   case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1769   case PPCISD::XXMFACC:         return "PPCISD::XXMFACC";
1770   case PPCISD::LD_SPLAT:        return "PPCISD::LD_SPLAT";
1771   case PPCISD::ZEXT_LD_SPLAT:   return "PPCISD::ZEXT_LD_SPLAT";
1772   case PPCISD::SEXT_LD_SPLAT:   return "PPCISD::SEXT_LD_SPLAT";
1773   case PPCISD::FNMSUB:          return "PPCISD::FNMSUB";
1774   case PPCISD::STRICT_FADDRTZ:
1775     return "PPCISD::STRICT_FADDRTZ";
1776   case PPCISD::STRICT_FCTIDZ:
1777     return "PPCISD::STRICT_FCTIDZ";
1778   case PPCISD::STRICT_FCTIWZ:
1779     return "PPCISD::STRICT_FCTIWZ";
1780   case PPCISD::STRICT_FCTIDUZ:
1781     return "PPCISD::STRICT_FCTIDUZ";
1782   case PPCISD::STRICT_FCTIWUZ:
1783     return "PPCISD::STRICT_FCTIWUZ";
1784   case PPCISD::STRICT_FCFID:
1785     return "PPCISD::STRICT_FCFID";
1786   case PPCISD::STRICT_FCFIDU:
1787     return "PPCISD::STRICT_FCFIDU";
1788   case PPCISD::STRICT_FCFIDS:
1789     return "PPCISD::STRICT_FCFIDS";
1790   case PPCISD::STRICT_FCFIDUS:
1791     return "PPCISD::STRICT_FCFIDUS";
1792   case PPCISD::LXVRZX:          return "PPCISD::LXVRZX";
1793   case PPCISD::STORE_COND:
1794     return "PPCISD::STORE_COND";
1795   }
1796   return nullptr;
1797 }
1798 
1799 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1800                                           EVT VT) const {
1801   if (!VT.isVector())
1802     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1803 
1804   return VT.changeVectorElementTypeToInteger();
1805 }
1806 
1807 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1808   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1809   return true;
1810 }
1811 
1812 //===----------------------------------------------------------------------===//
1813 // Node matching predicates, for use by the tblgen matching code.
1814 //===----------------------------------------------------------------------===//
1815 
1816 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1817 static bool isFloatingPointZero(SDValue Op) {
1818   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1819     return CFP->getValueAPF().isZero();
1820   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1821     // Maybe this has already been legalized into the constant pool?
1822     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1823       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1824         return CFP->getValueAPF().isZero();
1825   }
1826   return false;
1827 }
1828 
1829 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1830 /// true if Op is undef or if it matches the specified value.
1831 static bool isConstantOrUndef(int Op, int Val) {
1832   return Op < 0 || Op == Val;
1833 }
1834 
1835 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1836 /// VPKUHUM instruction.
1837 /// The ShuffleKind distinguishes between big-endian operations with
1838 /// two different inputs (0), either-endian operations with two identical
1839 /// inputs (1), and little-endian operations with two different inputs (2).
1840 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1841 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1842                                SelectionDAG &DAG) {
1843   bool IsLE = DAG.getDataLayout().isLittleEndian();
1844   if (ShuffleKind == 0) {
1845     if (IsLE)
1846       return false;
1847     for (unsigned i = 0; i != 16; ++i)
1848       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1849         return false;
1850   } else if (ShuffleKind == 2) {
1851     if (!IsLE)
1852       return false;
1853     for (unsigned i = 0; i != 16; ++i)
1854       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1855         return false;
1856   } else if (ShuffleKind == 1) {
1857     unsigned j = IsLE ? 0 : 1;
1858     for (unsigned i = 0; i != 8; ++i)
1859       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1860           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1861         return false;
1862   }
1863   return true;
1864 }
1865 
1866 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1867 /// VPKUWUM instruction.
1868 /// The ShuffleKind distinguishes between big-endian operations with
1869 /// two different inputs (0), either-endian operations with two identical
1870 /// inputs (1), and little-endian operations with two different inputs (2).
1871 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1872 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1873                                SelectionDAG &DAG) {
1874   bool IsLE = DAG.getDataLayout().isLittleEndian();
1875   if (ShuffleKind == 0) {
1876     if (IsLE)
1877       return false;
1878     for (unsigned i = 0; i != 16; i += 2)
1879       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1880           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1881         return false;
1882   } else if (ShuffleKind == 2) {
1883     if (!IsLE)
1884       return false;
1885     for (unsigned i = 0; i != 16; i += 2)
1886       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1887           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1888         return false;
1889   } else if (ShuffleKind == 1) {
1890     unsigned j = IsLE ? 0 : 2;
1891     for (unsigned i = 0; i != 8; i += 2)
1892       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1893           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1894           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1895           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1896         return false;
1897   }
1898   return true;
1899 }
1900 
1901 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1902 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1903 /// current subtarget.
1904 ///
1905 /// The ShuffleKind distinguishes between big-endian operations with
1906 /// two different inputs (0), either-endian operations with two identical
1907 /// inputs (1), and little-endian operations with two different inputs (2).
1908 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1909 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1910                                SelectionDAG &DAG) {
1911   const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1912   if (!Subtarget.hasP8Vector())
1913     return false;
1914 
1915   bool IsLE = DAG.getDataLayout().isLittleEndian();
1916   if (ShuffleKind == 0) {
1917     if (IsLE)
1918       return false;
1919     for (unsigned i = 0; i != 16; i += 4)
1920       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1921           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1922           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1923           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1924         return false;
1925   } else if (ShuffleKind == 2) {
1926     if (!IsLE)
1927       return false;
1928     for (unsigned i = 0; i != 16; i += 4)
1929       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1930           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1931           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1932           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1933         return false;
1934   } else if (ShuffleKind == 1) {
1935     unsigned j = IsLE ? 0 : 4;
1936     for (unsigned i = 0; i != 8; i += 4)
1937       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1938           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1939           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1940           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1941           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1942           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1943           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1944           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1945         return false;
1946   }
1947   return true;
1948 }
1949 
1950 /// isVMerge - Common function, used to match vmrg* shuffles.
1951 ///
1952 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1953                      unsigned LHSStart, unsigned RHSStart) {
1954   if (N->getValueType(0) != MVT::v16i8)
1955     return false;
1956   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1957          "Unsupported merge size!");
1958 
1959   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1960     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1961       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1962                              LHSStart+j+i*UnitSize) ||
1963           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1964                              RHSStart+j+i*UnitSize))
1965         return false;
1966     }
1967   return true;
1968 }
1969 
1970 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1971 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1972 /// The ShuffleKind distinguishes between big-endian merges with two
1973 /// different inputs (0), either-endian merges with two identical inputs (1),
1974 /// and little-endian merges with two different inputs (2).  For the latter,
1975 /// the input operands are swapped (see PPCInstrAltivec.td).
1976 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1977                              unsigned ShuffleKind, SelectionDAG &DAG) {
1978   if (DAG.getDataLayout().isLittleEndian()) {
1979     if (ShuffleKind == 1) // unary
1980       return isVMerge(N, UnitSize, 0, 0);
1981     else if (ShuffleKind == 2) // swapped
1982       return isVMerge(N, UnitSize, 0, 16);
1983     else
1984       return false;
1985   } else {
1986     if (ShuffleKind == 1) // unary
1987       return isVMerge(N, UnitSize, 8, 8);
1988     else if (ShuffleKind == 0) // normal
1989       return isVMerge(N, UnitSize, 8, 24);
1990     else
1991       return false;
1992   }
1993 }
1994 
1995 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1996 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1997 /// The ShuffleKind distinguishes between big-endian merges with two
1998 /// different inputs (0), either-endian merges with two identical inputs (1),
1999 /// and little-endian merges with two different inputs (2).  For the latter,
2000 /// the input operands are swapped (see PPCInstrAltivec.td).
2001 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
2002                              unsigned ShuffleKind, SelectionDAG &DAG) {
2003   if (DAG.getDataLayout().isLittleEndian()) {
2004     if (ShuffleKind == 1) // unary
2005       return isVMerge(N, UnitSize, 8, 8);
2006     else if (ShuffleKind == 2) // swapped
2007       return isVMerge(N, UnitSize, 8, 24);
2008     else
2009       return false;
2010   } else {
2011     if (ShuffleKind == 1) // unary
2012       return isVMerge(N, UnitSize, 0, 0);
2013     else if (ShuffleKind == 0) // normal
2014       return isVMerge(N, UnitSize, 0, 16);
2015     else
2016       return false;
2017   }
2018 }
2019 
2020 /**
2021  * Common function used to match vmrgew and vmrgow shuffles
2022  *
2023  * The indexOffset determines whether to look for even or odd words in
2024  * the shuffle mask. This is based on the of the endianness of the target
2025  * machine.
2026  *   - Little Endian:
2027  *     - Use offset of 0 to check for odd elements
2028  *     - Use offset of 4 to check for even elements
2029  *   - Big Endian:
2030  *     - Use offset of 0 to check for even elements
2031  *     - Use offset of 4 to check for odd elements
2032  * A detailed description of the vector element ordering for little endian and
2033  * big endian can be found at
2034  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2035  * Targeting your applications - what little endian and big endian IBM XL C/C++
2036  * compiler differences mean to you
2037  *
2038  * The mask to the shuffle vector instruction specifies the indices of the
2039  * elements from the two input vectors to place in the result. The elements are
2040  * numbered in array-access order, starting with the first vector. These vectors
2041  * are always of type v16i8, thus each vector will contain 16 elements of size
2042  * 8. More info on the shuffle vector can be found in the
2043  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2044  * Language Reference.
2045  *
2046  * The RHSStartValue indicates whether the same input vectors are used (unary)
2047  * or two different input vectors are used, based on the following:
2048  *   - If the instruction uses the same vector for both inputs, the range of the
2049  *     indices will be 0 to 15. In this case, the RHSStart value passed should
2050  *     be 0.
2051  *   - If the instruction has two different vectors then the range of the
2052  *     indices will be 0 to 31. In this case, the RHSStart value passed should
2053  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
2054  *     to 31 specify elements in the second vector).
2055  *
2056  * \param[in] N The shuffle vector SD Node to analyze
2057  * \param[in] IndexOffset Specifies whether to look for even or odd elements
2058  * \param[in] RHSStartValue Specifies the starting index for the righthand input
2059  * vector to the shuffle_vector instruction
2060  * \return true iff this shuffle vector represents an even or odd word merge
2061  */
2062 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2063                      unsigned RHSStartValue) {
2064   if (N->getValueType(0) != MVT::v16i8)
2065     return false;
2066 
2067   for (unsigned i = 0; i < 2; ++i)
2068     for (unsigned j = 0; j < 4; ++j)
2069       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2070                              i*RHSStartValue+j+IndexOffset) ||
2071           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2072                              i*RHSStartValue+j+IndexOffset+8))
2073         return false;
2074   return true;
2075 }
2076 
2077 /**
2078  * Determine if the specified shuffle mask is suitable for the vmrgew or
2079  * vmrgow instructions.
2080  *
2081  * \param[in] N The shuffle vector SD Node to analyze
2082  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2083  * \param[in] ShuffleKind Identify the type of merge:
2084  *   - 0 = big-endian merge with two different inputs;
2085  *   - 1 = either-endian merge with two identical inputs;
2086  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
2087  *     little-endian merges).
2088  * \param[in] DAG The current SelectionDAG
2089  * \return true iff this shuffle mask
2090  */
2091 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
2092                               unsigned ShuffleKind, SelectionDAG &DAG) {
2093   if (DAG.getDataLayout().isLittleEndian()) {
2094     unsigned indexOffset = CheckEven ? 4 : 0;
2095     if (ShuffleKind == 1) // Unary
2096       return isVMerge(N, indexOffset, 0);
2097     else if (ShuffleKind == 2) // swapped
2098       return isVMerge(N, indexOffset, 16);
2099     else
2100       return false;
2101   }
2102   else {
2103     unsigned indexOffset = CheckEven ? 0 : 4;
2104     if (ShuffleKind == 1) // Unary
2105       return isVMerge(N, indexOffset, 0);
2106     else if (ShuffleKind == 0) // Normal
2107       return isVMerge(N, indexOffset, 16);
2108     else
2109       return false;
2110   }
2111   return false;
2112 }
2113 
2114 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2115 /// amount, otherwise return -1.
2116 /// The ShuffleKind distinguishes between big-endian operations with two
2117 /// different inputs (0), either-endian operations with two identical inputs
2118 /// (1), and little-endian operations with two different inputs (2).  For the
2119 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
2120 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2121                              SelectionDAG &DAG) {
2122   if (N->getValueType(0) != MVT::v16i8)
2123     return -1;
2124 
2125   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2126 
2127   // Find the first non-undef value in the shuffle mask.
2128   unsigned i;
2129   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2130     /*search*/;
2131 
2132   if (i == 16) return -1;  // all undef.
2133 
2134   // Otherwise, check to see if the rest of the elements are consecutively
2135   // numbered from this value.
2136   unsigned ShiftAmt = SVOp->getMaskElt(i);
2137   if (ShiftAmt < i) return -1;
2138 
2139   ShiftAmt -= i;
2140   bool isLE = DAG.getDataLayout().isLittleEndian();
2141 
2142   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2143     // Check the rest of the elements to see if they are consecutive.
2144     for (++i; i != 16; ++i)
2145       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2146         return -1;
2147   } else if (ShuffleKind == 1) {
2148     // Check the rest of the elements to see if they are consecutive.
2149     for (++i; i != 16; ++i)
2150       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2151         return -1;
2152   } else
2153     return -1;
2154 
2155   if (isLE)
2156     ShiftAmt = 16 - ShiftAmt;
2157 
2158   return ShiftAmt;
2159 }
2160 
2161 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2162 /// specifies a splat of a single element that is suitable for input to
2163 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2164 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
2165   EVT VT = N->getValueType(0);
2166   if (VT == MVT::v2i64 || VT == MVT::v2f64)
2167     return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2168 
2169   assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2170          EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2171 
2172   // The consecutive indices need to specify an element, not part of two
2173   // different elements.  So abandon ship early if this isn't the case.
2174   if (N->getMaskElt(0) % EltSize != 0)
2175     return false;
2176 
2177   // This is a splat operation if each element of the permute is the same, and
2178   // if the value doesn't reference the second vector.
2179   unsigned ElementBase = N->getMaskElt(0);
2180 
2181   // FIXME: Handle UNDEF elements too!
2182   if (ElementBase >= 16)
2183     return false;
2184 
2185   // Check that the indices are consecutive, in the case of a multi-byte element
2186   // splatted with a v16i8 mask.
2187   for (unsigned i = 1; i != EltSize; ++i)
2188     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2189       return false;
2190 
2191   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2192     if (N->getMaskElt(i) < 0) continue;
2193     for (unsigned j = 0; j != EltSize; ++j)
2194       if (N->getMaskElt(i+j) != N->getMaskElt(j))
2195         return false;
2196   }
2197   return true;
2198 }
2199 
2200 /// Check that the mask is shuffling N byte elements. Within each N byte
2201 /// element of the mask, the indices could be either in increasing or
2202 /// decreasing order as long as they are consecutive.
2203 /// \param[in] N the shuffle vector SD Node to analyze
2204 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2205 /// Word/DoubleWord/QuadWord).
2206 /// \param[in] StepLen the delta indices number among the N byte element, if
2207 /// the mask is in increasing/decreasing order then it is 1/-1.
2208 /// \return true iff the mask is shuffling N byte elements.
2209 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2210                                    int StepLen) {
2211   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2212          "Unexpected element width.");
2213   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2214 
2215   unsigned NumOfElem = 16 / Width;
2216   unsigned MaskVal[16]; //  Width is never greater than 16
2217   for (unsigned i = 0; i < NumOfElem; ++i) {
2218     MaskVal[0] = N->getMaskElt(i * Width);
2219     if ((StepLen == 1) && (MaskVal[0] % Width)) {
2220       return false;
2221     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2222       return false;
2223     }
2224 
2225     for (unsigned int j = 1; j < Width; ++j) {
2226       MaskVal[j] = N->getMaskElt(i * Width + j);
2227       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2228         return false;
2229       }
2230     }
2231   }
2232 
2233   return true;
2234 }
2235 
2236 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2237                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2238   if (!isNByteElemShuffleMask(N, 4, 1))
2239     return false;
2240 
2241   // Now we look at mask elements 0,4,8,12
2242   unsigned M0 = N->getMaskElt(0) / 4;
2243   unsigned M1 = N->getMaskElt(4) / 4;
2244   unsigned M2 = N->getMaskElt(8) / 4;
2245   unsigned M3 = N->getMaskElt(12) / 4;
2246   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2247   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2248 
2249   // Below, let H and L be arbitrary elements of the shuffle mask
2250   // where H is in the range [4,7] and L is in the range [0,3].
2251   // H, 1, 2, 3 or L, 5, 6, 7
2252   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2253       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2254     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2255     InsertAtByte = IsLE ? 12 : 0;
2256     Swap = M0 < 4;
2257     return true;
2258   }
2259   // 0, H, 2, 3 or 4, L, 6, 7
2260   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2261       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2262     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2263     InsertAtByte = IsLE ? 8 : 4;
2264     Swap = M1 < 4;
2265     return true;
2266   }
2267   // 0, 1, H, 3 or 4, 5, L, 7
2268   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2269       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2270     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2271     InsertAtByte = IsLE ? 4 : 8;
2272     Swap = M2 < 4;
2273     return true;
2274   }
2275   // 0, 1, 2, H or 4, 5, 6, L
2276   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2277       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2278     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2279     InsertAtByte = IsLE ? 0 : 12;
2280     Swap = M3 < 4;
2281     return true;
2282   }
2283 
2284   // If both vector operands for the shuffle are the same vector, the mask will
2285   // contain only elements from the first one and the second one will be undef.
2286   if (N->getOperand(1).isUndef()) {
2287     ShiftElts = 0;
2288     Swap = true;
2289     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2290     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2291       InsertAtByte = IsLE ? 12 : 0;
2292       return true;
2293     }
2294     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2295       InsertAtByte = IsLE ? 8 : 4;
2296       return true;
2297     }
2298     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2299       InsertAtByte = IsLE ? 4 : 8;
2300       return true;
2301     }
2302     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2303       InsertAtByte = IsLE ? 0 : 12;
2304       return true;
2305     }
2306   }
2307 
2308   return false;
2309 }
2310 
2311 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2312                                bool &Swap, bool IsLE) {
2313   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2314   // Ensure each byte index of the word is consecutive.
2315   if (!isNByteElemShuffleMask(N, 4, 1))
2316     return false;
2317 
2318   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2319   unsigned M0 = N->getMaskElt(0) / 4;
2320   unsigned M1 = N->getMaskElt(4) / 4;
2321   unsigned M2 = N->getMaskElt(8) / 4;
2322   unsigned M3 = N->getMaskElt(12) / 4;
2323 
2324   // If both vector operands for the shuffle are the same vector, the mask will
2325   // contain only elements from the first one and the second one will be undef.
2326   if (N->getOperand(1).isUndef()) {
2327     assert(M0 < 4 && "Indexing into an undef vector?");
2328     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2329       return false;
2330 
2331     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2332     Swap = false;
2333     return true;
2334   }
2335 
2336   // Ensure each word index of the ShuffleVector Mask is consecutive.
2337   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2338     return false;
2339 
2340   if (IsLE) {
2341     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2342       // Input vectors don't need to be swapped if the leading element
2343       // of the result is one of the 3 left elements of the second vector
2344       // (or if there is no shift to be done at all).
2345       Swap = false;
2346       ShiftElts = (8 - M0) % 8;
2347     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2348       // Input vectors need to be swapped if the leading element
2349       // of the result is one of the 3 left elements of the first vector
2350       // (or if we're shifting by 4 - thereby simply swapping the vectors).
2351       Swap = true;
2352       ShiftElts = (4 - M0) % 4;
2353     }
2354 
2355     return true;
2356   } else {                                          // BE
2357     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2358       // Input vectors don't need to be swapped if the leading element
2359       // of the result is one of the 4 elements of the first vector.
2360       Swap = false;
2361       ShiftElts = M0;
2362     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2363       // Input vectors need to be swapped if the leading element
2364       // of the result is one of the 4 elements of the right vector.
2365       Swap = true;
2366       ShiftElts = M0 - 4;
2367     }
2368 
2369     return true;
2370   }
2371 }
2372 
2373 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2374   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2375 
2376   if (!isNByteElemShuffleMask(N, Width, -1))
2377     return false;
2378 
2379   for (int i = 0; i < 16; i += Width)
2380     if (N->getMaskElt(i) != i + Width - 1)
2381       return false;
2382 
2383   return true;
2384 }
2385 
2386 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2387   return isXXBRShuffleMaskHelper(N, 2);
2388 }
2389 
2390 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2391   return isXXBRShuffleMaskHelper(N, 4);
2392 }
2393 
2394 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2395   return isXXBRShuffleMaskHelper(N, 8);
2396 }
2397 
2398 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2399   return isXXBRShuffleMaskHelper(N, 16);
2400 }
2401 
2402 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2403 /// if the inputs to the instruction should be swapped and set \p DM to the
2404 /// value for the immediate.
2405 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2406 /// AND element 0 of the result comes from the first input (LE) or second input
2407 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2408 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2409 /// mask.
2410 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2411                                bool &Swap, bool IsLE) {
2412   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2413 
2414   // Ensure each byte index of the double word is consecutive.
2415   if (!isNByteElemShuffleMask(N, 8, 1))
2416     return false;
2417 
2418   unsigned M0 = N->getMaskElt(0) / 8;
2419   unsigned M1 = N->getMaskElt(8) / 8;
2420   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2421 
2422   // If both vector operands for the shuffle are the same vector, the mask will
2423   // contain only elements from the first one and the second one will be undef.
2424   if (N->getOperand(1).isUndef()) {
2425     if ((M0 | M1) < 2) {
2426       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2427       Swap = false;
2428       return true;
2429     } else
2430       return false;
2431   }
2432 
2433   if (IsLE) {
2434     if (M0 > 1 && M1 < 2) {
2435       Swap = false;
2436     } else if (M0 < 2 && M1 > 1) {
2437       M0 = (M0 + 2) % 4;
2438       M1 = (M1 + 2) % 4;
2439       Swap = true;
2440     } else
2441       return false;
2442 
2443     // Note: if control flow comes here that means Swap is already set above
2444     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2445     return true;
2446   } else { // BE
2447     if (M0 < 2 && M1 > 1) {
2448       Swap = false;
2449     } else if (M0 > 1 && M1 < 2) {
2450       M0 = (M0 + 2) % 4;
2451       M1 = (M1 + 2) % 4;
2452       Swap = true;
2453     } else
2454       return false;
2455 
2456     // Note: if control flow comes here that means Swap is already set above
2457     DM = (M0 << 1) + (M1 & 1);
2458     return true;
2459   }
2460 }
2461 
2462 
2463 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2464 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2465 /// elements are counted from the left of the vector register).
2466 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2467                                          SelectionDAG &DAG) {
2468   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2469   assert(isSplatShuffleMask(SVOp, EltSize));
2470   EVT VT = SVOp->getValueType(0);
2471 
2472   if (VT == MVT::v2i64 || VT == MVT::v2f64)
2473     return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2474                                                 : SVOp->getMaskElt(0);
2475 
2476   if (DAG.getDataLayout().isLittleEndian())
2477     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2478   else
2479     return SVOp->getMaskElt(0) / EltSize;
2480 }
2481 
2482 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2483 /// by using a vspltis[bhw] instruction of the specified element size, return
2484 /// the constant being splatted.  The ByteSize field indicates the number of
2485 /// bytes of each element [124] -> [bhw].
2486 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2487   SDValue OpVal;
2488 
2489   // If ByteSize of the splat is bigger than the element size of the
2490   // build_vector, then we have a case where we are checking for a splat where
2491   // multiple elements of the buildvector are folded together into a single
2492   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2493   unsigned EltSize = 16/N->getNumOperands();
2494   if (EltSize < ByteSize) {
2495     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2496     SDValue UniquedVals[4];
2497     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2498 
2499     // See if all of the elements in the buildvector agree across.
2500     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2501       if (N->getOperand(i).isUndef()) continue;
2502       // If the element isn't a constant, bail fully out.
2503       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2504 
2505       if (!UniquedVals[i&(Multiple-1)].getNode())
2506         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2507       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2508         return SDValue();  // no match.
2509     }
2510 
2511     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2512     // either constant or undef values that are identical for each chunk.  See
2513     // if these chunks can form into a larger vspltis*.
2514 
2515     // Check to see if all of the leading entries are either 0 or -1.  If
2516     // neither, then this won't fit into the immediate field.
2517     bool LeadingZero = true;
2518     bool LeadingOnes = true;
2519     for (unsigned i = 0; i != Multiple-1; ++i) {
2520       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2521 
2522       LeadingZero &= isNullConstant(UniquedVals[i]);
2523       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2524     }
2525     // Finally, check the least significant entry.
2526     if (LeadingZero) {
2527       if (!UniquedVals[Multiple-1].getNode())
2528         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2529       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2530       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2531         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2532     }
2533     if (LeadingOnes) {
2534       if (!UniquedVals[Multiple-1].getNode())
2535         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2536       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2537       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2538         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2539     }
2540 
2541     return SDValue();
2542   }
2543 
2544   // Check to see if this buildvec has a single non-undef value in its elements.
2545   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2546     if (N->getOperand(i).isUndef()) continue;
2547     if (!OpVal.getNode())
2548       OpVal = N->getOperand(i);
2549     else if (OpVal != N->getOperand(i))
2550       return SDValue();
2551   }
2552 
2553   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
2554 
2555   unsigned ValSizeInBytes = EltSize;
2556   uint64_t Value = 0;
2557   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2558     Value = CN->getZExtValue();
2559   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2560     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2561     Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2562   }
2563 
2564   // If the splat value is larger than the element value, then we can never do
2565   // this splat.  The only case that we could fit the replicated bits into our
2566   // immediate field for would be zero, and we prefer to use vxor for it.
2567   if (ValSizeInBytes < ByteSize) return SDValue();
2568 
2569   // If the element value is larger than the splat value, check if it consists
2570   // of a repeated bit pattern of size ByteSize.
2571   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2572     return SDValue();
2573 
2574   // Properly sign extend the value.
2575   int MaskVal = SignExtend32(Value, ByteSize * 8);
2576 
2577   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2578   if (MaskVal == 0) return SDValue();
2579 
2580   // Finally, if this value fits in a 5 bit sext field, return it
2581   if (SignExtend32<5>(MaskVal) == MaskVal)
2582     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2583   return SDValue();
2584 }
2585 
2586 //===----------------------------------------------------------------------===//
2587 //  Addressing Mode Selection
2588 //===----------------------------------------------------------------------===//
2589 
2590 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2591 /// or 64-bit immediate, and if the value can be accurately represented as a
2592 /// sign extension from a 16-bit value.  If so, this returns true and the
2593 /// immediate.
2594 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2595   if (!isa<ConstantSDNode>(N))
2596     return false;
2597 
2598   Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2599   if (N->getValueType(0) == MVT::i32)
2600     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2601   else
2602     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2603 }
2604 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2605   return isIntS16Immediate(Op.getNode(), Imm);
2606 }
2607 
2608 /// Used when computing address flags for selecting loads and stores.
2609 /// If we have an OR, check if the LHS and RHS are provably disjoint.
2610 /// An OR of two provably disjoint values is equivalent to an ADD.
2611 /// Most PPC load/store instructions compute the effective address as a sum,
2612 /// so doing this conversion is useful.
2613 static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2614   if (N.getOpcode() != ISD::OR)
2615     return false;
2616   KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2617   if (!LHSKnown.Zero.getBoolValue())
2618     return false;
2619   KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2620   return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2621 }
2622 
2623 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2624 /// be represented as an indexed [r+r] operation.
2625 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2626                                                SDValue &Index,
2627                                                SelectionDAG &DAG) const {
2628   for (SDNode *U : N->uses()) {
2629     if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2630       if (Memop->getMemoryVT() == MVT::f64) {
2631           Base = N.getOperand(0);
2632           Index = N.getOperand(1);
2633           return true;
2634       }
2635     }
2636   }
2637   return false;
2638 }
2639 
2640 /// isIntS34Immediate - This method tests if value of node given can be
2641 /// accurately represented as a sign extension from a 34-bit value.  If so,
2642 /// this returns true and the immediate.
2643 bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2644   if (!isa<ConstantSDNode>(N))
2645     return false;
2646 
2647   Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2648   return isInt<34>(Imm);
2649 }
2650 bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2651   return isIntS34Immediate(Op.getNode(), Imm);
2652 }
2653 
2654 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2655 /// can be represented as an indexed [r+r] operation.  Returns false if it
2656 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2657 /// non-zero and N can be represented by a base register plus a signed 16-bit
2658 /// displacement, make a more precise judgement by checking (displacement % \p
2659 /// EncodingAlignment).
2660 bool PPCTargetLowering::SelectAddressRegReg(
2661     SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2662     MaybeAlign EncodingAlignment) const {
2663   // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2664   // a [pc+imm].
2665   if (SelectAddressPCRel(N, Base))
2666     return false;
2667 
2668   int16_t Imm = 0;
2669   if (N.getOpcode() == ISD::ADD) {
2670     // Is there any SPE load/store (f64), which can't handle 16bit offset?
2671     // SPE load/store can only handle 8-bit offsets.
2672     if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2673         return true;
2674     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2675         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2676       return false; // r+i
2677     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2678       return false;    // r+i
2679 
2680     Base = N.getOperand(0);
2681     Index = N.getOperand(1);
2682     return true;
2683   } else if (N.getOpcode() == ISD::OR) {
2684     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2685         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2686       return false; // r+i can fold it if we can.
2687 
2688     // If this is an or of disjoint bitfields, we can codegen this as an add
2689     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2690     // disjoint.
2691     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2692 
2693     if (LHSKnown.Zero.getBoolValue()) {
2694       KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2695       // If all of the bits are known zero on the LHS or RHS, the add won't
2696       // carry.
2697       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2698         Base = N.getOperand(0);
2699         Index = N.getOperand(1);
2700         return true;
2701       }
2702     }
2703   }
2704 
2705   return false;
2706 }
2707 
2708 // If we happen to be doing an i64 load or store into a stack slot that has
2709 // less than a 4-byte alignment, then the frame-index elimination may need to
2710 // use an indexed load or store instruction (because the offset may not be a
2711 // multiple of 4). The extra register needed to hold the offset comes from the
2712 // register scavenger, and it is possible that the scavenger will need to use
2713 // an emergency spill slot. As a result, we need to make sure that a spill slot
2714 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2715 // stack slot.
2716 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2717   // FIXME: This does not handle the LWA case.
2718   if (VT != MVT::i64)
2719     return;
2720 
2721   // NOTE: We'll exclude negative FIs here, which come from argument
2722   // lowering, because there are no known test cases triggering this problem
2723   // using packed structures (or similar). We can remove this exclusion if
2724   // we find such a test case. The reason why this is so test-case driven is
2725   // because this entire 'fixup' is only to prevent crashes (from the
2726   // register scavenger) on not-really-valid inputs. For example, if we have:
2727   //   %a = alloca i1
2728   //   %b = bitcast i1* %a to i64*
2729   //   store i64* a, i64 b
2730   // then the store should really be marked as 'align 1', but is not. If it
2731   // were marked as 'align 1' then the indexed form would have been
2732   // instruction-selected initially, and the problem this 'fixup' is preventing
2733   // won't happen regardless.
2734   if (FrameIdx < 0)
2735     return;
2736 
2737   MachineFunction &MF = DAG.getMachineFunction();
2738   MachineFrameInfo &MFI = MF.getFrameInfo();
2739 
2740   if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2741     return;
2742 
2743   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2744   FuncInfo->setHasNonRISpills();
2745 }
2746 
2747 /// Returns true if the address N can be represented by a base register plus
2748 /// a signed 16-bit displacement [r+imm], and if it is not better
2749 /// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2750 /// displacements that are multiples of that value.
2751 bool PPCTargetLowering::SelectAddressRegImm(
2752     SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2753     MaybeAlign EncodingAlignment) const {
2754   // FIXME dl should come from parent load or store, not from address
2755   SDLoc dl(N);
2756 
2757   // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2758   // a [pc+imm].
2759   if (SelectAddressPCRel(N, Base))
2760     return false;
2761 
2762   // If this can be more profitably realized as r+r, fail.
2763   if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2764     return false;
2765 
2766   if (N.getOpcode() == ISD::ADD) {
2767     int16_t imm = 0;
2768     if (isIntS16Immediate(N.getOperand(1), imm) &&
2769         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2770       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2771       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2772         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2773         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2774       } else {
2775         Base = N.getOperand(0);
2776       }
2777       return true; // [r+i]
2778     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2779       // Match LOAD (ADD (X, Lo(G))).
2780       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2781              && "Cannot handle constant offsets yet!");
2782       Disp = N.getOperand(1).getOperand(0);  // The global address.
2783       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2784              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2785              Disp.getOpcode() == ISD::TargetConstantPool ||
2786              Disp.getOpcode() == ISD::TargetJumpTable);
2787       Base = N.getOperand(0);
2788       return true;  // [&g+r]
2789     }
2790   } else if (N.getOpcode() == ISD::OR) {
2791     int16_t imm = 0;
2792     if (isIntS16Immediate(N.getOperand(1), imm) &&
2793         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2794       // If this is an or of disjoint bitfields, we can codegen this as an add
2795       // (for better address arithmetic) if the LHS and RHS of the OR are
2796       // provably disjoint.
2797       KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2798 
2799       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2800         // If all of the bits are known zero on the LHS or RHS, the add won't
2801         // carry.
2802         if (FrameIndexSDNode *FI =
2803               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2804           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2805           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2806         } else {
2807           Base = N.getOperand(0);
2808         }
2809         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2810         return true;
2811       }
2812     }
2813   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2814     // Loading from a constant address.
2815 
2816     // If this address fits entirely in a 16-bit sext immediate field, codegen
2817     // this as "d, 0"
2818     int16_t Imm;
2819     if (isIntS16Immediate(CN, Imm) &&
2820         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2821       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2822       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2823                              CN->getValueType(0));
2824       return true;
2825     }
2826 
2827     // Handle 32-bit sext immediates with LIS + addr mode.
2828     if ((CN->getValueType(0) == MVT::i32 ||
2829          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2830         (!EncodingAlignment ||
2831          isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2832       int Addr = (int)CN->getZExtValue();
2833 
2834       // Otherwise, break this down into an LIS + disp.
2835       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2836 
2837       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2838                                    MVT::i32);
2839       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2840       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2841       return true;
2842     }
2843   }
2844 
2845   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2846   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2847     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2848     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2849   } else
2850     Base = N;
2851   return true;      // [r+0]
2852 }
2853 
2854 /// Similar to the 16-bit case but for instructions that take a 34-bit
2855 /// displacement field (prefixed loads/stores).
2856 bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2857                                               SDValue &Base,
2858                                               SelectionDAG &DAG) const {
2859   // Only on 64-bit targets.
2860   if (N.getValueType() != MVT::i64)
2861     return false;
2862 
2863   SDLoc dl(N);
2864   int64_t Imm = 0;
2865 
2866   if (N.getOpcode() == ISD::ADD) {
2867     if (!isIntS34Immediate(N.getOperand(1), Imm))
2868       return false;
2869     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2870     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2871       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2872     else
2873       Base = N.getOperand(0);
2874     return true;
2875   }
2876 
2877   if (N.getOpcode() == ISD::OR) {
2878     if (!isIntS34Immediate(N.getOperand(1), Imm))
2879       return false;
2880     // If this is an or of disjoint bitfields, we can codegen this as an add
2881     // (for better address arithmetic) if the LHS and RHS of the OR are
2882     // provably disjoint.
2883     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2884     if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2885       return false;
2886     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2887       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2888     else
2889       Base = N.getOperand(0);
2890     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2891     return true;
2892   }
2893 
2894   if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2895     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2896     Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2897     return true;
2898   }
2899 
2900   return false;
2901 }
2902 
2903 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2904 /// represented as an indexed [r+r] operation.
2905 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2906                                                 SDValue &Index,
2907                                                 SelectionDAG &DAG) const {
2908   // Check to see if we can easily represent this as an [r+r] address.  This
2909   // will fail if it thinks that the address is more profitably represented as
2910   // reg+imm, e.g. where imm = 0.
2911   if (SelectAddressRegReg(N, Base, Index, DAG))
2912     return true;
2913 
2914   // If the address is the result of an add, we will utilize the fact that the
2915   // address calculation includes an implicit add.  However, we can reduce
2916   // register pressure if we do not materialize a constant just for use as the
2917   // index register.  We only get rid of the add if it is not an add of a
2918   // value and a 16-bit signed constant and both have a single use.
2919   int16_t imm = 0;
2920   if (N.getOpcode() == ISD::ADD &&
2921       (!isIntS16Immediate(N.getOperand(1), imm) ||
2922        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2923     Base = N.getOperand(0);
2924     Index = N.getOperand(1);
2925     return true;
2926   }
2927 
2928   // Otherwise, do it the hard way, using R0 as the base register.
2929   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2930                          N.getValueType());
2931   Index = N;
2932   return true;
2933 }
2934 
2935 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2936   Ty *PCRelCand = dyn_cast<Ty>(N);
2937   return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2938 }
2939 
2940 /// Returns true if this address is a PC Relative address.
2941 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2942 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2943 bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2944   // This is a materialize PC Relative node. Always select this as PC Relative.
2945   Base = N;
2946   if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2947     return true;
2948   if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2949       isValidPCRelNode<GlobalAddressSDNode>(N) ||
2950       isValidPCRelNode<JumpTableSDNode>(N) ||
2951       isValidPCRelNode<BlockAddressSDNode>(N))
2952     return true;
2953   return false;
2954 }
2955 
2956 /// Returns true if we should use a direct load into vector instruction
2957 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2958 static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2959 
2960   // If there are any other uses other than scalar to vector, then we should
2961   // keep it as a scalar load -> direct move pattern to prevent multiple
2962   // loads.
2963   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2964   if (!LD)
2965     return false;
2966 
2967   EVT MemVT = LD->getMemoryVT();
2968   if (!MemVT.isSimple())
2969     return false;
2970   switch(MemVT.getSimpleVT().SimpleTy) {
2971   case MVT::i64:
2972     break;
2973   case MVT::i32:
2974     if (!ST.hasP8Vector())
2975       return false;
2976     break;
2977   case MVT::i16:
2978   case MVT::i8:
2979     if (!ST.hasP9Vector())
2980       return false;
2981     break;
2982   default:
2983     return false;
2984   }
2985 
2986   SDValue LoadedVal(N, 0);
2987   if (!LoadedVal.hasOneUse())
2988     return false;
2989 
2990   for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2991        UI != UE; ++UI)
2992     if (UI.getUse().get().getResNo() == 0 &&
2993         UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2994         UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2995       return false;
2996 
2997   return true;
2998 }
2999 
3000 /// getPreIndexedAddressParts - returns true by value, base pointer and
3001 /// offset pointer and addressing mode by reference if the node's address
3002 /// can be legally represented as pre-indexed load / store address.
3003 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
3004                                                   SDValue &Offset,
3005                                                   ISD::MemIndexedMode &AM,
3006                                                   SelectionDAG &DAG) const {
3007   if (DisablePPCPreinc) return false;
3008 
3009   bool isLoad = true;
3010   SDValue Ptr;
3011   EVT VT;
3012   Align Alignment;
3013   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3014     Ptr = LD->getBasePtr();
3015     VT = LD->getMemoryVT();
3016     Alignment = LD->getAlign();
3017   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3018     Ptr = ST->getBasePtr();
3019     VT  = ST->getMemoryVT();
3020     Alignment = ST->getAlign();
3021     isLoad = false;
3022   } else
3023     return false;
3024 
3025   // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3026   // instructions because we can fold these into a more efficient instruction
3027   // instead, (such as LXSD).
3028   if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3029     return false;
3030   }
3031 
3032   // PowerPC doesn't have preinc load/store instructions for vectors
3033   if (VT.isVector())
3034     return false;
3035 
3036   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3037     // Common code will reject creating a pre-inc form if the base pointer
3038     // is a frame index, or if N is a store and the base pointer is either
3039     // the same as or a predecessor of the value being stored.  Check for
3040     // those situations here, and try with swapped Base/Offset instead.
3041     bool Swap = false;
3042 
3043     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3044       Swap = true;
3045     else if (!isLoad) {
3046       SDValue Val = cast<StoreSDNode>(N)->getValue();
3047       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3048         Swap = true;
3049     }
3050 
3051     if (Swap)
3052       std::swap(Base, Offset);
3053 
3054     AM = ISD::PRE_INC;
3055     return true;
3056   }
3057 
3058   // LDU/STU can only handle immediates that are a multiple of 4.
3059   if (VT != MVT::i64) {
3060     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3061       return false;
3062   } else {
3063     // LDU/STU need an address with at least 4-byte alignment.
3064     if (Alignment < Align(4))
3065       return false;
3066 
3067     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3068       return false;
3069   }
3070 
3071   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3072     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
3073     // sext i32 to i64 when addr mode is r+i.
3074     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3075         LD->getExtensionType() == ISD::SEXTLOAD &&
3076         isa<ConstantSDNode>(Offset))
3077       return false;
3078   }
3079 
3080   AM = ISD::PRE_INC;
3081   return true;
3082 }
3083 
3084 //===----------------------------------------------------------------------===//
3085 //  LowerOperation implementation
3086 //===----------------------------------------------------------------------===//
3087 
3088 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
3089 /// and LoOpFlags to the target MO flags.
3090 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3091                                unsigned &HiOpFlags, unsigned &LoOpFlags,
3092                                const GlobalValue *GV = nullptr) {
3093   HiOpFlags = PPCII::MO_HA;
3094   LoOpFlags = PPCII::MO_LO;
3095 
3096   // Don't use the pic base if not in PIC relocation model.
3097   if (IsPIC) {
3098     HiOpFlags |= PPCII::MO_PIC_FLAG;
3099     LoOpFlags |= PPCII::MO_PIC_FLAG;
3100   }
3101 }
3102 
3103 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3104                              SelectionDAG &DAG) {
3105   SDLoc DL(HiPart);
3106   EVT PtrVT = HiPart.getValueType();
3107   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3108 
3109   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3110   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3111 
3112   // With PIC, the first instruction is actually "GR+hi(&G)".
3113   if (isPIC)
3114     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3115                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3116 
3117   // Generate non-pic code that has direct accesses to the constant pool.
3118   // The address of the global is just (hi(&g)+lo(&g)).
3119   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3120 }
3121 
3122 static void setUsesTOCBasePtr(MachineFunction &MF) {
3123   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3124   FuncInfo->setUsesTOCBasePtr();
3125 }
3126 
3127 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
3128   setUsesTOCBasePtr(DAG.getMachineFunction());
3129 }
3130 
3131 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3132                                        SDValue GA) const {
3133   const bool Is64Bit = Subtarget.isPPC64();
3134   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3135   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3136                         : Subtarget.isAIXABI()
3137                               ? DAG.getRegister(PPC::R2, VT)
3138                               : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3139   SDValue Ops[] = { GA, Reg };
3140   return DAG.getMemIntrinsicNode(
3141       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3142       MachinePointerInfo::getGOT(DAG.getMachineFunction()), std::nullopt,
3143       MachineMemOperand::MOLoad);
3144 }
3145 
3146 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3147                                              SelectionDAG &DAG) const {
3148   EVT PtrVT = Op.getValueType();
3149   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3150   const Constant *C = CP->getConstVal();
3151 
3152   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3153   // The actual address of the GlobalValue is stored in the TOC.
3154   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3155     if (Subtarget.isUsingPCRelativeCalls()) {
3156       SDLoc DL(CP);
3157       EVT Ty = getPointerTy(DAG.getDataLayout());
3158       SDValue ConstPool = DAG.getTargetConstantPool(
3159           C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3160       return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3161     }
3162     setUsesTOCBasePtr(DAG);
3163     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3164     return getTOCEntry(DAG, SDLoc(CP), GA);
3165   }
3166 
3167   unsigned MOHiFlag, MOLoFlag;
3168   bool IsPIC = isPositionIndependent();
3169   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3170 
3171   if (IsPIC && Subtarget.isSVR4ABI()) {
3172     SDValue GA =
3173         DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3174     return getTOCEntry(DAG, SDLoc(CP), GA);
3175   }
3176 
3177   SDValue CPIHi =
3178       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3179   SDValue CPILo =
3180       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3181   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3182 }
3183 
3184 // For 64-bit PowerPC, prefer the more compact relative encodings.
3185 // This trades 32 bits per jump table entry for one or two instructions
3186 // on the jump site.
3187 unsigned PPCTargetLowering::getJumpTableEncoding() const {
3188   if (isJumpTableRelative())
3189     return MachineJumpTableInfo::EK_LabelDifference32;
3190 
3191   return TargetLowering::getJumpTableEncoding();
3192 }
3193 
3194 bool PPCTargetLowering::isJumpTableRelative() const {
3195   if (UseAbsoluteJumpTables)
3196     return false;
3197   if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3198     return true;
3199   return TargetLowering::isJumpTableRelative();
3200 }
3201 
3202 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3203                                                     SelectionDAG &DAG) const {
3204   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3205     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3206 
3207   switch (getTargetMachine().getCodeModel()) {
3208   case CodeModel::Small:
3209   case CodeModel::Medium:
3210     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3211   default:
3212     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3213                        getPointerTy(DAG.getDataLayout()));
3214   }
3215 }
3216 
3217 const MCExpr *
3218 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3219                                                 unsigned JTI,
3220                                                 MCContext &Ctx) const {
3221   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3222     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3223 
3224   switch (getTargetMachine().getCodeModel()) {
3225   case CodeModel::Small:
3226   case CodeModel::Medium:
3227     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3228   default:
3229     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3230   }
3231 }
3232 
3233 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3234   EVT PtrVT = Op.getValueType();
3235   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3236 
3237   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3238   if (Subtarget.isUsingPCRelativeCalls()) {
3239     SDLoc DL(JT);
3240     EVT Ty = getPointerTy(DAG.getDataLayout());
3241     SDValue GA =
3242         DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3243     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3244     return MatAddr;
3245   }
3246 
3247   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3248   // The actual address of the GlobalValue is stored in the TOC.
3249   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3250     setUsesTOCBasePtr(DAG);
3251     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3252     return getTOCEntry(DAG, SDLoc(JT), GA);
3253   }
3254 
3255   unsigned MOHiFlag, MOLoFlag;
3256   bool IsPIC = isPositionIndependent();
3257   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3258 
3259   if (IsPIC && Subtarget.isSVR4ABI()) {
3260     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3261                                         PPCII::MO_PIC_FLAG);
3262     return getTOCEntry(DAG, SDLoc(GA), GA);
3263   }
3264 
3265   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3266   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3267   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3268 }
3269 
3270 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3271                                              SelectionDAG &DAG) const {
3272   EVT PtrVT = Op.getValueType();
3273   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3274   const BlockAddress *BA = BASDN->getBlockAddress();
3275 
3276   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3277   if (Subtarget.isUsingPCRelativeCalls()) {
3278     SDLoc DL(BASDN);
3279     EVT Ty = getPointerTy(DAG.getDataLayout());
3280     SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3281                                            PPCII::MO_PCREL_FLAG);
3282     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3283     return MatAddr;
3284   }
3285 
3286   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3287   // The actual BlockAddress is stored in the TOC.
3288   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3289     setUsesTOCBasePtr(DAG);
3290     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3291     return getTOCEntry(DAG, SDLoc(BASDN), GA);
3292   }
3293 
3294   // 32-bit position-independent ELF stores the BlockAddress in the .got.
3295   if (Subtarget.is32BitELFABI() && isPositionIndependent())
3296     return getTOCEntry(
3297         DAG, SDLoc(BASDN),
3298         DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3299 
3300   unsigned MOHiFlag, MOLoFlag;
3301   bool IsPIC = isPositionIndependent();
3302   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3303   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3304   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3305   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3306 }
3307 
3308 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3309                                               SelectionDAG &DAG) const {
3310   if (Subtarget.isAIXABI())
3311     return LowerGlobalTLSAddressAIX(Op, DAG);
3312 
3313   return LowerGlobalTLSAddressLinux(Op, DAG);
3314 }
3315 
3316 SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3317                                                     SelectionDAG &DAG) const {
3318   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3319 
3320   if (DAG.getTarget().useEmulatedTLS())
3321     report_fatal_error("Emulated TLS is not yet supported on AIX");
3322 
3323   SDLoc dl(GA);
3324   const GlobalValue *GV = GA->getGlobal();
3325   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3326   bool Is64Bit = Subtarget.isPPC64();
3327   TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
3328 
3329   if (Model == TLSModel::LocalExec) {
3330     SDValue VariableOffsetTGA =
3331         DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3332     SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3333     SDValue TLSReg;
3334     if (Is64Bit)
3335       // For local-exec on AIX (64-bit), the sequence that is generated involves
3336       // a load of the variable offset (from the TOC), followed by an add of the
3337       // loaded variable offset to R13 (the thread pointer).
3338       // This code sequence looks like:
3339       //    ld reg1,var[TC](2)
3340       //    add reg2, reg1, r13     // r13 contains the thread pointer
3341       TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3342     else
3343       // For local-exec on AIX (32-bit), the sequence that is generated involves
3344       // loading the variable offset from the TOC, generating a call to
3345       // .__get_tpointer to get the thread pointer (which will be in R3), and
3346       // adding the two together:
3347       //    lwz reg1,var[TC](2)
3348       //    bla .__get_tpointer
3349       //    add reg2, reg1, r3
3350       TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3351     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3352   }
3353 
3354   // The Local-Exec and General-Dynamic TLS models are currently the only
3355   // supported access models. If Local-exec is not possible or specified, all
3356   // GlobalTLSAddress nodes are lowered using the general-dynamic model.
3357   // We need to generate two TOC entries, one for the variable offset, one for
3358   // the region handle. The global address for the TOC entry of the region
3359   // handle is created with the MO_TLSGDM_FLAG flag and the global address
3360   // for the TOC entry of the variable offset is created with MO_TLSGD_FLAG.
3361   SDValue VariableOffsetTGA =
3362       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3363   SDValue RegionHandleTGA =
3364       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3365   SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3366   SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3367   return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3368                      RegionHandle);
3369 }
3370 
3371 SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3372                                                       SelectionDAG &DAG) const {
3373   // FIXME: TLS addresses currently use medium model code sequences,
3374   // which is the most useful form.  Eventually support for small and
3375   // large models could be added if users need it, at the cost of
3376   // additional complexity.
3377   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3378   if (DAG.getTarget().useEmulatedTLS())
3379     return LowerToTLSEmulatedModel(GA, DAG);
3380 
3381   SDLoc dl(GA);
3382   const GlobalValue *GV = GA->getGlobal();
3383   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3384   bool is64bit = Subtarget.isPPC64();
3385   const Module *M = DAG.getMachineFunction().getFunction().getParent();
3386   PICLevel::Level picLevel = M->getPICLevel();
3387 
3388   const TargetMachine &TM = getTargetMachine();
3389   TLSModel::Model Model = TM.getTLSModel(GV);
3390 
3391   if (Model == TLSModel::LocalExec) {
3392     if (Subtarget.isUsingPCRelativeCalls()) {
3393       SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3394       SDValue TGA = DAG.getTargetGlobalAddress(
3395           GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3396       SDValue MatAddr =
3397           DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3398       return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3399     }
3400 
3401     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3402                                                PPCII::MO_TPREL_HA);
3403     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3404                                                PPCII::MO_TPREL_LO);
3405     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3406                              : DAG.getRegister(PPC::R2, MVT::i32);
3407 
3408     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3409     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3410   }
3411 
3412   if (Model == TLSModel::InitialExec) {
3413     bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3414     SDValue TGA = DAG.getTargetGlobalAddress(
3415         GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3416     SDValue TGATLS = DAG.getTargetGlobalAddress(
3417         GV, dl, PtrVT, 0,
3418         IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
3419     SDValue TPOffset;
3420     if (IsPCRel) {
3421       SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3422       TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3423                              MachinePointerInfo());
3424     } else {
3425       SDValue GOTPtr;
3426       if (is64bit) {
3427         setUsesTOCBasePtr(DAG);
3428         SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3429         GOTPtr =
3430             DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3431       } else {
3432         if (!TM.isPositionIndependent())
3433           GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3434         else if (picLevel == PICLevel::SmallPIC)
3435           GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3436         else
3437           GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3438       }
3439       TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3440     }
3441     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3442   }
3443 
3444   if (Model == TLSModel::GeneralDynamic) {
3445     if (Subtarget.isUsingPCRelativeCalls()) {
3446       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3447                                                PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3448       return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3449     }
3450 
3451     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3452     SDValue GOTPtr;
3453     if (is64bit) {
3454       setUsesTOCBasePtr(DAG);
3455       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3456       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3457                                    GOTReg, TGA);
3458     } else {
3459       if (picLevel == PICLevel::SmallPIC)
3460         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3461       else
3462         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3463     }
3464     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3465                        GOTPtr, TGA, TGA);
3466   }
3467 
3468   if (Model == TLSModel::LocalDynamic) {
3469     if (Subtarget.isUsingPCRelativeCalls()) {
3470       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3471                                                PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3472       SDValue MatPCRel =
3473           DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3474       return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3475     }
3476 
3477     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3478     SDValue GOTPtr;
3479     if (is64bit) {
3480       setUsesTOCBasePtr(DAG);
3481       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3482       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3483                            GOTReg, TGA);
3484     } else {
3485       if (picLevel == PICLevel::SmallPIC)
3486         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3487       else
3488         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3489     }
3490     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3491                                   PtrVT, GOTPtr, TGA, TGA);
3492     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3493                                       PtrVT, TLSAddr, TGA);
3494     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3495   }
3496 
3497   llvm_unreachable("Unknown TLS model!");
3498 }
3499 
3500 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3501                                               SelectionDAG &DAG) const {
3502   EVT PtrVT = Op.getValueType();
3503   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3504   SDLoc DL(GSDN);
3505   const GlobalValue *GV = GSDN->getGlobal();
3506 
3507   // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3508   // The actual address of the GlobalValue is stored in the TOC.
3509   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3510     if (Subtarget.isUsingPCRelativeCalls()) {
3511       EVT Ty = getPointerTy(DAG.getDataLayout());
3512       if (isAccessedAsGotIndirect(Op)) {
3513         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3514                                                 PPCII::MO_PCREL_FLAG |
3515                                                     PPCII::MO_GOT_FLAG);
3516         SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3517         SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3518                                    MachinePointerInfo());
3519         return Load;
3520       } else {
3521         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3522                                                 PPCII::MO_PCREL_FLAG);
3523         return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3524       }
3525     }
3526     setUsesTOCBasePtr(DAG);
3527     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3528     return getTOCEntry(DAG, DL, GA);
3529   }
3530 
3531   unsigned MOHiFlag, MOLoFlag;
3532   bool IsPIC = isPositionIndependent();
3533   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3534 
3535   if (IsPIC && Subtarget.isSVR4ABI()) {
3536     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3537                                             GSDN->getOffset(),
3538                                             PPCII::MO_PIC_FLAG);
3539     return getTOCEntry(DAG, DL, GA);
3540   }
3541 
3542   SDValue GAHi =
3543     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3544   SDValue GALo =
3545     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3546 
3547   return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3548 }
3549 
3550 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3551   bool IsStrict = Op->isStrictFPOpcode();
3552   ISD::CondCode CC =
3553       cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3554   SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3555   SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3556   SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3557   EVT LHSVT = LHS.getValueType();
3558   SDLoc dl(Op);
3559 
3560   // Soften the setcc with libcall if it is fp128.
3561   if (LHSVT == MVT::f128) {
3562     assert(!Subtarget.hasP9Vector() &&
3563            "SETCC for f128 is already legal under Power9!");
3564     softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3565                         Op->getOpcode() == ISD::STRICT_FSETCCS);
3566     if (RHS.getNode())
3567       LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3568                         DAG.getCondCode(CC));
3569     if (IsStrict)
3570       return DAG.getMergeValues({LHS, Chain}, dl);
3571     return LHS;
3572   }
3573 
3574   assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3575 
3576   if (Op.getValueType() == MVT::v2i64) {
3577     // When the operands themselves are v2i64 values, we need to do something
3578     // special because VSX has no underlying comparison operations for these.
3579     if (LHS.getValueType() == MVT::v2i64) {
3580       // Equality can be handled by casting to the legal type for Altivec
3581       // comparisons, everything else needs to be expanded.
3582       if (CC != ISD::SETEQ && CC != ISD::SETNE)
3583         return SDValue();
3584       SDValue SetCC32 = DAG.getSetCC(
3585           dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3586           DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3587       int ShuffV[] = {1, 0, 3, 2};
3588       SDValue Shuff =
3589           DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3590       return DAG.getBitcast(MVT::v2i64,
3591                             DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3592                                         dl, MVT::v4i32, Shuff, SetCC32));
3593     }
3594 
3595     // We handle most of these in the usual way.
3596     return Op;
3597   }
3598 
3599   // If we're comparing for equality to zero, expose the fact that this is
3600   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3601   // fold the new nodes.
3602   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3603     return V;
3604 
3605   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3606     // Leave comparisons against 0 and -1 alone for now, since they're usually
3607     // optimized.  FIXME: revisit this when we can custom lower all setcc
3608     // optimizations.
3609     if (C->isAllOnes() || C->isZero())
3610       return SDValue();
3611   }
3612 
3613   // If we have an integer seteq/setne, turn it into a compare against zero
3614   // by xor'ing the rhs with the lhs, which is faster than setting a
3615   // condition register, reading it back out, and masking the correct bit.  The
3616   // normal approach here uses sub to do this instead of xor.  Using xor exposes
3617   // the result to other bit-twiddling opportunities.
3618   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3619     EVT VT = Op.getValueType();
3620     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3621     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3622   }
3623   return SDValue();
3624 }
3625 
3626 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3627   SDNode *Node = Op.getNode();
3628   EVT VT = Node->getValueType(0);
3629   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3630   SDValue InChain = Node->getOperand(0);
3631   SDValue VAListPtr = Node->getOperand(1);
3632   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3633   SDLoc dl(Node);
3634 
3635   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3636 
3637   // gpr_index
3638   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3639                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
3640   InChain = GprIndex.getValue(1);
3641 
3642   if (VT == MVT::i64) {
3643     // Check if GprIndex is even
3644     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3645                                  DAG.getConstant(1, dl, MVT::i32));
3646     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3647                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3648     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3649                                           DAG.getConstant(1, dl, MVT::i32));
3650     // Align GprIndex to be even if it isn't
3651     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3652                            GprIndex);
3653   }
3654 
3655   // fpr index is 1 byte after gpr
3656   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3657                                DAG.getConstant(1, dl, MVT::i32));
3658 
3659   // fpr
3660   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3661                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
3662   InChain = FprIndex.getValue(1);
3663 
3664   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3665                                        DAG.getConstant(8, dl, MVT::i32));
3666 
3667   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3668                                         DAG.getConstant(4, dl, MVT::i32));
3669 
3670   // areas
3671   SDValue OverflowArea =
3672       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3673   InChain = OverflowArea.getValue(1);
3674 
3675   SDValue RegSaveArea =
3676       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3677   InChain = RegSaveArea.getValue(1);
3678 
3679   // select overflow_area if index > 8
3680   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3681                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3682 
3683   // adjustment constant gpr_index * 4/8
3684   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3685                                     VT.isInteger() ? GprIndex : FprIndex,
3686                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3687                                                     MVT::i32));
3688 
3689   // OurReg = RegSaveArea + RegConstant
3690   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3691                                RegConstant);
3692 
3693   // Floating types are 32 bytes into RegSaveArea
3694   if (VT.isFloatingPoint())
3695     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3696                          DAG.getConstant(32, dl, MVT::i32));
3697 
3698   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3699   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3700                                    VT.isInteger() ? GprIndex : FprIndex,
3701                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3702                                                    MVT::i32));
3703 
3704   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3705                               VT.isInteger() ? VAListPtr : FprPtr,
3706                               MachinePointerInfo(SV), MVT::i8);
3707 
3708   // determine if we should load from reg_save_area or overflow_area
3709   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3710 
3711   // increase overflow_area by 4/8 if gpr/fpr > 8
3712   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3713                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
3714                                           dl, MVT::i32));
3715 
3716   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3717                              OverflowAreaPlusN);
3718 
3719   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3720                               MachinePointerInfo(), MVT::i32);
3721 
3722   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3723 }
3724 
3725 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3726   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3727 
3728   // We have to copy the entire va_list struct:
3729   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3730   return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3731                        DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3732                        false, true, false, MachinePointerInfo(),
3733                        MachinePointerInfo());
3734 }
3735 
3736 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3737                                                   SelectionDAG &DAG) const {
3738   if (Subtarget.isAIXABI())
3739     report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3740 
3741   return Op.getOperand(0);
3742 }
3743 
3744 SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3745   MachineFunction &MF = DAG.getMachineFunction();
3746   PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3747 
3748   assert((Op.getOpcode() == ISD::INLINEASM ||
3749           Op.getOpcode() == ISD::INLINEASM_BR) &&
3750          "Expecting Inline ASM node.");
3751 
3752   // If an LR store is already known to be required then there is not point in
3753   // checking this ASM as well.
3754   if (MFI.isLRStoreRequired())
3755     return Op;
3756 
3757   // Inline ASM nodes have an optional last operand that is an incoming Flag of
3758   // type MVT::Glue. We want to ignore this last operand if that is the case.
3759   unsigned NumOps = Op.getNumOperands();
3760   if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3761     --NumOps;
3762 
3763   // Check all operands that may contain the LR.
3764   for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3765     unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
3766     unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
3767     ++i; // Skip the ID value.
3768 
3769     switch (InlineAsm::getKind(Flags)) {
3770     default:
3771       llvm_unreachable("Bad flags!");
3772     case InlineAsm::Kind_RegUse:
3773     case InlineAsm::Kind_Imm:
3774     case InlineAsm::Kind_Mem:
3775       i += NumVals;
3776       break;
3777     case InlineAsm::Kind_Clobber:
3778     case InlineAsm::Kind_RegDef:
3779     case InlineAsm::Kind_RegDefEarlyClobber: {
3780       for (; NumVals; --NumVals, ++i) {
3781         Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3782         if (Reg != PPC::LR && Reg != PPC::LR8)
3783           continue;
3784         MFI.setLRStoreRequired();
3785         return Op;
3786       }
3787       break;
3788     }
3789     }
3790   }
3791 
3792   return Op;
3793 }
3794 
3795 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3796                                                 SelectionDAG &DAG) const {
3797   if (Subtarget.isAIXABI())
3798     report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3799 
3800   SDValue Chain = Op.getOperand(0);
3801   SDValue Trmp = Op.getOperand(1); // trampoline
3802   SDValue FPtr = Op.getOperand(2); // nested function
3803   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3804   SDLoc dl(Op);
3805 
3806   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3807   bool isPPC64 = (PtrVT == MVT::i64);
3808   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3809 
3810   TargetLowering::ArgListTy Args;
3811   TargetLowering::ArgListEntry Entry;
3812 
3813   Entry.Ty = IntPtrTy;
3814   Entry.Node = Trmp; Args.push_back(Entry);
3815 
3816   // TrampSize == (isPPC64 ? 48 : 40);
3817   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3818                                isPPC64 ? MVT::i64 : MVT::i32);
3819   Args.push_back(Entry);
3820 
3821   Entry.Node = FPtr; Args.push_back(Entry);
3822   Entry.Node = Nest; Args.push_back(Entry);
3823 
3824   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3825   TargetLowering::CallLoweringInfo CLI(DAG);
3826   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3827       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3828       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3829 
3830   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3831   return CallResult.second;
3832 }
3833 
3834 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3835   MachineFunction &MF = DAG.getMachineFunction();
3836   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3837   EVT PtrVT = getPointerTy(MF.getDataLayout());
3838 
3839   SDLoc dl(Op);
3840 
3841   if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3842     // vastart just stores the address of the VarArgsFrameIndex slot into the
3843     // memory location argument.
3844     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3845     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3846     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3847                         MachinePointerInfo(SV));
3848   }
3849 
3850   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3851   // We suppose the given va_list is already allocated.
3852   //
3853   // typedef struct {
3854   //  char gpr;     /* index into the array of 8 GPRs
3855   //                 * stored in the register save area
3856   //                 * gpr=0 corresponds to r3,
3857   //                 * gpr=1 to r4, etc.
3858   //                 */
3859   //  char fpr;     /* index into the array of 8 FPRs
3860   //                 * stored in the register save area
3861   //                 * fpr=0 corresponds to f1,
3862   //                 * fpr=1 to f2, etc.
3863   //                 */
3864   //  char *overflow_arg_area;
3865   //                /* location on stack that holds
3866   //                 * the next overflow argument
3867   //                 */
3868   //  char *reg_save_area;
3869   //               /* where r3:r10 and f1:f8 (if saved)
3870   //                * are stored
3871   //                */
3872   // } va_list[1];
3873 
3874   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3875   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3876   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3877                                             PtrVT);
3878   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3879                                  PtrVT);
3880 
3881   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3882   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3883 
3884   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3885   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3886 
3887   uint64_t FPROffset = 1;
3888   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3889 
3890   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3891 
3892   // Store first byte : number of int regs
3893   SDValue firstStore =
3894       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3895                         MachinePointerInfo(SV), MVT::i8);
3896   uint64_t nextOffset = FPROffset;
3897   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3898                                   ConstFPROffset);
3899 
3900   // Store second byte : number of float regs
3901   SDValue secondStore =
3902       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3903                         MachinePointerInfo(SV, nextOffset), MVT::i8);
3904   nextOffset += StackOffset;
3905   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3906 
3907   // Store second word : arguments given on stack
3908   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3909                                     MachinePointerInfo(SV, nextOffset));
3910   nextOffset += FrameOffset;
3911   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3912 
3913   // Store third word : arguments given in registers
3914   return DAG.getStore(thirdStore, dl, FR, nextPtr,
3915                       MachinePointerInfo(SV, nextOffset));
3916 }
3917 
3918 /// FPR - The set of FP registers that should be allocated for arguments
3919 /// on Darwin and AIX.
3920 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3921                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3922                                 PPC::F11, PPC::F12, PPC::F13};
3923 
3924 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3925 /// the stack.
3926 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3927                                        unsigned PtrByteSize) {
3928   unsigned ArgSize = ArgVT.getStoreSize();
3929   if (Flags.isByVal())
3930     ArgSize = Flags.getByValSize();
3931 
3932   // Round up to multiples of the pointer size, except for array members,
3933   // which are always packed.
3934   if (!Flags.isInConsecutiveRegs())
3935     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3936 
3937   return ArgSize;
3938 }
3939 
3940 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3941 /// on the stack.
3942 static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3943                                          ISD::ArgFlagsTy Flags,
3944                                          unsigned PtrByteSize) {
3945   Align Alignment(PtrByteSize);
3946 
3947   // Altivec parameters are padded to a 16 byte boundary.
3948   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3949       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3950       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3951       ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3952     Alignment = Align(16);
3953 
3954   // ByVal parameters are aligned as requested.
3955   if (Flags.isByVal()) {
3956     auto BVAlign = Flags.getNonZeroByValAlign();
3957     if (BVAlign > PtrByteSize) {
3958       if (BVAlign.value() % PtrByteSize != 0)
3959         llvm_unreachable(
3960             "ByVal alignment is not a multiple of the pointer size");
3961 
3962       Alignment = BVAlign;
3963     }
3964   }
3965 
3966   // Array members are always packed to their original alignment.
3967   if (Flags.isInConsecutiveRegs()) {
3968     // If the array member was split into multiple registers, the first
3969     // needs to be aligned to the size of the full type.  (Except for
3970     // ppcf128, which is only aligned as its f64 components.)
3971     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3972       Alignment = Align(OrigVT.getStoreSize());
3973     else
3974       Alignment = Align(ArgVT.getStoreSize());
3975   }
3976 
3977   return Alignment;
3978 }
3979 
3980 /// CalculateStackSlotUsed - Return whether this argument will use its
3981 /// stack slot (instead of being passed in registers).  ArgOffset,
3982 /// AvailableFPRs, and AvailableVRs must hold the current argument
3983 /// position, and will be updated to account for this argument.
3984 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3985                                    unsigned PtrByteSize, unsigned LinkageSize,
3986                                    unsigned ParamAreaSize, unsigned &ArgOffset,
3987                                    unsigned &AvailableFPRs,
3988                                    unsigned &AvailableVRs) {
3989   bool UseMemory = false;
3990 
3991   // Respect alignment of argument on the stack.
3992   Align Alignment =
3993       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3994   ArgOffset = alignTo(ArgOffset, Alignment);
3995   // If there's no space left in the argument save area, we must
3996   // use memory (this check also catches zero-sized arguments).
3997   if (ArgOffset >= LinkageSize + ParamAreaSize)
3998     UseMemory = true;
3999 
4000   // Allocate argument on the stack.
4001   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4002   if (Flags.isInConsecutiveRegsLast())
4003     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4004   // If we overran the argument save area, we must use memory
4005   // (this check catches arguments passed partially in memory)
4006   if (ArgOffset > LinkageSize + ParamAreaSize)
4007     UseMemory = true;
4008 
4009   // However, if the argument is actually passed in an FPR or a VR,
4010   // we don't use memory after all.
4011   if (!Flags.isByVal()) {
4012     if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4013       if (AvailableFPRs > 0) {
4014         --AvailableFPRs;
4015         return false;
4016       }
4017     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4018         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4019         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4020         ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4021       if (AvailableVRs > 0) {
4022         --AvailableVRs;
4023         return false;
4024       }
4025   }
4026 
4027   return UseMemory;
4028 }
4029 
4030 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
4031 /// ensure minimum alignment required for target.
4032 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
4033                                      unsigned NumBytes) {
4034   return alignTo(NumBytes, Lowering->getStackAlign());
4035 }
4036 
4037 SDValue PPCTargetLowering::LowerFormalArguments(
4038     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4039     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4040     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4041   if (Subtarget.isAIXABI())
4042     return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4043                                     InVals);
4044   if (Subtarget.is64BitELFABI())
4045     return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4046                                        InVals);
4047   assert(Subtarget.is32BitELFABI());
4048   return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4049                                      InVals);
4050 }
4051 
4052 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4053     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4054     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4055     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4056 
4057   // 32-bit SVR4 ABI Stack Frame Layout:
4058   //              +-----------------------------------+
4059   //        +-->  |            Back chain             |
4060   //        |     +-----------------------------------+
4061   //        |     | Floating-point register save area |
4062   //        |     +-----------------------------------+
4063   //        |     |    General register save area     |
4064   //        |     +-----------------------------------+
4065   //        |     |          CR save word             |
4066   //        |     +-----------------------------------+
4067   //        |     |         VRSAVE save word          |
4068   //        |     +-----------------------------------+
4069   //        |     |         Alignment padding         |
4070   //        |     +-----------------------------------+
4071   //        |     |     Vector register save area     |
4072   //        |     +-----------------------------------+
4073   //        |     |       Local variable space        |
4074   //        |     +-----------------------------------+
4075   //        |     |        Parameter list area        |
4076   //        |     +-----------------------------------+
4077   //        |     |           LR save word            |
4078   //        |     +-----------------------------------+
4079   // SP-->  +---  |            Back chain             |
4080   //              +-----------------------------------+
4081   //
4082   // Specifications:
4083   //   System V Application Binary Interface PowerPC Processor Supplement
4084   //   AltiVec Technology Programming Interface Manual
4085 
4086   MachineFunction &MF = DAG.getMachineFunction();
4087   MachineFrameInfo &MFI = MF.getFrameInfo();
4088   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4089 
4090   EVT PtrVT = getPointerTy(MF.getDataLayout());
4091   // Potential tail calls could cause overwriting of argument stack slots.
4092   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4093                        (CallConv == CallingConv::Fast));
4094   const Align PtrAlign(4);
4095 
4096   // Assign locations to all of the incoming arguments.
4097   SmallVector<CCValAssign, 16> ArgLocs;
4098   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4099                  *DAG.getContext());
4100 
4101   // Reserve space for the linkage area on the stack.
4102   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4103   CCInfo.AllocateStack(LinkageSize, PtrAlign);
4104   if (useSoftFloat())
4105     CCInfo.PreAnalyzeFormalArguments(Ins);
4106 
4107   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4108   CCInfo.clearWasPPCF128();
4109 
4110   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4111     CCValAssign &VA = ArgLocs[i];
4112 
4113     // Arguments stored in registers.
4114     if (VA.isRegLoc()) {
4115       const TargetRegisterClass *RC;
4116       EVT ValVT = VA.getValVT();
4117 
4118       switch (ValVT.getSimpleVT().SimpleTy) {
4119         default:
4120           llvm_unreachable("ValVT not supported by formal arguments Lowering");
4121         case MVT::i1:
4122         case MVT::i32:
4123           RC = &PPC::GPRCRegClass;
4124           break;
4125         case MVT::f32:
4126           if (Subtarget.hasP8Vector())
4127             RC = &PPC::VSSRCRegClass;
4128           else if (Subtarget.hasSPE())
4129             RC = &PPC::GPRCRegClass;
4130           else
4131             RC = &PPC::F4RCRegClass;
4132           break;
4133         case MVT::f64:
4134           if (Subtarget.hasVSX())
4135             RC = &PPC::VSFRCRegClass;
4136           else if (Subtarget.hasSPE())
4137             // SPE passes doubles in GPR pairs.
4138             RC = &PPC::GPRCRegClass;
4139           else
4140             RC = &PPC::F8RCRegClass;
4141           break;
4142         case MVT::v16i8:
4143         case MVT::v8i16:
4144         case MVT::v4i32:
4145           RC = &PPC::VRRCRegClass;
4146           break;
4147         case MVT::v4f32:
4148           RC = &PPC::VRRCRegClass;
4149           break;
4150         case MVT::v2f64:
4151         case MVT::v2i64:
4152           RC = &PPC::VRRCRegClass;
4153           break;
4154       }
4155 
4156       SDValue ArgValue;
4157       // Transform the arguments stored in physical registers into
4158       // virtual ones.
4159       if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4160         assert(i + 1 < e && "No second half of double precision argument");
4161         Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4162         Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4163         SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4164         SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4165         if (!Subtarget.isLittleEndian())
4166           std::swap (ArgValueLo, ArgValueHi);
4167         ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4168                                ArgValueHi);
4169       } else {
4170         Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4171         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4172                                       ValVT == MVT::i1 ? MVT::i32 : ValVT);
4173         if (ValVT == MVT::i1)
4174           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4175       }
4176 
4177       InVals.push_back(ArgValue);
4178     } else {
4179       // Argument stored in memory.
4180       assert(VA.isMemLoc());
4181 
4182       // Get the extended size of the argument type in stack
4183       unsigned ArgSize = VA.getLocVT().getStoreSize();
4184       // Get the actual size of the argument type
4185       unsigned ObjSize = VA.getValVT().getStoreSize();
4186       unsigned ArgOffset = VA.getLocMemOffset();
4187       // Stack objects in PPC32 are right justified.
4188       ArgOffset += ArgSize - ObjSize;
4189       int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4190 
4191       // Create load nodes to retrieve arguments from the stack.
4192       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4193       InVals.push_back(
4194           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4195     }
4196   }
4197 
4198   // Assign locations to all of the incoming aggregate by value arguments.
4199   // Aggregates passed by value are stored in the local variable space of the
4200   // caller's stack frame, right above the parameter list area.
4201   SmallVector<CCValAssign, 16> ByValArgLocs;
4202   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4203                       ByValArgLocs, *DAG.getContext());
4204 
4205   // Reserve stack space for the allocations in CCInfo.
4206   CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4207 
4208   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4209 
4210   // Area that is at least reserved in the caller of this function.
4211   unsigned MinReservedArea = CCByValInfo.getStackSize();
4212   MinReservedArea = std::max(MinReservedArea, LinkageSize);
4213 
4214   // Set the size that is at least reserved in caller of this function.  Tail
4215   // call optimized function's reserved stack space needs to be aligned so that
4216   // taking the difference between two stack areas will result in an aligned
4217   // stack.
4218   MinReservedArea =
4219       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4220   FuncInfo->setMinReservedArea(MinReservedArea);
4221 
4222   SmallVector<SDValue, 8> MemOps;
4223 
4224   // If the function takes variable number of arguments, make a frame index for
4225   // the start of the first vararg value... for expansion of llvm.va_start.
4226   if (isVarArg) {
4227     static const MCPhysReg GPArgRegs[] = {
4228       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4229       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4230     };
4231     const unsigned NumGPArgRegs = std::size(GPArgRegs);
4232 
4233     static const MCPhysReg FPArgRegs[] = {
4234       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4235       PPC::F8
4236     };
4237     unsigned NumFPArgRegs = std::size(FPArgRegs);
4238 
4239     if (useSoftFloat() || hasSPE())
4240        NumFPArgRegs = 0;
4241 
4242     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4243     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4244 
4245     // Make room for NumGPArgRegs and NumFPArgRegs.
4246     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4247                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4248 
4249     FuncInfo->setVarArgsStackOffset(MFI.CreateFixedObject(
4250         PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4251 
4252     FuncInfo->setVarArgsFrameIndex(
4253         MFI.CreateStackObject(Depth, Align(8), false));
4254     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4255 
4256     // The fixed integer arguments of a variadic function are stored to the
4257     // VarArgsFrameIndex on the stack so that they may be loaded by
4258     // dereferencing the result of va_next.
4259     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4260       // Get an existing live-in vreg, or add a new one.
4261       Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4262       if (!VReg)
4263         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4264 
4265       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4266       SDValue Store =
4267           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4268       MemOps.push_back(Store);
4269       // Increment the address by four for the next argument to store
4270       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4271       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4272     }
4273 
4274     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4275     // is set.
4276     // The double arguments are stored to the VarArgsFrameIndex
4277     // on the stack.
4278     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4279       // Get an existing live-in vreg, or add a new one.
4280       Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4281       if (!VReg)
4282         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4283 
4284       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4285       SDValue Store =
4286           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4287       MemOps.push_back(Store);
4288       // Increment the address by eight for the next argument to store
4289       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4290                                          PtrVT);
4291       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4292     }
4293   }
4294 
4295   if (!MemOps.empty())
4296     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4297 
4298   return Chain;
4299 }
4300 
4301 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4302 // value to MVT::i64 and then truncate to the correct register size.
4303 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4304                                              EVT ObjectVT, SelectionDAG &DAG,
4305                                              SDValue ArgVal,
4306                                              const SDLoc &dl) const {
4307   if (Flags.isSExt())
4308     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4309                          DAG.getValueType(ObjectVT));
4310   else if (Flags.isZExt())
4311     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4312                          DAG.getValueType(ObjectVT));
4313 
4314   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4315 }
4316 
4317 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4318     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4319     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4320     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4321   // TODO: add description of PPC stack frame format, or at least some docs.
4322   //
4323   bool isELFv2ABI = Subtarget.isELFv2ABI();
4324   bool isLittleEndian = Subtarget.isLittleEndian();
4325   MachineFunction &MF = DAG.getMachineFunction();
4326   MachineFrameInfo &MFI = MF.getFrameInfo();
4327   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4328 
4329   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4330          "fastcc not supported on varargs functions");
4331 
4332   EVT PtrVT = getPointerTy(MF.getDataLayout());
4333   // Potential tail calls could cause overwriting of argument stack slots.
4334   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4335                        (CallConv == CallingConv::Fast));
4336   unsigned PtrByteSize = 8;
4337   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4338 
4339   static const MCPhysReg GPR[] = {
4340     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4341     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4342   };
4343   static const MCPhysReg VR[] = {
4344     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4345     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4346   };
4347 
4348   const unsigned Num_GPR_Regs = std::size(GPR);
4349   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4350   const unsigned Num_VR_Regs = std::size(VR);
4351 
4352   // Do a first pass over the arguments to determine whether the ABI
4353   // guarantees that our caller has allocated the parameter save area
4354   // on its stack frame.  In the ELFv1 ABI, this is always the case;
4355   // in the ELFv2 ABI, it is true if this is a vararg function or if
4356   // any parameter is located in a stack slot.
4357 
4358   bool HasParameterArea = !isELFv2ABI || isVarArg;
4359   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4360   unsigned NumBytes = LinkageSize;
4361   unsigned AvailableFPRs = Num_FPR_Regs;
4362   unsigned AvailableVRs = Num_VR_Regs;
4363   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4364     if (Ins[i].Flags.isNest())
4365       continue;
4366 
4367     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4368                                PtrByteSize, LinkageSize, ParamAreaSize,
4369                                NumBytes, AvailableFPRs, AvailableVRs))
4370       HasParameterArea = true;
4371   }
4372 
4373   // Add DAG nodes to load the arguments or copy them out of registers.  On
4374   // entry to a function on PPC, the arguments start after the linkage area,
4375   // although the first ones are often in registers.
4376 
4377   unsigned ArgOffset = LinkageSize;
4378   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4379   SmallVector<SDValue, 8> MemOps;
4380   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4381   unsigned CurArgIdx = 0;
4382   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4383     SDValue ArgVal;
4384     bool needsLoad = false;
4385     EVT ObjectVT = Ins[ArgNo].VT;
4386     EVT OrigVT = Ins[ArgNo].ArgVT;
4387     unsigned ObjSize = ObjectVT.getStoreSize();
4388     unsigned ArgSize = ObjSize;
4389     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4390     if (Ins[ArgNo].isOrigArg()) {
4391       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4392       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4393     }
4394     // We re-align the argument offset for each argument, except when using the
4395     // fast calling convention, when we need to make sure we do that only when
4396     // we'll actually use a stack slot.
4397     unsigned CurArgOffset;
4398     Align Alignment;
4399     auto ComputeArgOffset = [&]() {
4400       /* Respect alignment of argument on the stack.  */
4401       Alignment =
4402           CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4403       ArgOffset = alignTo(ArgOffset, Alignment);
4404       CurArgOffset = ArgOffset;
4405     };
4406 
4407     if (CallConv != CallingConv::Fast) {
4408       ComputeArgOffset();
4409 
4410       /* Compute GPR index associated with argument offset.  */
4411       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4412       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4413     }
4414 
4415     // FIXME the codegen can be much improved in some cases.
4416     // We do not have to keep everything in memory.
4417     if (Flags.isByVal()) {
4418       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4419 
4420       if (CallConv == CallingConv::Fast)
4421         ComputeArgOffset();
4422 
4423       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4424       ObjSize = Flags.getByValSize();
4425       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4426       // Empty aggregate parameters do not take up registers.  Examples:
4427       //   struct { } a;
4428       //   union  { } b;
4429       //   int c[0];
4430       // etc.  However, we have to provide a place-holder in InVals, so
4431       // pretend we have an 8-byte item at the current address for that
4432       // purpose.
4433       if (!ObjSize) {
4434         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4435         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4436         InVals.push_back(FIN);
4437         continue;
4438       }
4439 
4440       // Create a stack object covering all stack doublewords occupied
4441       // by the argument.  If the argument is (fully or partially) on
4442       // the stack, or if the argument is fully in registers but the
4443       // caller has allocated the parameter save anyway, we can refer
4444       // directly to the caller's stack frame.  Otherwise, create a
4445       // local copy in our own frame.
4446       int FI;
4447       if (HasParameterArea ||
4448           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4449         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4450       else
4451         FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4452       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4453 
4454       // Handle aggregates smaller than 8 bytes.
4455       if (ObjSize < PtrByteSize) {
4456         // The value of the object is its address, which differs from the
4457         // address of the enclosing doubleword on big-endian systems.
4458         SDValue Arg = FIN;
4459         if (!isLittleEndian) {
4460           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4461           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4462         }
4463         InVals.push_back(Arg);
4464 
4465         if (GPR_idx != Num_GPR_Regs) {
4466           Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4467           FuncInfo->addLiveInAttr(VReg, Flags);
4468           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4469           EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4470           SDValue Store =
4471               DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4472                                 MachinePointerInfo(&*FuncArg), ObjType);
4473           MemOps.push_back(Store);
4474         }
4475         // Whether we copied from a register or not, advance the offset
4476         // into the parameter save area by a full doubleword.
4477         ArgOffset += PtrByteSize;
4478         continue;
4479       }
4480 
4481       // The value of the object is its address, which is the address of
4482       // its first stack doubleword.
4483       InVals.push_back(FIN);
4484 
4485       // Store whatever pieces of the object are in registers to memory.
4486       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4487         if (GPR_idx == Num_GPR_Regs)
4488           break;
4489 
4490         Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4491         FuncInfo->addLiveInAttr(VReg, Flags);
4492         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4493         SDValue Addr = FIN;
4494         if (j) {
4495           SDValue Off = DAG.getConstant(j, dl, PtrVT);
4496           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4497         }
4498         unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4499         EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4500         SDValue Store =
4501             DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4502                               MachinePointerInfo(&*FuncArg, j), ObjType);
4503         MemOps.push_back(Store);
4504         ++GPR_idx;
4505       }
4506       ArgOffset += ArgSize;
4507       continue;
4508     }
4509 
4510     switch (ObjectVT.getSimpleVT().SimpleTy) {
4511     default: llvm_unreachable("Unhandled argument type!");
4512     case MVT::i1:
4513     case MVT::i32:
4514     case MVT::i64:
4515       if (Flags.isNest()) {
4516         // The 'nest' parameter, if any, is passed in R11.
4517         Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4518         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4519 
4520         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4521           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4522 
4523         break;
4524       }
4525 
4526       // These can be scalar arguments or elements of an integer array type
4527       // passed directly.  Clang may use those instead of "byval" aggregate
4528       // types to avoid forcing arguments to memory unnecessarily.
4529       if (GPR_idx != Num_GPR_Regs) {
4530         Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4531         FuncInfo->addLiveInAttr(VReg, Flags);
4532         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4533 
4534         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4535           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4536           // value to MVT::i64 and then truncate to the correct register size.
4537           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4538       } else {
4539         if (CallConv == CallingConv::Fast)
4540           ComputeArgOffset();
4541 
4542         needsLoad = true;
4543         ArgSize = PtrByteSize;
4544       }
4545       if (CallConv != CallingConv::Fast || needsLoad)
4546         ArgOffset += 8;
4547       break;
4548 
4549     case MVT::f32:
4550     case MVT::f64:
4551       // These can be scalar arguments or elements of a float array type
4552       // passed directly.  The latter are used to implement ELFv2 homogenous
4553       // float aggregates.
4554       if (FPR_idx != Num_FPR_Regs) {
4555         unsigned VReg;
4556 
4557         if (ObjectVT == MVT::f32)
4558           VReg = MF.addLiveIn(FPR[FPR_idx],
4559                               Subtarget.hasP8Vector()
4560                                   ? &PPC::VSSRCRegClass
4561                                   : &PPC::F4RCRegClass);
4562         else
4563           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4564                                                 ? &PPC::VSFRCRegClass
4565                                                 : &PPC::F8RCRegClass);
4566 
4567         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4568         ++FPR_idx;
4569       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4570         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4571         // once we support fp <-> gpr moves.
4572 
4573         // This can only ever happen in the presence of f32 array types,
4574         // since otherwise we never run out of FPRs before running out
4575         // of GPRs.
4576         Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4577         FuncInfo->addLiveInAttr(VReg, Flags);
4578         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4579 
4580         if (ObjectVT == MVT::f32) {
4581           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4582             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4583                                  DAG.getConstant(32, dl, MVT::i32));
4584           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4585         }
4586 
4587         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4588       } else {
4589         if (CallConv == CallingConv::Fast)
4590           ComputeArgOffset();
4591 
4592         needsLoad = true;
4593       }
4594 
4595       // When passing an array of floats, the array occupies consecutive
4596       // space in the argument area; only round up to the next doubleword
4597       // at the end of the array.  Otherwise, each float takes 8 bytes.
4598       if (CallConv != CallingConv::Fast || needsLoad) {
4599         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4600         ArgOffset += ArgSize;
4601         if (Flags.isInConsecutiveRegsLast())
4602           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4603       }
4604       break;
4605     case MVT::v4f32:
4606     case MVT::v4i32:
4607     case MVT::v8i16:
4608     case MVT::v16i8:
4609     case MVT::v2f64:
4610     case MVT::v2i64:
4611     case MVT::v1i128:
4612     case MVT::f128:
4613       // These can be scalar arguments or elements of a vector array type
4614       // passed directly.  The latter are used to implement ELFv2 homogenous
4615       // vector aggregates.
4616       if (VR_idx != Num_VR_Regs) {
4617         Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4618         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4619         ++VR_idx;
4620       } else {
4621         if (CallConv == CallingConv::Fast)
4622           ComputeArgOffset();
4623         needsLoad = true;
4624       }
4625       if (CallConv != CallingConv::Fast || needsLoad)
4626         ArgOffset += 16;
4627       break;
4628     }
4629 
4630     // We need to load the argument to a virtual register if we determined
4631     // above that we ran out of physical registers of the appropriate type.
4632     if (needsLoad) {
4633       if (ObjSize < ArgSize && !isLittleEndian)
4634         CurArgOffset += ArgSize - ObjSize;
4635       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4636       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4637       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4638     }
4639 
4640     InVals.push_back(ArgVal);
4641   }
4642 
4643   // Area that is at least reserved in the caller of this function.
4644   unsigned MinReservedArea;
4645   if (HasParameterArea)
4646     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4647   else
4648     MinReservedArea = LinkageSize;
4649 
4650   // Set the size that is at least reserved in caller of this function.  Tail
4651   // call optimized functions' reserved stack space needs to be aligned so that
4652   // taking the difference between two stack areas will result in an aligned
4653   // stack.
4654   MinReservedArea =
4655       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4656   FuncInfo->setMinReservedArea(MinReservedArea);
4657 
4658   // If the function takes variable number of arguments, make a frame index for
4659   // the start of the first vararg value... for expansion of llvm.va_start.
4660   // On ELFv2ABI spec, it writes:
4661   // C programs that are intended to be *portable* across different compilers
4662   // and architectures must use the header file <stdarg.h> to deal with variable
4663   // argument lists.
4664   if (isVarArg && MFI.hasVAStart()) {
4665     int Depth = ArgOffset;
4666 
4667     FuncInfo->setVarArgsFrameIndex(
4668       MFI.CreateFixedObject(PtrByteSize, Depth, true));
4669     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4670 
4671     // If this function is vararg, store any remaining integer argument regs
4672     // to their spots on the stack so that they may be loaded by dereferencing
4673     // the result of va_next.
4674     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4675          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4676       Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4677       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4678       SDValue Store =
4679           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4680       MemOps.push_back(Store);
4681       // Increment the address by four for the next argument to store
4682       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4683       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4684     }
4685   }
4686 
4687   if (!MemOps.empty())
4688     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4689 
4690   return Chain;
4691 }
4692 
4693 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4694 /// adjusted to accommodate the arguments for the tailcall.
4695 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4696                                    unsigned ParamSize) {
4697 
4698   if (!isTailCall) return 0;
4699 
4700   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4701   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4702   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4703   // Remember only if the new adjustment is bigger.
4704   if (SPDiff < FI->getTailCallSPDelta())
4705     FI->setTailCallSPDelta(SPDiff);
4706 
4707   return SPDiff;
4708 }
4709 
4710 static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4711 
4712 static bool callsShareTOCBase(const Function *Caller,
4713                               const GlobalValue *CalleeGV,
4714                               const TargetMachine &TM) {
4715   // It does not make sense to call callsShareTOCBase() with a caller that
4716   // is PC Relative since PC Relative callers do not have a TOC.
4717 #ifndef NDEBUG
4718   const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4719   assert(!STICaller->isUsingPCRelativeCalls() &&
4720          "PC Relative callers do not have a TOC and cannot share a TOC Base");
4721 #endif
4722 
4723   // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4724   // don't have enough information to determine if the caller and callee share
4725   // the same  TOC base, so we have to pessimistically assume they don't for
4726   // correctness.
4727   if (!CalleeGV)
4728     return false;
4729 
4730   // If the callee is preemptable, then the static linker will use a plt-stub
4731   // which saves the toc to the stack, and needs a nop after the call
4732   // instruction to convert to a toc-restore.
4733   if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), CalleeGV))
4734     return false;
4735 
4736   // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4737   // We may need a TOC restore in the situation where the caller requires a
4738   // valid TOC but the callee is PC Relative and does not.
4739   const Function *F = dyn_cast<Function>(CalleeGV);
4740   const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4741 
4742   // If we have an Alias we can try to get the function from there.
4743   if (Alias) {
4744     const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4745     F = dyn_cast<Function>(GlobalObj);
4746   }
4747 
4748   // If we still have no valid function pointer we do not have enough
4749   // information to determine if the callee uses PC Relative calls so we must
4750   // assume that it does.
4751   if (!F)
4752     return false;
4753 
4754   // If the callee uses PC Relative we cannot guarantee that the callee won't
4755   // clobber the TOC of the caller and so we must assume that the two
4756   // functions do not share a TOC base.
4757   const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4758   if (STICallee->isUsingPCRelativeCalls())
4759     return false;
4760 
4761   // If the GV is not a strong definition then we need to assume it can be
4762   // replaced by another function at link time. The function that replaces
4763   // it may not share the same TOC as the caller since the callee may be
4764   // replaced by a PC Relative version of the same function.
4765   if (!CalleeGV->isStrongDefinitionForLinker())
4766     return false;
4767 
4768   // The medium and large code models are expected to provide a sufficiently
4769   // large TOC to provide all data addressing needs of a module with a
4770   // single TOC.
4771   if (CodeModel::Medium == TM.getCodeModel() ||
4772       CodeModel::Large == TM.getCodeModel())
4773     return true;
4774 
4775   // Any explicitly-specified sections and section prefixes must also match.
4776   // Also, if we're using -ffunction-sections, then each function is always in
4777   // a different section (the same is true for COMDAT functions).
4778   if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4779       Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4780     return false;
4781   if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4782     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4783       return false;
4784   }
4785 
4786   return true;
4787 }
4788 
4789 static bool
4790 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4791                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4792   assert(Subtarget.is64BitELFABI());
4793 
4794   const unsigned PtrByteSize = 8;
4795   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4796 
4797   static const MCPhysReg GPR[] = {
4798     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4799     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4800   };
4801   static const MCPhysReg VR[] = {
4802     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4803     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4804   };
4805 
4806   const unsigned NumGPRs = std::size(GPR);
4807   const unsigned NumFPRs = 13;
4808   const unsigned NumVRs = std::size(VR);
4809   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4810 
4811   unsigned NumBytes = LinkageSize;
4812   unsigned AvailableFPRs = NumFPRs;
4813   unsigned AvailableVRs = NumVRs;
4814 
4815   for (const ISD::OutputArg& Param : Outs) {
4816     if (Param.Flags.isNest()) continue;
4817 
4818     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4819                                LinkageSize, ParamAreaSize, NumBytes,
4820                                AvailableFPRs, AvailableVRs))
4821       return true;
4822   }
4823   return false;
4824 }
4825 
4826 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4827   if (CB.arg_size() != CallerFn->arg_size())
4828     return false;
4829 
4830   auto CalleeArgIter = CB.arg_begin();
4831   auto CalleeArgEnd = CB.arg_end();
4832   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4833 
4834   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4835     const Value* CalleeArg = *CalleeArgIter;
4836     const Value* CallerArg = &(*CallerArgIter);
4837     if (CalleeArg == CallerArg)
4838       continue;
4839 
4840     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4841     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4842     //      }
4843     // 1st argument of callee is undef and has the same type as caller.
4844     if (CalleeArg->getType() == CallerArg->getType() &&
4845         isa<UndefValue>(CalleeArg))
4846       continue;
4847 
4848     return false;
4849   }
4850 
4851   return true;
4852 }
4853 
4854 // Returns true if TCO is possible between the callers and callees
4855 // calling conventions.
4856 static bool
4857 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4858                                     CallingConv::ID CalleeCC) {
4859   // Tail calls are possible with fastcc and ccc.
4860   auto isTailCallableCC  = [] (CallingConv::ID CC){
4861       return  CC == CallingConv::C || CC == CallingConv::Fast;
4862   };
4863   if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4864     return false;
4865 
4866   // We can safely tail call both fastcc and ccc callees from a c calling
4867   // convention caller. If the caller is fastcc, we may have less stack space
4868   // than a non-fastcc caller with the same signature so disable tail-calls in
4869   // that case.
4870   return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4871 }
4872 
4873 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4874     const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4875     CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4876     const SmallVectorImpl<ISD::OutputArg> &Outs,
4877     const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4878     bool isCalleeExternalSymbol) const {
4879   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4880 
4881   if (DisableSCO && !TailCallOpt) return false;
4882 
4883   // Variadic argument functions are not supported.
4884   if (isVarArg) return false;
4885 
4886   // Check that the calling conventions are compatible for tco.
4887   if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4888     return false;
4889 
4890   // Caller contains any byval parameter is not supported.
4891   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4892     return false;
4893 
4894   // Callee contains any byval parameter is not supported, too.
4895   // Note: This is a quick work around, because in some cases, e.g.
4896   // caller's stack size > callee's stack size, we are still able to apply
4897   // sibling call optimization. For example, gcc is able to do SCO for caller1
4898   // in the following example, but not for caller2.
4899   //   struct test {
4900   //     long int a;
4901   //     char ary[56];
4902   //   } gTest;
4903   //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
4904   //     b->a = v.a;
4905   //     return 0;
4906   //   }
4907   //   void caller1(struct test a, struct test c, struct test *b) {
4908   //     callee(gTest, b); }
4909   //   void caller2(struct test *b) { callee(gTest, b); }
4910   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4911     return false;
4912 
4913   // If callee and caller use different calling conventions, we cannot pass
4914   // parameters on stack since offsets for the parameter area may be different.
4915   if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
4916     return false;
4917 
4918   // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4919   // the caller and callee share the same TOC for TCO/SCO. If the caller and
4920   // callee potentially have different TOC bases then we cannot tail call since
4921   // we need to restore the TOC pointer after the call.
4922   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4923   // We cannot guarantee this for indirect calls or calls to external functions.
4924   // When PC-Relative addressing is used, the concept of the TOC is no longer
4925   // applicable so this check is not required.
4926   // Check first for indirect calls.
4927   if (!Subtarget.isUsingPCRelativeCalls() &&
4928       !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
4929     return false;
4930 
4931   // Check if we share the TOC base.
4932   if (!Subtarget.isUsingPCRelativeCalls() &&
4933       !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
4934     return false;
4935 
4936   // TCO allows altering callee ABI, so we don't have to check further.
4937   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4938     return true;
4939 
4940   if (DisableSCO) return false;
4941 
4942   // If callee use the same argument list that caller is using, then we can
4943   // apply SCO on this case. If it is not, then we need to check if callee needs
4944   // stack for passing arguments.
4945   // PC Relative tail calls may not have a CallBase.
4946   // If there is no CallBase we cannot verify if we have the same argument
4947   // list so assume that we don't have the same argument list.
4948   if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
4949       needStackSlotPassParameters(Subtarget, Outs))
4950     return false;
4951   else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4952     return false;
4953 
4954   return true;
4955 }
4956 
4957 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4958 /// for tail call optimization. Targets which want to do tail call
4959 /// optimization should implement this function.
4960 bool PPCTargetLowering::IsEligibleForTailCallOptimization(
4961     const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4962     CallingConv::ID CallerCC, bool isVarArg,
4963     const SmallVectorImpl<ISD::InputArg> &Ins) const {
4964   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4965     return false;
4966 
4967   // Variable argument functions are not supported.
4968   if (isVarArg)
4969     return false;
4970 
4971   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4972     // Functions containing by val parameters are not supported.
4973     if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4974       return false;
4975 
4976     // Non-PIC/GOT tail calls are supported.
4977     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4978       return true;
4979 
4980     // At the moment we can only do local tail calls (in same module, hidden
4981     // or protected) if we are generating PIC.
4982     if (CalleeGV)
4983       return CalleeGV->hasHiddenVisibility() ||
4984              CalleeGV->hasProtectedVisibility();
4985   }
4986 
4987   return false;
4988 }
4989 
4990 /// isCallCompatibleAddress - Return the immediate to use if the specified
4991 /// 32-bit value is representable in the immediate field of a BxA instruction.
4992 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4993   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4994   if (!C) return nullptr;
4995 
4996   int Addr = C->getZExtValue();
4997   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4998       SignExtend32<26>(Addr) != Addr)
4999     return nullptr;  // Top 6 bits have to be sext of immediate.
5000 
5001   return DAG
5002       .getConstant(
5003           (int)C->getZExtValue() >> 2, SDLoc(Op),
5004           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
5005       .getNode();
5006 }
5007 
5008 namespace {
5009 
5010 struct TailCallArgumentInfo {
5011   SDValue Arg;
5012   SDValue FrameIdxOp;
5013   int FrameIdx = 0;
5014 
5015   TailCallArgumentInfo() = default;
5016 };
5017 
5018 } // end anonymous namespace
5019 
5020 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5021 static void StoreTailCallArgumentsToStackSlot(
5022     SelectionDAG &DAG, SDValue Chain,
5023     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5024     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5025   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5026     SDValue Arg = TailCallArgs[i].Arg;
5027     SDValue FIN = TailCallArgs[i].FrameIdxOp;
5028     int FI = TailCallArgs[i].FrameIdx;
5029     // Store relative to framepointer.
5030     MemOpChains.push_back(DAG.getStore(
5031         Chain, dl, Arg, FIN,
5032         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
5033   }
5034 }
5035 
5036 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5037 /// the appropriate stack slot for the tail call optimized function call.
5038 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
5039                                              SDValue OldRetAddr, SDValue OldFP,
5040                                              int SPDiff, const SDLoc &dl) {
5041   if (SPDiff) {
5042     // Calculate the new stack slot for the return address.
5043     MachineFunction &MF = DAG.getMachineFunction();
5044     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5045     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5046     bool isPPC64 = Subtarget.isPPC64();
5047     int SlotSize = isPPC64 ? 8 : 4;
5048     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5049     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5050                                                          NewRetAddrLoc, true);
5051     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5052     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5053     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5054                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5055   }
5056   return Chain;
5057 }
5058 
5059 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5060 /// the position of the argument.
5061 static void
5062 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
5063                          SDValue Arg, int SPDiff, unsigned ArgOffset,
5064                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5065   int Offset = ArgOffset + SPDiff;
5066   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5067   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5068   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5069   SDValue FIN = DAG.getFrameIndex(FI, VT);
5070   TailCallArgumentInfo Info;
5071   Info.Arg = Arg;
5072   Info.FrameIdxOp = FIN;
5073   Info.FrameIdx = FI;
5074   TailCallArguments.push_back(Info);
5075 }
5076 
5077 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5078 /// stack slot. Returns the chain as result and the loaded frame pointers in
5079 /// LROpOut/FPOpout. Used when tail calling.
5080 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5081     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5082     SDValue &FPOpOut, const SDLoc &dl) const {
5083   if (SPDiff) {
5084     // Load the LR and FP stack slot for later adjusting.
5085     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5086     LROpOut = getReturnAddrFrameIndex(DAG);
5087     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5088     Chain = SDValue(LROpOut.getNode(), 1);
5089   }
5090   return Chain;
5091 }
5092 
5093 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5094 /// by "Src" to address "Dst" of size "Size".  Alignment information is
5095 /// specified by the specific parameter attribute. The copy will be passed as
5096 /// a byval function parameter.
5097 /// Sometimes what we are copying is the end of a larger object, the part that
5098 /// does not fit in registers.
5099 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
5100                                          SDValue Chain, ISD::ArgFlagsTy Flags,
5101                                          SelectionDAG &DAG, const SDLoc &dl) {
5102   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5103   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5104                        Flags.getNonZeroByValAlign(), false, false, false,
5105                        MachinePointerInfo(), MachinePointerInfo());
5106 }
5107 
5108 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5109 /// tail calls.
5110 static void LowerMemOpCallTo(
5111     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5112     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5113     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5114     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5115   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5116   if (!isTailCall) {
5117     if (isVector) {
5118       SDValue StackPtr;
5119       if (isPPC64)
5120         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5121       else
5122         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5123       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5124                            DAG.getConstant(ArgOffset, dl, PtrVT));
5125     }
5126     MemOpChains.push_back(
5127         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5128     // Calculate and remember argument location.
5129   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5130                                   TailCallArguments);
5131 }
5132 
5133 static void
5134 PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain,
5135                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5136                 SDValue FPOp,
5137                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5138   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5139   // might overwrite each other in case of tail call optimization.
5140   SmallVector<SDValue, 8> MemOpChains2;
5141   // Do not flag preceding copytoreg stuff together with the following stuff.
5142   InGlue = SDValue();
5143   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5144                                     MemOpChains2, dl);
5145   if (!MemOpChains2.empty())
5146     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5147 
5148   // Store the return address to the appropriate stack slot.
5149   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5150 
5151   // Emit callseq_end just before tailcall node.
5152   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5153   InGlue = Chain.getValue(1);
5154 }
5155 
5156 // Is this global address that of a function that can be called by name? (as
5157 // opposed to something that must hold a descriptor for an indirect call).
5158 static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5159   if (GV) {
5160     if (GV->isThreadLocal())
5161       return false;
5162 
5163     return GV->getValueType()->isFunctionTy();
5164   }
5165 
5166   return false;
5167 }
5168 
5169 SDValue PPCTargetLowering::LowerCallResult(
5170     SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5171     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5172     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5173   SmallVector<CCValAssign, 16> RVLocs;
5174   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5175                     *DAG.getContext());
5176 
5177   CCRetInfo.AnalyzeCallResult(
5178       Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5179                ? RetCC_PPC_Cold
5180                : RetCC_PPC);
5181 
5182   // Copy all of the result registers out of their specified physreg.
5183   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5184     CCValAssign &VA = RVLocs[i];
5185     assert(VA.isRegLoc() && "Can only return in registers!");
5186 
5187     SDValue Val;
5188 
5189     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5190       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5191                                       InGlue);
5192       Chain = Lo.getValue(1);
5193       InGlue = Lo.getValue(2);
5194       VA = RVLocs[++i]; // skip ahead to next loc
5195       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5196                                       InGlue);
5197       Chain = Hi.getValue(1);
5198       InGlue = Hi.getValue(2);
5199       if (!Subtarget.isLittleEndian())
5200         std::swap (Lo, Hi);
5201       Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5202     } else {
5203       Val = DAG.getCopyFromReg(Chain, dl,
5204                                VA.getLocReg(), VA.getLocVT(), InGlue);
5205       Chain = Val.getValue(1);
5206       InGlue = Val.getValue(2);
5207     }
5208 
5209     switch (VA.getLocInfo()) {
5210     default: llvm_unreachable("Unknown loc info!");
5211     case CCValAssign::Full: break;
5212     case CCValAssign::AExt:
5213       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5214       break;
5215     case CCValAssign::ZExt:
5216       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5217                         DAG.getValueType(VA.getValVT()));
5218       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5219       break;
5220     case CCValAssign::SExt:
5221       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5222                         DAG.getValueType(VA.getValVT()));
5223       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5224       break;
5225     }
5226 
5227     InVals.push_back(Val);
5228   }
5229 
5230   return Chain;
5231 }
5232 
5233 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5234                            const PPCSubtarget &Subtarget, bool isPatchPoint) {
5235   auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5236   const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5237 
5238   // PatchPoint calls are not indirect.
5239   if (isPatchPoint)
5240     return false;
5241 
5242   if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5243     return false;
5244 
5245   // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5246   // becuase the immediate function pointer points to a descriptor instead of
5247   // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5248   // pointer immediate points to the global entry point, while the BLA would
5249   // need to jump to the local entry point (see rL211174).
5250   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5251       isBLACompatibleAddress(Callee, DAG))
5252     return false;
5253 
5254   return true;
5255 }
5256 
5257 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5258 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5259   return Subtarget.isAIXABI() ||
5260          (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5261 }
5262 
5263 static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5264                               const Function &Caller, const SDValue &Callee,
5265                               const PPCSubtarget &Subtarget,
5266                               const TargetMachine &TM,
5267                               bool IsStrictFPCall = false) {
5268   if (CFlags.IsTailCall)
5269     return PPCISD::TC_RETURN;
5270 
5271   unsigned RetOpc = 0;
5272   // This is a call through a function pointer.
5273   if (CFlags.IsIndirect) {
5274     // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5275     // indirect calls. The save of the caller's TOC pointer to the stack will be
5276     // inserted into the DAG as part of call lowering. The restore of the TOC
5277     // pointer is modeled by using a pseudo instruction for the call opcode that
5278     // represents the 2 instruction sequence of an indirect branch and link,
5279     // immediately followed by a load of the TOC pointer from the the stack save
5280     // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5281     // as it is not saved or used.
5282     RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5283                                                  : PPCISD::BCTRL;
5284   } else if (Subtarget.isUsingPCRelativeCalls()) {
5285     assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5286     RetOpc = PPCISD::CALL_NOTOC;
5287   } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5288     // The ABIs that maintain a TOC pointer accross calls need to have a nop
5289     // immediately following the call instruction if the caller and callee may
5290     // have different TOC bases. At link time if the linker determines the calls
5291     // may not share a TOC base, the call is redirected to a trampoline inserted
5292     // by the linker. The trampoline will (among other things) save the callers
5293     // TOC pointer at an ABI designated offset in the linkage area and the
5294     // linker will rewrite the nop to be a load of the TOC pointer from the
5295     // linkage area into gpr2.
5296     auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5297     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5298     RetOpc =
5299         callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5300   } else
5301     RetOpc = PPCISD::CALL;
5302   if (IsStrictFPCall) {
5303     switch (RetOpc) {
5304     default:
5305       llvm_unreachable("Unknown call opcode");
5306     case PPCISD::BCTRL_LOAD_TOC:
5307       RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5308       break;
5309     case PPCISD::BCTRL:
5310       RetOpc = PPCISD::BCTRL_RM;
5311       break;
5312     case PPCISD::CALL_NOTOC:
5313       RetOpc = PPCISD::CALL_NOTOC_RM;
5314       break;
5315     case PPCISD::CALL:
5316       RetOpc = PPCISD::CALL_RM;
5317       break;
5318     case PPCISD::CALL_NOP:
5319       RetOpc = PPCISD::CALL_NOP_RM;
5320       break;
5321     }
5322   }
5323   return RetOpc;
5324 }
5325 
5326 static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5327                                const SDLoc &dl, const PPCSubtarget &Subtarget) {
5328   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5329     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5330       return SDValue(Dest, 0);
5331 
5332   // Returns true if the callee is local, and false otherwise.
5333   auto isLocalCallee = [&]() {
5334     const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5335     const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5336     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5337 
5338     return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5339            !isa_and_nonnull<GlobalIFunc>(GV);
5340   };
5341 
5342   // The PLT is only used in 32-bit ELF PIC mode.  Attempting to use the PLT in
5343   // a static relocation model causes some versions of GNU LD (2.17.50, at
5344   // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5345   // built with secure-PLT.
5346   bool UsePlt =
5347       Subtarget.is32BitELFABI() && !isLocalCallee() &&
5348       Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5349 
5350   const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5351     const TargetMachine &TM = Subtarget.getTargetMachine();
5352     const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5353     MCSymbolXCOFF *S =
5354         cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5355 
5356     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5357     return DAG.getMCSymbol(S, PtrVT);
5358   };
5359 
5360   auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5361   const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5362   if (isFunctionGlobalAddress(GV)) {
5363     const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5364 
5365     if (Subtarget.isAIXABI()) {
5366       assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5367       return getAIXFuncEntryPointSymbolSDNode(GV);
5368     }
5369     return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5370                                       UsePlt ? PPCII::MO_PLT : 0);
5371   }
5372 
5373   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5374     const char *SymName = S->getSymbol();
5375     if (Subtarget.isAIXABI()) {
5376       // If there exists a user-declared function whose name is the same as the
5377       // ExternalSymbol's, then we pick up the user-declared version.
5378       const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5379       if (const Function *F =
5380               dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5381         return getAIXFuncEntryPointSymbolSDNode(F);
5382 
5383       // On AIX, direct function calls reference the symbol for the function's
5384       // entry point, which is named by prepending a "." before the function's
5385       // C-linkage name. A Qualname is returned here because an external
5386       // function entry point is a csect with XTY_ER property.
5387       const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5388         auto &Context = DAG.getMachineFunction().getMMI().getContext();
5389         MCSectionXCOFF *Sec = Context.getXCOFFSection(
5390             (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5391             XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER));
5392         return Sec->getQualNameSymbol();
5393       };
5394 
5395       SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5396     }
5397     return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5398                                        UsePlt ? PPCII::MO_PLT : 0);
5399   }
5400 
5401   // No transformation needed.
5402   assert(Callee.getNode() && "What no callee?");
5403   return Callee;
5404 }
5405 
5406 static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5407   assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5408          "Expected a CALLSEQ_STARTSDNode.");
5409 
5410   // The last operand is the chain, except when the node has glue. If the node
5411   // has glue, then the last operand is the glue, and the chain is the second
5412   // last operand.
5413   SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5414   if (LastValue.getValueType() != MVT::Glue)
5415     return LastValue;
5416 
5417   return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5418 }
5419 
5420 // Creates the node that moves a functions address into the count register
5421 // to prepare for an indirect call instruction.
5422 static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5423                                 SDValue &Glue, SDValue &Chain,
5424                                 const SDLoc &dl) {
5425   SDValue MTCTROps[] = {Chain, Callee, Glue};
5426   EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5427   Chain = DAG.getNode(PPCISD::MTCTR, dl, ArrayRef(ReturnTypes, 2),
5428                       ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5429   // The glue is the second value produced.
5430   Glue = Chain.getValue(1);
5431 }
5432 
5433 static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5434                                           SDValue &Glue, SDValue &Chain,
5435                                           SDValue CallSeqStart,
5436                                           const CallBase *CB, const SDLoc &dl,
5437                                           bool hasNest,
5438                                           const PPCSubtarget &Subtarget) {
5439   // Function pointers in the 64-bit SVR4 ABI do not point to the function
5440   // entry point, but to the function descriptor (the function entry point
5441   // address is part of the function descriptor though).
5442   // The function descriptor is a three doubleword structure with the
5443   // following fields: function entry point, TOC base address and
5444   // environment pointer.
5445   // Thus for a call through a function pointer, the following actions need
5446   // to be performed:
5447   //   1. Save the TOC of the caller in the TOC save area of its stack
5448   //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5449   //   2. Load the address of the function entry point from the function
5450   //      descriptor.
5451   //   3. Load the TOC of the callee from the function descriptor into r2.
5452   //   4. Load the environment pointer from the function descriptor into
5453   //      r11.
5454   //   5. Branch to the function entry point address.
5455   //   6. On return of the callee, the TOC of the caller needs to be
5456   //      restored (this is done in FinishCall()).
5457   //
5458   // The loads are scheduled at the beginning of the call sequence, and the
5459   // register copies are flagged together to ensure that no other
5460   // operations can be scheduled in between. E.g. without flagging the
5461   // copies together, a TOC access in the caller could be scheduled between
5462   // the assignment of the callee TOC and the branch to the callee, which leads
5463   // to incorrect code.
5464 
5465   // Start by loading the function address from the descriptor.
5466   SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5467   auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5468                       ? (MachineMemOperand::MODereferenceable |
5469                          MachineMemOperand::MOInvariant)
5470                       : MachineMemOperand::MONone;
5471 
5472   MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5473 
5474   // Registers used in building the DAG.
5475   const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5476   const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5477 
5478   // Offsets of descriptor members.
5479   const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5480   const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5481 
5482   const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5483   const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5484 
5485   // One load for the functions entry point address.
5486   SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5487                                     Alignment, MMOFlags);
5488 
5489   // One for loading the TOC anchor for the module that contains the called
5490   // function.
5491   SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5492   SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5493   SDValue TOCPtr =
5494       DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5495                   MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5496 
5497   // One for loading the environment pointer.
5498   SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5499   SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5500   SDValue LoadEnvPtr =
5501       DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5502                   MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5503 
5504 
5505   // Then copy the newly loaded TOC anchor to the TOC pointer.
5506   SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5507   Chain = TOCVal.getValue(0);
5508   Glue = TOCVal.getValue(1);
5509 
5510   // If the function call has an explicit 'nest' parameter, it takes the
5511   // place of the environment pointer.
5512   assert((!hasNest || !Subtarget.isAIXABI()) &&
5513          "Nest parameter is not supported on AIX.");
5514   if (!hasNest) {
5515     SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5516     Chain = EnvVal.getValue(0);
5517     Glue = EnvVal.getValue(1);
5518   }
5519 
5520   // The rest of the indirect call sequence is the same as the non-descriptor
5521   // DAG.
5522   prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5523 }
5524 
5525 static void
5526 buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5527                   PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5528                   SelectionDAG &DAG,
5529                   SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5530                   SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5531                   const PPCSubtarget &Subtarget) {
5532   const bool IsPPC64 = Subtarget.isPPC64();
5533   // MVT for a general purpose register.
5534   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5535 
5536   // First operand is always the chain.
5537   Ops.push_back(Chain);
5538 
5539   // If it's a direct call pass the callee as the second operand.
5540   if (!CFlags.IsIndirect)
5541     Ops.push_back(Callee);
5542   else {
5543     assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5544 
5545     // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5546     // on the stack (this would have been done in `LowerCall_64SVR4` or
5547     // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5548     // represents both the indirect branch and a load that restores the TOC
5549     // pointer from the linkage area. The operand for the TOC restore is an add
5550     // of the TOC save offset to the stack pointer. This must be the second
5551     // operand: after the chain input but before any other variadic arguments.
5552     // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5553     // saved or used.
5554     if (isTOCSaveRestoreRequired(Subtarget)) {
5555       const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5556 
5557       SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5558       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5559       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5560       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5561       Ops.push_back(AddTOC);
5562     }
5563 
5564     // Add the register used for the environment pointer.
5565     if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5566       Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5567                                     RegVT));
5568 
5569 
5570     // Add CTR register as callee so a bctr can be emitted later.
5571     if (CFlags.IsTailCall)
5572       Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5573   }
5574 
5575   // If this is a tail call add stack pointer delta.
5576   if (CFlags.IsTailCall)
5577     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5578 
5579   // Add argument registers to the end of the list so that they are known live
5580   // into the call.
5581   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5582     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5583                                   RegsToPass[i].second.getValueType()));
5584 
5585   // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5586   // no way to mark dependencies as implicit here.
5587   // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5588   if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5589        !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5590     Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5591 
5592   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5593   if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5594     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5595 
5596   // Add a register mask operand representing the call-preserved registers.
5597   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5598   const uint32_t *Mask =
5599       TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5600   assert(Mask && "Missing call preserved mask for calling convention");
5601   Ops.push_back(DAG.getRegisterMask(Mask));
5602 
5603   // If the glue is valid, it is the last operand.
5604   if (Glue.getNode())
5605     Ops.push_back(Glue);
5606 }
5607 
5608 SDValue PPCTargetLowering::FinishCall(
5609     CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5610     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5611     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5612     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5613     SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5614 
5615   if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5616       Subtarget.isAIXABI())
5617     setUsesTOCBasePtr(DAG);
5618 
5619   unsigned CallOpc =
5620       getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5621                     Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5622 
5623   if (!CFlags.IsIndirect)
5624     Callee = transformCallee(Callee, DAG, dl, Subtarget);
5625   else if (Subtarget.usesFunctionDescriptors())
5626     prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5627                                   dl, CFlags.HasNest, Subtarget);
5628   else
5629     prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5630 
5631   // Build the operand list for the call instruction.
5632   SmallVector<SDValue, 8> Ops;
5633   buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5634                     SPDiff, Subtarget);
5635 
5636   // Emit tail call.
5637   if (CFlags.IsTailCall) {
5638     // Indirect tail call when using PC Relative calls do not have the same
5639     // constraints.
5640     assert(((Callee.getOpcode() == ISD::Register &&
5641              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5642             Callee.getOpcode() == ISD::TargetExternalSymbol ||
5643             Callee.getOpcode() == ISD::TargetGlobalAddress ||
5644             isa<ConstantSDNode>(Callee) ||
5645             (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5646            "Expecting a global address, external symbol, absolute value, "
5647            "register or an indirect tail call when PC Relative calls are "
5648            "used.");
5649     // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5650     assert(CallOpc == PPCISD::TC_RETURN &&
5651            "Unexpected call opcode for a tail call.");
5652     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5653     SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5654     DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5655     return Ret;
5656   }
5657 
5658   std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5659   Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5660   DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5661   Glue = Chain.getValue(1);
5662 
5663   // When performing tail call optimization the callee pops its arguments off
5664   // the stack. Account for this here so these bytes can be pushed back on in
5665   // PPCFrameLowering::eliminateCallFramePseudoInstr.
5666   int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5667                          getTargetMachine().Options.GuaranteedTailCallOpt)
5668                             ? NumBytes
5669                             : 0;
5670 
5671   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5672   Glue = Chain.getValue(1);
5673 
5674   return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5675                          DAG, InVals);
5676 }
5677 
5678 bool PPCTargetLowering::supportsTailCallFor(const CallBase *CB) const {
5679   CallingConv::ID CalleeCC = CB->getCallingConv();
5680   const Function *CallerFunc = CB->getCaller();
5681   CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5682   const Function *CalleeFunc = CB->getCalledFunction();
5683   if (!CalleeFunc)
5684     return false;
5685   const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5686 
5687   SmallVector<ISD::OutputArg, 2> Outs;
5688   SmallVector<ISD::InputArg, 2> Ins;
5689 
5690   GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5691                 CalleeFunc->getAttributes(), Outs, *this,
5692                 CalleeFunc->getParent()->getDataLayout());
5693 
5694   return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5695                           CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5696                           false /*isCalleeExternalSymbol*/);
5697 }
5698 
5699 bool PPCTargetLowering::isEligibleForTCO(
5700     const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5701     CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5702     const SmallVectorImpl<ISD::OutputArg> &Outs,
5703     const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5704     bool isCalleeExternalSymbol) const {
5705   if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5706     return false;
5707 
5708   if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5709     return IsEligibleForTailCallOptimization_64SVR4(
5710         CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5711         isCalleeExternalSymbol);
5712   else
5713     return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5714                                              isVarArg, Ins);
5715 }
5716 
5717 SDValue
5718 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5719                              SmallVectorImpl<SDValue> &InVals) const {
5720   SelectionDAG &DAG                     = CLI.DAG;
5721   SDLoc &dl                             = CLI.DL;
5722   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5723   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5724   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5725   SDValue Chain                         = CLI.Chain;
5726   SDValue Callee                        = CLI.Callee;
5727   bool &isTailCall                      = CLI.IsTailCall;
5728   CallingConv::ID CallConv              = CLI.CallConv;
5729   bool isVarArg                         = CLI.IsVarArg;
5730   bool isPatchPoint                     = CLI.IsPatchPoint;
5731   const CallBase *CB                    = CLI.CB;
5732 
5733   if (isTailCall) {
5734     MachineFunction &MF = DAG.getMachineFunction();
5735     CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5736     auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5737     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5738     bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5739 
5740     isTailCall =
5741         isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5742                          &(MF.getFunction()), IsCalleeExternalSymbol);
5743     if (isTailCall) {
5744       ++NumTailCalls;
5745       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5746         ++NumSiblingCalls;
5747 
5748       // PC Relative calls no longer guarantee that the callee is a Global
5749       // Address Node. The callee could be an indirect tail call in which
5750       // case the SDValue for the callee could be a load (to load the address
5751       // of a function pointer) or it may be a register copy (to move the
5752       // address of the callee from a function parameter into a virtual
5753       // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5754       assert((Subtarget.isUsingPCRelativeCalls() ||
5755               isa<GlobalAddressSDNode>(Callee)) &&
5756              "Callee should be an llvm::Function object.");
5757 
5758       LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5759                         << "\nTCO callee: ");
5760       LLVM_DEBUG(Callee.dump());
5761     }
5762   }
5763 
5764   if (!isTailCall && CB && CB->isMustTailCall())
5765     report_fatal_error("failed to perform tail call elimination on a call "
5766                        "site marked musttail");
5767 
5768   // When long calls (i.e. indirect calls) are always used, calls are always
5769   // made via function pointer. If we have a function name, first translate it
5770   // into a pointer.
5771   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5772       !isTailCall)
5773     Callee = LowerGlobalAddress(Callee, DAG);
5774 
5775   CallFlags CFlags(
5776       CallConv, isTailCall, isVarArg, isPatchPoint,
5777       isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5778       // hasNest
5779       Subtarget.is64BitELFABI() &&
5780           any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5781       CLI.NoMerge);
5782 
5783   if (Subtarget.isAIXABI())
5784     return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5785                          InVals, CB);
5786 
5787   assert(Subtarget.isSVR4ABI());
5788   if (Subtarget.isPPC64())
5789     return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5790                             InVals, CB);
5791   return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5792                           InVals, CB);
5793 }
5794 
5795 SDValue PPCTargetLowering::LowerCall_32SVR4(
5796     SDValue Chain, SDValue Callee, CallFlags CFlags,
5797     const SmallVectorImpl<ISD::OutputArg> &Outs,
5798     const SmallVectorImpl<SDValue> &OutVals,
5799     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5800     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5801     const CallBase *CB) const {
5802   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5803   // of the 32-bit SVR4 ABI stack frame layout.
5804 
5805   const CallingConv::ID CallConv = CFlags.CallConv;
5806   const bool IsVarArg = CFlags.IsVarArg;
5807   const bool IsTailCall = CFlags.IsTailCall;
5808 
5809   assert((CallConv == CallingConv::C ||
5810           CallConv == CallingConv::Cold ||
5811           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5812 
5813   const Align PtrAlign(4);
5814 
5815   MachineFunction &MF = DAG.getMachineFunction();
5816 
5817   // Mark this function as potentially containing a function that contains a
5818   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5819   // and restoring the callers stack pointer in this functions epilog. This is
5820   // done because by tail calling the called function might overwrite the value
5821   // in this function's (MF) stack pointer stack slot 0(SP).
5822   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5823       CallConv == CallingConv::Fast)
5824     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5825 
5826   // Count how many bytes are to be pushed on the stack, including the linkage
5827   // area, parameter list area and the part of the local variable space which
5828   // contains copies of aggregates which are passed by value.
5829 
5830   // Assign locations to all of the outgoing arguments.
5831   SmallVector<CCValAssign, 16> ArgLocs;
5832   PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5833 
5834   // Reserve space for the linkage area on the stack.
5835   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5836                        PtrAlign);
5837   if (useSoftFloat())
5838     CCInfo.PreAnalyzeCallOperands(Outs);
5839 
5840   if (IsVarArg) {
5841     // Handle fixed and variable vector arguments differently.
5842     // Fixed vector arguments go into registers as long as registers are
5843     // available. Variable vector arguments always go into memory.
5844     unsigned NumArgs = Outs.size();
5845 
5846     for (unsigned i = 0; i != NumArgs; ++i) {
5847       MVT ArgVT = Outs[i].VT;
5848       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5849       bool Result;
5850 
5851       if (Outs[i].IsFixed) {
5852         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5853                                CCInfo);
5854       } else {
5855         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5856                                       ArgFlags, CCInfo);
5857       }
5858 
5859       if (Result) {
5860 #ifndef NDEBUG
5861         errs() << "Call operand #" << i << " has unhandled type "
5862                << ArgVT << "\n";
5863 #endif
5864         llvm_unreachable(nullptr);
5865       }
5866     }
5867   } else {
5868     // All arguments are treated the same.
5869     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5870   }
5871   CCInfo.clearWasPPCF128();
5872 
5873   // Assign locations to all of the outgoing aggregate by value arguments.
5874   SmallVector<CCValAssign, 16> ByValArgLocs;
5875   CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5876 
5877   // Reserve stack space for the allocations in CCInfo.
5878   CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
5879 
5880   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5881 
5882   // Size of the linkage area, parameter list area and the part of the local
5883   // space variable where copies of aggregates which are passed by value are
5884   // stored.
5885   unsigned NumBytes = CCByValInfo.getStackSize();
5886 
5887   // Calculate by how many bytes the stack has to be adjusted in case of tail
5888   // call optimization.
5889   int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5890 
5891   // Adjust the stack pointer for the new arguments...
5892   // These operations are automatically eliminated by the prolog/epilog pass
5893   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5894   SDValue CallSeqStart = Chain;
5895 
5896   // Load the return address and frame pointer so it can be moved somewhere else
5897   // later.
5898   SDValue LROp, FPOp;
5899   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5900 
5901   // Set up a copy of the stack pointer for use loading and storing any
5902   // arguments that may not fit in the registers available for argument
5903   // passing.
5904   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5905 
5906   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5907   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5908   SmallVector<SDValue, 8> MemOpChains;
5909 
5910   bool seenFloatArg = false;
5911   // Walk the register/memloc assignments, inserting copies/loads.
5912   // i - Tracks the index into the list of registers allocated for the call
5913   // RealArgIdx - Tracks the index into the list of actual function arguments
5914   // j - Tracks the index into the list of byval arguments
5915   for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5916        i != e;
5917        ++i, ++RealArgIdx) {
5918     CCValAssign &VA = ArgLocs[i];
5919     SDValue Arg = OutVals[RealArgIdx];
5920     ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5921 
5922     if (Flags.isByVal()) {
5923       // Argument is an aggregate which is passed by value, thus we need to
5924       // create a copy of it in the local variable space of the current stack
5925       // frame (which is the stack frame of the caller) and pass the address of
5926       // this copy to the callee.
5927       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5928       CCValAssign &ByValVA = ByValArgLocs[j++];
5929       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5930 
5931       // Memory reserved in the local variable space of the callers stack frame.
5932       unsigned LocMemOffset = ByValVA.getLocMemOffset();
5933 
5934       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5935       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5936                            StackPtr, PtrOff);
5937 
5938       // Create a copy of the argument in the local area of the current
5939       // stack frame.
5940       SDValue MemcpyCall =
5941         CreateCopyOfByValArgument(Arg, PtrOff,
5942                                   CallSeqStart.getNode()->getOperand(0),
5943                                   Flags, DAG, dl);
5944 
5945       // This must go outside the CALLSEQ_START..END.
5946       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5947                                                      SDLoc(MemcpyCall));
5948       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5949                              NewCallSeqStart.getNode());
5950       Chain = CallSeqStart = NewCallSeqStart;
5951 
5952       // Pass the address of the aggregate copy on the stack either in a
5953       // physical register or in the parameter list area of the current stack
5954       // frame to the callee.
5955       Arg = PtrOff;
5956     }
5957 
5958     // When useCRBits() is true, there can be i1 arguments.
5959     // It is because getRegisterType(MVT::i1) => MVT::i1,
5960     // and for other integer types getRegisterType() => MVT::i32.
5961     // Extend i1 and ensure callee will get i32.
5962     if (Arg.getValueType() == MVT::i1)
5963       Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5964                         dl, MVT::i32, Arg);
5965 
5966     if (VA.isRegLoc()) {
5967       seenFloatArg |= VA.getLocVT().isFloatingPoint();
5968       // Put argument in a physical register.
5969       if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5970         bool IsLE = Subtarget.isLittleEndian();
5971         SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5972                         DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5973         RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5974         SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5975                            DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5976         RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5977                              SVal.getValue(0)));
5978       } else
5979         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5980     } else {
5981       // Put argument in the parameter list area of the current stack frame.
5982       assert(VA.isMemLoc());
5983       unsigned LocMemOffset = VA.getLocMemOffset();
5984 
5985       if (!IsTailCall) {
5986         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5987         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5988                              StackPtr, PtrOff);
5989 
5990         MemOpChains.push_back(
5991             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5992       } else {
5993         // Calculate and remember argument location.
5994         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5995                                  TailCallArguments);
5996       }
5997     }
5998   }
5999 
6000   if (!MemOpChains.empty())
6001     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6002 
6003   // Build a sequence of copy-to-reg nodes chained together with token chain
6004   // and flag operands which copy the outgoing args into the appropriate regs.
6005   SDValue InGlue;
6006   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6007     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6008                              RegsToPass[i].second, InGlue);
6009     InGlue = Chain.getValue(1);
6010   }
6011 
6012   // Set CR bit 6 to true if this is a vararg call with floating args passed in
6013   // registers.
6014   if (IsVarArg) {
6015     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6016     SDValue Ops[] = { Chain, InGlue };
6017 
6018     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6019                         VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6020 
6021     InGlue = Chain.getValue(1);
6022   }
6023 
6024   if (IsTailCall)
6025     PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6026                     TailCallArguments);
6027 
6028   return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6029                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6030 }
6031 
6032 // Copy an argument into memory, being careful to do this outside the
6033 // call sequence for the call to which the argument belongs.
6034 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6035     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6036     SelectionDAG &DAG, const SDLoc &dl) const {
6037   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6038                         CallSeqStart.getNode()->getOperand(0),
6039                         Flags, DAG, dl);
6040   // The MEMCPY must go outside the CALLSEQ_START..END.
6041   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6042   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6043                                                  SDLoc(MemcpyCall));
6044   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6045                          NewCallSeqStart.getNode());
6046   return NewCallSeqStart;
6047 }
6048 
6049 SDValue PPCTargetLowering::LowerCall_64SVR4(
6050     SDValue Chain, SDValue Callee, CallFlags CFlags,
6051     const SmallVectorImpl<ISD::OutputArg> &Outs,
6052     const SmallVectorImpl<SDValue> &OutVals,
6053     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6054     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6055     const CallBase *CB) const {
6056   bool isELFv2ABI = Subtarget.isELFv2ABI();
6057   bool isLittleEndian = Subtarget.isLittleEndian();
6058   unsigned NumOps = Outs.size();
6059   bool IsSibCall = false;
6060   bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6061 
6062   EVT PtrVT = getPointerTy(DAG.getDataLayout());
6063   unsigned PtrByteSize = 8;
6064 
6065   MachineFunction &MF = DAG.getMachineFunction();
6066 
6067   if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6068     IsSibCall = true;
6069 
6070   // Mark this function as potentially containing a function that contains a
6071   // tail call. As a consequence the frame pointer will be used for dynamicalloc
6072   // and restoring the callers stack pointer in this functions epilog. This is
6073   // done because by tail calling the called function might overwrite the value
6074   // in this function's (MF) stack pointer stack slot 0(SP).
6075   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6076     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6077 
6078   assert(!(IsFastCall && CFlags.IsVarArg) &&
6079          "fastcc not supported on varargs functions");
6080 
6081   // Count how many bytes are to be pushed on the stack, including the linkage
6082   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
6083   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6084   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6085   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6086   unsigned NumBytes = LinkageSize;
6087   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6088 
6089   static const MCPhysReg GPR[] = {
6090     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6091     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6092   };
6093   static const MCPhysReg VR[] = {
6094     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6095     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6096   };
6097 
6098   const unsigned NumGPRs = std::size(GPR);
6099   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6100   const unsigned NumVRs = std::size(VR);
6101 
6102   // On ELFv2, we can avoid allocating the parameter area if all the arguments
6103   // can be passed to the callee in registers.
6104   // For the fast calling convention, there is another check below.
6105   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6106   bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6107   if (!HasParameterArea) {
6108     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6109     unsigned AvailableFPRs = NumFPRs;
6110     unsigned AvailableVRs = NumVRs;
6111     unsigned NumBytesTmp = NumBytes;
6112     for (unsigned i = 0; i != NumOps; ++i) {
6113       if (Outs[i].Flags.isNest()) continue;
6114       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6115                                  PtrByteSize, LinkageSize, ParamAreaSize,
6116                                  NumBytesTmp, AvailableFPRs, AvailableVRs))
6117         HasParameterArea = true;
6118     }
6119   }
6120 
6121   // When using the fast calling convention, we don't provide backing for
6122   // arguments that will be in registers.
6123   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6124 
6125   // Avoid allocating parameter area for fastcc functions if all the arguments
6126   // can be passed in the registers.
6127   if (IsFastCall)
6128     HasParameterArea = false;
6129 
6130   // Add up all the space actually used.
6131   for (unsigned i = 0; i != NumOps; ++i) {
6132     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6133     EVT ArgVT = Outs[i].VT;
6134     EVT OrigVT = Outs[i].ArgVT;
6135 
6136     if (Flags.isNest())
6137       continue;
6138 
6139     if (IsFastCall) {
6140       if (Flags.isByVal()) {
6141         NumGPRsUsed += (Flags.getByValSize()+7)/8;
6142         if (NumGPRsUsed > NumGPRs)
6143           HasParameterArea = true;
6144       } else {
6145         switch (ArgVT.getSimpleVT().SimpleTy) {
6146         default: llvm_unreachable("Unexpected ValueType for argument!");
6147         case MVT::i1:
6148         case MVT::i32:
6149         case MVT::i64:
6150           if (++NumGPRsUsed <= NumGPRs)
6151             continue;
6152           break;
6153         case MVT::v4i32:
6154         case MVT::v8i16:
6155         case MVT::v16i8:
6156         case MVT::v2f64:
6157         case MVT::v2i64:
6158         case MVT::v1i128:
6159         case MVT::f128:
6160           if (++NumVRsUsed <= NumVRs)
6161             continue;
6162           break;
6163         case MVT::v4f32:
6164           if (++NumVRsUsed <= NumVRs)
6165             continue;
6166           break;
6167         case MVT::f32:
6168         case MVT::f64:
6169           if (++NumFPRsUsed <= NumFPRs)
6170             continue;
6171           break;
6172         }
6173         HasParameterArea = true;
6174       }
6175     }
6176 
6177     /* Respect alignment of argument on the stack.  */
6178     auto Alignement =
6179         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6180     NumBytes = alignTo(NumBytes, Alignement);
6181 
6182     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6183     if (Flags.isInConsecutiveRegsLast())
6184       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6185   }
6186 
6187   unsigned NumBytesActuallyUsed = NumBytes;
6188 
6189   // In the old ELFv1 ABI,
6190   // the prolog code of the callee may store up to 8 GPR argument registers to
6191   // the stack, allowing va_start to index over them in memory if its varargs.
6192   // Because we cannot tell if this is needed on the caller side, we have to
6193   // conservatively assume that it is needed.  As such, make sure we have at
6194   // least enough stack space for the caller to store the 8 GPRs.
6195   // In the ELFv2 ABI, we allocate the parameter area iff a callee
6196   // really requires memory operands, e.g. a vararg function.
6197   if (HasParameterArea)
6198     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6199   else
6200     NumBytes = LinkageSize;
6201 
6202   // Tail call needs the stack to be aligned.
6203   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6204     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6205 
6206   int SPDiff = 0;
6207 
6208   // Calculate by how many bytes the stack has to be adjusted in case of tail
6209   // call optimization.
6210   if (!IsSibCall)
6211     SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6212 
6213   // To protect arguments on the stack from being clobbered in a tail call,
6214   // force all the loads to happen before doing any other lowering.
6215   if (CFlags.IsTailCall)
6216     Chain = DAG.getStackArgumentTokenFactor(Chain);
6217 
6218   // Adjust the stack pointer for the new arguments...
6219   // These operations are automatically eliminated by the prolog/epilog pass
6220   if (!IsSibCall)
6221     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6222   SDValue CallSeqStart = Chain;
6223 
6224   // Load the return address and frame pointer so it can be move somewhere else
6225   // later.
6226   SDValue LROp, FPOp;
6227   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6228 
6229   // Set up a copy of the stack pointer for use loading and storing any
6230   // arguments that may not fit in the registers available for argument
6231   // passing.
6232   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6233 
6234   // Figure out which arguments are going to go in registers, and which in
6235   // memory.  Also, if this is a vararg function, floating point operations
6236   // must be stored to our stack, and loaded into integer regs as well, if
6237   // any integer regs are available for argument passing.
6238   unsigned ArgOffset = LinkageSize;
6239 
6240   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6241   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6242 
6243   SmallVector<SDValue, 8> MemOpChains;
6244   for (unsigned i = 0; i != NumOps; ++i) {
6245     SDValue Arg = OutVals[i];
6246     ISD::ArgFlagsTy Flags = Outs[i].Flags;
6247     EVT ArgVT = Outs[i].VT;
6248     EVT OrigVT = Outs[i].ArgVT;
6249 
6250     // PtrOff will be used to store the current argument to the stack if a
6251     // register cannot be found for it.
6252     SDValue PtrOff;
6253 
6254     // We re-align the argument offset for each argument, except when using the
6255     // fast calling convention, when we need to make sure we do that only when
6256     // we'll actually use a stack slot.
6257     auto ComputePtrOff = [&]() {
6258       /* Respect alignment of argument on the stack.  */
6259       auto Alignment =
6260           CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6261       ArgOffset = alignTo(ArgOffset, Alignment);
6262 
6263       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6264 
6265       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6266     };
6267 
6268     if (!IsFastCall) {
6269       ComputePtrOff();
6270 
6271       /* Compute GPR index associated with argument offset.  */
6272       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6273       GPR_idx = std::min(GPR_idx, NumGPRs);
6274     }
6275 
6276     // Promote integers to 64-bit values.
6277     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6278       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6279       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6280       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6281     }
6282 
6283     // FIXME memcpy is used way more than necessary.  Correctness first.
6284     // Note: "by value" is code for passing a structure by value, not
6285     // basic types.
6286     if (Flags.isByVal()) {
6287       // Note: Size includes alignment padding, so
6288       //   struct x { short a; char b; }
6289       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
6290       // These are the proper values we need for right-justifying the
6291       // aggregate in a parameter register.
6292       unsigned Size = Flags.getByValSize();
6293 
6294       // An empty aggregate parameter takes up no storage and no
6295       // registers.
6296       if (Size == 0)
6297         continue;
6298 
6299       if (IsFastCall)
6300         ComputePtrOff();
6301 
6302       // All aggregates smaller than 8 bytes must be passed right-justified.
6303       if (Size==1 || Size==2 || Size==4) {
6304         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6305         if (GPR_idx != NumGPRs) {
6306           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6307                                         MachinePointerInfo(), VT);
6308           MemOpChains.push_back(Load.getValue(1));
6309           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6310 
6311           ArgOffset += PtrByteSize;
6312           continue;
6313         }
6314       }
6315 
6316       if (GPR_idx == NumGPRs && Size < 8) {
6317         SDValue AddPtr = PtrOff;
6318         if (!isLittleEndian) {
6319           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6320                                           PtrOff.getValueType());
6321           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6322         }
6323         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6324                                                           CallSeqStart,
6325                                                           Flags, DAG, dl);
6326         ArgOffset += PtrByteSize;
6327         continue;
6328       }
6329       // Copy the object to parameter save area if it can not be entirely passed
6330       // by registers.
6331       // FIXME: we only need to copy the parts which need to be passed in
6332       // parameter save area. For the parts passed by registers, we don't need
6333       // to copy them to the stack although we need to allocate space for them
6334       // in parameter save area.
6335       if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6336         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6337                                                           CallSeqStart,
6338                                                           Flags, DAG, dl);
6339 
6340       // When a register is available, pass a small aggregate right-justified.
6341       if (Size < 8 && GPR_idx != NumGPRs) {
6342         // The easiest way to get this right-justified in a register
6343         // is to copy the structure into the rightmost portion of a
6344         // local variable slot, then load the whole slot into the
6345         // register.
6346         // FIXME: The memcpy seems to produce pretty awful code for
6347         // small aggregates, particularly for packed ones.
6348         // FIXME: It would be preferable to use the slot in the
6349         // parameter save area instead of a new local variable.
6350         SDValue AddPtr = PtrOff;
6351         if (!isLittleEndian) {
6352           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6353           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6354         }
6355         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6356                                                           CallSeqStart,
6357                                                           Flags, DAG, dl);
6358 
6359         // Load the slot into the register.
6360         SDValue Load =
6361             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6362         MemOpChains.push_back(Load.getValue(1));
6363         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6364 
6365         // Done with this argument.
6366         ArgOffset += PtrByteSize;
6367         continue;
6368       }
6369 
6370       // For aggregates larger than PtrByteSize, copy the pieces of the
6371       // object that fit into registers from the parameter save area.
6372       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6373         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6374         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6375         if (GPR_idx != NumGPRs) {
6376           unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6377           EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6378           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6379                                         MachinePointerInfo(), ObjType);
6380 
6381           MemOpChains.push_back(Load.getValue(1));
6382           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6383           ArgOffset += PtrByteSize;
6384         } else {
6385           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6386           break;
6387         }
6388       }
6389       continue;
6390     }
6391 
6392     switch (Arg.getSimpleValueType().SimpleTy) {
6393     default: llvm_unreachable("Unexpected ValueType for argument!");
6394     case MVT::i1:
6395     case MVT::i32:
6396     case MVT::i64:
6397       if (Flags.isNest()) {
6398         // The 'nest' parameter, if any, is passed in R11.
6399         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6400         break;
6401       }
6402 
6403       // These can be scalar arguments or elements of an integer array type
6404       // passed directly.  Clang may use those instead of "byval" aggregate
6405       // types to avoid forcing arguments to memory unnecessarily.
6406       if (GPR_idx != NumGPRs) {
6407         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6408       } else {
6409         if (IsFastCall)
6410           ComputePtrOff();
6411 
6412         assert(HasParameterArea &&
6413                "Parameter area must exist to pass an argument in memory.");
6414         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6415                          true, CFlags.IsTailCall, false, MemOpChains,
6416                          TailCallArguments, dl);
6417         if (IsFastCall)
6418           ArgOffset += PtrByteSize;
6419       }
6420       if (!IsFastCall)
6421         ArgOffset += PtrByteSize;
6422       break;
6423     case MVT::f32:
6424     case MVT::f64: {
6425       // These can be scalar arguments or elements of a float array type
6426       // passed directly.  The latter are used to implement ELFv2 homogenous
6427       // float aggregates.
6428 
6429       // Named arguments go into FPRs first, and once they overflow, the
6430       // remaining arguments go into GPRs and then the parameter save area.
6431       // Unnamed arguments for vararg functions always go to GPRs and
6432       // then the parameter save area.  For now, put all arguments to vararg
6433       // routines always in both locations (FPR *and* GPR or stack slot).
6434       bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6435       bool NeededLoad = false;
6436 
6437       // First load the argument into the next available FPR.
6438       if (FPR_idx != NumFPRs)
6439         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6440 
6441       // Next, load the argument into GPR or stack slot if needed.
6442       if (!NeedGPROrStack)
6443         ;
6444       else if (GPR_idx != NumGPRs && !IsFastCall) {
6445         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6446         // once we support fp <-> gpr moves.
6447 
6448         // In the non-vararg case, this can only ever happen in the
6449         // presence of f32 array types, since otherwise we never run
6450         // out of FPRs before running out of GPRs.
6451         SDValue ArgVal;
6452 
6453         // Double values are always passed in a single GPR.
6454         if (Arg.getValueType() != MVT::f32) {
6455           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6456 
6457         // Non-array float values are extended and passed in a GPR.
6458         } else if (!Flags.isInConsecutiveRegs()) {
6459           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6460           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6461 
6462         // If we have an array of floats, we collect every odd element
6463         // together with its predecessor into one GPR.
6464         } else if (ArgOffset % PtrByteSize != 0) {
6465           SDValue Lo, Hi;
6466           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6467           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6468           if (!isLittleEndian)
6469             std::swap(Lo, Hi);
6470           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6471 
6472         // The final element, if even, goes into the first half of a GPR.
6473         } else if (Flags.isInConsecutiveRegsLast()) {
6474           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6475           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6476           if (!isLittleEndian)
6477             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6478                                  DAG.getConstant(32, dl, MVT::i32));
6479 
6480         // Non-final even elements are skipped; they will be handled
6481         // together the with subsequent argument on the next go-around.
6482         } else
6483           ArgVal = SDValue();
6484 
6485         if (ArgVal.getNode())
6486           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6487       } else {
6488         if (IsFastCall)
6489           ComputePtrOff();
6490 
6491         // Single-precision floating-point values are mapped to the
6492         // second (rightmost) word of the stack doubleword.
6493         if (Arg.getValueType() == MVT::f32 &&
6494             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6495           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6496           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6497         }
6498 
6499         assert(HasParameterArea &&
6500                "Parameter area must exist to pass an argument in memory.");
6501         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6502                          true, CFlags.IsTailCall, false, MemOpChains,
6503                          TailCallArguments, dl);
6504 
6505         NeededLoad = true;
6506       }
6507       // When passing an array of floats, the array occupies consecutive
6508       // space in the argument area; only round up to the next doubleword
6509       // at the end of the array.  Otherwise, each float takes 8 bytes.
6510       if (!IsFastCall || NeededLoad) {
6511         ArgOffset += (Arg.getValueType() == MVT::f32 &&
6512                       Flags.isInConsecutiveRegs()) ? 4 : 8;
6513         if (Flags.isInConsecutiveRegsLast())
6514           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6515       }
6516       break;
6517     }
6518     case MVT::v4f32:
6519     case MVT::v4i32:
6520     case MVT::v8i16:
6521     case MVT::v16i8:
6522     case MVT::v2f64:
6523     case MVT::v2i64:
6524     case MVT::v1i128:
6525     case MVT::f128:
6526       // These can be scalar arguments or elements of a vector array type
6527       // passed directly.  The latter are used to implement ELFv2 homogenous
6528       // vector aggregates.
6529 
6530       // For a varargs call, named arguments go into VRs or on the stack as
6531       // usual; unnamed arguments always go to the stack or the corresponding
6532       // GPRs when within range.  For now, we always put the value in both
6533       // locations (or even all three).
6534       if (CFlags.IsVarArg) {
6535         assert(HasParameterArea &&
6536                "Parameter area must exist if we have a varargs call.");
6537         // We could elide this store in the case where the object fits
6538         // entirely in R registers.  Maybe later.
6539         SDValue Store =
6540             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6541         MemOpChains.push_back(Store);
6542         if (VR_idx != NumVRs) {
6543           SDValue Load =
6544               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6545           MemOpChains.push_back(Load.getValue(1));
6546           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6547         }
6548         ArgOffset += 16;
6549         for (unsigned i=0; i<16; i+=PtrByteSize) {
6550           if (GPR_idx == NumGPRs)
6551             break;
6552           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6553                                    DAG.getConstant(i, dl, PtrVT));
6554           SDValue Load =
6555               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6556           MemOpChains.push_back(Load.getValue(1));
6557           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6558         }
6559         break;
6560       }
6561 
6562       // Non-varargs Altivec params go into VRs or on the stack.
6563       if (VR_idx != NumVRs) {
6564         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6565       } else {
6566         if (IsFastCall)
6567           ComputePtrOff();
6568 
6569         assert(HasParameterArea &&
6570                "Parameter area must exist to pass an argument in memory.");
6571         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6572                          true, CFlags.IsTailCall, true, MemOpChains,
6573                          TailCallArguments, dl);
6574         if (IsFastCall)
6575           ArgOffset += 16;
6576       }
6577 
6578       if (!IsFastCall)
6579         ArgOffset += 16;
6580       break;
6581     }
6582   }
6583 
6584   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6585          "mismatch in size of parameter area");
6586   (void)NumBytesActuallyUsed;
6587 
6588   if (!MemOpChains.empty())
6589     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6590 
6591   // Check if this is an indirect call (MTCTR/BCTRL).
6592   // See prepareDescriptorIndirectCall and buildCallOperands for more
6593   // information about calls through function pointers in the 64-bit SVR4 ABI.
6594   if (CFlags.IsIndirect) {
6595     // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6596     // caller in the TOC save area.
6597     if (isTOCSaveRestoreRequired(Subtarget)) {
6598       assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6599       // Load r2 into a virtual register and store it to the TOC save area.
6600       setUsesTOCBasePtr(DAG);
6601       SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6602       // TOC save area offset.
6603       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6604       SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6605       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6606       Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6607                            MachinePointerInfo::getStack(
6608                                DAG.getMachineFunction(), TOCSaveOffset));
6609     }
6610     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6611     // This does not mean the MTCTR instruction must use R12; it's easier
6612     // to model this as an extra parameter, so do that.
6613     if (isELFv2ABI && !CFlags.IsPatchPoint)
6614       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6615   }
6616 
6617   // Build a sequence of copy-to-reg nodes chained together with token chain
6618   // and flag operands which copy the outgoing args into the appropriate regs.
6619   SDValue InGlue;
6620   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6621     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6622                              RegsToPass[i].second, InGlue);
6623     InGlue = Chain.getValue(1);
6624   }
6625 
6626   if (CFlags.IsTailCall && !IsSibCall)
6627     PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6628                     TailCallArguments);
6629 
6630   return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6631                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6632 }
6633 
6634 // Returns true when the shadow of a general purpose argument register
6635 // in the parameter save area is aligned to at least 'RequiredAlign'.
6636 static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6637   assert(RequiredAlign.value() <= 16 &&
6638          "Required alignment greater than stack alignment.");
6639   switch (Reg) {
6640   default:
6641     report_fatal_error("called on invalid register.");
6642   case PPC::R5:
6643   case PPC::R9:
6644   case PPC::X3:
6645   case PPC::X5:
6646   case PPC::X7:
6647   case PPC::X9:
6648     // These registers are 16 byte aligned which is the most strict aligment
6649     // we can support.
6650     return true;
6651   case PPC::R3:
6652   case PPC::R7:
6653   case PPC::X4:
6654   case PPC::X6:
6655   case PPC::X8:
6656   case PPC::X10:
6657     // The shadow of these registers in the PSA is 8 byte aligned.
6658     return RequiredAlign <= 8;
6659   case PPC::R4:
6660   case PPC::R6:
6661   case PPC::R8:
6662   case PPC::R10:
6663     return RequiredAlign <= 4;
6664   }
6665 }
6666 
6667 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6668                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6669                    CCState &S) {
6670   AIXCCState &State = static_cast<AIXCCState &>(S);
6671   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6672       State.getMachineFunction().getSubtarget());
6673   const bool IsPPC64 = Subtarget.isPPC64();
6674   const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6675   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6676 
6677   if (ValVT == MVT::f128)
6678     report_fatal_error("f128 is unimplemented on AIX.");
6679 
6680   if (ArgFlags.isNest())
6681     report_fatal_error("Nest arguments are unimplemented.");
6682 
6683   static const MCPhysReg GPR_32[] = {// 32-bit registers.
6684                                      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6685                                      PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6686   static const MCPhysReg GPR_64[] = {// 64-bit registers.
6687                                      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6688                                      PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6689 
6690   static const MCPhysReg VR[] = {// Vector registers.
6691                                  PPC::V2,  PPC::V3,  PPC::V4,  PPC::V5,
6692                                  PPC::V6,  PPC::V7,  PPC::V8,  PPC::V9,
6693                                  PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6694 
6695   if (ArgFlags.isByVal()) {
6696     if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6697       report_fatal_error("Pass-by-value arguments with alignment greater than "
6698                          "register width are not supported.");
6699 
6700     const unsigned ByValSize = ArgFlags.getByValSize();
6701 
6702     // An empty aggregate parameter takes up no storage and no registers,
6703     // but needs a MemLoc for a stack slot for the formal arguments side.
6704     if (ByValSize == 0) {
6705       State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6706                                        State.getStackSize(), RegVT, LocInfo));
6707       return false;
6708     }
6709 
6710     const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6711     unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6712     for (const unsigned E = Offset + StackSize; Offset < E;
6713          Offset += PtrAlign.value()) {
6714       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6715         State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6716       else {
6717         State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6718                                          Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
6719                                          LocInfo));
6720         break;
6721       }
6722     }
6723     return false;
6724   }
6725 
6726   // Arguments always reserve parameter save area.
6727   switch (ValVT.SimpleTy) {
6728   default:
6729     report_fatal_error("Unhandled value type for argument.");
6730   case MVT::i64:
6731     // i64 arguments should have been split to i32 for PPC32.
6732     assert(IsPPC64 && "PPC32 should have split i64 values.");
6733     [[fallthrough]];
6734   case MVT::i1:
6735   case MVT::i32: {
6736     const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6737     // AIX integer arguments are always passed in register width.
6738     if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6739       LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6740                                   : CCValAssign::LocInfo::ZExt;
6741     if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6742       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6743     else
6744       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6745 
6746     return false;
6747   }
6748   case MVT::f32:
6749   case MVT::f64: {
6750     // Parameter save area (PSA) is reserved even if the float passes in fpr.
6751     const unsigned StoreSize = LocVT.getStoreSize();
6752     // Floats are always 4-byte aligned in the PSA on AIX.
6753     // This includes f64 in 64-bit mode for ABI compatibility.
6754     const unsigned Offset =
6755         State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6756     unsigned FReg = State.AllocateReg(FPR);
6757     if (FReg)
6758       State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6759 
6760     // Reserve and initialize GPRs or initialize the PSA as required.
6761     for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6762       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6763         assert(FReg && "An FPR should be available when a GPR is reserved.");
6764         if (State.isVarArg()) {
6765           // Successfully reserved GPRs are only initialized for vararg calls.
6766           // Custom handling is required for:
6767           //   f64 in PPC32 needs to be split into 2 GPRs.
6768           //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6769           State.addLoc(
6770               CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6771         }
6772       } else {
6773         // If there are insufficient GPRs, the PSA needs to be initialized.
6774         // Initialization occurs even if an FPR was initialized for
6775         // compatibility with the AIX XL compiler. The full memory for the
6776         // argument will be initialized even if a prior word is saved in GPR.
6777         // A custom memLoc is used when the argument also passes in FPR so
6778         // that the callee handling can skip over it easily.
6779         State.addLoc(
6780             FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6781                                              LocInfo)
6782                  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6783         break;
6784       }
6785     }
6786 
6787     return false;
6788   }
6789   case MVT::v4f32:
6790   case MVT::v4i32:
6791   case MVT::v8i16:
6792   case MVT::v16i8:
6793   case MVT::v2i64:
6794   case MVT::v2f64:
6795   case MVT::v1i128: {
6796     const unsigned VecSize = 16;
6797     const Align VecAlign(VecSize);
6798 
6799     if (!State.isVarArg()) {
6800       // If there are vector registers remaining we don't consume any stack
6801       // space.
6802       if (unsigned VReg = State.AllocateReg(VR)) {
6803         State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6804         return false;
6805       }
6806       // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6807       // might be allocated in the portion of the PSA that is shadowed by the
6808       // GPRs.
6809       const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6810       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6811       return false;
6812     }
6813 
6814     const unsigned PtrSize = IsPPC64 ? 8 : 4;
6815     ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6816 
6817     unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6818     // Burn any underaligned registers and their shadowed stack space until
6819     // we reach the required alignment.
6820     while (NextRegIndex != GPRs.size() &&
6821            !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6822       // Shadow allocate register and its stack shadow.
6823       unsigned Reg = State.AllocateReg(GPRs);
6824       State.AllocateStack(PtrSize, PtrAlign);
6825       assert(Reg && "Allocating register unexpectedly failed.");
6826       (void)Reg;
6827       NextRegIndex = State.getFirstUnallocated(GPRs);
6828     }
6829 
6830     // Vectors that are passed as fixed arguments are handled differently.
6831     // They are passed in VRs if any are available (unlike arguments passed
6832     // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6833     // functions)
6834     if (State.isFixed(ValNo)) {
6835       if (unsigned VReg = State.AllocateReg(VR)) {
6836         State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6837         // Shadow allocate GPRs and stack space even though we pass in a VR.
6838         for (unsigned I = 0; I != VecSize; I += PtrSize)
6839           State.AllocateReg(GPRs);
6840         State.AllocateStack(VecSize, VecAlign);
6841         return false;
6842       }
6843       // No vector registers remain so pass on the stack.
6844       const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6845       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6846       return false;
6847     }
6848 
6849     // If all GPRS are consumed then we pass the argument fully on the stack.
6850     if (NextRegIndex == GPRs.size()) {
6851       const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6852       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6853       return false;
6854     }
6855 
6856     // Corner case for 32-bit codegen. We have 2 registers to pass the first
6857     // half of the argument, and then need to pass the remaining half on the
6858     // stack.
6859     if (GPRs[NextRegIndex] == PPC::R9) {
6860       const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6861       State.addLoc(
6862           CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6863 
6864       const unsigned FirstReg = State.AllocateReg(PPC::R9);
6865       const unsigned SecondReg = State.AllocateReg(PPC::R10);
6866       assert(FirstReg && SecondReg &&
6867              "Allocating R9 or R10 unexpectedly failed.");
6868       State.addLoc(
6869           CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6870       State.addLoc(
6871           CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6872       return false;
6873     }
6874 
6875     // We have enough GPRs to fully pass the vector argument, and we have
6876     // already consumed any underaligned registers. Start with the custom
6877     // MemLoc and then the custom RegLocs.
6878     const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6879     State.addLoc(
6880         CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6881     for (unsigned I = 0; I != VecSize; I += PtrSize) {
6882       const unsigned Reg = State.AllocateReg(GPRs);
6883       assert(Reg && "Failed to allocated register for vararg vector argument");
6884       State.addLoc(
6885           CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6886     }
6887     return false;
6888   }
6889   }
6890   return true;
6891 }
6892 
6893 // So far, this function is only used by LowerFormalArguments_AIX()
6894 static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
6895                                                     bool IsPPC64,
6896                                                     bool HasP8Vector,
6897                                                     bool HasVSX) {
6898   assert((IsPPC64 || SVT != MVT::i64) &&
6899          "i64 should have been split for 32-bit codegen.");
6900 
6901   switch (SVT) {
6902   default:
6903     report_fatal_error("Unexpected value type for formal argument");
6904   case MVT::i1:
6905   case MVT::i32:
6906   case MVT::i64:
6907     return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6908   case MVT::f32:
6909     return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6910   case MVT::f64:
6911     return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6912   case MVT::v4f32:
6913   case MVT::v4i32:
6914   case MVT::v8i16:
6915   case MVT::v16i8:
6916   case MVT::v2i64:
6917   case MVT::v2f64:
6918   case MVT::v1i128:
6919     return &PPC::VRRCRegClass;
6920   }
6921 }
6922 
6923 static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
6924                                         SelectionDAG &DAG, SDValue ArgValue,
6925                                         MVT LocVT, const SDLoc &dl) {
6926   assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
6927   assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
6928 
6929   if (Flags.isSExt())
6930     ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
6931                            DAG.getValueType(ValVT));
6932   else if (Flags.isZExt())
6933     ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
6934                            DAG.getValueType(ValVT));
6935 
6936   return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
6937 }
6938 
6939 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
6940   const unsigned LASize = FL->getLinkageSize();
6941 
6942   if (PPC::GPRCRegClass.contains(Reg)) {
6943     assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
6944            "Reg must be a valid argument register!");
6945     return LASize + 4 * (Reg - PPC::R3);
6946   }
6947 
6948   if (PPC::G8RCRegClass.contains(Reg)) {
6949     assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
6950            "Reg must be a valid argument register!");
6951     return LASize + 8 * (Reg - PPC::X3);
6952   }
6953 
6954   llvm_unreachable("Only general purpose registers expected.");
6955 }
6956 
6957 //   AIX ABI Stack Frame Layout:
6958 //
6959 //   Low Memory +--------------------------------------------+
6960 //   SP   +---> | Back chain                                 | ---+
6961 //        |     +--------------------------------------------+    |
6962 //        |     | Saved Condition Register                   |    |
6963 //        |     +--------------------------------------------+    |
6964 //        |     | Saved Linkage Register                     |    |
6965 //        |     +--------------------------------------------+    | Linkage Area
6966 //        |     | Reserved for compilers                     |    |
6967 //        |     +--------------------------------------------+    |
6968 //        |     | Reserved for binders                       |    |
6969 //        |     +--------------------------------------------+    |
6970 //        |     | Saved TOC pointer                          | ---+
6971 //        |     +--------------------------------------------+
6972 //        |     | Parameter save area                        |
6973 //        |     +--------------------------------------------+
6974 //        |     | Alloca space                               |
6975 //        |     +--------------------------------------------+
6976 //        |     | Local variable space                       |
6977 //        |     +--------------------------------------------+
6978 //        |     | Float/int conversion temporary             |
6979 //        |     +--------------------------------------------+
6980 //        |     | Save area for AltiVec registers            |
6981 //        |     +--------------------------------------------+
6982 //        |     | AltiVec alignment padding                  |
6983 //        |     +--------------------------------------------+
6984 //        |     | Save area for VRSAVE register              |
6985 //        |     +--------------------------------------------+
6986 //        |     | Save area for General Purpose registers    |
6987 //        |     +--------------------------------------------+
6988 //        |     | Save area for Floating Point registers     |
6989 //        |     +--------------------------------------------+
6990 //        +---- | Back chain                                 |
6991 // High Memory  +--------------------------------------------+
6992 //
6993 //  Specifications:
6994 //  AIX 7.2 Assembler Language Reference
6995 //  Subroutine linkage convention
6996 
6997 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6998     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
6999     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7000     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7001 
7002   assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7003           CallConv == CallingConv::Fast) &&
7004          "Unexpected calling convention!");
7005 
7006   if (getTargetMachine().Options.GuaranteedTailCallOpt)
7007     report_fatal_error("Tail call support is unimplemented on AIX.");
7008 
7009   if (useSoftFloat())
7010     report_fatal_error("Soft float support is unimplemented on AIX.");
7011 
7012   const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7013 
7014   const bool IsPPC64 = Subtarget.isPPC64();
7015   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7016 
7017   // Assign locations to all of the incoming arguments.
7018   SmallVector<CCValAssign, 16> ArgLocs;
7019   MachineFunction &MF = DAG.getMachineFunction();
7020   MachineFrameInfo &MFI = MF.getFrameInfo();
7021   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7022   AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7023 
7024   const EVT PtrVT = getPointerTy(MF.getDataLayout());
7025   // Reserve space for the linkage area on the stack.
7026   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7027   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7028   CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7029 
7030   SmallVector<SDValue, 8> MemOps;
7031 
7032   for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7033     CCValAssign &VA = ArgLocs[I++];
7034     MVT LocVT = VA.getLocVT();
7035     MVT ValVT = VA.getValVT();
7036     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7037     // For compatibility with the AIX XL compiler, the float args in the
7038     // parameter save area are initialized even if the argument is available
7039     // in register.  The caller is required to initialize both the register
7040     // and memory, however, the callee can choose to expect it in either.
7041     // The memloc is dismissed here because the argument is retrieved from
7042     // the register.
7043     if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7044       continue;
7045 
7046     auto HandleMemLoc = [&]() {
7047       const unsigned LocSize = LocVT.getStoreSize();
7048       const unsigned ValSize = ValVT.getStoreSize();
7049       assert((ValSize <= LocSize) &&
7050              "Object size is larger than size of MemLoc");
7051       int CurArgOffset = VA.getLocMemOffset();
7052       // Objects are right-justified because AIX is big-endian.
7053       if (LocSize > ValSize)
7054         CurArgOffset += LocSize - ValSize;
7055       // Potential tail calls could cause overwriting of argument stack slots.
7056       const bool IsImmutable =
7057           !(getTargetMachine().Options.GuaranteedTailCallOpt &&
7058             (CallConv == CallingConv::Fast));
7059       int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7060       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7061       SDValue ArgValue =
7062           DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7063       InVals.push_back(ArgValue);
7064     };
7065 
7066     // Vector arguments to VaArg functions are passed both on the stack, and
7067     // in any available GPRs. Load the value from the stack and add the GPRs
7068     // as live ins.
7069     if (VA.isMemLoc() && VA.needsCustom()) {
7070       assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7071       assert(isVarArg && "Only use custom memloc for vararg.");
7072       // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7073       // matching custom RegLocs.
7074       const unsigned OriginalValNo = VA.getValNo();
7075       (void)OriginalValNo;
7076 
7077       auto HandleCustomVecRegLoc = [&]() {
7078         assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7079                "Missing custom RegLoc.");
7080         VA = ArgLocs[I++];
7081         assert(VA.getValVT().isVector() &&
7082                "Unexpected Val type for custom RegLoc.");
7083         assert(VA.getValNo() == OriginalValNo &&
7084                "ValNo mismatch between custom MemLoc and RegLoc.");
7085         MVT::SimpleValueType SVT = VA.getLocVT().SimpleTy;
7086         MF.addLiveIn(VA.getLocReg(),
7087                      getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7088                                        Subtarget.hasVSX()));
7089       };
7090 
7091       HandleMemLoc();
7092       // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7093       // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7094       // R10.
7095       HandleCustomVecRegLoc();
7096       HandleCustomVecRegLoc();
7097 
7098       // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7099       // we passed the vector in R5, R6, R7 and R8.
7100       if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7101         assert(!IsPPC64 &&
7102                "Only 2 custom RegLocs expected for 64-bit codegen.");
7103         HandleCustomVecRegLoc();
7104         HandleCustomVecRegLoc();
7105       }
7106 
7107       continue;
7108     }
7109 
7110     if (VA.isRegLoc()) {
7111       if (VA.getValVT().isScalarInteger())
7112         FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
7113       else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7114         switch (VA.getValVT().SimpleTy) {
7115         default:
7116           report_fatal_error("Unhandled value type for argument.");
7117         case MVT::f32:
7118           FuncInfo->appendParameterType(PPCFunctionInfo::ShortFloatingPoint);
7119           break;
7120         case MVT::f64:
7121           FuncInfo->appendParameterType(PPCFunctionInfo::LongFloatingPoint);
7122           break;
7123         }
7124       } else if (VA.getValVT().isVector()) {
7125         switch (VA.getValVT().SimpleTy) {
7126         default:
7127           report_fatal_error("Unhandled value type for argument.");
7128         case MVT::v16i8:
7129           FuncInfo->appendParameterType(PPCFunctionInfo::VectorChar);
7130           break;
7131         case MVT::v8i16:
7132           FuncInfo->appendParameterType(PPCFunctionInfo::VectorShort);
7133           break;
7134         case MVT::v4i32:
7135         case MVT::v2i64:
7136         case MVT::v1i128:
7137           FuncInfo->appendParameterType(PPCFunctionInfo::VectorInt);
7138           break;
7139         case MVT::v4f32:
7140         case MVT::v2f64:
7141           FuncInfo->appendParameterType(PPCFunctionInfo::VectorFloat);
7142           break;
7143         }
7144       }
7145     }
7146 
7147     if (Flags.isByVal() && VA.isMemLoc()) {
7148       const unsigned Size =
7149           alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7150                   PtrByteSize);
7151       const int FI = MF.getFrameInfo().CreateFixedObject(
7152           Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7153           /* IsAliased */ true);
7154       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7155       InVals.push_back(FIN);
7156 
7157       continue;
7158     }
7159 
7160     if (Flags.isByVal()) {
7161       assert(VA.isRegLoc() && "MemLocs should already be handled.");
7162 
7163       const MCPhysReg ArgReg = VA.getLocReg();
7164       const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7165 
7166       if (Flags.getNonZeroByValAlign() > PtrByteSize)
7167         report_fatal_error("Over aligned byvals not supported yet.");
7168 
7169       const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7170       const int FI = MF.getFrameInfo().CreateFixedObject(
7171           StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7172           /* IsAliased */ true);
7173       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7174       InVals.push_back(FIN);
7175 
7176       // Add live ins for all the RegLocs for the same ByVal.
7177       const TargetRegisterClass *RegClass =
7178           IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7179 
7180       auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7181                                                unsigned Offset) {
7182         const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7183         // Since the callers side has left justified the aggregate in the
7184         // register, we can simply store the entire register into the stack
7185         // slot.
7186         SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7187         // The store to the fixedstack object is needed becuase accessing a
7188         // field of the ByVal will use a gep and load. Ideally we will optimize
7189         // to extracting the value from the register directly, and elide the
7190         // stores when the arguments address is not taken, but that will need to
7191         // be future work.
7192         SDValue Store = DAG.getStore(
7193             CopyFrom.getValue(1), dl, CopyFrom,
7194             DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
7195             MachinePointerInfo::getFixedStack(MF, FI, Offset));
7196 
7197         MemOps.push_back(Store);
7198       };
7199 
7200       unsigned Offset = 0;
7201       HandleRegLoc(VA.getLocReg(), Offset);
7202       Offset += PtrByteSize;
7203       for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7204            Offset += PtrByteSize) {
7205         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7206                "RegLocs should be for ByVal argument.");
7207 
7208         const CCValAssign RL = ArgLocs[I++];
7209         HandleRegLoc(RL.getLocReg(), Offset);
7210         FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
7211       }
7212 
7213       if (Offset != StackSize) {
7214         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7215                "Expected MemLoc for remaining bytes.");
7216         assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7217         // Consume the MemLoc.The InVal has already been emitted, so nothing
7218         // more needs to be done.
7219         ++I;
7220       }
7221 
7222       continue;
7223     }
7224 
7225     if (VA.isRegLoc() && !VA.needsCustom()) {
7226       MVT::SimpleValueType SVT = ValVT.SimpleTy;
7227       Register VReg =
7228           MF.addLiveIn(VA.getLocReg(),
7229                        getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7230                                          Subtarget.hasVSX()));
7231       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7232       if (ValVT.isScalarInteger() &&
7233           (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7234         ArgValue =
7235             truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7236       }
7237       InVals.push_back(ArgValue);
7238       continue;
7239     }
7240     if (VA.isMemLoc()) {
7241       HandleMemLoc();
7242       continue;
7243     }
7244   }
7245 
7246   // On AIX a minimum of 8 words is saved to the parameter save area.
7247   const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7248   // Area that is at least reserved in the caller of this function.
7249   unsigned CallerReservedArea = std::max<unsigned>(
7250       CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7251 
7252   // Set the size that is at least reserved in caller of this function. Tail
7253   // call optimized function's reserved stack space needs to be aligned so
7254   // that taking the difference between two stack areas will result in an
7255   // aligned stack.
7256   CallerReservedArea =
7257       EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7258   FuncInfo->setMinReservedArea(CallerReservedArea);
7259 
7260   if (isVarArg) {
7261     FuncInfo->setVarArgsFrameIndex(
7262         MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7263     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7264 
7265     static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7266                                        PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7267 
7268     static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7269                                        PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7270     const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7271 
7272     // The fixed integer arguments of a variadic function are stored to the
7273     // VarArgsFrameIndex on the stack so that they may be loaded by
7274     // dereferencing the result of va_next.
7275     for (unsigned GPRIndex =
7276              (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7277          GPRIndex < NumGPArgRegs; ++GPRIndex) {
7278 
7279       const Register VReg =
7280           IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7281                   : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7282 
7283       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7284       SDValue Store =
7285           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7286       MemOps.push_back(Store);
7287       // Increment the address for the next argument to store.
7288       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7289       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7290     }
7291   }
7292 
7293   if (!MemOps.empty())
7294     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7295 
7296   return Chain;
7297 }
7298 
7299 SDValue PPCTargetLowering::LowerCall_AIX(
7300     SDValue Chain, SDValue Callee, CallFlags CFlags,
7301     const SmallVectorImpl<ISD::OutputArg> &Outs,
7302     const SmallVectorImpl<SDValue> &OutVals,
7303     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7304     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7305     const CallBase *CB) const {
7306   // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7307   // AIX ABI stack frame layout.
7308 
7309   assert((CFlags.CallConv == CallingConv::C ||
7310           CFlags.CallConv == CallingConv::Cold ||
7311           CFlags.CallConv == CallingConv::Fast) &&
7312          "Unexpected calling convention!");
7313 
7314   if (CFlags.IsPatchPoint)
7315     report_fatal_error("This call type is unimplemented on AIX.");
7316 
7317   const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7318 
7319   MachineFunction &MF = DAG.getMachineFunction();
7320   SmallVector<CCValAssign, 16> ArgLocs;
7321   AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7322                     *DAG.getContext());
7323 
7324   // Reserve space for the linkage save area (LSA) on the stack.
7325   // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7326   //   [SP][CR][LR][2 x reserved][TOC].
7327   // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7328   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7329   const bool IsPPC64 = Subtarget.isPPC64();
7330   const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7331   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7332   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7333   CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7334 
7335   // The prolog code of the callee may store up to 8 GPR argument registers to
7336   // the stack, allowing va_start to index over them in memory if the callee
7337   // is variadic.
7338   // Because we cannot tell if this is needed on the caller side, we have to
7339   // conservatively assume that it is needed.  As such, make sure we have at
7340   // least enough stack space for the caller to store the 8 GPRs.
7341   const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7342   const unsigned NumBytes = std::max<unsigned>(
7343       LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7344 
7345   // Adjust the stack pointer for the new arguments...
7346   // These operations are automatically eliminated by the prolog/epilog pass.
7347   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7348   SDValue CallSeqStart = Chain;
7349 
7350   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
7351   SmallVector<SDValue, 8> MemOpChains;
7352 
7353   // Set up a copy of the stack pointer for loading and storing any
7354   // arguments that may not fit in the registers available for argument
7355   // passing.
7356   const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7357                                    : DAG.getRegister(PPC::R1, MVT::i32);
7358 
7359   for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7360     const unsigned ValNo = ArgLocs[I].getValNo();
7361     SDValue Arg = OutVals[ValNo];
7362     ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7363 
7364     if (Flags.isByVal()) {
7365       const unsigned ByValSize = Flags.getByValSize();
7366 
7367       // Nothing to do for zero-sized ByVals on the caller side.
7368       if (!ByValSize) {
7369         ++I;
7370         continue;
7371       }
7372 
7373       auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7374         return DAG.getExtLoad(
7375             ISD::ZEXTLOAD, dl, PtrVT, Chain,
7376             (LoadOffset != 0)
7377                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7378                 : Arg,
7379             MachinePointerInfo(), VT);
7380       };
7381 
7382       unsigned LoadOffset = 0;
7383 
7384       // Initialize registers, which are fully occupied by the by-val argument.
7385       while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7386         SDValue Load = GetLoad(PtrVT, LoadOffset);
7387         MemOpChains.push_back(Load.getValue(1));
7388         LoadOffset += PtrByteSize;
7389         const CCValAssign &ByValVA = ArgLocs[I++];
7390         assert(ByValVA.getValNo() == ValNo &&
7391                "Unexpected location for pass-by-value argument.");
7392         RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7393       }
7394 
7395       if (LoadOffset == ByValSize)
7396         continue;
7397 
7398       // There must be one more loc to handle the remainder.
7399       assert(ArgLocs[I].getValNo() == ValNo &&
7400              "Expected additional location for by-value argument.");
7401 
7402       if (ArgLocs[I].isMemLoc()) {
7403         assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7404         const CCValAssign &ByValVA = ArgLocs[I++];
7405         ISD::ArgFlagsTy MemcpyFlags = Flags;
7406         // Only memcpy the bytes that don't pass in register.
7407         MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7408         Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7409             (LoadOffset != 0)
7410                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7411                 : Arg,
7412             DAG.getObjectPtrOffset(dl, StackPtr,
7413                                    TypeSize::Fixed(ByValVA.getLocMemOffset())),
7414             CallSeqStart, MemcpyFlags, DAG, dl);
7415         continue;
7416       }
7417 
7418       // Initialize the final register residue.
7419       // Any residue that occupies the final by-val arg register must be
7420       // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7421       // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7422       // 2 and 1 byte loads.
7423       const unsigned ResidueBytes = ByValSize % PtrByteSize;
7424       assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7425              "Unexpected register residue for by-value argument.");
7426       SDValue ResidueVal;
7427       for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7428         const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7429         const MVT VT =
7430             N == 1 ? MVT::i8
7431                    : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7432         SDValue Load = GetLoad(VT, LoadOffset);
7433         MemOpChains.push_back(Load.getValue(1));
7434         LoadOffset += N;
7435         Bytes += N;
7436 
7437         // By-val arguments are passed left-justfied in register.
7438         // Every load here needs to be shifted, otherwise a full register load
7439         // should have been used.
7440         assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7441                "Unexpected load emitted during handling of pass-by-value "
7442                "argument.");
7443         unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7444         EVT ShiftAmountTy =
7445             getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7446         SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7447         SDValue ShiftedLoad =
7448             DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7449         ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7450                                               ShiftedLoad)
7451                                 : ShiftedLoad;
7452       }
7453 
7454       const CCValAssign &ByValVA = ArgLocs[I++];
7455       RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7456       continue;
7457     }
7458 
7459     CCValAssign &VA = ArgLocs[I++];
7460     const MVT LocVT = VA.getLocVT();
7461     const MVT ValVT = VA.getValVT();
7462 
7463     switch (VA.getLocInfo()) {
7464     default:
7465       report_fatal_error("Unexpected argument extension type.");
7466     case CCValAssign::Full:
7467       break;
7468     case CCValAssign::ZExt:
7469       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7470       break;
7471     case CCValAssign::SExt:
7472       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7473       break;
7474     }
7475 
7476     if (VA.isRegLoc() && !VA.needsCustom()) {
7477       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7478       continue;
7479     }
7480 
7481     // Vector arguments passed to VarArg functions need custom handling when
7482     // they are passed (at least partially) in GPRs.
7483     if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7484       assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7485       // Store value to its stack slot.
7486       SDValue PtrOff =
7487           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7488       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7489       SDValue Store =
7490           DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7491       MemOpChains.push_back(Store);
7492       const unsigned OriginalValNo = VA.getValNo();
7493       // Then load the GPRs from the stack
7494       unsigned LoadOffset = 0;
7495       auto HandleCustomVecRegLoc = [&]() {
7496         assert(I != E && "Unexpected end of CCvalAssigns.");
7497         assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7498                "Expected custom RegLoc.");
7499         CCValAssign RegVA = ArgLocs[I++];
7500         assert(RegVA.getValNo() == OriginalValNo &&
7501                "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7502         SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7503                                   DAG.getConstant(LoadOffset, dl, PtrVT));
7504         SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7505         MemOpChains.push_back(Load.getValue(1));
7506         RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7507         LoadOffset += PtrByteSize;
7508       };
7509 
7510       // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7511       // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7512       // R10.
7513       HandleCustomVecRegLoc();
7514       HandleCustomVecRegLoc();
7515 
7516       if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7517           ArgLocs[I].getValNo() == OriginalValNo) {
7518         assert(!IsPPC64 &&
7519                "Only 2 custom RegLocs expected for 64-bit codegen.");
7520         HandleCustomVecRegLoc();
7521         HandleCustomVecRegLoc();
7522       }
7523 
7524       continue;
7525     }
7526 
7527     if (VA.isMemLoc()) {
7528       SDValue PtrOff =
7529           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7530       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7531       MemOpChains.push_back(
7532           DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7533 
7534       continue;
7535     }
7536 
7537     if (!ValVT.isFloatingPoint())
7538       report_fatal_error(
7539           "Unexpected register handling for calling convention.");
7540 
7541     // Custom handling is used for GPR initializations for vararg float
7542     // arguments.
7543     assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7544            LocVT.isInteger() &&
7545            "Custom register handling only expected for VarArg.");
7546 
7547     SDValue ArgAsInt =
7548         DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7549 
7550     if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7551       // f32 in 32-bit GPR
7552       // f64 in 64-bit GPR
7553       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7554     else if (Arg.getValueType().getFixedSizeInBits() <
7555              LocVT.getFixedSizeInBits())
7556       // f32 in 64-bit GPR.
7557       RegsToPass.push_back(std::make_pair(
7558           VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7559     else {
7560       // f64 in two 32-bit GPRs
7561       // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7562       assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7563              "Unexpected custom register for argument!");
7564       CCValAssign &GPR1 = VA;
7565       SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7566                                      DAG.getConstant(32, dl, MVT::i8));
7567       RegsToPass.push_back(std::make_pair(
7568           GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7569 
7570       if (I != E) {
7571         // If only 1 GPR was available, there will only be one custom GPR and
7572         // the argument will also pass in memory.
7573         CCValAssign &PeekArg = ArgLocs[I];
7574         if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7575           assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7576           CCValAssign &GPR2 = ArgLocs[I++];
7577           RegsToPass.push_back(std::make_pair(
7578               GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7579         }
7580       }
7581     }
7582   }
7583 
7584   if (!MemOpChains.empty())
7585     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7586 
7587   // For indirect calls, we need to save the TOC base to the stack for
7588   // restoration after the call.
7589   if (CFlags.IsIndirect) {
7590     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7591     const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7592     const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7593     const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7594     const unsigned TOCSaveOffset =
7595         Subtarget.getFrameLowering()->getTOCSaveOffset();
7596 
7597     setUsesTOCBasePtr(DAG);
7598     SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7599     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7600     SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7601     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7602     Chain = DAG.getStore(
7603         Val.getValue(1), dl, Val, AddPtr,
7604         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7605   }
7606 
7607   // Build a sequence of copy-to-reg nodes chained together with token chain
7608   // and flag operands which copy the outgoing args into the appropriate regs.
7609   SDValue InGlue;
7610   for (auto Reg : RegsToPass) {
7611     Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7612     InGlue = Chain.getValue(1);
7613   }
7614 
7615   const int SPDiff = 0;
7616   return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7617                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
7618 }
7619 
7620 bool
7621 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7622                                   MachineFunction &MF, bool isVarArg,
7623                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
7624                                   LLVMContext &Context) const {
7625   SmallVector<CCValAssign, 16> RVLocs;
7626   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7627   return CCInfo.CheckReturn(
7628       Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7629                 ? RetCC_PPC_Cold
7630                 : RetCC_PPC);
7631 }
7632 
7633 SDValue
7634 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7635                                bool isVarArg,
7636                                const SmallVectorImpl<ISD::OutputArg> &Outs,
7637                                const SmallVectorImpl<SDValue> &OutVals,
7638                                const SDLoc &dl, SelectionDAG &DAG) const {
7639   SmallVector<CCValAssign, 16> RVLocs;
7640   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7641                  *DAG.getContext());
7642   CCInfo.AnalyzeReturn(Outs,
7643                        (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7644                            ? RetCC_PPC_Cold
7645                            : RetCC_PPC);
7646 
7647   SDValue Glue;
7648   SmallVector<SDValue, 4> RetOps(1, Chain);
7649 
7650   // Copy the result values into the output registers.
7651   for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7652     CCValAssign &VA = RVLocs[i];
7653     assert(VA.isRegLoc() && "Can only return in registers!");
7654 
7655     SDValue Arg = OutVals[RealResIdx];
7656 
7657     switch (VA.getLocInfo()) {
7658     default: llvm_unreachable("Unknown loc info!");
7659     case CCValAssign::Full: break;
7660     case CCValAssign::AExt:
7661       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7662       break;
7663     case CCValAssign::ZExt:
7664       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7665       break;
7666     case CCValAssign::SExt:
7667       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7668       break;
7669     }
7670     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7671       bool isLittleEndian = Subtarget.isLittleEndian();
7672       // Legalize ret f64 -> ret 2 x i32.
7673       SDValue SVal =
7674           DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7675                       DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7676       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7677       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7678       SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7679                          DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7680       Glue = Chain.getValue(1);
7681       VA = RVLocs[++i]; // skip ahead to next loc
7682       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7683     } else
7684       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7685     Glue = Chain.getValue(1);
7686     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7687   }
7688 
7689   RetOps[0] = Chain;  // Update chain.
7690 
7691   // Add the glue if we have it.
7692   if (Glue.getNode())
7693     RetOps.push_back(Glue);
7694 
7695   return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7696 }
7697 
7698 SDValue
7699 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7700                                                 SelectionDAG &DAG) const {
7701   SDLoc dl(Op);
7702 
7703   // Get the correct type for integers.
7704   EVT IntVT = Op.getValueType();
7705 
7706   // Get the inputs.
7707   SDValue Chain = Op.getOperand(0);
7708   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7709   // Build a DYNAREAOFFSET node.
7710   SDValue Ops[2] = {Chain, FPSIdx};
7711   SDVTList VTs = DAG.getVTList(IntVT);
7712   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7713 }
7714 
7715 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7716                                              SelectionDAG &DAG) const {
7717   // When we pop the dynamic allocation we need to restore the SP link.
7718   SDLoc dl(Op);
7719 
7720   // Get the correct type for pointers.
7721   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7722 
7723   // Construct the stack pointer operand.
7724   bool isPPC64 = Subtarget.isPPC64();
7725   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7726   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7727 
7728   // Get the operands for the STACKRESTORE.
7729   SDValue Chain = Op.getOperand(0);
7730   SDValue SaveSP = Op.getOperand(1);
7731 
7732   // Load the old link SP.
7733   SDValue LoadLinkSP =
7734       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7735 
7736   // Restore the stack pointer.
7737   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7738 
7739   // Store the old link SP.
7740   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7741 }
7742 
7743 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7744   MachineFunction &MF = DAG.getMachineFunction();
7745   bool isPPC64 = Subtarget.isPPC64();
7746   EVT PtrVT = getPointerTy(MF.getDataLayout());
7747 
7748   // Get current frame pointer save index.  The users of this index will be
7749   // primarily DYNALLOC instructions.
7750   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7751   int RASI = FI->getReturnAddrSaveIndex();
7752 
7753   // If the frame pointer save index hasn't been defined yet.
7754   if (!RASI) {
7755     // Find out what the fix offset of the frame pointer save area.
7756     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7757     // Allocate the frame index for frame pointer save area.
7758     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7759     // Save the result.
7760     FI->setReturnAddrSaveIndex(RASI);
7761   }
7762   return DAG.getFrameIndex(RASI, PtrVT);
7763 }
7764 
7765 SDValue
7766 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7767   MachineFunction &MF = DAG.getMachineFunction();
7768   bool isPPC64 = Subtarget.isPPC64();
7769   EVT PtrVT = getPointerTy(MF.getDataLayout());
7770 
7771   // Get current frame pointer save index.  The users of this index will be
7772   // primarily DYNALLOC instructions.
7773   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7774   int FPSI = FI->getFramePointerSaveIndex();
7775 
7776   // If the frame pointer save index hasn't been defined yet.
7777   if (!FPSI) {
7778     // Find out what the fix offset of the frame pointer save area.
7779     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7780     // Allocate the frame index for frame pointer save area.
7781     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7782     // Save the result.
7783     FI->setFramePointerSaveIndex(FPSI);
7784   }
7785   return DAG.getFrameIndex(FPSI, PtrVT);
7786 }
7787 
7788 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7789                                                    SelectionDAG &DAG) const {
7790   MachineFunction &MF = DAG.getMachineFunction();
7791   // Get the inputs.
7792   SDValue Chain = Op.getOperand(0);
7793   SDValue Size  = Op.getOperand(1);
7794   SDLoc dl(Op);
7795 
7796   // Get the correct type for pointers.
7797   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7798   // Negate the size.
7799   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7800                                 DAG.getConstant(0, dl, PtrVT), Size);
7801   // Construct a node for the frame pointer save index.
7802   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7803   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7804   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7805   if (hasInlineStackProbe(MF))
7806     return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7807   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7808 }
7809 
7810 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7811                                                      SelectionDAG &DAG) const {
7812   MachineFunction &MF = DAG.getMachineFunction();
7813 
7814   bool isPPC64 = Subtarget.isPPC64();
7815   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7816 
7817   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7818   return DAG.getFrameIndex(FI, PtrVT);
7819 }
7820 
7821 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7822                                                SelectionDAG &DAG) const {
7823   SDLoc DL(Op);
7824   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7825                      DAG.getVTList(MVT::i32, MVT::Other),
7826                      Op.getOperand(0), Op.getOperand(1));
7827 }
7828 
7829 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7830                                                 SelectionDAG &DAG) const {
7831   SDLoc DL(Op);
7832   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7833                      Op.getOperand(0), Op.getOperand(1));
7834 }
7835 
7836 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7837   if (Op.getValueType().isVector())
7838     return LowerVectorLoad(Op, DAG);
7839 
7840   assert(Op.getValueType() == MVT::i1 &&
7841          "Custom lowering only for i1 loads");
7842 
7843   // First, load 8 bits into 32 bits, then truncate to 1 bit.
7844 
7845   SDLoc dl(Op);
7846   LoadSDNode *LD = cast<LoadSDNode>(Op);
7847 
7848   SDValue Chain = LD->getChain();
7849   SDValue BasePtr = LD->getBasePtr();
7850   MachineMemOperand *MMO = LD->getMemOperand();
7851 
7852   SDValue NewLD =
7853       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7854                      BasePtr, MVT::i8, MMO);
7855   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7856 
7857   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7858   return DAG.getMergeValues(Ops, dl);
7859 }
7860 
7861 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7862   if (Op.getOperand(1).getValueType().isVector())
7863     return LowerVectorStore(Op, DAG);
7864 
7865   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7866          "Custom lowering only for i1 stores");
7867 
7868   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7869 
7870   SDLoc dl(Op);
7871   StoreSDNode *ST = cast<StoreSDNode>(Op);
7872 
7873   SDValue Chain = ST->getChain();
7874   SDValue BasePtr = ST->getBasePtr();
7875   SDValue Value = ST->getValue();
7876   MachineMemOperand *MMO = ST->getMemOperand();
7877 
7878   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7879                       Value);
7880   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7881 }
7882 
7883 // FIXME: Remove this once the ANDI glue bug is fixed:
7884 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7885   assert(Op.getValueType() == MVT::i1 &&
7886          "Custom lowering only for i1 results");
7887 
7888   SDLoc DL(Op);
7889   return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7890 }
7891 
7892 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7893                                                SelectionDAG &DAG) const {
7894 
7895   // Implements a vector truncate that fits in a vector register as a shuffle.
7896   // We want to legalize vector truncates down to where the source fits in
7897   // a vector register (and target is therefore smaller than vector register
7898   // size).  At that point legalization will try to custom lower the sub-legal
7899   // result and get here - where we can contain the truncate as a single target
7900   // operation.
7901 
7902   // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7903   //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7904   //
7905   // We will implement it for big-endian ordering as this (where x denotes
7906   // undefined):
7907   //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7908   //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7909   //
7910   // The same operation in little-endian ordering will be:
7911   //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7912   //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7913 
7914   EVT TrgVT = Op.getValueType();
7915   assert(TrgVT.isVector() && "Vector type expected.");
7916   unsigned TrgNumElts = TrgVT.getVectorNumElements();
7917   EVT EltVT = TrgVT.getVectorElementType();
7918   if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7919       TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7920       !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
7921     return SDValue();
7922 
7923   SDValue N1 = Op.getOperand(0);
7924   EVT SrcVT = N1.getValueType();
7925   unsigned SrcSize = SrcVT.getSizeInBits();
7926   if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7927       !llvm::has_single_bit<uint32_t>(
7928           SrcVT.getVectorElementType().getSizeInBits()))
7929     return SDValue();
7930   if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7931     return SDValue();
7932 
7933   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7934   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7935 
7936   SDLoc DL(Op);
7937   SDValue Op1, Op2;
7938   if (SrcSize == 256) {
7939     EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7940     EVT SplitVT =
7941         N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
7942     unsigned SplitNumElts = SplitVT.getVectorNumElements();
7943     Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7944                       DAG.getConstant(0, DL, VecIdxTy));
7945     Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7946                       DAG.getConstant(SplitNumElts, DL, VecIdxTy));
7947   }
7948   else {
7949     Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7950     Op2 = DAG.getUNDEF(WideVT);
7951   }
7952 
7953   // First list the elements we want to keep.
7954   unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7955   SmallVector<int, 16> ShuffV;
7956   if (Subtarget.isLittleEndian())
7957     for (unsigned i = 0; i < TrgNumElts; ++i)
7958       ShuffV.push_back(i * SizeMult);
7959   else
7960     for (unsigned i = 1; i <= TrgNumElts; ++i)
7961       ShuffV.push_back(i * SizeMult - 1);
7962 
7963   // Populate the remaining elements with undefs.
7964   for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7965     // ShuffV.push_back(i + WideNumElts);
7966     ShuffV.push_back(WideNumElts + 1);
7967 
7968   Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7969   Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7970   return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7971 }
7972 
7973 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7974 /// possible.
7975 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7976   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7977   EVT ResVT = Op.getValueType();
7978   EVT CmpVT = Op.getOperand(0).getValueType();
7979   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7980   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
7981   SDLoc dl(Op);
7982 
7983   // Without power9-vector, we don't have native instruction for f128 comparison.
7984   // Following transformation to libcall is needed for setcc:
7985   // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
7986   if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
7987     SDValue Z = DAG.getSetCC(
7988         dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
7989         LHS, RHS, CC);
7990     SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
7991     return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
7992   }
7993 
7994   // Not FP, or using SPE? Not a fsel.
7995   if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
7996       Subtarget.hasSPE())
7997     return Op;
7998 
7999   SDNodeFlags Flags = Op.getNode()->getFlags();
8000 
8001   // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8002   // presence of infinities.
8003   if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8004     switch (CC) {
8005     default:
8006       break;
8007     case ISD::SETOGT:
8008     case ISD::SETGT:
8009       return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8010     case ISD::SETOLT:
8011     case ISD::SETLT:
8012       return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8013     }
8014   }
8015 
8016   // We might be able to do better than this under some circumstances, but in
8017   // general, fsel-based lowering of select is a finite-math-only optimization.
8018   // For more information, see section F.3 of the 2.06 ISA specification.
8019   // With ISA 3.0
8020   if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8021       (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
8022     return Op;
8023 
8024   // If the RHS of the comparison is a 0.0, we don't need to do the
8025   // subtraction at all.
8026   SDValue Sel1;
8027   if (isFloatingPointZero(RHS))
8028     switch (CC) {
8029     default: break;       // SETUO etc aren't handled by fsel.
8030     case ISD::SETNE:
8031       std::swap(TV, FV);
8032       [[fallthrough]];
8033     case ISD::SETEQ:
8034       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8035         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8036       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8037       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8038         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8039       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8040                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8041     case ISD::SETULT:
8042     case ISD::SETLT:
8043       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8044       [[fallthrough]];
8045     case ISD::SETOGE:
8046     case ISD::SETGE:
8047       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8048         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8049       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8050     case ISD::SETUGT:
8051     case ISD::SETGT:
8052       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
8053       [[fallthrough]];
8054     case ISD::SETOLE:
8055     case ISD::SETLE:
8056       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
8057         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8058       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8059                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8060     }
8061 
8062   SDValue Cmp;
8063   switch (CC) {
8064   default: break;       // SETUO etc aren't handled by fsel.
8065   case ISD::SETNE:
8066     std::swap(TV, FV);
8067     [[fallthrough]];
8068   case ISD::SETEQ:
8069     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8070     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8071       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8072     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8073     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
8074       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8075     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8076                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8077   case ISD::SETULT:
8078   case ISD::SETLT:
8079     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8080     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8081       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8082     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8083   case ISD::SETOGE:
8084   case ISD::SETGE:
8085     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8086     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8087       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8088     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8089   case ISD::SETUGT:
8090   case ISD::SETGT:
8091     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8092     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8093       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8094     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8095   case ISD::SETOLE:
8096   case ISD::SETLE:
8097     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8098     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
8099       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8100     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8101   }
8102   return Op;
8103 }
8104 
8105 static unsigned getPPCStrictOpcode(unsigned Opc) {
8106   switch (Opc) {
8107   default:
8108     llvm_unreachable("No strict version of this opcode!");
8109   case PPCISD::FCTIDZ:
8110     return PPCISD::STRICT_FCTIDZ;
8111   case PPCISD::FCTIWZ:
8112     return PPCISD::STRICT_FCTIWZ;
8113   case PPCISD::FCTIDUZ:
8114     return PPCISD::STRICT_FCTIDUZ;
8115   case PPCISD::FCTIWUZ:
8116     return PPCISD::STRICT_FCTIWUZ;
8117   case PPCISD::FCFID:
8118     return PPCISD::STRICT_FCFID;
8119   case PPCISD::FCFIDU:
8120     return PPCISD::STRICT_FCFIDU;
8121   case PPCISD::FCFIDS:
8122     return PPCISD::STRICT_FCFIDS;
8123   case PPCISD::FCFIDUS:
8124     return PPCISD::STRICT_FCFIDUS;
8125   }
8126 }
8127 
8128 static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
8129                               const PPCSubtarget &Subtarget) {
8130   SDLoc dl(Op);
8131   bool IsStrict = Op->isStrictFPOpcode();
8132   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8133                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8134 
8135   // TODO: Any other flags to propagate?
8136   SDNodeFlags Flags;
8137   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8138 
8139   // For strict nodes, source is the second operand.
8140   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8141   SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8142   MVT DestTy = Op.getSimpleValueType();
8143   assert(Src.getValueType().isFloatingPoint() &&
8144          (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8145           DestTy == MVT::i64) &&
8146          "Invalid FP_TO_INT types");
8147   if (Src.getValueType() == MVT::f32) {
8148     if (IsStrict) {
8149       Src =
8150           DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
8151                       DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8152       Chain = Src.getValue(1);
8153     } else
8154       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8155   }
8156   if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8157     DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
8158   unsigned Opc = ISD::DELETED_NODE;
8159   switch (DestTy.SimpleTy) {
8160   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8161   case MVT::i32:
8162     Opc = IsSigned ? PPCISD::FCTIWZ
8163                    : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8164     break;
8165   case MVT::i64:
8166     assert((IsSigned || Subtarget.hasFPCVT()) &&
8167            "i64 FP_TO_UINT is supported only with FPCVT");
8168     Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8169   }
8170   EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8171   SDValue Conv;
8172   if (IsStrict) {
8173     Opc = getPPCStrictOpcode(Opc);
8174     Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8175                        Flags);
8176   } else {
8177     Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8178   }
8179   return Conv;
8180 }
8181 
8182 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8183                                                SelectionDAG &DAG,
8184                                                const SDLoc &dl) const {
8185   SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8186   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8187                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8188   bool IsStrict = Op->isStrictFPOpcode();
8189 
8190   // Convert the FP value to an int value through memory.
8191   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8192                   (IsSigned || Subtarget.hasFPCVT());
8193   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8194   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8195   MachinePointerInfo MPI =
8196       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
8197 
8198   // Emit a store to the stack slot.
8199   SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8200   Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8201   if (i32Stack) {
8202     MachineFunction &MF = DAG.getMachineFunction();
8203     Alignment = Align(4);
8204     MachineMemOperand *MMO =
8205         MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8206     SDValue Ops[] = { Chain, Tmp, FIPtr };
8207     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8208               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8209   } else
8210     Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8211 
8212   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
8213   // add in a bias on big endian.
8214   if (Op.getValueType() == MVT::i32 && !i32Stack) {
8215     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8216                         DAG.getConstant(4, dl, FIPtr.getValueType()));
8217     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8218   }
8219 
8220   RLI.Chain = Chain;
8221   RLI.Ptr = FIPtr;
8222   RLI.MPI = MPI;
8223   RLI.Alignment = Alignment;
8224 }
8225 
8226 /// Custom lowers floating point to integer conversions to use
8227 /// the direct move instructions available in ISA 2.07 to avoid the
8228 /// need for load/store combinations.
8229 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8230                                                     SelectionDAG &DAG,
8231                                                     const SDLoc &dl) const {
8232   SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8233   SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8234   if (Op->isStrictFPOpcode())
8235     return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8236   else
8237     return Mov;
8238 }
8239 
8240 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8241                                           const SDLoc &dl) const {
8242   bool IsStrict = Op->isStrictFPOpcode();
8243   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8244                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8245   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8246   EVT SrcVT = Src.getValueType();
8247   EVT DstVT = Op.getValueType();
8248 
8249   // FP to INT conversions are legal for f128.
8250   if (SrcVT == MVT::f128)
8251     return Subtarget.hasP9Vector() ? Op : SDValue();
8252 
8253   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8254   // PPC (the libcall is not available).
8255   if (SrcVT == MVT::ppcf128) {
8256     if (DstVT == MVT::i32) {
8257       // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8258       // set other fast-math flags to FP operations in both strict and
8259       // non-strict cases. (FP_TO_SINT, FSUB)
8260       SDNodeFlags Flags;
8261       Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8262 
8263       if (IsSigned) {
8264         SDValue Lo, Hi;
8265         std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8266 
8267         // Add the two halves of the long double in round-to-zero mode, and use
8268         // a smaller FP_TO_SINT.
8269         if (IsStrict) {
8270           SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8271                                     DAG.getVTList(MVT::f64, MVT::Other),
8272                                     {Op.getOperand(0), Lo, Hi}, Flags);
8273           return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8274                              DAG.getVTList(MVT::i32, MVT::Other),
8275                              {Res.getValue(1), Res}, Flags);
8276         } else {
8277           SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8278           return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8279         }
8280       } else {
8281         const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8282         APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8283         SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8284         SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8285         if (IsStrict) {
8286           // Sel = Src < 0x80000000
8287           // FltOfs = select Sel, 0.0, 0x80000000
8288           // IntOfs = select Sel, 0, 0x80000000
8289           // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8290           SDValue Chain = Op.getOperand(0);
8291           EVT SetCCVT =
8292               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8293           EVT DstSetCCVT =
8294               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8295           SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8296                                      Chain, true);
8297           Chain = Sel.getValue(1);
8298 
8299           SDValue FltOfs = DAG.getSelect(
8300               dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8301           Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8302 
8303           SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8304                                     DAG.getVTList(SrcVT, MVT::Other),
8305                                     {Chain, Src, FltOfs}, Flags);
8306           Chain = Val.getValue(1);
8307           SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8308                                      DAG.getVTList(DstVT, MVT::Other),
8309                                      {Chain, Val}, Flags);
8310           Chain = SInt.getValue(1);
8311           SDValue IntOfs = DAG.getSelect(
8312               dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8313           SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8314           return DAG.getMergeValues({Result, Chain}, dl);
8315         } else {
8316           // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8317           // FIXME: generated code sucks.
8318           SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8319           True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8320           True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8321           SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8322           return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8323         }
8324       }
8325     }
8326 
8327     return SDValue();
8328   }
8329 
8330   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8331     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8332 
8333   ReuseLoadInfo RLI;
8334   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8335 
8336   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8337                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8338 }
8339 
8340 // We're trying to insert a regular store, S, and then a load, L. If the
8341 // incoming value, O, is a load, we might just be able to have our load use the
8342 // address used by O. However, we don't know if anything else will store to
8343 // that address before we can load from it. To prevent this situation, we need
8344 // to insert our load, L, into the chain as a peer of O. To do this, we give L
8345 // the same chain operand as O, we create a token factor from the chain results
8346 // of O and L, and we replace all uses of O's chain result with that token
8347 // factor (see spliceIntoChain below for this last part).
8348 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8349                                             ReuseLoadInfo &RLI,
8350                                             SelectionDAG &DAG,
8351                                             ISD::LoadExtType ET) const {
8352   // Conservatively skip reusing for constrained FP nodes.
8353   if (Op->isStrictFPOpcode())
8354     return false;
8355 
8356   SDLoc dl(Op);
8357   bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8358                        (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8359   if (ET == ISD::NON_EXTLOAD &&
8360       (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8361       isOperationLegalOrCustom(Op.getOpcode(),
8362                                Op.getOperand(0).getValueType())) {
8363 
8364     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8365     return true;
8366   }
8367 
8368   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8369   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8370       LD->isNonTemporal())
8371     return false;
8372   if (LD->getMemoryVT() != MemVT)
8373     return false;
8374 
8375   // If the result of the load is an illegal type, then we can't build a
8376   // valid chain for reuse since the legalised loads and token factor node that
8377   // ties the legalised loads together uses a different output chain then the
8378   // illegal load.
8379   if (!isTypeLegal(LD->getValueType(0)))
8380     return false;
8381 
8382   RLI.Ptr = LD->getBasePtr();
8383   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8384     assert(LD->getAddressingMode() == ISD::PRE_INC &&
8385            "Non-pre-inc AM on PPC?");
8386     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8387                           LD->getOffset());
8388   }
8389 
8390   RLI.Chain = LD->getChain();
8391   RLI.MPI = LD->getPointerInfo();
8392   RLI.IsDereferenceable = LD->isDereferenceable();
8393   RLI.IsInvariant = LD->isInvariant();
8394   RLI.Alignment = LD->getAlign();
8395   RLI.AAInfo = LD->getAAInfo();
8396   RLI.Ranges = LD->getRanges();
8397 
8398   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8399   return true;
8400 }
8401 
8402 // Given the head of the old chain, ResChain, insert a token factor containing
8403 // it and NewResChain, and make users of ResChain now be users of that token
8404 // factor.
8405 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8406 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8407                                         SDValue NewResChain,
8408                                         SelectionDAG &DAG) const {
8409   if (!ResChain)
8410     return;
8411 
8412   SDLoc dl(NewResChain);
8413 
8414   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8415                            NewResChain, DAG.getUNDEF(MVT::Other));
8416   assert(TF.getNode() != NewResChain.getNode() &&
8417          "A new TF really is required here");
8418 
8419   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8420   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8421 }
8422 
8423 /// Analyze profitability of direct move
8424 /// prefer float load to int load plus direct move
8425 /// when there is no integer use of int load
8426 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8427   SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8428   if (Origin->getOpcode() != ISD::LOAD)
8429     return true;
8430 
8431   // If there is no LXSIBZX/LXSIHZX, like Power8,
8432   // prefer direct move if the memory size is 1 or 2 bytes.
8433   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8434   if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8435     return true;
8436 
8437   for (SDNode::use_iterator UI = Origin->use_begin(),
8438                             UE = Origin->use_end();
8439        UI != UE; ++UI) {
8440 
8441     // Only look at the users of the loaded value.
8442     if (UI.getUse().get().getResNo() != 0)
8443       continue;
8444 
8445     if (UI->getOpcode() != ISD::SINT_TO_FP &&
8446         UI->getOpcode() != ISD::UINT_TO_FP &&
8447         UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8448         UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8449       return true;
8450   }
8451 
8452   return false;
8453 }
8454 
8455 static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
8456                               const PPCSubtarget &Subtarget,
8457                               SDValue Chain = SDValue()) {
8458   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8459                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8460   SDLoc dl(Op);
8461 
8462   // TODO: Any other flags to propagate?
8463   SDNodeFlags Flags;
8464   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8465 
8466   // If we have FCFIDS, then use it when converting to single-precision.
8467   // Otherwise, convert to double-precision and then round.
8468   bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8469   unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8470                               : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8471   EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8472   if (Op->isStrictFPOpcode()) {
8473     if (!Chain)
8474       Chain = Op.getOperand(0);
8475     return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8476                        DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8477   } else
8478     return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8479 }
8480 
8481 /// Custom lowers integer to floating point conversions to use
8482 /// the direct move instructions available in ISA 2.07 to avoid the
8483 /// need for load/store combinations.
8484 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8485                                                     SelectionDAG &DAG,
8486                                                     const SDLoc &dl) const {
8487   assert((Op.getValueType() == MVT::f32 ||
8488           Op.getValueType() == MVT::f64) &&
8489          "Invalid floating point type as target of conversion");
8490   assert(Subtarget.hasFPCVT() &&
8491          "Int to FP conversions with direct moves require FPCVT");
8492   SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8493   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8494   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8495                 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8496   unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8497   SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8498   return convertIntToFP(Op, Mov, DAG, Subtarget);
8499 }
8500 
8501 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8502 
8503   EVT VecVT = Vec.getValueType();
8504   assert(VecVT.isVector() && "Expected a vector type.");
8505   assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8506 
8507   EVT EltVT = VecVT.getVectorElementType();
8508   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8509   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8510 
8511   unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8512   SmallVector<SDValue, 16> Ops(NumConcat);
8513   Ops[0] = Vec;
8514   SDValue UndefVec = DAG.getUNDEF(VecVT);
8515   for (unsigned i = 1; i < NumConcat; ++i)
8516     Ops[i] = UndefVec;
8517 
8518   return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8519 }
8520 
8521 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8522                                                 const SDLoc &dl) const {
8523   bool IsStrict = Op->isStrictFPOpcode();
8524   unsigned Opc = Op.getOpcode();
8525   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8526   assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8527           Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
8528          "Unexpected conversion type");
8529   assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8530          "Supports conversions to v2f64/v4f32 only.");
8531 
8532   // TODO: Any other flags to propagate?
8533   SDNodeFlags Flags;
8534   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8535 
8536   bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8537   bool FourEltRes = Op.getValueType() == MVT::v4f32;
8538 
8539   SDValue Wide = widenVec(DAG, Src, dl);
8540   EVT WideVT = Wide.getValueType();
8541   unsigned WideNumElts = WideVT.getVectorNumElements();
8542   MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8543 
8544   SmallVector<int, 16> ShuffV;
8545   for (unsigned i = 0; i < WideNumElts; ++i)
8546     ShuffV.push_back(i + WideNumElts);
8547 
8548   int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8549   int SaveElts = FourEltRes ? 4 : 2;
8550   if (Subtarget.isLittleEndian())
8551     for (int i = 0; i < SaveElts; i++)
8552       ShuffV[i * Stride] = i;
8553   else
8554     for (int i = 1; i <= SaveElts; i++)
8555       ShuffV[i * Stride - 1] = i - 1;
8556 
8557   SDValue ShuffleSrc2 =
8558       SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8559   SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8560 
8561   SDValue Extend;
8562   if (SignedConv) {
8563     Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8564     EVT ExtVT = Src.getValueType();
8565     if (Subtarget.hasP9Altivec())
8566       ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8567                                IntermediateVT.getVectorNumElements());
8568 
8569     Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8570                          DAG.getValueType(ExtVT));
8571   } else
8572     Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8573 
8574   if (IsStrict)
8575     return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8576                        {Op.getOperand(0), Extend}, Flags);
8577 
8578   return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8579 }
8580 
8581 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8582                                           SelectionDAG &DAG) const {
8583   SDLoc dl(Op);
8584   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8585                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8586   bool IsStrict = Op->isStrictFPOpcode();
8587   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8588   SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8589 
8590   // TODO: Any other flags to propagate?
8591   SDNodeFlags Flags;
8592   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8593 
8594   EVT InVT = Src.getValueType();
8595   EVT OutVT = Op.getValueType();
8596   if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8597       isOperationCustom(Op.getOpcode(), InVT))
8598     return LowerINT_TO_FPVector(Op, DAG, dl);
8599 
8600   // Conversions to f128 are legal.
8601   if (Op.getValueType() == MVT::f128)
8602     return Subtarget.hasP9Vector() ? Op : SDValue();
8603 
8604   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8605   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8606     return SDValue();
8607 
8608   if (Src.getValueType() == MVT::i1) {
8609     SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8610                               DAG.getConstantFP(1.0, dl, Op.getValueType()),
8611                               DAG.getConstantFP(0.0, dl, Op.getValueType()));
8612     if (IsStrict)
8613       return DAG.getMergeValues({Sel, Chain}, dl);
8614     else
8615       return Sel;
8616   }
8617 
8618   // If we have direct moves, we can do all the conversion, skip the store/load
8619   // however, without FPCVT we can't do most conversions.
8620   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8621       Subtarget.isPPC64() && Subtarget.hasFPCVT())
8622     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8623 
8624   assert((IsSigned || Subtarget.hasFPCVT()) &&
8625          "UINT_TO_FP is supported only with FPCVT");
8626 
8627   if (Src.getValueType() == MVT::i64) {
8628     SDValue SINT = Src;
8629     // When converting to single-precision, we actually need to convert
8630     // to double-precision first and then round to single-precision.
8631     // To avoid double-rounding effects during that operation, we have
8632     // to prepare the input operand.  Bits that might be truncated when
8633     // converting to double-precision are replaced by a bit that won't
8634     // be lost at this stage, but is below the single-precision rounding
8635     // position.
8636     //
8637     // However, if -enable-unsafe-fp-math is in effect, accept double
8638     // rounding to avoid the extra overhead.
8639     if (Op.getValueType() == MVT::f32 &&
8640         !Subtarget.hasFPCVT() &&
8641         !DAG.getTarget().Options.UnsafeFPMath) {
8642 
8643       // Twiddle input to make sure the low 11 bits are zero.  (If this
8644       // is the case, we are guaranteed the value will fit into the 53 bit
8645       // mantissa of an IEEE double-precision value without rounding.)
8646       // If any of those low 11 bits were not zero originally, make sure
8647       // bit 12 (value 2048) is set instead, so that the final rounding
8648       // to single-precision gets the correct result.
8649       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8650                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
8651       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8652                           Round, DAG.getConstant(2047, dl, MVT::i64));
8653       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8654       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8655                           Round, DAG.getConstant(-2048, dl, MVT::i64));
8656 
8657       // However, we cannot use that value unconditionally: if the magnitude
8658       // of the input value is small, the bit-twiddling we did above might
8659       // end up visibly changing the output.  Fortunately, in that case, we
8660       // don't need to twiddle bits since the original input will convert
8661       // exactly to double-precision floating-point already.  Therefore,
8662       // construct a conditional to use the original value if the top 11
8663       // bits are all sign-bit copies, and use the rounded value computed
8664       // above otherwise.
8665       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8666                                  SINT, DAG.getConstant(53, dl, MVT::i32));
8667       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8668                          Cond, DAG.getConstant(1, dl, MVT::i64));
8669       Cond = DAG.getSetCC(
8670           dl,
8671           getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8672           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8673 
8674       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8675     }
8676 
8677     ReuseLoadInfo RLI;
8678     SDValue Bits;
8679 
8680     MachineFunction &MF = DAG.getMachineFunction();
8681     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8682       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8683                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8684       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8685     } else if (Subtarget.hasLFIWAX() &&
8686                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8687       MachineMemOperand *MMO =
8688         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8689                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8690       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8691       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8692                                      DAG.getVTList(MVT::f64, MVT::Other),
8693                                      Ops, MVT::i32, MMO);
8694       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8695     } else if (Subtarget.hasFPCVT() &&
8696                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8697       MachineMemOperand *MMO =
8698         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8699                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8700       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8701       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8702                                      DAG.getVTList(MVT::f64, MVT::Other),
8703                                      Ops, MVT::i32, MMO);
8704       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8705     } else if (((Subtarget.hasLFIWAX() &&
8706                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8707                 (Subtarget.hasFPCVT() &&
8708                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8709                SINT.getOperand(0).getValueType() == MVT::i32) {
8710       MachineFrameInfo &MFI = MF.getFrameInfo();
8711       EVT PtrVT = getPointerTy(DAG.getDataLayout());
8712 
8713       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8714       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8715 
8716       SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8717                                    MachinePointerInfo::getFixedStack(
8718                                        DAG.getMachineFunction(), FrameIdx));
8719       Chain = Store;
8720 
8721       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8722              "Expected an i32 store");
8723 
8724       RLI.Ptr = FIdx;
8725       RLI.Chain = Chain;
8726       RLI.MPI =
8727           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8728       RLI.Alignment = Align(4);
8729 
8730       MachineMemOperand *MMO =
8731         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8732                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8733       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8734       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8735                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
8736                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
8737                                      Ops, MVT::i32, MMO);
8738       Chain = Bits.getValue(1);
8739     } else
8740       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8741 
8742     SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8743     if (IsStrict)
8744       Chain = FP.getValue(1);
8745 
8746     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8747       if (IsStrict)
8748         FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8749                          DAG.getVTList(MVT::f32, MVT::Other),
8750                          {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8751       else
8752         FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8753                          DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8754     }
8755     return FP;
8756   }
8757 
8758   assert(Src.getValueType() == MVT::i32 &&
8759          "Unhandled INT_TO_FP type in custom expander!");
8760   // Since we only generate this in 64-bit mode, we can take advantage of
8761   // 64-bit registers.  In particular, sign extend the input value into the
8762   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8763   // then lfd it and fcfid it.
8764   MachineFunction &MF = DAG.getMachineFunction();
8765   MachineFrameInfo &MFI = MF.getFrameInfo();
8766   EVT PtrVT = getPointerTy(MF.getDataLayout());
8767 
8768   SDValue Ld;
8769   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8770     ReuseLoadInfo RLI;
8771     bool ReusingLoad;
8772     if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8773       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8774       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8775 
8776       SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8777                                    MachinePointerInfo::getFixedStack(
8778                                        DAG.getMachineFunction(), FrameIdx));
8779       Chain = Store;
8780 
8781       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8782              "Expected an i32 store");
8783 
8784       RLI.Ptr = FIdx;
8785       RLI.Chain = Chain;
8786       RLI.MPI =
8787           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8788       RLI.Alignment = Align(4);
8789     }
8790 
8791     MachineMemOperand *MMO =
8792       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8793                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8794     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8795     Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8796                                  DAG.getVTList(MVT::f64, MVT::Other), Ops,
8797                                  MVT::i32, MMO);
8798     Chain = Ld.getValue(1);
8799     if (ReusingLoad)
8800       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8801   } else {
8802     assert(Subtarget.isPPC64() &&
8803            "i32->FP without LFIWAX supported only on PPC64");
8804 
8805     int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8806     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8807 
8808     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8809 
8810     // STD the extended value into the stack slot.
8811     SDValue Store = DAG.getStore(
8812         Chain, dl, Ext64, FIdx,
8813         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8814     Chain = Store;
8815 
8816     // Load the value as a double.
8817     Ld = DAG.getLoad(
8818         MVT::f64, dl, Chain, FIdx,
8819         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8820     Chain = Ld.getValue(1);
8821   }
8822 
8823   // FCFID it and return it.
8824   SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8825   if (IsStrict)
8826     Chain = FP.getValue(1);
8827   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8828     if (IsStrict)
8829       FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8830                        DAG.getVTList(MVT::f32, MVT::Other),
8831                        {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8832     else
8833       FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8834                        DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8835   }
8836   return FP;
8837 }
8838 
8839 SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
8840                                              SelectionDAG &DAG) const {
8841   SDLoc dl(Op);
8842   /*
8843    The rounding mode is in bits 30:31 of FPSR, and has the following
8844    settings:
8845      00 Round to nearest
8846      01 Round to 0
8847      10 Round to +inf
8848      11 Round to -inf
8849 
8850   GET_ROUNDING, on the other hand, expects the following:
8851     -1 Undefined
8852      0 Round to 0
8853      1 Round to nearest
8854      2 Round to +inf
8855      3 Round to -inf
8856 
8857   To perform the conversion, we do:
8858     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8859   */
8860 
8861   MachineFunction &MF = DAG.getMachineFunction();
8862   EVT VT = Op.getValueType();
8863   EVT PtrVT = getPointerTy(MF.getDataLayout());
8864 
8865   // Save FP Control Word to register
8866   SDValue Chain = Op.getOperand(0);
8867   SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8868   Chain = MFFS.getValue(1);
8869 
8870   SDValue CWD;
8871   if (isTypeLegal(MVT::i64)) {
8872     CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
8873                       DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
8874   } else {
8875     // Save FP register to stack slot
8876     int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8877     SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8878     Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8879 
8880     // Load FP Control Word from low 32 bits of stack slot.
8881     assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&
8882            "Stack slot adjustment is valid only on big endian subtargets!");
8883     SDValue Four = DAG.getConstant(4, dl, PtrVT);
8884     SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8885     CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8886     Chain = CWD.getValue(1);
8887   }
8888 
8889   // Transform as necessary
8890   SDValue CWD1 =
8891     DAG.getNode(ISD::AND, dl, MVT::i32,
8892                 CWD, DAG.getConstant(3, dl, MVT::i32));
8893   SDValue CWD2 =
8894     DAG.getNode(ISD::SRL, dl, MVT::i32,
8895                 DAG.getNode(ISD::AND, dl, MVT::i32,
8896                             DAG.getNode(ISD::XOR, dl, MVT::i32,
8897                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
8898                             DAG.getConstant(3, dl, MVT::i32)),
8899                 DAG.getConstant(1, dl, MVT::i32));
8900 
8901   SDValue RetVal =
8902     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8903 
8904   RetVal =
8905       DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8906                   dl, VT, RetVal);
8907 
8908   return DAG.getMergeValues({RetVal, Chain}, dl);
8909 }
8910 
8911 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8912   EVT VT = Op.getValueType();
8913   unsigned BitWidth = VT.getSizeInBits();
8914   SDLoc dl(Op);
8915   assert(Op.getNumOperands() == 3 &&
8916          VT == Op.getOperand(1).getValueType() &&
8917          "Unexpected SHL!");
8918 
8919   // Expand into a bunch of logical ops.  Note that these ops
8920   // depend on the PPC behavior for oversized shift amounts.
8921   SDValue Lo = Op.getOperand(0);
8922   SDValue Hi = Op.getOperand(1);
8923   SDValue Amt = Op.getOperand(2);
8924   EVT AmtVT = Amt.getValueType();
8925 
8926   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8927                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8928   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8929   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8930   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8931   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8932                              DAG.getConstant(-BitWidth, dl, AmtVT));
8933   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8934   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8935   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8936   SDValue OutOps[] = { OutLo, OutHi };
8937   return DAG.getMergeValues(OutOps, dl);
8938 }
8939 
8940 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8941   EVT VT = Op.getValueType();
8942   SDLoc dl(Op);
8943   unsigned BitWidth = VT.getSizeInBits();
8944   assert(Op.getNumOperands() == 3 &&
8945          VT == Op.getOperand(1).getValueType() &&
8946          "Unexpected SRL!");
8947 
8948   // Expand into a bunch of logical ops.  Note that these ops
8949   // depend on the PPC behavior for oversized shift amounts.
8950   SDValue Lo = Op.getOperand(0);
8951   SDValue Hi = Op.getOperand(1);
8952   SDValue Amt = Op.getOperand(2);
8953   EVT AmtVT = Amt.getValueType();
8954 
8955   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8956                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8957   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8958   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8959   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8960   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8961                              DAG.getConstant(-BitWidth, dl, AmtVT));
8962   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8963   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8964   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8965   SDValue OutOps[] = { OutLo, OutHi };
8966   return DAG.getMergeValues(OutOps, dl);
8967 }
8968 
8969 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8970   SDLoc dl(Op);
8971   EVT VT = Op.getValueType();
8972   unsigned BitWidth = VT.getSizeInBits();
8973   assert(Op.getNumOperands() == 3 &&
8974          VT == Op.getOperand(1).getValueType() &&
8975          "Unexpected SRA!");
8976 
8977   // Expand into a bunch of logical ops, followed by a select_cc.
8978   SDValue Lo = Op.getOperand(0);
8979   SDValue Hi = Op.getOperand(1);
8980   SDValue Amt = Op.getOperand(2);
8981   EVT AmtVT = Amt.getValueType();
8982 
8983   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8984                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8985   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8986   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8987   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8988   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8989                              DAG.getConstant(-BitWidth, dl, AmtVT));
8990   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8991   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8992   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8993                                   Tmp4, Tmp6, ISD::SETLE);
8994   SDValue OutOps[] = { OutLo, OutHi };
8995   return DAG.getMergeValues(OutOps, dl);
8996 }
8997 
8998 SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8999                                             SelectionDAG &DAG) const {
9000   SDLoc dl(Op);
9001   EVT VT = Op.getValueType();
9002   unsigned BitWidth = VT.getSizeInBits();
9003 
9004   bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9005   SDValue X = Op.getOperand(0);
9006   SDValue Y = Op.getOperand(1);
9007   SDValue Z = Op.getOperand(2);
9008   EVT AmtVT = Z.getValueType();
9009 
9010   // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9011   // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9012   // This is simpler than TargetLowering::expandFunnelShift because we can rely
9013   // on PowerPC shift by BW being well defined.
9014   Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9015                   DAG.getConstant(BitWidth - 1, dl, AmtVT));
9016   SDValue SubZ =
9017       DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9018   X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9019   Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9020   return DAG.getNode(ISD::OR, dl, VT, X, Y);
9021 }
9022 
9023 //===----------------------------------------------------------------------===//
9024 // Vector related lowering.
9025 //
9026 
9027 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9028 /// element size of SplatSize. Cast the result to VT.
9029 static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9030                                       SelectionDAG &DAG, const SDLoc &dl) {
9031   static const MVT VTys[] = { // canonical VT to use for each size.
9032     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9033   };
9034 
9035   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9036 
9037   // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9038   if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9039     SplatSize = 1;
9040     Val = 0xFF;
9041   }
9042 
9043   EVT CanonicalVT = VTys[SplatSize-1];
9044 
9045   // Build a canonical splat for this value.
9046   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9047 }
9048 
9049 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9050 /// specified intrinsic ID.
9051 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
9052                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
9053   if (DestVT == MVT::Other) DestVT = Op.getValueType();
9054   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9055                      DAG.getConstant(IID, dl, MVT::i32), Op);
9056 }
9057 
9058 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9059 /// specified intrinsic ID.
9060 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9061                                 SelectionDAG &DAG, const SDLoc &dl,
9062                                 EVT DestVT = MVT::Other) {
9063   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9064   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9065                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9066 }
9067 
9068 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9069 /// specified intrinsic ID.
9070 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9071                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9072                                 EVT DestVT = MVT::Other) {
9073   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9074   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9075                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9076 }
9077 
9078 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9079 /// amount.  The result has the specified value type.
9080 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9081                            SelectionDAG &DAG, const SDLoc &dl) {
9082   // Force LHS/RHS to be the right type.
9083   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9084   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9085 
9086   int Ops[16];
9087   for (unsigned i = 0; i != 16; ++i)
9088     Ops[i] = i + Amt;
9089   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9090   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9091 }
9092 
9093 /// Do we have an efficient pattern in a .td file for this node?
9094 ///
9095 /// \param V - pointer to the BuildVectorSDNode being matched
9096 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9097 ///
9098 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9099 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9100 /// the opposite is true (expansion is beneficial) are:
9101 /// - The node builds a vector out of integers that are not 32 or 64-bits
9102 /// - The node builds a vector out of constants
9103 /// - The node is a "load-and-splat"
9104 /// In all other cases, we will choose to keep the BUILD_VECTOR.
9105 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
9106                                             bool HasDirectMove,
9107                                             bool HasP8Vector) {
9108   EVT VecVT = V->getValueType(0);
9109   bool RightType = VecVT == MVT::v2f64 ||
9110     (HasP8Vector && VecVT == MVT::v4f32) ||
9111     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9112   if (!RightType)
9113     return false;
9114 
9115   bool IsSplat = true;
9116   bool IsLoad = false;
9117   SDValue Op0 = V->getOperand(0);
9118 
9119   // This function is called in a block that confirms the node is not a constant
9120   // splat. So a constant BUILD_VECTOR here means the vector is built out of
9121   // different constants.
9122   if (V->isConstant())
9123     return false;
9124   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9125     if (V->getOperand(i).isUndef())
9126       return false;
9127     // We want to expand nodes that represent load-and-splat even if the
9128     // loaded value is a floating point truncation or conversion to int.
9129     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9130         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9131          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9132         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9133          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9134         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9135          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9136       IsLoad = true;
9137     // If the operands are different or the input is not a load and has more
9138     // uses than just this BV node, then it isn't a splat.
9139     if (V->getOperand(i) != Op0 ||
9140         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9141       IsSplat = false;
9142   }
9143   return !(IsSplat && IsLoad);
9144 }
9145 
9146 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9147 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9148 
9149   SDLoc dl(Op);
9150   SDValue Op0 = Op->getOperand(0);
9151 
9152   if ((Op.getValueType() != MVT::f128) ||
9153       (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9154       (Op0.getOperand(0).getValueType() != MVT::i64) ||
9155       (Op0.getOperand(1).getValueType() != MVT::i64))
9156     return SDValue();
9157 
9158   return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9159                      Op0.getOperand(1));
9160 }
9161 
9162 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9163   const SDValue *InputLoad = &Op;
9164   while (InputLoad->getOpcode() == ISD::BITCAST)
9165     InputLoad = &InputLoad->getOperand(0);
9166   if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9167       InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9168     IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9169     InputLoad = &InputLoad->getOperand(0);
9170   }
9171   if (InputLoad->getOpcode() != ISD::LOAD)
9172     return nullptr;
9173   LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9174   return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9175 }
9176 
9177 // Convert the argument APFloat to a single precision APFloat if there is no
9178 // loss in information during the conversion to single precision APFloat and the
9179 // resulting number is not a denormal number. Return true if successful.
9180 bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
9181   APFloat APFloatToConvert = ArgAPFloat;
9182   bool LosesInfo = true;
9183   APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9184                            &LosesInfo);
9185   bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9186   if (Success)
9187     ArgAPFloat = APFloatToConvert;
9188   return Success;
9189 }
9190 
9191 // Bitcast the argument APInt to a double and convert it to a single precision
9192 // APFloat, bitcast the APFloat to an APInt and assign it to the original
9193 // argument if there is no loss in information during the conversion from
9194 // double to single precision APFloat and the resulting number is not a denormal
9195 // number. Return true if successful.
9196 bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
9197   double DpValue = ArgAPInt.bitsToDouble();
9198   APFloat APFloatDp(DpValue);
9199   bool Success = convertToNonDenormSingle(APFloatDp);
9200   if (Success)
9201     ArgAPInt = APFloatDp.bitcastToAPInt();
9202   return Success;
9203 }
9204 
9205 // Nondestructive check for convertTonNonDenormSingle.
9206 bool llvm::checkConvertToNonDenormSingle(APFloat &ArgAPFloat) {
9207   // Only convert if it loses info, since XXSPLTIDP should
9208   // handle the other case.
9209   APFloat APFloatToConvert = ArgAPFloat;
9210   bool LosesInfo = true;
9211   APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
9212                            &LosesInfo);
9213 
9214   return (!LosesInfo && !APFloatToConvert.isDenormal());
9215 }
9216 
9217 static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9218                              unsigned &Opcode) {
9219   LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9220   if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9221     return false;
9222 
9223   EVT Ty = Op->getValueType(0);
9224   // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9225   // as we cannot handle extending loads for these types.
9226   if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9227       ISD::isNON_EXTLoad(InputNode))
9228     return true;
9229 
9230   EVT MemVT = InputNode->getMemoryVT();
9231   // For v8i16 and v16i8 types, extending loads can be handled as long as the
9232   // memory VT is the same vector element VT type.
9233   // The loads feeding into the v8i16 and v16i8 types will be extending because
9234   // scalar i8/i16 are not legal types.
9235   if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9236       (MemVT == Ty.getVectorElementType()))
9237     return true;
9238 
9239   if (Ty == MVT::v2i64) {
9240     // Check the extend type, when the input type is i32, and the output vector
9241     // type is v2i64.
9242     if (MemVT == MVT::i32) {
9243       if (ISD::isZEXTLoad(InputNode))
9244         Opcode = PPCISD::ZEXT_LD_SPLAT;
9245       if (ISD::isSEXTLoad(InputNode))
9246         Opcode = PPCISD::SEXT_LD_SPLAT;
9247     }
9248     return true;
9249   }
9250   return false;
9251 }
9252 
9253 // If this is a case we can't handle, return null and let the default
9254 // expansion code take care of it.  If we CAN select this case, and if it
9255 // selects to a single instruction, return Op.  Otherwise, if we can codegen
9256 // this case more efficiently than a constant pool load, lower it to the
9257 // sequence of ops that should be used.
9258 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9259                                              SelectionDAG &DAG) const {
9260   SDLoc dl(Op);
9261   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9262   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9263 
9264   // Check if this is a splat of a constant value.
9265   APInt APSplatBits, APSplatUndef;
9266   unsigned SplatBitSize;
9267   bool HasAnyUndefs;
9268   bool BVNIsConstantSplat =
9269       BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9270                            HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9271 
9272   // If it is a splat of a double, check if we can shrink it to a 32 bit
9273   // non-denormal float which when converted back to double gives us the same
9274   // double. This is to exploit the XXSPLTIDP instruction.
9275   // If we lose precision, we use XXSPLTI32DX.
9276   if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9277       Subtarget.hasPrefixInstrs()) {
9278     // Check the type first to short-circuit so we don't modify APSplatBits if
9279     // this block isn't executed.
9280     if ((Op->getValueType(0) == MVT::v2f64) &&
9281         convertToNonDenormSingle(APSplatBits)) {
9282       SDValue SplatNode = DAG.getNode(
9283           PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9284           DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9285       return DAG.getBitcast(Op.getValueType(), SplatNode);
9286     } else {
9287       // We may lose precision, so we have to use XXSPLTI32DX.
9288 
9289       uint32_t Hi =
9290           (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9291       uint32_t Lo =
9292           (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9293       SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9294 
9295       if (!Hi || !Lo)
9296         // If either load is 0, then we should generate XXLXOR to set to 0.
9297         SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9298 
9299       if (Hi)
9300         SplatNode = DAG.getNode(
9301             PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9302             DAG.getTargetConstant(0, dl, MVT::i32),
9303             DAG.getTargetConstant(Hi, dl, MVT::i32));
9304 
9305       if (Lo)
9306         SplatNode =
9307             DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9308                         DAG.getTargetConstant(1, dl, MVT::i32),
9309                         DAG.getTargetConstant(Lo, dl, MVT::i32));
9310 
9311       return DAG.getBitcast(Op.getValueType(), SplatNode);
9312     }
9313   }
9314 
9315   if (!BVNIsConstantSplat || SplatBitSize > 32) {
9316     unsigned NewOpcode = PPCISD::LD_SPLAT;
9317 
9318     // Handle load-and-splat patterns as we have instructions that will do this
9319     // in one go.
9320     if (DAG.isSplatValue(Op, true) &&
9321         isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9322       const SDValue *InputLoad = &Op.getOperand(0);
9323       LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9324 
9325       // If the input load is an extending load, it will be an i32 -> i64
9326       // extending load and isValidSplatLoad() will update NewOpcode.
9327       unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9328       unsigned ElementSize =
9329           MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9330 
9331       assert(((ElementSize == 2 * MemorySize)
9332                   ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9333                      NewOpcode == PPCISD::SEXT_LD_SPLAT)
9334                   : (NewOpcode == PPCISD::LD_SPLAT)) &&
9335              "Unmatched element size and opcode!\n");
9336 
9337       // Checking for a single use of this load, we have to check for vector
9338       // width (128 bits) / ElementSize uses (since each operand of the
9339       // BUILD_VECTOR is a separate use of the value.
9340       unsigned NumUsesOfInputLD = 128 / ElementSize;
9341       for (SDValue BVInOp : Op->ops())
9342         if (BVInOp.isUndef())
9343           NumUsesOfInputLD--;
9344 
9345       // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9346       // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9347       // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9348       // 15", but function IsValidSplatLoad() now will only return true when
9349       // the data at index 0 is not nullptr. So we will not get into trouble for
9350       // these cases.
9351       //
9352       // case 1 - lfiwzx/lfiwax
9353       // 1.1: load result is i32 and is sign/zero extend to i64;
9354       // 1.2: build a v2i64 vector type with above loaded value;
9355       // 1.3: the vector has only one value at index 0, others are all undef;
9356       // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9357       if (NumUsesOfInputLD == 1 &&
9358           (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9359            !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9360            Subtarget.hasLFIWAX()))
9361         return SDValue();
9362 
9363       // case 2 - lxvr[hb]x
9364       // 2.1: load result is at most i16;
9365       // 2.2: build a vector with above loaded value;
9366       // 2.3: the vector has only one value at index 0, others are all undef;
9367       // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9368       if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9369           Subtarget.isISA3_1() && ElementSize <= 16)
9370         return SDValue();
9371 
9372       assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9373       if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9374           Subtarget.hasVSX()) {
9375         SDValue Ops[] = {
9376           LD->getChain(),    // Chain
9377           LD->getBasePtr(),  // Ptr
9378           DAG.getValueType(Op.getValueType()) // VT
9379         };
9380         SDValue LdSplt = DAG.getMemIntrinsicNode(
9381             NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9382             LD->getMemoryVT(), LD->getMemOperand());
9383         // Replace all uses of the output chain of the original load with the
9384         // output chain of the new load.
9385         DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9386                                       LdSplt.getValue(1));
9387         return LdSplt;
9388       }
9389     }
9390 
9391     // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9392     // 32-bits can be lowered to VSX instructions under certain conditions.
9393     // Without VSX, there is no pattern more efficient than expanding the node.
9394     if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9395         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9396                                         Subtarget.hasP8Vector()))
9397       return Op;
9398     return SDValue();
9399   }
9400 
9401   uint64_t SplatBits = APSplatBits.getZExtValue();
9402   uint64_t SplatUndef = APSplatUndef.getZExtValue();
9403   unsigned SplatSize = SplatBitSize / 8;
9404 
9405   // First, handle single instruction cases.
9406 
9407   // All zeros?
9408   if (SplatBits == 0) {
9409     // Canonicalize all zero vectors to be v4i32.
9410     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9411       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9412       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9413     }
9414     return Op;
9415   }
9416 
9417   // We have XXSPLTIW for constant splats four bytes wide.
9418   // Given vector length is a multiple of 4, 2-byte splats can be replaced
9419   // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9420   // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9421   // turned into a 4-byte splat of 0xABABABAB.
9422   if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9423     return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9424                                   Op.getValueType(), DAG, dl);
9425 
9426   if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9427     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9428                                   dl);
9429 
9430   // We have XXSPLTIB for constant splats one byte wide.
9431   if (Subtarget.hasP9Vector() && SplatSize == 1)
9432     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9433                                   dl);
9434 
9435   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9436   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9437                     (32-SplatBitSize));
9438   if (SextVal >= -16 && SextVal <= 15)
9439     return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9440                                   dl);
9441 
9442   // Two instruction sequences.
9443 
9444   // If this value is in the range [-32,30] and is even, use:
9445   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9446   // If this value is in the range [17,31] and is odd, use:
9447   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9448   // If this value is in the range [-31,-17] and is odd, use:
9449   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9450   // Note the last two are three-instruction sequences.
9451   if (SextVal >= -32 && SextVal <= 31) {
9452     // To avoid having these optimizations undone by constant folding,
9453     // we convert to a pseudo that will be expanded later into one of
9454     // the above forms.
9455     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9456     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9457               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9458     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9459     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9460     if (VT == Op.getValueType())
9461       return RetVal;
9462     else
9463       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9464   }
9465 
9466   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
9467   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
9468   // for fneg/fabs.
9469   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9470     // Make -1 and vspltisw -1:
9471     SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9472 
9473     // Make the VSLW intrinsic, computing 0x8000_0000.
9474     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9475                                    OnesV, DAG, dl);
9476 
9477     // xor by OnesV to invert it.
9478     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9479     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9480   }
9481 
9482   // Check to see if this is a wide variety of vsplti*, binop self cases.
9483   static const signed char SplatCsts[] = {
9484     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9485     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9486   };
9487 
9488   for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9489     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9490     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
9491     int i = SplatCsts[idx];
9492 
9493     // Figure out what shift amount will be used by altivec if shifted by i in
9494     // this splat size.
9495     unsigned TypeShiftAmt = i & (SplatBitSize-1);
9496 
9497     // vsplti + shl self.
9498     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9499       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9500       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9501         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9502         Intrinsic::ppc_altivec_vslw
9503       };
9504       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9505       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9506     }
9507 
9508     // vsplti + srl self.
9509     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9510       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9511       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9512         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9513         Intrinsic::ppc_altivec_vsrw
9514       };
9515       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9516       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9517     }
9518 
9519     // vsplti + rol self.
9520     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9521                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9522       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9523       static const unsigned IIDs[] = { // Intrinsic to use for each size.
9524         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9525         Intrinsic::ppc_altivec_vrlw
9526       };
9527       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9528       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9529     }
9530 
9531     // t = vsplti c, result = vsldoi t, t, 1
9532     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9533       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9534       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9535       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9536     }
9537     // t = vsplti c, result = vsldoi t, t, 2
9538     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9539       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9540       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9541       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9542     }
9543     // t = vsplti c, result = vsldoi t, t, 3
9544     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9545       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9546       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9547       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9548     }
9549   }
9550 
9551   return SDValue();
9552 }
9553 
9554 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9555 /// the specified operations to build the shuffle.
9556 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9557                                       SDValue RHS, SelectionDAG &DAG,
9558                                       const SDLoc &dl) {
9559   unsigned OpNum = (PFEntry >> 26) & 0x0F;
9560   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9561   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
9562 
9563   enum {
9564     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9565     OP_VMRGHW,
9566     OP_VMRGLW,
9567     OP_VSPLTISW0,
9568     OP_VSPLTISW1,
9569     OP_VSPLTISW2,
9570     OP_VSPLTISW3,
9571     OP_VSLDOI4,
9572     OP_VSLDOI8,
9573     OP_VSLDOI12
9574   };
9575 
9576   if (OpNum == OP_COPY) {
9577     if (LHSID == (1*9+2)*9+3) return LHS;
9578     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9579     return RHS;
9580   }
9581 
9582   SDValue OpLHS, OpRHS;
9583   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9584   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9585 
9586   int ShufIdxs[16];
9587   switch (OpNum) {
9588   default: llvm_unreachable("Unknown i32 permute!");
9589   case OP_VMRGHW:
9590     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
9591     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9592     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
9593     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9594     break;
9595   case OP_VMRGLW:
9596     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9597     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9598     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9599     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9600     break;
9601   case OP_VSPLTISW0:
9602     for (unsigned i = 0; i != 16; ++i)
9603       ShufIdxs[i] = (i&3)+0;
9604     break;
9605   case OP_VSPLTISW1:
9606     for (unsigned i = 0; i != 16; ++i)
9607       ShufIdxs[i] = (i&3)+4;
9608     break;
9609   case OP_VSPLTISW2:
9610     for (unsigned i = 0; i != 16; ++i)
9611       ShufIdxs[i] = (i&3)+8;
9612     break;
9613   case OP_VSPLTISW3:
9614     for (unsigned i = 0; i != 16; ++i)
9615       ShufIdxs[i] = (i&3)+12;
9616     break;
9617   case OP_VSLDOI4:
9618     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9619   case OP_VSLDOI8:
9620     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9621   case OP_VSLDOI12:
9622     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9623   }
9624   EVT VT = OpLHS.getValueType();
9625   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9626   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9627   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9628   return DAG.getNode(ISD::BITCAST, dl, VT, T);
9629 }
9630 
9631 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9632 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9633 /// SDValue.
9634 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9635                                            SelectionDAG &DAG) const {
9636   const unsigned BytesInVector = 16;
9637   bool IsLE = Subtarget.isLittleEndian();
9638   SDLoc dl(N);
9639   SDValue V1 = N->getOperand(0);
9640   SDValue V2 = N->getOperand(1);
9641   unsigned ShiftElts = 0, InsertAtByte = 0;
9642   bool Swap = false;
9643 
9644   // Shifts required to get the byte we want at element 7.
9645   unsigned LittleEndianShifts[] = {8, 7,  6,  5,  4,  3,  2,  1,
9646                                    0, 15, 14, 13, 12, 11, 10, 9};
9647   unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9648                                 1, 2,  3,  4,  5,  6,  7,  8};
9649 
9650   ArrayRef<int> Mask = N->getMask();
9651   int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9652 
9653   // For each mask element, find out if we're just inserting something
9654   // from V2 into V1 or vice versa.
9655   // Possible permutations inserting an element from V2 into V1:
9656   //   X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9657   //   0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9658   //   ...
9659   //   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9660   // Inserting from V1 into V2 will be similar, except mask range will be
9661   // [16,31].
9662 
9663   bool FoundCandidate = false;
9664   // If both vector operands for the shuffle are the same vector, the mask
9665   // will contain only elements from the first one and the second one will be
9666   // undef.
9667   unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9668   // Go through the mask of half-words to find an element that's being moved
9669   // from one vector to the other.
9670   for (unsigned i = 0; i < BytesInVector; ++i) {
9671     unsigned CurrentElement = Mask[i];
9672     // If 2nd operand is undefined, we should only look for element 7 in the
9673     // Mask.
9674     if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9675       continue;
9676 
9677     bool OtherElementsInOrder = true;
9678     // Examine the other elements in the Mask to see if they're in original
9679     // order.
9680     for (unsigned j = 0; j < BytesInVector; ++j) {
9681       if (j == i)
9682         continue;
9683       // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9684       // from V2 [16,31] and vice versa.  Unless the 2nd operand is undefined,
9685       // in which we always assume we're always picking from the 1st operand.
9686       int MaskOffset =
9687           (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9688       if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9689         OtherElementsInOrder = false;
9690         break;
9691       }
9692     }
9693     // If other elements are in original order, we record the number of shifts
9694     // we need to get the element we want into element 7. Also record which byte
9695     // in the vector we should insert into.
9696     if (OtherElementsInOrder) {
9697       // If 2nd operand is undefined, we assume no shifts and no swapping.
9698       if (V2.isUndef()) {
9699         ShiftElts = 0;
9700         Swap = false;
9701       } else {
9702         // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9703         ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9704                          : BigEndianShifts[CurrentElement & 0xF];
9705         Swap = CurrentElement < BytesInVector;
9706       }
9707       InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9708       FoundCandidate = true;
9709       break;
9710     }
9711   }
9712 
9713   if (!FoundCandidate)
9714     return SDValue();
9715 
9716   // Candidate found, construct the proper SDAG sequence with VINSERTB,
9717   // optionally with VECSHL if shift is required.
9718   if (Swap)
9719     std::swap(V1, V2);
9720   if (V2.isUndef())
9721     V2 = V1;
9722   if (ShiftElts) {
9723     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9724                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9725     return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9726                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
9727   }
9728   return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9729                      DAG.getConstant(InsertAtByte, dl, MVT::i32));
9730 }
9731 
9732 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9733 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9734 /// SDValue.
9735 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9736                                            SelectionDAG &DAG) const {
9737   const unsigned NumHalfWords = 8;
9738   const unsigned BytesInVector = NumHalfWords * 2;
9739   // Check that the shuffle is on half-words.
9740   if (!isNByteElemShuffleMask(N, 2, 1))
9741     return SDValue();
9742 
9743   bool IsLE = Subtarget.isLittleEndian();
9744   SDLoc dl(N);
9745   SDValue V1 = N->getOperand(0);
9746   SDValue V2 = N->getOperand(1);
9747   unsigned ShiftElts = 0, InsertAtByte = 0;
9748   bool Swap = false;
9749 
9750   // Shifts required to get the half-word we want at element 3.
9751   unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9752   unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9753 
9754   uint32_t Mask = 0;
9755   uint32_t OriginalOrderLow = 0x1234567;
9756   uint32_t OriginalOrderHigh = 0x89ABCDEF;
9757   // Now we look at mask elements 0,2,4,6,8,10,12,14.  Pack the mask into a
9758   // 32-bit space, only need 4-bit nibbles per element.
9759   for (unsigned i = 0; i < NumHalfWords; ++i) {
9760     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9761     Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9762   }
9763 
9764   // For each mask element, find out if we're just inserting something
9765   // from V2 into V1 or vice versa.  Possible permutations inserting an element
9766   // from V2 into V1:
9767   //   X, 1, 2, 3, 4, 5, 6, 7
9768   //   0, X, 2, 3, 4, 5, 6, 7
9769   //   0, 1, X, 3, 4, 5, 6, 7
9770   //   0, 1, 2, X, 4, 5, 6, 7
9771   //   0, 1, 2, 3, X, 5, 6, 7
9772   //   0, 1, 2, 3, 4, X, 6, 7
9773   //   0, 1, 2, 3, 4, 5, X, 7
9774   //   0, 1, 2, 3, 4, 5, 6, X
9775   // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9776 
9777   bool FoundCandidate = false;
9778   // Go through the mask of half-words to find an element that's being moved
9779   // from one vector to the other.
9780   for (unsigned i = 0; i < NumHalfWords; ++i) {
9781     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9782     uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9783     uint32_t MaskOtherElts = ~(0xF << MaskShift);
9784     uint32_t TargetOrder = 0x0;
9785 
9786     // If both vector operands for the shuffle are the same vector, the mask
9787     // will contain only elements from the first one and the second one will be
9788     // undef.
9789     if (V2.isUndef()) {
9790       ShiftElts = 0;
9791       unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9792       TargetOrder = OriginalOrderLow;
9793       Swap = false;
9794       // Skip if not the correct element or mask of other elements don't equal
9795       // to our expected order.
9796       if (MaskOneElt == VINSERTHSrcElem &&
9797           (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9798         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9799         FoundCandidate = true;
9800         break;
9801       }
9802     } else { // If both operands are defined.
9803       // Target order is [8,15] if the current mask is between [0,7].
9804       TargetOrder =
9805           (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9806       // Skip if mask of other elements don't equal our expected order.
9807       if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9808         // We only need the last 3 bits for the number of shifts.
9809         ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9810                          : BigEndianShifts[MaskOneElt & 0x7];
9811         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9812         Swap = MaskOneElt < NumHalfWords;
9813         FoundCandidate = true;
9814         break;
9815       }
9816     }
9817   }
9818 
9819   if (!FoundCandidate)
9820     return SDValue();
9821 
9822   // Candidate found, construct the proper SDAG sequence with VINSERTH,
9823   // optionally with VECSHL if shift is required.
9824   if (Swap)
9825     std::swap(V1, V2);
9826   if (V2.isUndef())
9827     V2 = V1;
9828   SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9829   if (ShiftElts) {
9830     // Double ShiftElts because we're left shifting on v16i8 type.
9831     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9832                               DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9833     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9834     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9835                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
9836     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9837   }
9838   SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9839   SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9840                             DAG.getConstant(InsertAtByte, dl, MVT::i32));
9841   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9842 }
9843 
9844 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9845 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9846 /// return the default SDValue.
9847 SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9848                                               SelectionDAG &DAG) const {
9849   // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9850   // to v16i8. Peek through the bitcasts to get the actual operands.
9851   SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9852   SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9853 
9854   auto ShuffleMask = SVN->getMask();
9855   SDValue VecShuffle(SVN, 0);
9856   SDLoc DL(SVN);
9857 
9858   // Check that we have a four byte shuffle.
9859   if (!isNByteElemShuffleMask(SVN, 4, 1))
9860     return SDValue();
9861 
9862   // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9863   if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9864     std::swap(LHS, RHS);
9865     VecShuffle = peekThroughBitcasts(DAG.getCommutedVectorShuffle(*SVN));
9866     ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
9867     if (!CommutedSV)
9868       return SDValue();
9869     ShuffleMask = CommutedSV->getMask();
9870   }
9871 
9872   // Ensure that the RHS is a vector of constants.
9873   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9874   if (!BVN)
9875     return SDValue();
9876 
9877   // Check if RHS is a splat of 4-bytes (or smaller).
9878   APInt APSplatValue, APSplatUndef;
9879   unsigned SplatBitSize;
9880   bool HasAnyUndefs;
9881   if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9882                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9883       SplatBitSize > 32)
9884     return SDValue();
9885 
9886   // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9887   // The instruction splats a constant C into two words of the source vector
9888   // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9889   // Thus we check that the shuffle mask is the equivalent  of
9890   // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9891   // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9892   // within each word are consecutive, so we only need to check the first byte.
9893   SDValue Index;
9894   bool IsLE = Subtarget.isLittleEndian();
9895   if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9896       (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9897        ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9898     Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9899   else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9900            (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9901             ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9902     Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9903   else
9904     return SDValue();
9905 
9906   // If the splat is narrower than 32-bits, we need to get the 32-bit value
9907   // for XXSPLTI32DX.
9908   unsigned SplatVal = APSplatValue.getZExtValue();
9909   for (; SplatBitSize < 32; SplatBitSize <<= 1)
9910     SplatVal |= (SplatVal << SplatBitSize);
9911 
9912   SDValue SplatNode = DAG.getNode(
9913       PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
9914       Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9915   return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9916 }
9917 
9918 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9919 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9920 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9921 /// i.e (or (shl x, C1), (srl x, 128-C1)).
9922 SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9923   assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9924   assert(Op.getValueType() == MVT::v1i128 &&
9925          "Only set v1i128 as custom, other type shouldn't reach here!");
9926   SDLoc dl(Op);
9927   SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9928   SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9929   unsigned SHLAmt = N1.getConstantOperandVal(0);
9930   if (SHLAmt % 8 == 0) {
9931     std::array<int, 16> Mask;
9932     std::iota(Mask.begin(), Mask.end(), 0);
9933     std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9934     if (SDValue Shuffle =
9935             DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
9936                                  DAG.getUNDEF(MVT::v16i8), Mask))
9937       return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9938   }
9939   SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9940   SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9941                               DAG.getConstant(SHLAmt, dl, MVT::i32));
9942   SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9943                               DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9944   SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9945   return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9946 }
9947 
9948 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
9949 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
9950 /// return the code it can be lowered into.  Worst case, it can always be
9951 /// lowered into a vperm.
9952 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9953                                                SelectionDAG &DAG) const {
9954   SDLoc dl(Op);
9955   SDValue V1 = Op.getOperand(0);
9956   SDValue V2 = Op.getOperand(1);
9957   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9958 
9959   // Any nodes that were combined in the target-independent combiner prior
9960   // to vector legalization will not be sent to the target combine. Try to
9961   // combine it here.
9962   if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9963     if (!isa<ShuffleVectorSDNode>(NewShuffle))
9964       return NewShuffle;
9965     Op = NewShuffle;
9966     SVOp = cast<ShuffleVectorSDNode>(Op);
9967     V1 = Op.getOperand(0);
9968     V2 = Op.getOperand(1);
9969   }
9970   EVT VT = Op.getValueType();
9971   bool isLittleEndian = Subtarget.isLittleEndian();
9972 
9973   unsigned ShiftElts, InsertAtByte;
9974   bool Swap = false;
9975 
9976   // If this is a load-and-splat, we can do that with a single instruction
9977   // in some cases. However if the load has multiple uses, we don't want to
9978   // combine it because that will just produce multiple loads.
9979   bool IsPermutedLoad = false;
9980   const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9981   if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9982       (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9983       InputLoad->hasOneUse()) {
9984     bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9985     int SplatIdx =
9986       PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9987 
9988     // The splat index for permuted loads will be in the left half of the vector
9989     // which is strictly wider than the loaded value by 8 bytes. So we need to
9990     // adjust the splat index to point to the correct address in memory.
9991     if (IsPermutedLoad) {
9992       assert((isLittleEndian || IsFourByte) &&
9993              "Unexpected size for permuted load on big endian target");
9994       SplatIdx += IsFourByte ? 2 : 1;
9995       assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9996              "Splat of a value outside of the loaded memory");
9997     }
9998 
9999     LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10000     // For 4-byte load-and-splat, we need Power9.
10001     if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10002       uint64_t Offset = 0;
10003       if (IsFourByte)
10004         Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10005       else
10006         Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10007 
10008       // If the width of the load is the same as the width of the splat,
10009       // loading with an offset would load the wrong memory.
10010       if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10011         Offset = 0;
10012 
10013       SDValue BasePtr = LD->getBasePtr();
10014       if (Offset != 0)
10015         BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
10016                               BasePtr, DAG.getIntPtrConstant(Offset, dl));
10017       SDValue Ops[] = {
10018         LD->getChain(),    // Chain
10019         BasePtr,           // BasePtr
10020         DAG.getValueType(Op.getValueType()) // VT
10021       };
10022       SDVTList VTL =
10023         DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10024       SDValue LdSplt =
10025         DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
10026                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
10027       DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10028       if (LdSplt.getValueType() != SVOp->getValueType(0))
10029         LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10030       return LdSplt;
10031     }
10032   }
10033 
10034   // All v2i64 and v2f64 shuffles are legal
10035   if (VT == MVT::v2i64 || VT == MVT::v2f64)
10036     return Op;
10037 
10038   if (Subtarget.hasP9Vector() &&
10039       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10040                            isLittleEndian)) {
10041     if (Swap)
10042       std::swap(V1, V2);
10043     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10044     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10045     if (ShiftElts) {
10046       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10047                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
10048       SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10049                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10050       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10051     }
10052     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10053                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
10054     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10055   }
10056 
10057   if (Subtarget.hasPrefixInstrs()) {
10058     SDValue SplatInsertNode;
10059     if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10060       return SplatInsertNode;
10061   }
10062 
10063   if (Subtarget.hasP9Altivec()) {
10064     SDValue NewISDNode;
10065     if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10066       return NewISDNode;
10067 
10068     if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10069       return NewISDNode;
10070   }
10071 
10072   if (Subtarget.hasVSX() &&
10073       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10074     if (Swap)
10075       std::swap(V1, V2);
10076     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10077     SDValue Conv2 =
10078         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10079 
10080     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10081                               DAG.getConstant(ShiftElts, dl, MVT::i32));
10082     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10083   }
10084 
10085   if (Subtarget.hasVSX() &&
10086     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10087     if (Swap)
10088       std::swap(V1, V2);
10089     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10090     SDValue Conv2 =
10091         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10092 
10093     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10094                               DAG.getConstant(ShiftElts, dl, MVT::i32));
10095     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10096   }
10097 
10098   if (Subtarget.hasP9Vector()) {
10099      if (PPC::isXXBRHShuffleMask(SVOp)) {
10100       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10101       SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10102       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10103     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10104       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10105       SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10106       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10107     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10108       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10109       SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10110       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10111     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10112       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10113       SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10114       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10115     }
10116   }
10117 
10118   if (Subtarget.hasVSX()) {
10119     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10120       int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10121 
10122       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10123       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10124                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
10125       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10126     }
10127 
10128     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10129     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10130       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10131       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10132       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10133     }
10134   }
10135 
10136   // Cases that are handled by instructions that take permute immediates
10137   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10138   // selected by the instruction selector.
10139   if (V2.isUndef()) {
10140     if (PPC::isSplatShuffleMask(SVOp, 1) ||
10141         PPC::isSplatShuffleMask(SVOp, 2) ||
10142         PPC::isSplatShuffleMask(SVOp, 4) ||
10143         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10144         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10145         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10146         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10147         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10148         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10149         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10150         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10151         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10152         (Subtarget.hasP8Altivec() && (
10153          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10154          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10155          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10156       return Op;
10157     }
10158   }
10159 
10160   // Altivec has a variety of "shuffle immediates" that take two vector inputs
10161   // and produce a fixed permutation.  If any of these match, do not lower to
10162   // VPERM.
10163   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10164   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10165       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10166       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10167       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10168       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10169       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10170       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10171       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10172       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10173       (Subtarget.hasP8Altivec() && (
10174        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10175        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10176        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10177     return Op;
10178 
10179   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
10180   // perfect shuffle table to emit an optimal matching sequence.
10181   ArrayRef<int> PermMask = SVOp->getMask();
10182 
10183   if (!DisablePerfectShuffle && !isLittleEndian) {
10184     unsigned PFIndexes[4];
10185     bool isFourElementShuffle = true;
10186     for (unsigned i = 0; i != 4 && isFourElementShuffle;
10187          ++i) {                           // Element number
10188       unsigned EltNo = 8;                 // Start out undef.
10189       for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10190         if (PermMask[i * 4 + j] < 0)
10191           continue; // Undef, ignore it.
10192 
10193         unsigned ByteSource = PermMask[i * 4 + j];
10194         if ((ByteSource & 3) != j) {
10195           isFourElementShuffle = false;
10196           break;
10197         }
10198 
10199         if (EltNo == 8) {
10200           EltNo = ByteSource / 4;
10201         } else if (EltNo != ByteSource / 4) {
10202           isFourElementShuffle = false;
10203           break;
10204         }
10205       }
10206       PFIndexes[i] = EltNo;
10207     }
10208 
10209     // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10210     // perfect shuffle vector to determine if it is cost effective to do this as
10211     // discrete instructions, or whether we should use a vperm.
10212     // For now, we skip this for little endian until such time as we have a
10213     // little-endian perfect shuffle table.
10214     if (isFourElementShuffle) {
10215       // Compute the index in the perfect shuffle table.
10216       unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10217                               PFIndexes[2] * 9 + PFIndexes[3];
10218 
10219       unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10220       unsigned Cost = (PFEntry >> 30);
10221 
10222       // Determining when to avoid vperm is tricky.  Many things affect the cost
10223       // of vperm, particularly how many times the perm mask needs to be
10224       // computed. For example, if the perm mask can be hoisted out of a loop or
10225       // is already used (perhaps because there are multiple permutes with the
10226       // same shuffle mask?) the vperm has a cost of 1.  OTOH, hoisting the
10227       // permute mask out of the loop requires an extra register.
10228       //
10229       // As a compromise, we only emit discrete instructions if the shuffle can
10230       // be generated in 3 or fewer operations.  When we have loop information
10231       // available, if this block is within a loop, we should avoid using vperm
10232       // for 3-operation perms and use a constant pool load instead.
10233       if (Cost < 3)
10234         return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10235     }
10236   }
10237 
10238   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10239   // vector that will get spilled to the constant pool.
10240   if (V2.isUndef()) V2 = V1;
10241 
10242   return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10243 }
10244 
10245 SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10246                                       ArrayRef<int> PermMask, EVT VT,
10247                                       SDValue V1, SDValue V2) const {
10248   unsigned Opcode = PPCISD::VPERM;
10249   EVT ValType = V1.getValueType();
10250   SDLoc dl(Op);
10251   bool NeedSwap = false;
10252   bool isLittleEndian = Subtarget.isLittleEndian();
10253   bool isPPC64 = Subtarget.isPPC64();
10254 
10255   // Only need to place items backwards in LE,
10256   // the mask will be properly calculated.
10257   if (isLittleEndian)
10258     std::swap(V1, V2);
10259 
10260   if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10261       (V1->hasOneUse() || V2->hasOneUse())) {
10262     LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10263                          "XXPERM instead\n");
10264     Opcode = PPCISD::XXPERM;
10265 
10266     // The second input to XXPERM is also an output so if the second input has
10267     // multiple uses then copying is necessary, as a result we want the
10268     // single-use operand to be used as the second input to prevent copying.
10269     if (!V2->hasOneUse() && V1->hasOneUse()) {
10270       std::swap(V1, V2);
10271       NeedSwap = !NeedSwap;
10272     }
10273   }
10274 
10275   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10276   // that it is in input element units, not in bytes.  Convert now.
10277 
10278   // For little endian, the order of the input vectors is reversed, and
10279   // the permutation mask is complemented with respect to 31.  This is
10280   // necessary to produce proper semantics with the big-endian-based vperm
10281   // instruction.
10282   EVT EltVT = V1.getValueType().getVectorElementType();
10283   unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10284 
10285   bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10286   bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10287 
10288   /*
10289   Vectors will be appended like so: [ V1 | v2 ]
10290   XXSWAPD on V1:
10291   [   A   |   B   |   C   |   D   ] -> [   C   |   D   |   A   |   B   ]
10292      0-3     4-7     8-11   12-15         0-3     4-7     8-11   12-15
10293   i.e.  index of A, B += 8, and index of C, D -= 8.
10294   XXSWAPD on V2:
10295   [   E   |   F   |   G   |   H   ] -> [   G   |   H   |   E   |   F   ]
10296     16-19   20-23   24-27   28-31        16-19   20-23   24-27   28-31
10297   i.e.  index of E, F += 8, index of G, H -= 8
10298   Swap V1 and V2:
10299   [   V1   |   V2  ] -> [   V2   |   V1   ]
10300      0-15     16-31        0-15     16-31
10301   i.e.  index of V1 += 16, index of V2 -= 16
10302   */
10303 
10304   SmallVector<SDValue, 16> ResultMask;
10305   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10306     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10307 
10308     if (Opcode == PPCISD::XXPERM) {
10309       if (V1HasXXSWAPD) {
10310         if (SrcElt < 8)
10311           SrcElt += 8;
10312         else if (SrcElt < 16)
10313           SrcElt -= 8;
10314       }
10315       if (V2HasXXSWAPD) {
10316         if (SrcElt > 23)
10317           SrcElt -= 8;
10318         else if (SrcElt > 15)
10319           SrcElt += 8;
10320       }
10321       if (NeedSwap) {
10322         if (SrcElt < 16)
10323           SrcElt += 16;
10324         else
10325           SrcElt -= 16;
10326       }
10327     }
10328 
10329     for (unsigned j = 0; j != BytesPerElement; ++j)
10330       if (isLittleEndian)
10331         ResultMask.push_back(
10332             DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10333       else
10334         ResultMask.push_back(
10335             DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10336   }
10337 
10338   if (Opcode == PPCISD::XXPERM && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10339     if (V1HasXXSWAPD) {
10340       dl = SDLoc(V1->getOperand(0));
10341       V1 = V1->getOperand(0)->getOperand(1);
10342     }
10343     if (V2HasXXSWAPD) {
10344       dl = SDLoc(V2->getOperand(0));
10345       V2 = V2->getOperand(0)->getOperand(1);
10346     }
10347     if (isPPC64 && ValType != MVT::v2f64)
10348       V1 = DAG.getBitcast(MVT::v2f64, V1);
10349     if (isPPC64 && V2.getValueType() != MVT::v2f64)
10350       V2 = DAG.getBitcast(MVT::v2f64, V2);
10351   }
10352 
10353   ShufflesHandledWithVPERM++;
10354   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10355   LLVM_DEBUG({
10356     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10357     if (Opcode == PPCISD::XXPERM) {
10358       dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10359     } else {
10360       dbgs() << "Emitting a VPERM for the following shuffle:\n";
10361     }
10362     SVOp->dump();
10363     dbgs() << "With the following permute control vector:\n";
10364     VPermMask.dump();
10365   });
10366 
10367   if (Opcode == PPCISD::XXPERM)
10368     VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10369 
10370   SDValue VPERMNode =
10371       DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10372 
10373   VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10374   return VPERMNode;
10375 }
10376 
10377 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10378 /// vector comparison.  If it is, return true and fill in Opc/isDot with
10379 /// information about the intrinsic.
10380 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10381                                  bool &isDot, const PPCSubtarget &Subtarget) {
10382   unsigned IntrinsicID =
10383       cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10384   CompareOpc = -1;
10385   isDot = false;
10386   switch (IntrinsicID) {
10387   default:
10388     return false;
10389   // Comparison predicates.
10390   case Intrinsic::ppc_altivec_vcmpbfp_p:
10391     CompareOpc = 966;
10392     isDot = true;
10393     break;
10394   case Intrinsic::ppc_altivec_vcmpeqfp_p:
10395     CompareOpc = 198;
10396     isDot = true;
10397     break;
10398   case Intrinsic::ppc_altivec_vcmpequb_p:
10399     CompareOpc = 6;
10400     isDot = true;
10401     break;
10402   case Intrinsic::ppc_altivec_vcmpequh_p:
10403     CompareOpc = 70;
10404     isDot = true;
10405     break;
10406   case Intrinsic::ppc_altivec_vcmpequw_p:
10407     CompareOpc = 134;
10408     isDot = true;
10409     break;
10410   case Intrinsic::ppc_altivec_vcmpequd_p:
10411     if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10412       CompareOpc = 199;
10413       isDot = true;
10414     } else
10415       return false;
10416     break;
10417   case Intrinsic::ppc_altivec_vcmpneb_p:
10418   case Intrinsic::ppc_altivec_vcmpneh_p:
10419   case Intrinsic::ppc_altivec_vcmpnew_p:
10420   case Intrinsic::ppc_altivec_vcmpnezb_p:
10421   case Intrinsic::ppc_altivec_vcmpnezh_p:
10422   case Intrinsic::ppc_altivec_vcmpnezw_p:
10423     if (Subtarget.hasP9Altivec()) {
10424       switch (IntrinsicID) {
10425       default:
10426         llvm_unreachable("Unknown comparison intrinsic.");
10427       case Intrinsic::ppc_altivec_vcmpneb_p:
10428         CompareOpc = 7;
10429         break;
10430       case Intrinsic::ppc_altivec_vcmpneh_p:
10431         CompareOpc = 71;
10432         break;
10433       case Intrinsic::ppc_altivec_vcmpnew_p:
10434         CompareOpc = 135;
10435         break;
10436       case Intrinsic::ppc_altivec_vcmpnezb_p:
10437         CompareOpc = 263;
10438         break;
10439       case Intrinsic::ppc_altivec_vcmpnezh_p:
10440         CompareOpc = 327;
10441         break;
10442       case Intrinsic::ppc_altivec_vcmpnezw_p:
10443         CompareOpc = 391;
10444         break;
10445       }
10446       isDot = true;
10447     } else
10448       return false;
10449     break;
10450   case Intrinsic::ppc_altivec_vcmpgefp_p:
10451     CompareOpc = 454;
10452     isDot = true;
10453     break;
10454   case Intrinsic::ppc_altivec_vcmpgtfp_p:
10455     CompareOpc = 710;
10456     isDot = true;
10457     break;
10458   case Intrinsic::ppc_altivec_vcmpgtsb_p:
10459     CompareOpc = 774;
10460     isDot = true;
10461     break;
10462   case Intrinsic::ppc_altivec_vcmpgtsh_p:
10463     CompareOpc = 838;
10464     isDot = true;
10465     break;
10466   case Intrinsic::ppc_altivec_vcmpgtsw_p:
10467     CompareOpc = 902;
10468     isDot = true;
10469     break;
10470   case Intrinsic::ppc_altivec_vcmpgtsd_p:
10471     if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10472       CompareOpc = 967;
10473       isDot = true;
10474     } else
10475       return false;
10476     break;
10477   case Intrinsic::ppc_altivec_vcmpgtub_p:
10478     CompareOpc = 518;
10479     isDot = true;
10480     break;
10481   case Intrinsic::ppc_altivec_vcmpgtuh_p:
10482     CompareOpc = 582;
10483     isDot = true;
10484     break;
10485   case Intrinsic::ppc_altivec_vcmpgtuw_p:
10486     CompareOpc = 646;
10487     isDot = true;
10488     break;
10489   case Intrinsic::ppc_altivec_vcmpgtud_p:
10490     if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10491       CompareOpc = 711;
10492       isDot = true;
10493     } else
10494       return false;
10495     break;
10496 
10497   case Intrinsic::ppc_altivec_vcmpequq:
10498   case Intrinsic::ppc_altivec_vcmpgtsq:
10499   case Intrinsic::ppc_altivec_vcmpgtuq:
10500     if (!Subtarget.isISA3_1())
10501       return false;
10502     switch (IntrinsicID) {
10503     default:
10504       llvm_unreachable("Unknown comparison intrinsic.");
10505     case Intrinsic::ppc_altivec_vcmpequq:
10506       CompareOpc = 455;
10507       break;
10508     case Intrinsic::ppc_altivec_vcmpgtsq:
10509       CompareOpc = 903;
10510       break;
10511     case Intrinsic::ppc_altivec_vcmpgtuq:
10512       CompareOpc = 647;
10513       break;
10514     }
10515     break;
10516 
10517   // VSX predicate comparisons use the same infrastructure
10518   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10519   case Intrinsic::ppc_vsx_xvcmpgedp_p:
10520   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10521   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10522   case Intrinsic::ppc_vsx_xvcmpgesp_p:
10523   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10524     if (Subtarget.hasVSX()) {
10525       switch (IntrinsicID) {
10526       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10527         CompareOpc = 99;
10528         break;
10529       case Intrinsic::ppc_vsx_xvcmpgedp_p:
10530         CompareOpc = 115;
10531         break;
10532       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10533         CompareOpc = 107;
10534         break;
10535       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10536         CompareOpc = 67;
10537         break;
10538       case Intrinsic::ppc_vsx_xvcmpgesp_p:
10539         CompareOpc = 83;
10540         break;
10541       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10542         CompareOpc = 75;
10543         break;
10544       }
10545       isDot = true;
10546     } else
10547       return false;
10548     break;
10549 
10550   // Normal Comparisons.
10551   case Intrinsic::ppc_altivec_vcmpbfp:
10552     CompareOpc = 966;
10553     break;
10554   case Intrinsic::ppc_altivec_vcmpeqfp:
10555     CompareOpc = 198;
10556     break;
10557   case Intrinsic::ppc_altivec_vcmpequb:
10558     CompareOpc = 6;
10559     break;
10560   case Intrinsic::ppc_altivec_vcmpequh:
10561     CompareOpc = 70;
10562     break;
10563   case Intrinsic::ppc_altivec_vcmpequw:
10564     CompareOpc = 134;
10565     break;
10566   case Intrinsic::ppc_altivec_vcmpequd:
10567     if (Subtarget.hasP8Altivec())
10568       CompareOpc = 199;
10569     else
10570       return false;
10571     break;
10572   case Intrinsic::ppc_altivec_vcmpneb:
10573   case Intrinsic::ppc_altivec_vcmpneh:
10574   case Intrinsic::ppc_altivec_vcmpnew:
10575   case Intrinsic::ppc_altivec_vcmpnezb:
10576   case Intrinsic::ppc_altivec_vcmpnezh:
10577   case Intrinsic::ppc_altivec_vcmpnezw:
10578     if (Subtarget.hasP9Altivec())
10579       switch (IntrinsicID) {
10580       default:
10581         llvm_unreachable("Unknown comparison intrinsic.");
10582       case Intrinsic::ppc_altivec_vcmpneb:
10583         CompareOpc = 7;
10584         break;
10585       case Intrinsic::ppc_altivec_vcmpneh:
10586         CompareOpc = 71;
10587         break;
10588       case Intrinsic::ppc_altivec_vcmpnew:
10589         CompareOpc = 135;
10590         break;
10591       case Intrinsic::ppc_altivec_vcmpnezb:
10592         CompareOpc = 263;
10593         break;
10594       case Intrinsic::ppc_altivec_vcmpnezh:
10595         CompareOpc = 327;
10596         break;
10597       case Intrinsic::ppc_altivec_vcmpnezw:
10598         CompareOpc = 391;
10599         break;
10600       }
10601     else
10602       return false;
10603     break;
10604   case Intrinsic::ppc_altivec_vcmpgefp:
10605     CompareOpc = 454;
10606     break;
10607   case Intrinsic::ppc_altivec_vcmpgtfp:
10608     CompareOpc = 710;
10609     break;
10610   case Intrinsic::ppc_altivec_vcmpgtsb:
10611     CompareOpc = 774;
10612     break;
10613   case Intrinsic::ppc_altivec_vcmpgtsh:
10614     CompareOpc = 838;
10615     break;
10616   case Intrinsic::ppc_altivec_vcmpgtsw:
10617     CompareOpc = 902;
10618     break;
10619   case Intrinsic::ppc_altivec_vcmpgtsd:
10620     if (Subtarget.hasP8Altivec())
10621       CompareOpc = 967;
10622     else
10623       return false;
10624     break;
10625   case Intrinsic::ppc_altivec_vcmpgtub:
10626     CompareOpc = 518;
10627     break;
10628   case Intrinsic::ppc_altivec_vcmpgtuh:
10629     CompareOpc = 582;
10630     break;
10631   case Intrinsic::ppc_altivec_vcmpgtuw:
10632     CompareOpc = 646;
10633     break;
10634   case Intrinsic::ppc_altivec_vcmpgtud:
10635     if (Subtarget.hasP8Altivec())
10636       CompareOpc = 711;
10637     else
10638       return false;
10639     break;
10640   case Intrinsic::ppc_altivec_vcmpequq_p:
10641   case Intrinsic::ppc_altivec_vcmpgtsq_p:
10642   case Intrinsic::ppc_altivec_vcmpgtuq_p:
10643     if (!Subtarget.isISA3_1())
10644       return false;
10645     switch (IntrinsicID) {
10646     default:
10647       llvm_unreachable("Unknown comparison intrinsic.");
10648     case Intrinsic::ppc_altivec_vcmpequq_p:
10649       CompareOpc = 455;
10650       break;
10651     case Intrinsic::ppc_altivec_vcmpgtsq_p:
10652       CompareOpc = 903;
10653       break;
10654     case Intrinsic::ppc_altivec_vcmpgtuq_p:
10655       CompareOpc = 647;
10656       break;
10657     }
10658     isDot = true;
10659     break;
10660   }
10661   return true;
10662 }
10663 
10664 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10665 /// lower, do it, otherwise return null.
10666 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10667                                                    SelectionDAG &DAG) const {
10668   unsigned IntrinsicID =
10669     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10670 
10671   SDLoc dl(Op);
10672 
10673   switch (IntrinsicID) {
10674   case Intrinsic::thread_pointer:
10675     // Reads the thread pointer register, used for __builtin_thread_pointer.
10676     if (Subtarget.isPPC64())
10677       return DAG.getRegister(PPC::X13, MVT::i64);
10678     return DAG.getRegister(PPC::R2, MVT::i32);
10679 
10680   case Intrinsic::ppc_mma_disassemble_acc: {
10681     if (Subtarget.isISAFuture()) {
10682       EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
10683       SDValue WideVec = SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl,
10684                                                    ArrayRef(ReturnTypes, 2),
10685                                                    Op.getOperand(1)),
10686                                 0);
10687       SmallVector<SDValue, 4> RetOps;
10688       SDValue Value = SDValue(WideVec.getNode(), 0);
10689       SDValue Value2 = SDValue(WideVec.getNode(), 1);
10690 
10691       SDValue Extract;
10692       Extract = DAG.getNode(
10693           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10694           Subtarget.isLittleEndian() ? Value2 : Value,
10695           DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10696                           dl, getPointerTy(DAG.getDataLayout())));
10697       RetOps.push_back(Extract);
10698       Extract = DAG.getNode(
10699           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10700           Subtarget.isLittleEndian() ? Value2 : Value,
10701           DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10702                           dl, getPointerTy(DAG.getDataLayout())));
10703       RetOps.push_back(Extract);
10704       Extract = DAG.getNode(
10705           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10706           Subtarget.isLittleEndian() ? Value : Value2,
10707           DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10708                           dl, getPointerTy(DAG.getDataLayout())));
10709       RetOps.push_back(Extract);
10710       Extract = DAG.getNode(
10711           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10712           Subtarget.isLittleEndian() ? Value : Value2,
10713           DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10714                           dl, getPointerTy(DAG.getDataLayout())));
10715       RetOps.push_back(Extract);
10716       return DAG.getMergeValues(RetOps, dl);
10717     }
10718     [[fallthrough]];
10719   }
10720   case Intrinsic::ppc_vsx_disassemble_pair: {
10721     int NumVecs = 2;
10722     SDValue WideVec = Op.getOperand(1);
10723     if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10724       NumVecs = 4;
10725       WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10726     }
10727     SmallVector<SDValue, 4> RetOps;
10728     for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10729       SDValue Extract = DAG.getNode(
10730           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10731           DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10732                                                      : VecNo,
10733                           dl, getPointerTy(DAG.getDataLayout())));
10734       RetOps.push_back(Extract);
10735     }
10736     return DAG.getMergeValues(RetOps, dl);
10737   }
10738 
10739   case Intrinsic::ppc_mma_xxmfacc:
10740   case Intrinsic::ppc_mma_xxmtacc: {
10741     // Allow pre-isa-future subtargets to lower as normal.
10742     if (!Subtarget.isISAFuture())
10743       return SDValue();
10744     // The intrinsics for xxmtacc and xxmfacc take one argument of
10745     // type v512i1, for future cpu the corresponding wacc instruction
10746     // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
10747     // the need to produce the xxm[t|f]acc.
10748     SDValue WideVec = Op.getOperand(1);
10749     DAG.ReplaceAllUsesWith(Op, WideVec);
10750     return SDValue();
10751   }
10752 
10753   case Intrinsic::ppc_unpack_longdouble: {
10754     auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10755     assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
10756            "Argument of long double unpack must be 0 or 1!");
10757     return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
10758                        DAG.getConstant(!!(Idx->getSExtValue()), dl,
10759                                        Idx->getValueType(0)));
10760   }
10761 
10762   case Intrinsic::ppc_compare_exp_lt:
10763   case Intrinsic::ppc_compare_exp_gt:
10764   case Intrinsic::ppc_compare_exp_eq:
10765   case Intrinsic::ppc_compare_exp_uo: {
10766     unsigned Pred;
10767     switch (IntrinsicID) {
10768     case Intrinsic::ppc_compare_exp_lt:
10769       Pred = PPC::PRED_LT;
10770       break;
10771     case Intrinsic::ppc_compare_exp_gt:
10772       Pred = PPC::PRED_GT;
10773       break;
10774     case Intrinsic::ppc_compare_exp_eq:
10775       Pred = PPC::PRED_EQ;
10776       break;
10777     case Intrinsic::ppc_compare_exp_uo:
10778       Pred = PPC::PRED_UN;
10779       break;
10780     }
10781     return SDValue(
10782         DAG.getMachineNode(
10783             PPC::SELECT_CC_I4, dl, MVT::i32,
10784             {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
10785                                         Op.getOperand(1), Op.getOperand(2)),
10786                      0),
10787              DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10788              DAG.getTargetConstant(Pred, dl, MVT::i32)}),
10789         0);
10790   }
10791   case Intrinsic::ppc_test_data_class: {
10792     EVT OpVT = Op.getOperand(1).getValueType();
10793     unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
10794                                          : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
10795                                                              : PPC::XSTSTDCSP);
10796     return SDValue(
10797         DAG.getMachineNode(
10798             PPC::SELECT_CC_I4, dl, MVT::i32,
10799             {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
10800                                         Op.getOperand(1)),
10801                      0),
10802              DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10803              DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
10804         0);
10805   }
10806   case Intrinsic::ppc_fnmsub: {
10807     EVT VT = Op.getOperand(1).getValueType();
10808     if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
10809       return DAG.getNode(
10810           ISD::FNEG, dl, VT,
10811           DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
10812                       DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
10813     return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
10814                        Op.getOperand(2), Op.getOperand(3));
10815   }
10816   case Intrinsic::ppc_convert_f128_to_ppcf128:
10817   case Intrinsic::ppc_convert_ppcf128_to_f128: {
10818     RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
10819                             ? RTLIB::CONVERT_PPCF128_F128
10820                             : RTLIB::CONVERT_F128_PPCF128;
10821     MakeLibCallOptions CallOptions;
10822     std::pair<SDValue, SDValue> Result =
10823         makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
10824                     dl, SDValue());
10825     return Result.first;
10826   }
10827   case Intrinsic::ppc_maxfe:
10828   case Intrinsic::ppc_maxfl:
10829   case Intrinsic::ppc_maxfs:
10830   case Intrinsic::ppc_minfe:
10831   case Intrinsic::ppc_minfl:
10832   case Intrinsic::ppc_minfs: {
10833     EVT VT = Op.getValueType();
10834     assert(
10835         all_of(Op->ops().drop_front(4),
10836                [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
10837         "ppc_[max|min]f[e|l|s] must have uniform type arguments");
10838     (void)VT;
10839     ISD::CondCode CC = ISD::SETGT;
10840     if (IntrinsicID == Intrinsic::ppc_minfe ||
10841         IntrinsicID == Intrinsic::ppc_minfl ||
10842         IntrinsicID == Intrinsic::ppc_minfs)
10843       CC = ISD::SETLT;
10844     unsigned I = Op.getNumOperands() - 2, Cnt = I;
10845     SDValue Res = Op.getOperand(I);
10846     for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
10847       Res =
10848           DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
10849     }
10850     return Res;
10851   }
10852   }
10853 
10854   // If this is a lowered altivec predicate compare, CompareOpc is set to the
10855   // opcode number of the comparison.
10856   int CompareOpc;
10857   bool isDot;
10858   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10859     return SDValue();    // Don't custom lower most intrinsics.
10860 
10861   // If this is a non-dot comparison, make the VCMP node and we are done.
10862   if (!isDot) {
10863     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10864                               Op.getOperand(1), Op.getOperand(2),
10865                               DAG.getConstant(CompareOpc, dl, MVT::i32));
10866     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10867   }
10868 
10869   // Create the PPCISD altivec 'dot' comparison node.
10870   SDValue Ops[] = {
10871     Op.getOperand(2),  // LHS
10872     Op.getOperand(3),  // RHS
10873     DAG.getConstant(CompareOpc, dl, MVT::i32)
10874   };
10875   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10876   SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
10877 
10878   // Now that we have the comparison, emit a copy from the CR to a GPR.
10879   // This is flagged to the above dot comparison.
10880   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10881                                 DAG.getRegister(PPC::CR6, MVT::i32),
10882                                 CompNode.getValue(1));
10883 
10884   // Unpack the result based on how the target uses it.
10885   unsigned BitNo;   // Bit # of CR6.
10886   bool InvertBit;   // Invert result?
10887   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10888   default:  // Can't happen, don't crash on invalid number though.
10889   case 0:   // Return the value of the EQ bit of CR6.
10890     BitNo = 0; InvertBit = false;
10891     break;
10892   case 1:   // Return the inverted value of the EQ bit of CR6.
10893     BitNo = 0; InvertBit = true;
10894     break;
10895   case 2:   // Return the value of the LT bit of CR6.
10896     BitNo = 2; InvertBit = false;
10897     break;
10898   case 3:   // Return the inverted value of the LT bit of CR6.
10899     BitNo = 2; InvertBit = true;
10900     break;
10901   }
10902 
10903   // Shift the bit into the low position.
10904   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10905                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10906   // Isolate the bit.
10907   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10908                       DAG.getConstant(1, dl, MVT::i32));
10909 
10910   // If we are supposed to, toggle the bit.
10911   if (InvertBit)
10912     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10913                         DAG.getConstant(1, dl, MVT::i32));
10914   return Flags;
10915 }
10916 
10917 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10918                                                SelectionDAG &DAG) const {
10919   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10920   // the beginning of the argument list.
10921   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10922   SDLoc DL(Op);
10923   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10924   case Intrinsic::ppc_cfence: {
10925     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10926     assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10927     SDValue Val = Op.getOperand(ArgStart + 1);
10928     EVT Ty = Val.getValueType();
10929     if (Ty == MVT::i128) {
10930       // FIXME: Testing one of two paired registers is sufficient to guarantee
10931       // ordering?
10932       Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
10933     }
10934     return SDValue(
10935         DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10936                            DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val),
10937                            Op.getOperand(0)),
10938         0);
10939   }
10940   default:
10941     break;
10942   }
10943   return SDValue();
10944 }
10945 
10946 // Lower scalar BSWAP64 to xxbrd.
10947 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10948   SDLoc dl(Op);
10949   if (!Subtarget.isPPC64())
10950     return Op;
10951   // MTVSRDD
10952   Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10953                    Op.getOperand(0));
10954   // XXBRD
10955   Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10956   // MFVSRD
10957   int VectorIndex = 0;
10958   if (Subtarget.isLittleEndian())
10959     VectorIndex = 1;
10960   Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
10961                    DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10962   return Op;
10963 }
10964 
10965 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10966 // compared to a value that is atomically loaded (atomic loads zero-extend).
10967 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10968                                                 SelectionDAG &DAG) const {
10969   assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10970          "Expecting an atomic compare-and-swap here.");
10971   SDLoc dl(Op);
10972   auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10973   EVT MemVT = AtomicNode->getMemoryVT();
10974   if (MemVT.getSizeInBits() >= 32)
10975     return Op;
10976 
10977   SDValue CmpOp = Op.getOperand(2);
10978   // If this is already correctly zero-extended, leave it alone.
10979   auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10980   if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10981     return Op;
10982 
10983   // Clear the high bits of the compare operand.
10984   unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10985   SDValue NewCmpOp =
10986     DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10987                 DAG.getConstant(MaskVal, dl, MVT::i32));
10988 
10989   // Replace the existing compare operand with the properly zero-extended one.
10990   SmallVector<SDValue, 4> Ops;
10991   for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10992     Ops.push_back(AtomicNode->getOperand(i));
10993   Ops[2] = NewCmpOp;
10994   MachineMemOperand *MMO = AtomicNode->getMemOperand();
10995   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10996   auto NodeTy =
10997     (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
10998   return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10999 }
11000 
11001 SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11002                                                   SelectionDAG &DAG) const {
11003   AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11004   EVT MemVT = N->getMemoryVT();
11005   assert(MemVT.getSimpleVT() == MVT::i128 &&
11006          "Expect quadword atomic operations");
11007   SDLoc dl(N);
11008   unsigned Opc = N->getOpcode();
11009   switch (Opc) {
11010   case ISD::ATOMIC_LOAD: {
11011     // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11012     // lowered to ppc instructions by pattern matching instruction selector.
11013     SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11014     SmallVector<SDValue, 4> Ops{
11015         N->getOperand(0),
11016         DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11017     for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11018       Ops.push_back(N->getOperand(I));
11019     SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11020                                                 Ops, MemVT, N->getMemOperand());
11021     SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11022     SDValue ValHi =
11023         DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11024     ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11025                         DAG.getConstant(64, dl, MVT::i32));
11026     SDValue Val =
11027         DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11028     return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11029                        {Val, LoadedVal.getValue(2)});
11030   }
11031   case ISD::ATOMIC_STORE: {
11032     // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11033     // lowered to ppc instructions by pattern matching instruction selector.
11034     SDVTList Tys = DAG.getVTList(MVT::Other);
11035     SmallVector<SDValue, 4> Ops{
11036         N->getOperand(0),
11037         DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11038     SDValue Val = N->getOperand(2);
11039     SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11040     SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11041                                 DAG.getConstant(64, dl, MVT::i32));
11042     ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11043     Ops.push_back(ValLo);
11044     Ops.push_back(ValHi);
11045     Ops.push_back(N->getOperand(1));
11046     return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11047                                    N->getMemOperand());
11048   }
11049   default:
11050     llvm_unreachable("Unexpected atomic opcode");
11051   }
11052 }
11053 
11054 static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl,
11055                                 SelectionDAG &DAG,
11056                                 const PPCSubtarget &Subtarget) {
11057   assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11058 
11059   enum DataClassMask {
11060     DC_NAN = 1 << 6,
11061     DC_NEG_INF = 1 << 4,
11062     DC_POS_INF = 1 << 5,
11063     DC_NEG_ZERO = 1 << 2,
11064     DC_POS_ZERO = 1 << 3,
11065     DC_NEG_SUBNORM = 1,
11066     DC_POS_SUBNORM = 1 << 1,
11067   };
11068 
11069   EVT VT = Op.getValueType();
11070 
11071   unsigned TestOp = VT == MVT::f128  ? PPC::XSTSTDCQP
11072                     : VT == MVT::f64 ? PPC::XSTSTDCDP
11073                                      : PPC::XSTSTDCSP;
11074 
11075   if (Mask == fcAllFlags)
11076     return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11077   if (Mask == 0)
11078     return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11079 
11080   // When it's cheaper or necessary to test reverse flags.
11081   if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11082     SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11083     return DAG.getNOT(Dl, Rev, MVT::i1);
11084   }
11085 
11086   // Power doesn't support testing whether a value is 'normal'. Test the rest
11087   // first, and test if it's 'not not-normal' with expected sign.
11088   if (Mask & fcNormal) {
11089     SDValue Rev(DAG.getMachineNode(
11090                     TestOp, Dl, MVT::i32,
11091                     DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11092                                               DC_NEG_ZERO | DC_POS_ZERO |
11093                                               DC_NEG_SUBNORM | DC_POS_SUBNORM,
11094                                           Dl, MVT::i32),
11095                     Op),
11096                 0);
11097     // Sign are stored in CR bit 0, result are in CR bit 2.
11098     SDValue Sign(
11099         DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11100                            DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11101         0);
11102     SDValue Normal(DAG.getNOT(
11103         Dl,
11104         SDValue(DAG.getMachineNode(
11105                     TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11106                     DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11107                 0),
11108         MVT::i1));
11109     if (Mask & fcPosNormal)
11110       Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11111     SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11112     if (Mask == fcPosNormal || Mask == fcNegNormal)
11113       return Result;
11114 
11115     return DAG.getNode(
11116         ISD::OR, Dl, MVT::i1,
11117         getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11118   }
11119 
11120   // The instruction doesn't differentiate between signaling or quiet NaN. Test
11121   // the rest first, and test if it 'is NaN and is signaling/quiet'.
11122   if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11123     bool IsQuiet = Mask & fcQNan;
11124     SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11125 
11126     // Quietness is determined by the first bit in fraction field.
11127     uint64_t QuietMask = 0;
11128     SDValue HighWord;
11129     if (VT == MVT::f128) {
11130       HighWord = DAG.getNode(
11131           ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11132           DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11133       QuietMask = 0x8000;
11134     } else if (VT == MVT::f64) {
11135       if (Subtarget.isPPC64()) {
11136         HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11137                                DAG.getBitcast(MVT::i64, Op),
11138                                DAG.getConstant(1, Dl, MVT::i32));
11139       } else {
11140         SDValue Vec = DAG.getBitcast(
11141             MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11142         HighWord = DAG.getNode(
11143             ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11144             DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11145       }
11146       QuietMask = 0x80000;
11147     } else if (VT == MVT::f32) {
11148       HighWord = DAG.getBitcast(MVT::i32, Op);
11149       QuietMask = 0x400000;
11150     }
11151     SDValue NanRes = DAG.getSetCC(
11152         Dl, MVT::i1,
11153         DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11154                     DAG.getConstant(QuietMask, Dl, MVT::i32)),
11155         DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11156     NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11157     if (Mask == fcQNan || Mask == fcSNan)
11158       return NanRes;
11159 
11160     return DAG.getNode(ISD::OR, Dl, MVT::i1,
11161                        getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11162                        NanRes);
11163   }
11164 
11165   unsigned NativeMask = 0;
11166   if ((Mask & fcNan) == fcNan)
11167     NativeMask |= DC_NAN;
11168   if (Mask & fcNegInf)
11169     NativeMask |= DC_NEG_INF;
11170   if (Mask & fcPosInf)
11171     NativeMask |= DC_POS_INF;
11172   if (Mask & fcNegZero)
11173     NativeMask |= DC_NEG_ZERO;
11174   if (Mask & fcPosZero)
11175     NativeMask |= DC_POS_ZERO;
11176   if (Mask & fcNegSubnormal)
11177     NativeMask |= DC_NEG_SUBNORM;
11178   if (Mask & fcPosSubnormal)
11179     NativeMask |= DC_POS_SUBNORM;
11180   return SDValue(
11181       DAG.getMachineNode(
11182           TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11183           SDValue(DAG.getMachineNode(
11184                       TestOp, Dl, MVT::i32,
11185                       DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11186                   0),
11187           DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11188       0);
11189 }
11190 
11191 SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11192                                            SelectionDAG &DAG) const {
11193   assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11194   SDValue LHS = Op.getOperand(0);
11195   const auto *RHS = cast<ConstantSDNode>(Op.getOperand(1));
11196   SDLoc Dl(Op);
11197   FPClassTest Category = static_cast<FPClassTest>(RHS->getZExtValue());
11198   return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11199 }
11200 
11201 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11202                                                  SelectionDAG &DAG) const {
11203   SDLoc dl(Op);
11204   // Create a stack slot that is 16-byte aligned.
11205   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
11206   int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11207   EVT PtrVT = getPointerTy(DAG.getDataLayout());
11208   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11209 
11210   // Store the input value into Value#0 of the stack slot.
11211   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
11212                                MachinePointerInfo());
11213   // Load it out.
11214   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11215 }
11216 
11217 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11218                                                   SelectionDAG &DAG) const {
11219   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11220          "Should only be called for ISD::INSERT_VECTOR_ELT");
11221 
11222   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11223 
11224   EVT VT = Op.getValueType();
11225   SDLoc dl(Op);
11226   SDValue V1 = Op.getOperand(0);
11227   SDValue V2 = Op.getOperand(1);
11228 
11229   if (VT == MVT::v2f64 && C)
11230     return Op;
11231 
11232   if (Subtarget.hasP9Vector()) {
11233     // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11234     // because on P10, it allows this specific insert_vector_elt load pattern to
11235     // utilize the refactored load and store infrastructure in order to exploit
11236     // prefixed loads.
11237     // On targets with inexpensive direct moves (Power9 and up), a
11238     // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11239     // load since a single precision load will involve conversion to double
11240     // precision on the load followed by another conversion to single precision.
11241     if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11242         (isa<LoadSDNode>(V2))) {
11243       SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11244       SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11245       SDValue InsVecElt =
11246           DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11247                       BitcastLoad, Op.getOperand(2));
11248       return DAG.getBitcast(MVT::v4f32, InsVecElt);
11249     }
11250   }
11251 
11252   if (Subtarget.isISA3_1()) {
11253     if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11254       return SDValue();
11255     // On P10, we have legal lowering for constant and variable indices for
11256     // all vectors.
11257     if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11258         VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11259       return Op;
11260   }
11261 
11262   // Before P10, we have legal lowering for constant indices but not for
11263   // variable ones.
11264   if (!C)
11265     return SDValue();
11266 
11267   // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11268   if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11269     SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11270     unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11271     unsigned InsertAtElement = C->getZExtValue();
11272     unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11273     if (Subtarget.isLittleEndian()) {
11274       InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11275     }
11276     return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11277                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
11278   }
11279   return Op;
11280 }
11281 
11282 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11283                                            SelectionDAG &DAG) const {
11284   SDLoc dl(Op);
11285   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11286   SDValue LoadChain = LN->getChain();
11287   SDValue BasePtr = LN->getBasePtr();
11288   EVT VT = Op.getValueType();
11289 
11290   if (VT != MVT::v256i1 && VT != MVT::v512i1)
11291     return Op;
11292 
11293   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11294   // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11295   // 2 or 4 vsx registers.
11296   assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11297          "Type unsupported without MMA");
11298   assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11299          "Type unsupported without paired vector support");
11300   Align Alignment = LN->getAlign();
11301   SmallVector<SDValue, 4> Loads;
11302   SmallVector<SDValue, 4> LoadChains;
11303   unsigned NumVecs = VT.getSizeInBits() / 128;
11304   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11305     SDValue Load =
11306         DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11307                     LN->getPointerInfo().getWithOffset(Idx * 16),
11308                     commonAlignment(Alignment, Idx * 16),
11309                     LN->getMemOperand()->getFlags(), LN->getAAInfo());
11310     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11311                           DAG.getConstant(16, dl, BasePtr.getValueType()));
11312     Loads.push_back(Load);
11313     LoadChains.push_back(Load.getValue(1));
11314   }
11315   if (Subtarget.isLittleEndian()) {
11316     std::reverse(Loads.begin(), Loads.end());
11317     std::reverse(LoadChains.begin(), LoadChains.end());
11318   }
11319   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11320   SDValue Value =
11321       DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11322                   dl, VT, Loads);
11323   SDValue RetOps[] = {Value, TF};
11324   return DAG.getMergeValues(RetOps, dl);
11325 }
11326 
11327 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11328                                             SelectionDAG &DAG) const {
11329   SDLoc dl(Op);
11330   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11331   SDValue StoreChain = SN->getChain();
11332   SDValue BasePtr = SN->getBasePtr();
11333   SDValue Value = SN->getValue();
11334   SDValue Value2 = SN->getValue();
11335   EVT StoreVT = Value.getValueType();
11336 
11337   if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11338     return Op;
11339 
11340   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11341   // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11342   // underlying registers individually.
11343   assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11344          "Type unsupported without MMA");
11345   assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11346          "Type unsupported without paired vector support");
11347   Align Alignment = SN->getAlign();
11348   SmallVector<SDValue, 4> Stores;
11349   unsigned NumVecs = 2;
11350   if (StoreVT == MVT::v512i1) {
11351     if (Subtarget.isISAFuture()) {
11352       EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11353       MachineSDNode *ExtNode = DAG.getMachineNode(
11354           PPC::DMXXEXTFDMR512, dl, ArrayRef(ReturnTypes, 2), Op.getOperand(1));
11355 
11356       Value = SDValue(ExtNode, 0);
11357       Value2 = SDValue(ExtNode, 1);
11358     } else
11359       Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11360     NumVecs = 4;
11361   }
11362   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11363     unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11364     SDValue Elt;
11365     if (Subtarget.isISAFuture()) {
11366       VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11367       Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11368                         Idx > 1 ? Value2 : Value,
11369                         DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11370     } else
11371       Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11372                         DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11373 
11374     SDValue Store =
11375         DAG.getStore(StoreChain, dl, Elt, BasePtr,
11376                      SN->getPointerInfo().getWithOffset(Idx * 16),
11377                      commonAlignment(Alignment, Idx * 16),
11378                      SN->getMemOperand()->getFlags(), SN->getAAInfo());
11379     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11380                           DAG.getConstant(16, dl, BasePtr.getValueType()));
11381     Stores.push_back(Store);
11382   }
11383   SDValue TF = DAG.getTokenFactor(dl, Stores);
11384   return TF;
11385 }
11386 
11387 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11388   SDLoc dl(Op);
11389   if (Op.getValueType() == MVT::v4i32) {
11390     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11391 
11392     SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11393     // +16 as shift amt.
11394     SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11395     SDValue RHSSwap =   // = vrlw RHS, 16
11396       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11397 
11398     // Shrinkify inputs to v8i16.
11399     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11400     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11401     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11402 
11403     // Low parts multiplied together, generating 32-bit results (we ignore the
11404     // top parts).
11405     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11406                                         LHS, RHS, DAG, dl, MVT::v4i32);
11407 
11408     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11409                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11410     // Shift the high parts up 16 bits.
11411     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11412                               Neg16, DAG, dl);
11413     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11414   } else if (Op.getValueType() == MVT::v16i8) {
11415     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11416     bool isLittleEndian = Subtarget.isLittleEndian();
11417 
11418     // Multiply the even 8-bit parts, producing 16-bit sums.
11419     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11420                                            LHS, RHS, DAG, dl, MVT::v8i16);
11421     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11422 
11423     // Multiply the odd 8-bit parts, producing 16-bit sums.
11424     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11425                                           LHS, RHS, DAG, dl, MVT::v8i16);
11426     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11427 
11428     // Merge the results together.  Because vmuleub and vmuloub are
11429     // instructions with a big-endian bias, we must reverse the
11430     // element numbering and reverse the meaning of "odd" and "even"
11431     // when generating little endian code.
11432     int Ops[16];
11433     for (unsigned i = 0; i != 8; ++i) {
11434       if (isLittleEndian) {
11435         Ops[i*2  ] = 2*i;
11436         Ops[i*2+1] = 2*i+16;
11437       } else {
11438         Ops[i*2  ] = 2*i+1;
11439         Ops[i*2+1] = 2*i+1+16;
11440       }
11441     }
11442     if (isLittleEndian)
11443       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11444     else
11445       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11446   } else {
11447     llvm_unreachable("Unknown mul to lower!");
11448   }
11449 }
11450 
11451 SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11452   bool IsStrict = Op->isStrictFPOpcode();
11453   if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11454       !Subtarget.hasP9Vector())
11455     return SDValue();
11456 
11457   return Op;
11458 }
11459 
11460 // Custom lowering for fpext vf32 to v2f64
11461 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11462 
11463   assert(Op.getOpcode() == ISD::FP_EXTEND &&
11464          "Should only be called for ISD::FP_EXTEND");
11465 
11466   // FIXME: handle extends from half precision float vectors on P9.
11467   // We only want to custom lower an extend from v2f32 to v2f64.
11468   if (Op.getValueType() != MVT::v2f64 ||
11469       Op.getOperand(0).getValueType() != MVT::v2f32)
11470     return SDValue();
11471 
11472   SDLoc dl(Op);
11473   SDValue Op0 = Op.getOperand(0);
11474 
11475   switch (Op0.getOpcode()) {
11476   default:
11477     return SDValue();
11478   case ISD::EXTRACT_SUBVECTOR: {
11479     assert(Op0.getNumOperands() == 2 &&
11480            isa<ConstantSDNode>(Op0->getOperand(1)) &&
11481            "Node should have 2 operands with second one being a constant!");
11482 
11483     if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11484       return SDValue();
11485 
11486     // Custom lower is only done for high or low doubleword.
11487     int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
11488     if (Idx % 2 != 0)
11489       return SDValue();
11490 
11491     // Since input is v4f32, at this point Idx is either 0 or 2.
11492     // Shift to get the doubleword position we want.
11493     int DWord = Idx >> 1;
11494 
11495     // High and low word positions are different on little endian.
11496     if (Subtarget.isLittleEndian())
11497       DWord ^= 0x1;
11498 
11499     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11500                        Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11501   }
11502   case ISD::FADD:
11503   case ISD::FMUL:
11504   case ISD::FSUB: {
11505     SDValue NewLoad[2];
11506     for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11507       // Ensure both input are loads.
11508       SDValue LdOp = Op0.getOperand(i);
11509       if (LdOp.getOpcode() != ISD::LOAD)
11510         return SDValue();
11511       // Generate new load node.
11512       LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11513       SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11514       NewLoad[i] = DAG.getMemIntrinsicNode(
11515           PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11516           LD->getMemoryVT(), LD->getMemOperand());
11517     }
11518     SDValue NewOp =
11519         DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11520                     NewLoad[1], Op0.getNode()->getFlags());
11521     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11522                        DAG.getConstant(0, dl, MVT::i32));
11523   }
11524   case ISD::LOAD: {
11525     LoadSDNode *LD = cast<LoadSDNode>(Op0);
11526     SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11527     SDValue NewLd = DAG.getMemIntrinsicNode(
11528         PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11529         LD->getMemoryVT(), LD->getMemOperand());
11530     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11531                        DAG.getConstant(0, dl, MVT::i32));
11532   }
11533   }
11534   llvm_unreachable("ERROR:Should return for all cases within swtich.");
11535 }
11536 
11537 /// LowerOperation - Provide custom lowering hooks for some operations.
11538 ///
11539 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
11540   switch (Op.getOpcode()) {
11541   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11542   case ISD::FPOW:               return lowerPow(Op, DAG);
11543   case ISD::FSIN:               return lowerSin(Op, DAG);
11544   case ISD::FCOS:               return lowerCos(Op, DAG);
11545   case ISD::FLOG:               return lowerLog(Op, DAG);
11546   case ISD::FLOG10:             return lowerLog10(Op, DAG);
11547   case ISD::FEXP:               return lowerExp(Op, DAG);
11548   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
11549   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
11550   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
11551   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
11552   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
11553   case ISD::STRICT_FSETCC:
11554   case ISD::STRICT_FSETCCS:
11555   case ISD::SETCC:              return LowerSETCC(Op, DAG);
11556   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
11557   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
11558 
11559   case ISD::INLINEASM:
11560   case ISD::INLINEASM_BR:       return LowerINLINEASM(Op, DAG);
11561   // Variable argument lowering.
11562   case ISD::VASTART:            return LowerVASTART(Op, DAG);
11563   case ISD::VAARG:              return LowerVAARG(Op, DAG);
11564   case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
11565 
11566   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG);
11567   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11568   case ISD::GET_DYNAMIC_AREA_OFFSET:
11569     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11570 
11571   // Exception handling lowering.
11572   case ISD::EH_DWARF_CFA:       return LowerEH_DWARF_CFA(Op, DAG);
11573   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
11574   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
11575 
11576   case ISD::LOAD:               return LowerLOAD(Op, DAG);
11577   case ISD::STORE:              return LowerSTORE(Op, DAG);
11578   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
11579   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
11580   case ISD::STRICT_FP_TO_UINT:
11581   case ISD::STRICT_FP_TO_SINT:
11582   case ISD::FP_TO_UINT:
11583   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11584   case ISD::STRICT_UINT_TO_FP:
11585   case ISD::STRICT_SINT_TO_FP:
11586   case ISD::UINT_TO_FP:
11587   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
11588   case ISD::GET_ROUNDING:       return LowerGET_ROUNDING(Op, DAG);
11589 
11590   // Lower 64-bit shifts.
11591   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
11592   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
11593   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
11594 
11595   case ISD::FSHL:               return LowerFunnelShift(Op, DAG);
11596   case ISD::FSHR:               return LowerFunnelShift(Op, DAG);
11597 
11598   // Vector-related lowering.
11599   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
11600   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
11601   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11602   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
11603   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
11604   case ISD::MUL:                return LowerMUL(Op, DAG);
11605   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
11606   case ISD::STRICT_FP_ROUND:
11607   case ISD::FP_ROUND:
11608     return LowerFP_ROUND(Op, DAG);
11609   case ISD::ROTL:               return LowerROTL(Op, DAG);
11610 
11611   // For counter-based loop handling.
11612   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
11613 
11614   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
11615 
11616   // Frame & Return address.
11617   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
11618   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
11619 
11620   case ISD::INTRINSIC_VOID:
11621     return LowerINTRINSIC_VOID(Op, DAG);
11622   case ISD::BSWAP:
11623     return LowerBSWAP(Op, DAG);
11624   case ISD::ATOMIC_CMP_SWAP:
11625     return LowerATOMIC_CMP_SWAP(Op, DAG);
11626   case ISD::ATOMIC_STORE:
11627     return LowerATOMIC_LOAD_STORE(Op, DAG);
11628   case ISD::IS_FPCLASS:
11629     return LowerIS_FPCLASS(Op, DAG);
11630   }
11631 }
11632 
11633 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
11634                                            SmallVectorImpl<SDValue>&Results,
11635                                            SelectionDAG &DAG) const {
11636   SDLoc dl(N);
11637   switch (N->getOpcode()) {
11638   default:
11639     llvm_unreachable("Do not know how to custom type legalize this operation!");
11640   case ISD::ATOMIC_LOAD: {
11641     SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11642     Results.push_back(Res);
11643     Results.push_back(Res.getValue(1));
11644     break;
11645   }
11646   case ISD::READCYCLECOUNTER: {
11647     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11648     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11649 
11650     Results.push_back(
11651         DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11652     Results.push_back(RTB.getValue(2));
11653     break;
11654   }
11655   case ISD::INTRINSIC_W_CHAIN: {
11656     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
11657         Intrinsic::loop_decrement)
11658       break;
11659 
11660     assert(N->getValueType(0) == MVT::i1 &&
11661            "Unexpected result type for CTR decrement intrinsic");
11662     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11663                                  N->getValueType(0));
11664     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11665     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11666                                  N->getOperand(1));
11667 
11668     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11669     Results.push_back(NewInt.getValue(1));
11670     break;
11671   }
11672   case ISD::INTRINSIC_WO_CHAIN: {
11673     switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) {
11674     case Intrinsic::ppc_pack_longdouble:
11675       Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11676                                     N->getOperand(2), N->getOperand(1)));
11677       break;
11678     case Intrinsic::ppc_maxfe:
11679     case Intrinsic::ppc_minfe:
11680     case Intrinsic::ppc_fnmsub:
11681     case Intrinsic::ppc_convert_f128_to_ppcf128:
11682       Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11683       break;
11684     }
11685     break;
11686   }
11687   case ISD::VAARG: {
11688     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11689       return;
11690 
11691     EVT VT = N->getValueType(0);
11692 
11693     if (VT == MVT::i64) {
11694       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11695 
11696       Results.push_back(NewNode);
11697       Results.push_back(NewNode.getValue(1));
11698     }
11699     return;
11700   }
11701   case ISD::STRICT_FP_TO_SINT:
11702   case ISD::STRICT_FP_TO_UINT:
11703   case ISD::FP_TO_SINT:
11704   case ISD::FP_TO_UINT: {
11705     // LowerFP_TO_INT() can only handle f32 and f64.
11706     if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11707         MVT::ppcf128)
11708       return;
11709     SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11710     Results.push_back(LoweredValue);
11711     if (N->isStrictFPOpcode())
11712       Results.push_back(LoweredValue.getValue(1));
11713     return;
11714   }
11715   case ISD::TRUNCATE: {
11716     if (!N->getValueType(0).isVector())
11717       return;
11718     SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11719     if (Lowered)
11720       Results.push_back(Lowered);
11721     return;
11722   }
11723   case ISD::FSHL:
11724   case ISD::FSHR:
11725     // Don't handle funnel shifts here.
11726     return;
11727   case ISD::BITCAST:
11728     // Don't handle bitcast here.
11729     return;
11730   case ISD::FP_EXTEND:
11731     SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11732     if (Lowered)
11733       Results.push_back(Lowered);
11734     return;
11735   }
11736 }
11737 
11738 //===----------------------------------------------------------------------===//
11739 //  Other Lowering Code
11740 //===----------------------------------------------------------------------===//
11741 
11742 static Instruction *callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id) {
11743   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11744   Function *Func = Intrinsic::getDeclaration(M, Id);
11745   return Builder.CreateCall(Func, {});
11746 }
11747 
11748 // The mappings for emitLeading/TrailingFence is taken from
11749 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11750 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
11751                                                  Instruction *Inst,
11752                                                  AtomicOrdering Ord) const {
11753   if (Ord == AtomicOrdering::SequentiallyConsistent)
11754     return callIntrinsic(Builder, Intrinsic::ppc_sync);
11755   if (isReleaseOrStronger(Ord))
11756     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11757   return nullptr;
11758 }
11759 
11760 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
11761                                                   Instruction *Inst,
11762                                                   AtomicOrdering Ord) const {
11763   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11764     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11765     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11766     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11767     if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11768       return Builder.CreateCall(
11769           Intrinsic::getDeclaration(
11770               Builder.GetInsertBlock()->getParent()->getParent(),
11771               Intrinsic::ppc_cfence, {Inst->getType()}),
11772           {Inst});
11773     // FIXME: Can use isync for rmw operation.
11774     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11775   }
11776   return nullptr;
11777 }
11778 
11779 MachineBasicBlock *
11780 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
11781                                     unsigned AtomicSize,
11782                                     unsigned BinOpcode,
11783                                     unsigned CmpOpcode,
11784                                     unsigned CmpPred) const {
11785   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11786   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11787 
11788   auto LoadMnemonic = PPC::LDARX;
11789   auto StoreMnemonic = PPC::STDCX;
11790   switch (AtomicSize) {
11791   default:
11792     llvm_unreachable("Unexpected size of atomic entity");
11793   case 1:
11794     LoadMnemonic = PPC::LBARX;
11795     StoreMnemonic = PPC::STBCX;
11796     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11797     break;
11798   case 2:
11799     LoadMnemonic = PPC::LHARX;
11800     StoreMnemonic = PPC::STHCX;
11801     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11802     break;
11803   case 4:
11804     LoadMnemonic = PPC::LWARX;
11805     StoreMnemonic = PPC::STWCX;
11806     break;
11807   case 8:
11808     LoadMnemonic = PPC::LDARX;
11809     StoreMnemonic = PPC::STDCX;
11810     break;
11811   }
11812 
11813   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11814   MachineFunction *F = BB->getParent();
11815   MachineFunction::iterator It = ++BB->getIterator();
11816 
11817   Register dest = MI.getOperand(0).getReg();
11818   Register ptrA = MI.getOperand(1).getReg();
11819   Register ptrB = MI.getOperand(2).getReg();
11820   Register incr = MI.getOperand(3).getReg();
11821   DebugLoc dl = MI.getDebugLoc();
11822 
11823   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11824   MachineBasicBlock *loop2MBB =
11825     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11826   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11827   F->insert(It, loopMBB);
11828   if (CmpOpcode)
11829     F->insert(It, loop2MBB);
11830   F->insert(It, exitMBB);
11831   exitMBB->splice(exitMBB->begin(), BB,
11832                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
11833   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11834 
11835   MachineRegisterInfo &RegInfo = F->getRegInfo();
11836   Register TmpReg = (!BinOpcode) ? incr :
11837     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11838                                            : &PPC::GPRCRegClass);
11839 
11840   //  thisMBB:
11841   //   ...
11842   //   fallthrough --> loopMBB
11843   BB->addSuccessor(loopMBB);
11844 
11845   //  loopMBB:
11846   //   l[wd]arx dest, ptr
11847   //   add r0, dest, incr
11848   //   st[wd]cx. r0, ptr
11849   //   bne- loopMBB
11850   //   fallthrough --> exitMBB
11851 
11852   // For max/min...
11853   //  loopMBB:
11854   //   l[wd]arx dest, ptr
11855   //   cmpl?[wd] dest, incr
11856   //   bgt exitMBB
11857   //  loop2MBB:
11858   //   st[wd]cx. dest, ptr
11859   //   bne- loopMBB
11860   //   fallthrough --> exitMBB
11861 
11862   BB = loopMBB;
11863   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11864     .addReg(ptrA).addReg(ptrB);
11865   if (BinOpcode)
11866     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11867   if (CmpOpcode) {
11868     Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11869     // Signed comparisons of byte or halfword values must be sign-extended.
11870     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11871       Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11872       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11873               ExtReg).addReg(dest);
11874       BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
11875     } else
11876       BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
11877 
11878     BuildMI(BB, dl, TII->get(PPC::BCC))
11879         .addImm(CmpPred)
11880         .addReg(CrReg)
11881         .addMBB(exitMBB);
11882     BB->addSuccessor(loop2MBB);
11883     BB->addSuccessor(exitMBB);
11884     BB = loop2MBB;
11885   }
11886   BuildMI(BB, dl, TII->get(StoreMnemonic))
11887     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11888   BuildMI(BB, dl, TII->get(PPC::BCC))
11889     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11890   BB->addSuccessor(loopMBB);
11891   BB->addSuccessor(exitMBB);
11892 
11893   //  exitMBB:
11894   //   ...
11895   BB = exitMBB;
11896   return BB;
11897 }
11898 
11899 static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
11900   switch(MI.getOpcode()) {
11901   default:
11902     return false;
11903   case PPC::COPY:
11904     return TII->isSignExtended(MI.getOperand(1).getReg(),
11905                                &MI.getMF()->getRegInfo());
11906   case PPC::LHA:
11907   case PPC::LHA8:
11908   case PPC::LHAU:
11909   case PPC::LHAU8:
11910   case PPC::LHAUX:
11911   case PPC::LHAUX8:
11912   case PPC::LHAX:
11913   case PPC::LHAX8:
11914   case PPC::LWA:
11915   case PPC::LWAUX:
11916   case PPC::LWAX:
11917   case PPC::LWAX_32:
11918   case PPC::LWA_32:
11919   case PPC::PLHA:
11920   case PPC::PLHA8:
11921   case PPC::PLHA8pc:
11922   case PPC::PLHApc:
11923   case PPC::PLWA:
11924   case PPC::PLWA8:
11925   case PPC::PLWA8pc:
11926   case PPC::PLWApc:
11927   case PPC::EXTSB:
11928   case PPC::EXTSB8:
11929   case PPC::EXTSB8_32_64:
11930   case PPC::EXTSB8_rec:
11931   case PPC::EXTSB_rec:
11932   case PPC::EXTSH:
11933   case PPC::EXTSH8:
11934   case PPC::EXTSH8_32_64:
11935   case PPC::EXTSH8_rec:
11936   case PPC::EXTSH_rec:
11937   case PPC::EXTSW:
11938   case PPC::EXTSWSLI:
11939   case PPC::EXTSWSLI_32_64:
11940   case PPC::EXTSWSLI_32_64_rec:
11941   case PPC::EXTSWSLI_rec:
11942   case PPC::EXTSW_32:
11943   case PPC::EXTSW_32_64:
11944   case PPC::EXTSW_32_64_rec:
11945   case PPC::EXTSW_rec:
11946   case PPC::SRAW:
11947   case PPC::SRAWI:
11948   case PPC::SRAWI_rec:
11949   case PPC::SRAW_rec:
11950     return true;
11951   }
11952   return false;
11953 }
11954 
11955 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
11956     MachineInstr &MI, MachineBasicBlock *BB,
11957     bool is8bit, // operation
11958     unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11959   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11960   const PPCInstrInfo *TII = Subtarget.getInstrInfo();
11961 
11962   // If this is a signed comparison and the value being compared is not known
11963   // to be sign extended, sign extend it here.
11964   DebugLoc dl = MI.getDebugLoc();
11965   MachineFunction *F = BB->getParent();
11966   MachineRegisterInfo &RegInfo = F->getRegInfo();
11967   Register incr = MI.getOperand(3).getReg();
11968   bool IsSignExtended =
11969       incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
11970 
11971   if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
11972     Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11973     BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
11974         .addReg(MI.getOperand(3).getReg());
11975     MI.getOperand(3).setReg(ValueReg);
11976     incr = ValueReg;
11977   }
11978   // If we support part-word atomic mnemonics, just use them
11979   if (Subtarget.hasPartwordAtomics())
11980     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11981                             CmpPred);
11982 
11983   // In 64 bit mode we have to use 64 bits for addresses, even though the
11984   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
11985   // registers without caring whether they're 32 or 64, but here we're
11986   // doing actual arithmetic on the addresses.
11987   bool is64bit = Subtarget.isPPC64();
11988   bool isLittleEndian = Subtarget.isLittleEndian();
11989   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11990 
11991   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11992   MachineFunction::iterator It = ++BB->getIterator();
11993 
11994   Register dest = MI.getOperand(0).getReg();
11995   Register ptrA = MI.getOperand(1).getReg();
11996   Register ptrB = MI.getOperand(2).getReg();
11997 
11998   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11999   MachineBasicBlock *loop2MBB =
12000       CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12001   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12002   F->insert(It, loopMBB);
12003   if (CmpOpcode)
12004     F->insert(It, loop2MBB);
12005   F->insert(It, exitMBB);
12006   exitMBB->splice(exitMBB->begin(), BB,
12007                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
12008   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12009 
12010   const TargetRegisterClass *RC =
12011       is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12012   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12013 
12014   Register PtrReg = RegInfo.createVirtualRegister(RC);
12015   Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12016   Register ShiftReg =
12017       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12018   Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12019   Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12020   Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12021   Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12022   Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12023   Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12024   Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12025   Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12026   Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12027   Register Ptr1Reg;
12028   Register TmpReg =
12029       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12030 
12031   //  thisMBB:
12032   //   ...
12033   //   fallthrough --> loopMBB
12034   BB->addSuccessor(loopMBB);
12035 
12036   // The 4-byte load must be aligned, while a char or short may be
12037   // anywhere in the word.  Hence all this nasty bookkeeping code.
12038   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
12039   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12040   //   xori shift, shift1, 24 [16]
12041   //   rlwinm ptr, ptr1, 0, 0, 29
12042   //   slw incr2, incr, shift
12043   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12044   //   slw mask, mask2, shift
12045   //  loopMBB:
12046   //   lwarx tmpDest, ptr
12047   //   add tmp, tmpDest, incr2
12048   //   andc tmp2, tmpDest, mask
12049   //   and tmp3, tmp, mask
12050   //   or tmp4, tmp3, tmp2
12051   //   stwcx. tmp4, ptr
12052   //   bne- loopMBB
12053   //   fallthrough --> exitMBB
12054   //   srw SrwDest, tmpDest, shift
12055   //   rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12056   if (ptrA != ZeroReg) {
12057     Ptr1Reg = RegInfo.createVirtualRegister(RC);
12058     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12059         .addReg(ptrA)
12060         .addReg(ptrB);
12061   } else {
12062     Ptr1Reg = ptrB;
12063   }
12064   // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12065   // mode.
12066   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12067       .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12068       .addImm(3)
12069       .addImm(27)
12070       .addImm(is8bit ? 28 : 27);
12071   if (!isLittleEndian)
12072     BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12073         .addReg(Shift1Reg)
12074         .addImm(is8bit ? 24 : 16);
12075   if (is64bit)
12076     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12077         .addReg(Ptr1Reg)
12078         .addImm(0)
12079         .addImm(61);
12080   else
12081     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12082         .addReg(Ptr1Reg)
12083         .addImm(0)
12084         .addImm(0)
12085         .addImm(29);
12086   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12087   if (is8bit)
12088     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12089   else {
12090     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12091     BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12092         .addReg(Mask3Reg)
12093         .addImm(65535);
12094   }
12095   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12096       .addReg(Mask2Reg)
12097       .addReg(ShiftReg);
12098 
12099   BB = loopMBB;
12100   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12101       .addReg(ZeroReg)
12102       .addReg(PtrReg);
12103   if (BinOpcode)
12104     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12105         .addReg(Incr2Reg)
12106         .addReg(TmpDestReg);
12107   BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12108       .addReg(TmpDestReg)
12109       .addReg(MaskReg);
12110   BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12111   if (CmpOpcode) {
12112     // For unsigned comparisons, we can directly compare the shifted values.
12113     // For signed comparisons we shift and sign extend.
12114     Register SReg = RegInfo.createVirtualRegister(GPRC);
12115     Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12116     BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12117         .addReg(TmpDestReg)
12118         .addReg(MaskReg);
12119     unsigned ValueReg = SReg;
12120     unsigned CmpReg = Incr2Reg;
12121     if (CmpOpcode == PPC::CMPW) {
12122       ValueReg = RegInfo.createVirtualRegister(GPRC);
12123       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12124           .addReg(SReg)
12125           .addReg(ShiftReg);
12126       Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12127       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12128           .addReg(ValueReg);
12129       ValueReg = ValueSReg;
12130       CmpReg = incr;
12131     }
12132     BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12133     BuildMI(BB, dl, TII->get(PPC::BCC))
12134         .addImm(CmpPred)
12135         .addReg(CrReg)
12136         .addMBB(exitMBB);
12137     BB->addSuccessor(loop2MBB);
12138     BB->addSuccessor(exitMBB);
12139     BB = loop2MBB;
12140   }
12141   BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12142   BuildMI(BB, dl, TII->get(PPC::STWCX))
12143       .addReg(Tmp4Reg)
12144       .addReg(ZeroReg)
12145       .addReg(PtrReg);
12146   BuildMI(BB, dl, TII->get(PPC::BCC))
12147       .addImm(PPC::PRED_NE)
12148       .addReg(PPC::CR0)
12149       .addMBB(loopMBB);
12150   BB->addSuccessor(loopMBB);
12151   BB->addSuccessor(exitMBB);
12152 
12153   //  exitMBB:
12154   //   ...
12155   BB = exitMBB;
12156   // Since the shift amount is not a constant, we need to clear
12157   // the upper bits with a separate RLWINM.
12158   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12159       .addReg(SrwDestReg)
12160       .addImm(0)
12161       .addImm(is8bit ? 24 : 16)
12162       .addImm(31);
12163   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12164       .addReg(TmpDestReg)
12165       .addReg(ShiftReg);
12166   return BB;
12167 }
12168 
12169 llvm::MachineBasicBlock *
12170 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
12171                                     MachineBasicBlock *MBB) const {
12172   DebugLoc DL = MI.getDebugLoc();
12173   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12174   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12175 
12176   MachineFunction *MF = MBB->getParent();
12177   MachineRegisterInfo &MRI = MF->getRegInfo();
12178 
12179   const BasicBlock *BB = MBB->getBasicBlock();
12180   MachineFunction::iterator I = ++MBB->getIterator();
12181 
12182   Register DstReg = MI.getOperand(0).getReg();
12183   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12184   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12185   Register mainDstReg = MRI.createVirtualRegister(RC);
12186   Register restoreDstReg = MRI.createVirtualRegister(RC);
12187 
12188   MVT PVT = getPointerTy(MF->getDataLayout());
12189   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12190          "Invalid Pointer Size!");
12191   // For v = setjmp(buf), we generate
12192   //
12193   // thisMBB:
12194   //  SjLjSetup mainMBB
12195   //  bl mainMBB
12196   //  v_restore = 1
12197   //  b sinkMBB
12198   //
12199   // mainMBB:
12200   //  buf[LabelOffset] = LR
12201   //  v_main = 0
12202   //
12203   // sinkMBB:
12204   //  v = phi(main, restore)
12205   //
12206 
12207   MachineBasicBlock *thisMBB = MBB;
12208   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12209   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12210   MF->insert(I, mainMBB);
12211   MF->insert(I, sinkMBB);
12212 
12213   MachineInstrBuilder MIB;
12214 
12215   // Transfer the remainder of BB and its successor edges to sinkMBB.
12216   sinkMBB->splice(sinkMBB->begin(), MBB,
12217                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12218   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
12219 
12220   // Note that the structure of the jmp_buf used here is not compatible
12221   // with that used by libc, and is not designed to be. Specifically, it
12222   // stores only those 'reserved' registers that LLVM does not otherwise
12223   // understand how to spill. Also, by convention, by the time this
12224   // intrinsic is called, Clang has already stored the frame address in the
12225   // first slot of the buffer and stack address in the third. Following the
12226   // X86 target code, we'll store the jump address in the second slot. We also
12227   // need to save the TOC pointer (R2) to handle jumps between shared
12228   // libraries, and that will be stored in the fourth slot. The thread
12229   // identifier (R13) is not affected.
12230 
12231   // thisMBB:
12232   const int64_t LabelOffset = 1 * PVT.getStoreSize();
12233   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
12234   const int64_t BPOffset    = 4 * PVT.getStoreSize();
12235 
12236   // Prepare IP either in reg.
12237   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12238   Register LabelReg = MRI.createVirtualRegister(PtrRC);
12239   Register BufReg = MI.getOperand(1).getReg();
12240 
12241   if (Subtarget.is64BitELFABI()) {
12242     setUsesTOCBasePtr(*MBB->getParent());
12243     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12244               .addReg(PPC::X2)
12245               .addImm(TOCOffset)
12246               .addReg(BufReg)
12247               .cloneMemRefs(MI);
12248   }
12249 
12250   // Naked functions never have a base pointer, and so we use r1. For all
12251   // other functions, this decision must be delayed until during PEI.
12252   unsigned BaseReg;
12253   if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12254     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12255   else
12256     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12257 
12258   MIB = BuildMI(*thisMBB, MI, DL,
12259                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12260             .addReg(BaseReg)
12261             .addImm(BPOffset)
12262             .addReg(BufReg)
12263             .cloneMemRefs(MI);
12264 
12265   // Setup
12266   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12267   MIB.addRegMask(TRI->getNoPreservedMask());
12268 
12269   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12270 
12271   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12272           .addMBB(mainMBB);
12273   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12274 
12275   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12276   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12277 
12278   // mainMBB:
12279   //  mainDstReg = 0
12280   MIB =
12281       BuildMI(mainMBB, DL,
12282               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12283 
12284   // Store IP
12285   if (Subtarget.isPPC64()) {
12286     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12287             .addReg(LabelReg)
12288             .addImm(LabelOffset)
12289             .addReg(BufReg);
12290   } else {
12291     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12292             .addReg(LabelReg)
12293             .addImm(LabelOffset)
12294             .addReg(BufReg);
12295   }
12296   MIB.cloneMemRefs(MI);
12297 
12298   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12299   mainMBB->addSuccessor(sinkMBB);
12300 
12301   // sinkMBB:
12302   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12303           TII->get(PPC::PHI), DstReg)
12304     .addReg(mainDstReg).addMBB(mainMBB)
12305     .addReg(restoreDstReg).addMBB(thisMBB);
12306 
12307   MI.eraseFromParent();
12308   return sinkMBB;
12309 }
12310 
12311 MachineBasicBlock *
12312 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
12313                                      MachineBasicBlock *MBB) const {
12314   DebugLoc DL = MI.getDebugLoc();
12315   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12316 
12317   MachineFunction *MF = MBB->getParent();
12318   MachineRegisterInfo &MRI = MF->getRegInfo();
12319 
12320   MVT PVT = getPointerTy(MF->getDataLayout());
12321   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12322          "Invalid Pointer Size!");
12323 
12324   const TargetRegisterClass *RC =
12325     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12326   Register Tmp = MRI.createVirtualRegister(RC);
12327   // Since FP is only updated here but NOT referenced, it's treated as GPR.
12328   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12329   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12330   unsigned BP =
12331       (PVT == MVT::i64)
12332           ? PPC::X30
12333           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12334                                                               : PPC::R30);
12335 
12336   MachineInstrBuilder MIB;
12337 
12338   const int64_t LabelOffset = 1 * PVT.getStoreSize();
12339   const int64_t SPOffset    = 2 * PVT.getStoreSize();
12340   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
12341   const int64_t BPOffset    = 4 * PVT.getStoreSize();
12342 
12343   Register BufReg = MI.getOperand(0).getReg();
12344 
12345   // Reload FP (the jumped-to function may not have had a
12346   // frame pointer, and if so, then its r31 will be restored
12347   // as necessary).
12348   if (PVT == MVT::i64) {
12349     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12350             .addImm(0)
12351             .addReg(BufReg);
12352   } else {
12353     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12354             .addImm(0)
12355             .addReg(BufReg);
12356   }
12357   MIB.cloneMemRefs(MI);
12358 
12359   // Reload IP
12360   if (PVT == MVT::i64) {
12361     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12362             .addImm(LabelOffset)
12363             .addReg(BufReg);
12364   } else {
12365     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12366             .addImm(LabelOffset)
12367             .addReg(BufReg);
12368   }
12369   MIB.cloneMemRefs(MI);
12370 
12371   // Reload SP
12372   if (PVT == MVT::i64) {
12373     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12374             .addImm(SPOffset)
12375             .addReg(BufReg);
12376   } else {
12377     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12378             .addImm(SPOffset)
12379             .addReg(BufReg);
12380   }
12381   MIB.cloneMemRefs(MI);
12382 
12383   // Reload BP
12384   if (PVT == MVT::i64) {
12385     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12386             .addImm(BPOffset)
12387             .addReg(BufReg);
12388   } else {
12389     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12390             .addImm(BPOffset)
12391             .addReg(BufReg);
12392   }
12393   MIB.cloneMemRefs(MI);
12394 
12395   // Reload TOC
12396   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12397     setUsesTOCBasePtr(*MBB->getParent());
12398     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12399               .addImm(TOCOffset)
12400               .addReg(BufReg)
12401               .cloneMemRefs(MI);
12402   }
12403 
12404   // Jump
12405   BuildMI(*MBB, MI, DL,
12406           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12407   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12408 
12409   MI.eraseFromParent();
12410   return MBB;
12411 }
12412 
12413 bool PPCTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
12414   // If the function specifically requests inline stack probes, emit them.
12415   if (MF.getFunction().hasFnAttribute("probe-stack"))
12416     return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12417            "inline-asm";
12418   return false;
12419 }
12420 
12421 unsigned PPCTargetLowering::getStackProbeSize(const MachineFunction &MF) const {
12422   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12423   unsigned StackAlign = TFI->getStackAlignment();
12424   assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12425          "Unexpected stack alignment");
12426   // The default stack probe size is 4096 if the function has no
12427   // stack-probe-size attribute.
12428   const Function &Fn = MF.getFunction();
12429   unsigned StackProbeSize =
12430       Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12431   // Round down to the stack alignment.
12432   StackProbeSize &= ~(StackAlign - 1);
12433   return StackProbeSize ? StackProbeSize : StackAlign;
12434 }
12435 
12436 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12437 // into three phases. In the first phase, it uses pseudo instruction
12438 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12439 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12440 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12441 // MaxCallFrameSize so that it can calculate correct data area pointer.
12442 MachineBasicBlock *
12443 PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
12444                                     MachineBasicBlock *MBB) const {
12445   const bool isPPC64 = Subtarget.isPPC64();
12446   MachineFunction *MF = MBB->getParent();
12447   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12448   DebugLoc DL = MI.getDebugLoc();
12449   const unsigned ProbeSize = getStackProbeSize(*MF);
12450   const BasicBlock *ProbedBB = MBB->getBasicBlock();
12451   MachineRegisterInfo &MRI = MF->getRegInfo();
12452   // The CFG of probing stack looks as
12453   //         +-----+
12454   //         | MBB |
12455   //         +--+--+
12456   //            |
12457   //       +----v----+
12458   //  +--->+ TestMBB +---+
12459   //  |    +----+----+   |
12460   //  |         |        |
12461   //  |   +-----v----+   |
12462   //  +---+ BlockMBB |   |
12463   //      +----------+   |
12464   //                     |
12465   //       +---------+   |
12466   //       | TailMBB +<--+
12467   //       +---------+
12468   // In MBB, calculate previous frame pointer and final stack pointer.
12469   // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12470   // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12471   // TailMBB is spliced via \p MI.
12472   MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12473   MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12474   MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12475 
12476   MachineFunction::iterator MBBIter = ++MBB->getIterator();
12477   MF->insert(MBBIter, TestMBB);
12478   MF->insert(MBBIter, BlockMBB);
12479   MF->insert(MBBIter, TailMBB);
12480 
12481   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12482   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12483 
12484   Register DstReg = MI.getOperand(0).getReg();
12485   Register NegSizeReg = MI.getOperand(1).getReg();
12486   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12487   Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12488   Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12489   Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12490 
12491   // Since value of NegSizeReg might be realigned in prologepilog, insert a
12492   // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12493   // NegSize.
12494   unsigned ProbeOpc;
12495   if (!MRI.hasOneNonDBGUse(NegSizeReg))
12496     ProbeOpc =
12497         isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12498   else
12499     // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12500     // and NegSizeReg will be allocated in the same phyreg to avoid
12501     // redundant copy when NegSizeReg has only one use which is current MI and
12502     // will be replaced by PREPARE_PROBED_ALLOCA then.
12503     ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12504                        : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12505   BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12506       .addDef(ActualNegSizeReg)
12507       .addReg(NegSizeReg)
12508       .add(MI.getOperand(2))
12509       .add(MI.getOperand(3));
12510 
12511   // Calculate final stack pointer, which equals to SP + ActualNegSize.
12512   BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12513           FinalStackPtr)
12514       .addReg(SPReg)
12515       .addReg(ActualNegSizeReg);
12516 
12517   // Materialize a scratch register for update.
12518   int64_t NegProbeSize = -(int64_t)ProbeSize;
12519   assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12520   Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12521   if (!isInt<16>(NegProbeSize)) {
12522     Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12523     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12524         .addImm(NegProbeSize >> 16);
12525     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12526             ScratchReg)
12527         .addReg(TempReg)
12528         .addImm(NegProbeSize & 0xFFFF);
12529   } else
12530     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12531         .addImm(NegProbeSize);
12532 
12533   {
12534     // Probing leading residual part.
12535     Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12536     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12537         .addReg(ActualNegSizeReg)
12538         .addReg(ScratchReg);
12539     Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12540     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12541         .addReg(Div)
12542         .addReg(ScratchReg);
12543     Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12544     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12545         .addReg(Mul)
12546         .addReg(ActualNegSizeReg);
12547     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12548         .addReg(FramePointer)
12549         .addReg(SPReg)
12550         .addReg(NegMod);
12551   }
12552 
12553   {
12554     // Remaining part should be multiple of ProbeSize.
12555     Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12556     BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12557         .addReg(SPReg)
12558         .addReg(FinalStackPtr);
12559     BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12560         .addImm(PPC::PRED_EQ)
12561         .addReg(CmpResult)
12562         .addMBB(TailMBB);
12563     TestMBB->addSuccessor(BlockMBB);
12564     TestMBB->addSuccessor(TailMBB);
12565   }
12566 
12567   {
12568     // Touch the block.
12569     // |P...|P...|P...
12570     BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12571         .addReg(FramePointer)
12572         .addReg(SPReg)
12573         .addReg(ScratchReg);
12574     BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12575     BlockMBB->addSuccessor(TestMBB);
12576   }
12577 
12578   // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12579   // DYNAREAOFFSET pseudo instruction to get the future result.
12580   Register MaxCallFrameSizeReg =
12581       MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12582   BuildMI(TailMBB, DL,
12583           TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12584           MaxCallFrameSizeReg)
12585       .add(MI.getOperand(2))
12586       .add(MI.getOperand(3));
12587   BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12588       .addReg(SPReg)
12589       .addReg(MaxCallFrameSizeReg);
12590 
12591   // Splice instructions after MI to TailMBB.
12592   TailMBB->splice(TailMBB->end(), MBB,
12593                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12594   TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
12595   MBB->addSuccessor(TestMBB);
12596 
12597   // Delete the pseudo instruction.
12598   MI.eraseFromParent();
12599 
12600   ++NumDynamicAllocaProbed;
12601   return TailMBB;
12602 }
12603 
12604 MachineBasicBlock *
12605 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
12606                                                MachineBasicBlock *BB) const {
12607   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12608       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12609     if (Subtarget.is64BitELFABI() &&
12610         MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12611         !Subtarget.isUsingPCRelativeCalls()) {
12612       // Call lowering should have added an r2 operand to indicate a dependence
12613       // on the TOC base pointer value. It can't however, because there is no
12614       // way to mark the dependence as implicit there, and so the stackmap code
12615       // will confuse it with a regular operand. Instead, add the dependence
12616       // here.
12617       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12618     }
12619 
12620     return emitPatchPoint(MI, BB);
12621   }
12622 
12623   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12624       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12625     return emitEHSjLjSetJmp(MI, BB);
12626   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12627              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12628     return emitEHSjLjLongJmp(MI, BB);
12629   }
12630 
12631   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12632 
12633   // To "insert" these instructions we actually have to insert their
12634   // control-flow patterns.
12635   const BasicBlock *LLVM_BB = BB->getBasicBlock();
12636   MachineFunction::iterator It = ++BB->getIterator();
12637 
12638   MachineFunction *F = BB->getParent();
12639   MachineRegisterInfo &MRI = F->getRegInfo();
12640 
12641   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12642       MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
12643       MI.getOpcode() == PPC::SELECT_I8) {
12644     SmallVector<MachineOperand, 2> Cond;
12645     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12646         MI.getOpcode() == PPC::SELECT_CC_I8)
12647       Cond.push_back(MI.getOperand(4));
12648     else
12649       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
12650     Cond.push_back(MI.getOperand(1));
12651 
12652     DebugLoc dl = MI.getDebugLoc();
12653     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12654                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12655   } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
12656              MI.getOpcode() == PPC::SELECT_CC_F8 ||
12657              MI.getOpcode() == PPC::SELECT_CC_F16 ||
12658              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
12659              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
12660              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
12661              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
12662              MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
12663              MI.getOpcode() == PPC::SELECT_CC_SPE ||
12664              MI.getOpcode() == PPC::SELECT_F4 ||
12665              MI.getOpcode() == PPC::SELECT_F8 ||
12666              MI.getOpcode() == PPC::SELECT_F16 ||
12667              MI.getOpcode() == PPC::SELECT_SPE ||
12668              MI.getOpcode() == PPC::SELECT_SPE4 ||
12669              MI.getOpcode() == PPC::SELECT_VRRC ||
12670              MI.getOpcode() == PPC::SELECT_VSFRC ||
12671              MI.getOpcode() == PPC::SELECT_VSSRC ||
12672              MI.getOpcode() == PPC::SELECT_VSRC) {
12673     // The incoming instruction knows the destination vreg to set, the
12674     // condition code register to branch on, the true/false values to
12675     // select between, and a branch opcode to use.
12676 
12677     //  thisMBB:
12678     //  ...
12679     //   TrueVal = ...
12680     //   cmpTY ccX, r1, r2
12681     //   bCC copy1MBB
12682     //   fallthrough --> copy0MBB
12683     MachineBasicBlock *thisMBB = BB;
12684     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12685     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12686     DebugLoc dl = MI.getDebugLoc();
12687     F->insert(It, copy0MBB);
12688     F->insert(It, sinkMBB);
12689 
12690     // Transfer the remainder of BB and its successor edges to sinkMBB.
12691     sinkMBB->splice(sinkMBB->begin(), BB,
12692                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12693     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12694 
12695     // Next, add the true and fallthrough blocks as its successors.
12696     BB->addSuccessor(copy0MBB);
12697     BB->addSuccessor(sinkMBB);
12698 
12699     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
12700         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
12701         MI.getOpcode() == PPC::SELECT_F16 ||
12702         MI.getOpcode() == PPC::SELECT_SPE4 ||
12703         MI.getOpcode() == PPC::SELECT_SPE ||
12704         MI.getOpcode() == PPC::SELECT_VRRC ||
12705         MI.getOpcode() == PPC::SELECT_VSFRC ||
12706         MI.getOpcode() == PPC::SELECT_VSSRC ||
12707         MI.getOpcode() == PPC::SELECT_VSRC) {
12708       BuildMI(BB, dl, TII->get(PPC::BC))
12709           .addReg(MI.getOperand(1).getReg())
12710           .addMBB(sinkMBB);
12711     } else {
12712       unsigned SelectPred = MI.getOperand(4).getImm();
12713       BuildMI(BB, dl, TII->get(PPC::BCC))
12714           .addImm(SelectPred)
12715           .addReg(MI.getOperand(1).getReg())
12716           .addMBB(sinkMBB);
12717     }
12718 
12719     //  copy0MBB:
12720     //   %FalseValue = ...
12721     //   # fallthrough to sinkMBB
12722     BB = copy0MBB;
12723 
12724     // Update machine-CFG edges
12725     BB->addSuccessor(sinkMBB);
12726 
12727     //  sinkMBB:
12728     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12729     //  ...
12730     BB = sinkMBB;
12731     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12732         .addReg(MI.getOperand(3).getReg())
12733         .addMBB(copy0MBB)
12734         .addReg(MI.getOperand(2).getReg())
12735         .addMBB(thisMBB);
12736   } else if (MI.getOpcode() == PPC::ReadTB) {
12737     // To read the 64-bit time-base register on a 32-bit target, we read the
12738     // two halves. Should the counter have wrapped while it was being read, we
12739     // need to try again.
12740     // ...
12741     // readLoop:
12742     // mfspr Rx,TBU # load from TBU
12743     // mfspr Ry,TB  # load from TB
12744     // mfspr Rz,TBU # load from TBU
12745     // cmpw crX,Rx,Rz # check if 'old'='new'
12746     // bne readLoop   # branch if they're not equal
12747     // ...
12748 
12749     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12750     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12751     DebugLoc dl = MI.getDebugLoc();
12752     F->insert(It, readMBB);
12753     F->insert(It, sinkMBB);
12754 
12755     // Transfer the remainder of BB and its successor edges to sinkMBB.
12756     sinkMBB->splice(sinkMBB->begin(), BB,
12757                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12758     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
12759 
12760     BB->addSuccessor(readMBB);
12761     BB = readMBB;
12762 
12763     MachineRegisterInfo &RegInfo = F->getRegInfo();
12764     Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12765     Register LoReg = MI.getOperand(0).getReg();
12766     Register HiReg = MI.getOperand(1).getReg();
12767 
12768     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12769     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12770     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12771 
12772     Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12773 
12774     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12775         .addReg(HiReg)
12776         .addReg(ReadAgainReg);
12777     BuildMI(BB, dl, TII->get(PPC::BCC))
12778         .addImm(PPC::PRED_NE)
12779         .addReg(CmpReg)
12780         .addMBB(readMBB);
12781 
12782     BB->addSuccessor(readMBB);
12783     BB->addSuccessor(sinkMBB);
12784   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12785     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12786   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12787     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12788   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12789     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12790   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12791     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12792 
12793   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12794     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
12795   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12796     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
12797   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12798     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12799   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12800     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12801 
12802   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12803     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
12804   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12805     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12806   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12807     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12808   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12809     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12810 
12811   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12812     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
12813   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12814     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
12815   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12816     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12817   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12818     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12819 
12820   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12821     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12822   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12823     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12824   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12825     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12826   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12827     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12828 
12829   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12830     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12831   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12832     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12833   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12834     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12835   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12836     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12837 
12838   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12839     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
12840   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12841     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
12842   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12843     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
12844   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12845     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
12846 
12847   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12848     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
12849   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12850     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
12851   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12852     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
12853   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12854     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
12855 
12856   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12857     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
12858   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12859     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
12860   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12861     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
12862   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12863     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
12864 
12865   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12866     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
12867   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12868     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
12869   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12870     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
12871   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12872     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
12873 
12874   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12875     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12876   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12877     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12878   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12879     BB = EmitAtomicBinary(MI, BB, 4, 0);
12880   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12881     BB = EmitAtomicBinary(MI, BB, 8, 0);
12882   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12883            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12884            (Subtarget.hasPartwordAtomics() &&
12885             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12886            (Subtarget.hasPartwordAtomics() &&
12887             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12888     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12889 
12890     auto LoadMnemonic = PPC::LDARX;
12891     auto StoreMnemonic = PPC::STDCX;
12892     switch (MI.getOpcode()) {
12893     default:
12894       llvm_unreachable("Compare and swap of unknown size");
12895     case PPC::ATOMIC_CMP_SWAP_I8:
12896       LoadMnemonic = PPC::LBARX;
12897       StoreMnemonic = PPC::STBCX;
12898       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12899       break;
12900     case PPC::ATOMIC_CMP_SWAP_I16:
12901       LoadMnemonic = PPC::LHARX;
12902       StoreMnemonic = PPC::STHCX;
12903       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12904       break;
12905     case PPC::ATOMIC_CMP_SWAP_I32:
12906       LoadMnemonic = PPC::LWARX;
12907       StoreMnemonic = PPC::STWCX;
12908       break;
12909     case PPC::ATOMIC_CMP_SWAP_I64:
12910       LoadMnemonic = PPC::LDARX;
12911       StoreMnemonic = PPC::STDCX;
12912       break;
12913     }
12914     MachineRegisterInfo &RegInfo = F->getRegInfo();
12915     Register dest = MI.getOperand(0).getReg();
12916     Register ptrA = MI.getOperand(1).getReg();
12917     Register ptrB = MI.getOperand(2).getReg();
12918     Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12919     Register oldval = MI.getOperand(3).getReg();
12920     Register newval = MI.getOperand(4).getReg();
12921     DebugLoc dl = MI.getDebugLoc();
12922 
12923     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12924     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12925     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12926     F->insert(It, loop1MBB);
12927     F->insert(It, loop2MBB);
12928     F->insert(It, exitMBB);
12929     exitMBB->splice(exitMBB->begin(), BB,
12930                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12931     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
12932 
12933     //  thisMBB:
12934     //   ...
12935     //   fallthrough --> loopMBB
12936     BB->addSuccessor(loop1MBB);
12937 
12938     // loop1MBB:
12939     //   l[bhwd]arx dest, ptr
12940     //   cmp[wd] dest, oldval
12941     //   bne- exitBB
12942     // loop2MBB:
12943     //   st[bhwd]cx. newval, ptr
12944     //   bne- loopMBB
12945     //   b exitBB
12946     // exitBB:
12947     BB = loop1MBB;
12948     BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12949     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
12950         .addReg(dest)
12951         .addReg(oldval);
12952     BuildMI(BB, dl, TII->get(PPC::BCC))
12953         .addImm(PPC::PRED_NE)
12954         .addReg(CrReg)
12955         .addMBB(exitMBB);
12956     BB->addSuccessor(loop2MBB);
12957     BB->addSuccessor(exitMBB);
12958 
12959     BB = loop2MBB;
12960     BuildMI(BB, dl, TII->get(StoreMnemonic))
12961         .addReg(newval)
12962         .addReg(ptrA)
12963         .addReg(ptrB);
12964     BuildMI(BB, dl, TII->get(PPC::BCC))
12965         .addImm(PPC::PRED_NE)
12966         .addReg(PPC::CR0)
12967         .addMBB(loop1MBB);
12968     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12969     BB->addSuccessor(loop1MBB);
12970     BB->addSuccessor(exitMBB);
12971 
12972     //  exitMBB:
12973     //   ...
12974     BB = exitMBB;
12975   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12976              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12977     // We must use 64-bit registers for addresses when targeting 64-bit,
12978     // since we're actually doing arithmetic on them.  Other registers
12979     // can be 32-bit.
12980     bool is64bit = Subtarget.isPPC64();
12981     bool isLittleEndian = Subtarget.isLittleEndian();
12982     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12983 
12984     Register dest = MI.getOperand(0).getReg();
12985     Register ptrA = MI.getOperand(1).getReg();
12986     Register ptrB = MI.getOperand(2).getReg();
12987     Register oldval = MI.getOperand(3).getReg();
12988     Register newval = MI.getOperand(4).getReg();
12989     DebugLoc dl = MI.getDebugLoc();
12990 
12991     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12992     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12993     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12994     F->insert(It, loop1MBB);
12995     F->insert(It, loop2MBB);
12996     F->insert(It, exitMBB);
12997     exitMBB->splice(exitMBB->begin(), BB,
12998                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
12999     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
13000 
13001     MachineRegisterInfo &RegInfo = F->getRegInfo();
13002     const TargetRegisterClass *RC =
13003         is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13004     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13005 
13006     Register PtrReg = RegInfo.createVirtualRegister(RC);
13007     Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13008     Register ShiftReg =
13009         isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13010     Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13011     Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13012     Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13013     Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13014     Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13015     Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13016     Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13017     Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13018     Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13019     Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13020     Register Ptr1Reg;
13021     Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13022     Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13023     Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13024     //  thisMBB:
13025     //   ...
13026     //   fallthrough --> loopMBB
13027     BB->addSuccessor(loop1MBB);
13028 
13029     // The 4-byte load must be aligned, while a char or short may be
13030     // anywhere in the word.  Hence all this nasty bookkeeping code.
13031     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
13032     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13033     //   xori shift, shift1, 24 [16]
13034     //   rlwinm ptr, ptr1, 0, 0, 29
13035     //   slw newval2, newval, shift
13036     //   slw oldval2, oldval,shift
13037     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13038     //   slw mask, mask2, shift
13039     //   and newval3, newval2, mask
13040     //   and oldval3, oldval2, mask
13041     // loop1MBB:
13042     //   lwarx tmpDest, ptr
13043     //   and tmp, tmpDest, mask
13044     //   cmpw tmp, oldval3
13045     //   bne- exitBB
13046     // loop2MBB:
13047     //   andc tmp2, tmpDest, mask
13048     //   or tmp4, tmp2, newval3
13049     //   stwcx. tmp4, ptr
13050     //   bne- loop1MBB
13051     //   b exitBB
13052     // exitBB:
13053     //   srw dest, tmpDest, shift
13054     if (ptrA != ZeroReg) {
13055       Ptr1Reg = RegInfo.createVirtualRegister(RC);
13056       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13057           .addReg(ptrA)
13058           .addReg(ptrB);
13059     } else {
13060       Ptr1Reg = ptrB;
13061     }
13062 
13063     // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13064     // mode.
13065     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13066         .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13067         .addImm(3)
13068         .addImm(27)
13069         .addImm(is8bit ? 28 : 27);
13070     if (!isLittleEndian)
13071       BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13072           .addReg(Shift1Reg)
13073           .addImm(is8bit ? 24 : 16);
13074     if (is64bit)
13075       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13076           .addReg(Ptr1Reg)
13077           .addImm(0)
13078           .addImm(61);
13079     else
13080       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13081           .addReg(Ptr1Reg)
13082           .addImm(0)
13083           .addImm(0)
13084           .addImm(29);
13085     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13086         .addReg(newval)
13087         .addReg(ShiftReg);
13088     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13089         .addReg(oldval)
13090         .addReg(ShiftReg);
13091     if (is8bit)
13092       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13093     else {
13094       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13095       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13096           .addReg(Mask3Reg)
13097           .addImm(65535);
13098     }
13099     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13100         .addReg(Mask2Reg)
13101         .addReg(ShiftReg);
13102     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13103         .addReg(NewVal2Reg)
13104         .addReg(MaskReg);
13105     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13106         .addReg(OldVal2Reg)
13107         .addReg(MaskReg);
13108 
13109     BB = loop1MBB;
13110     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13111         .addReg(ZeroReg)
13112         .addReg(PtrReg);
13113     BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13114         .addReg(TmpDestReg)
13115         .addReg(MaskReg);
13116     BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13117         .addReg(TmpReg)
13118         .addReg(OldVal3Reg);
13119     BuildMI(BB, dl, TII->get(PPC::BCC))
13120         .addImm(PPC::PRED_NE)
13121         .addReg(CrReg)
13122         .addMBB(exitMBB);
13123     BB->addSuccessor(loop2MBB);
13124     BB->addSuccessor(exitMBB);
13125 
13126     BB = loop2MBB;
13127     BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13128         .addReg(TmpDestReg)
13129         .addReg(MaskReg);
13130     BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13131         .addReg(Tmp2Reg)
13132         .addReg(NewVal3Reg);
13133     BuildMI(BB, dl, TII->get(PPC::STWCX))
13134         .addReg(Tmp4Reg)
13135         .addReg(ZeroReg)
13136         .addReg(PtrReg);
13137     BuildMI(BB, dl, TII->get(PPC::BCC))
13138         .addImm(PPC::PRED_NE)
13139         .addReg(PPC::CR0)
13140         .addMBB(loop1MBB);
13141     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13142     BB->addSuccessor(loop1MBB);
13143     BB->addSuccessor(exitMBB);
13144 
13145     //  exitMBB:
13146     //   ...
13147     BB = exitMBB;
13148     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13149         .addReg(TmpReg)
13150         .addReg(ShiftReg);
13151   } else if (MI.getOpcode() == PPC::FADDrtz) {
13152     // This pseudo performs an FADD with rounding mode temporarily forced
13153     // to round-to-zero.  We emit this via custom inserter since the FPSCR
13154     // is not modeled at the SelectionDAG level.
13155     Register Dest = MI.getOperand(0).getReg();
13156     Register Src1 = MI.getOperand(1).getReg();
13157     Register Src2 = MI.getOperand(2).getReg();
13158     DebugLoc dl = MI.getDebugLoc();
13159 
13160     MachineRegisterInfo &RegInfo = F->getRegInfo();
13161     Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13162 
13163     // Save FPSCR value.
13164     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13165 
13166     // Set rounding mode to round-to-zero.
13167     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13168         .addImm(31)
13169         .addReg(PPC::RM, RegState::ImplicitDefine);
13170 
13171     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13172         .addImm(30)
13173         .addReg(PPC::RM, RegState::ImplicitDefine);
13174 
13175     // Perform addition.
13176     auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13177                    .addReg(Src1)
13178                    .addReg(Src2);
13179     if (MI.getFlag(MachineInstr::NoFPExcept))
13180       MIB.setMIFlag(MachineInstr::NoFPExcept);
13181 
13182     // Restore FPSCR value.
13183     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13184   } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13185              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13186              MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13187              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13188     unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13189                        MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13190                           ? PPC::ANDI8_rec
13191                           : PPC::ANDI_rec;
13192     bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13193                  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13194 
13195     MachineRegisterInfo &RegInfo = F->getRegInfo();
13196     Register Dest = RegInfo.createVirtualRegister(
13197         Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13198 
13199     DebugLoc Dl = MI.getDebugLoc();
13200     BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13201         .addReg(MI.getOperand(1).getReg())
13202         .addImm(1);
13203     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13204             MI.getOperand(0).getReg())
13205         .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13206   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13207     DebugLoc Dl = MI.getDebugLoc();
13208     MachineRegisterInfo &RegInfo = F->getRegInfo();
13209     Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13210     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13211     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13212             MI.getOperand(0).getReg())
13213         .addReg(CRReg);
13214   } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13215     DebugLoc Dl = MI.getDebugLoc();
13216     unsigned Imm = MI.getOperand(1).getImm();
13217     BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13218     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13219             MI.getOperand(0).getReg())
13220         .addReg(PPC::CR0EQ);
13221   } else if (MI.getOpcode() == PPC::SETRNDi) {
13222     DebugLoc dl = MI.getDebugLoc();
13223     Register OldFPSCRReg = MI.getOperand(0).getReg();
13224 
13225     // Save FPSCR value.
13226     if (MRI.use_empty(OldFPSCRReg))
13227       BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13228     else
13229       BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13230 
13231     // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13232     // the following settings:
13233     //   00 Round to nearest
13234     //   01 Round to 0
13235     //   10 Round to +inf
13236     //   11 Round to -inf
13237 
13238     // When the operand is immediate, using the two least significant bits of
13239     // the immediate to set the bits 62:63 of FPSCR.
13240     unsigned Mode = MI.getOperand(1).getImm();
13241     BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13242         .addImm(31)
13243         .addReg(PPC::RM, RegState::ImplicitDefine);
13244 
13245     BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13246         .addImm(30)
13247         .addReg(PPC::RM, RegState::ImplicitDefine);
13248   } else if (MI.getOpcode() == PPC::SETRND) {
13249     DebugLoc dl = MI.getDebugLoc();
13250 
13251     // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13252     // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13253     // If the target doesn't have DirectMove, we should use stack to do the
13254     // conversion, because the target doesn't have the instructions like mtvsrd
13255     // or mfvsrd to do this conversion directly.
13256     auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13257       if (Subtarget.hasDirectMove()) {
13258         BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13259           .addReg(SrcReg);
13260       } else {
13261         // Use stack to do the register copy.
13262         unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13263         MachineRegisterInfo &RegInfo = F->getRegInfo();
13264         const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13265         if (RC == &PPC::F8RCRegClass) {
13266           // Copy register from F8RCRegClass to G8RCRegclass.
13267           assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13268                  "Unsupported RegClass.");
13269 
13270           StoreOp = PPC::STFD;
13271           LoadOp = PPC::LD;
13272         } else {
13273           // Copy register from G8RCRegClass to F8RCRegclass.
13274           assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13275                  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13276                  "Unsupported RegClass.");
13277         }
13278 
13279         MachineFrameInfo &MFI = F->getFrameInfo();
13280         int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13281 
13282         MachineMemOperand *MMOStore = F->getMachineMemOperand(
13283             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13284             MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
13285             MFI.getObjectAlign(FrameIdx));
13286 
13287         // Store the SrcReg into the stack.
13288         BuildMI(*BB, MI, dl, TII->get(StoreOp))
13289           .addReg(SrcReg)
13290           .addImm(0)
13291           .addFrameIndex(FrameIdx)
13292           .addMemOperand(MMOStore);
13293 
13294         MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13295             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13296             MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
13297             MFI.getObjectAlign(FrameIdx));
13298 
13299         // Load from the stack where SrcReg is stored, and save to DestReg,
13300         // so we have done the RegClass conversion from RegClass::SrcReg to
13301         // RegClass::DestReg.
13302         BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13303           .addImm(0)
13304           .addFrameIndex(FrameIdx)
13305           .addMemOperand(MMOLoad);
13306       }
13307     };
13308 
13309     Register OldFPSCRReg = MI.getOperand(0).getReg();
13310 
13311     // Save FPSCR value.
13312     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13313 
13314     // When the operand is gprc register, use two least significant bits of the
13315     // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13316     //
13317     // copy OldFPSCRTmpReg, OldFPSCRReg
13318     // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13319     // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13320     // copy NewFPSCRReg, NewFPSCRTmpReg
13321     // mtfsf 255, NewFPSCRReg
13322     MachineOperand SrcOp = MI.getOperand(1);
13323     MachineRegisterInfo &RegInfo = F->getRegInfo();
13324     Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13325 
13326     copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13327 
13328     Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13329     Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13330 
13331     // The first operand of INSERT_SUBREG should be a register which has
13332     // subregisters, we only care about its RegClass, so we should use an
13333     // IMPLICIT_DEF register.
13334     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13335     BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13336       .addReg(ImDefReg)
13337       .add(SrcOp)
13338       .addImm(1);
13339 
13340     Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13341     BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13342       .addReg(OldFPSCRTmpReg)
13343       .addReg(ExtSrcReg)
13344       .addImm(0)
13345       .addImm(62);
13346 
13347     Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13348     copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13349 
13350     // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13351     // bits of FPSCR.
13352     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13353       .addImm(255)
13354       .addReg(NewFPSCRReg)
13355       .addImm(0)
13356       .addImm(0);
13357   } else if (MI.getOpcode() == PPC::SETFLM) {
13358     DebugLoc Dl = MI.getDebugLoc();
13359 
13360     // Result of setflm is previous FPSCR content, so we need to save it first.
13361     Register OldFPSCRReg = MI.getOperand(0).getReg();
13362     if (MRI.use_empty(OldFPSCRReg))
13363       BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13364     else
13365       BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13366 
13367     // Put bits in 32:63 to FPSCR.
13368     Register NewFPSCRReg = MI.getOperand(1).getReg();
13369     BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13370         .addImm(255)
13371         .addReg(NewFPSCRReg)
13372         .addImm(0)
13373         .addImm(0);
13374   } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13375              MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13376     return emitProbedAlloca(MI, BB);
13377   } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13378     DebugLoc DL = MI.getDebugLoc();
13379     Register Src = MI.getOperand(2).getReg();
13380     Register Lo = MI.getOperand(0).getReg();
13381     Register Hi = MI.getOperand(1).getReg();
13382     BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13383         .addDef(Lo)
13384         .addUse(Src, 0, PPC::sub_gp8_x1);
13385     BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13386         .addDef(Hi)
13387         .addUse(Src, 0, PPC::sub_gp8_x0);
13388   } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13389              MI.getOpcode() == PPC::STQX_PSEUDO) {
13390     DebugLoc DL = MI.getDebugLoc();
13391     // Ptr is used as the ptr_rc_no_r0 part
13392     // of LQ/STQ's memory operand and adding result of RA and RB,
13393     // so it has to be g8rc_and_g8rc_nox0.
13394     Register Ptr =
13395         F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13396     Register Val = MI.getOperand(0).getReg();
13397     Register RA = MI.getOperand(1).getReg();
13398     Register RB = MI.getOperand(2).getReg();
13399     BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13400     BuildMI(*BB, MI, DL,
13401             MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13402                                               : TII->get(PPC::STQ))
13403         .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13404         .addImm(0)
13405         .addReg(Ptr);
13406   } else {
13407     llvm_unreachable("Unexpected instr type to insert");
13408   }
13409 
13410   MI.eraseFromParent(); // The pseudo instruction is gone now.
13411   return BB;
13412 }
13413 
13414 //===----------------------------------------------------------------------===//
13415 // Target Optimization Hooks
13416 //===----------------------------------------------------------------------===//
13417 
13418 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13419   // For the estimates, convergence is quadratic, so we essentially double the
13420   // number of digits correct after every iteration. For both FRE and FRSQRTE,
13421   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13422   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13423   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13424   if (VT.getScalarType() == MVT::f64)
13425     RefinementSteps++;
13426   return RefinementSteps;
13427 }
13428 
13429 SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13430                                             const DenormalMode &Mode) const {
13431   // We only have VSX Vector Test for software Square Root.
13432   EVT VT = Op.getValueType();
13433   if (!isTypeLegal(MVT::i1) ||
13434       (VT != MVT::f64 &&
13435        ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13436     return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13437 
13438   SDLoc DL(Op);
13439   // The output register of FTSQRT is CR field.
13440   SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13441   // ftsqrt BF,FRB
13442   // Let e_b be the unbiased exponent of the double-precision
13443   // floating-point operand in register FRB.
13444   // fe_flag is set to 1 if either of the following conditions occurs.
13445   //   - The double-precision floating-point operand in register FRB is a zero,
13446   //     a NaN, or an infinity, or a negative value.
13447   //   - e_b is less than or equal to -970.
13448   // Otherwise fe_flag is set to 0.
13449   // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13450   // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13451   // exponent is less than -970)
13452   SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13453   return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13454                                     FTSQRT, SRIdxVal),
13455                  0);
13456 }
13457 
13458 SDValue
13459 PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13460                                                SelectionDAG &DAG) const {
13461   // We only have VSX Vector Square Root.
13462   EVT VT = Op.getValueType();
13463   if (VT != MVT::f64 &&
13464       ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13465     return TargetLowering::getSqrtResultForDenormInput(Op, DAG);
13466 
13467   return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13468 }
13469 
13470 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13471                                            int Enabled, int &RefinementSteps,
13472                                            bool &UseOneConstNR,
13473                                            bool Reciprocal) const {
13474   EVT VT = Operand.getValueType();
13475   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13476       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13477       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13478       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13479     if (RefinementSteps == ReciprocalEstimate::Unspecified)
13480       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13481 
13482     // The Newton-Raphson computation with a single constant does not provide
13483     // enough accuracy on some CPUs.
13484     UseOneConstNR = !Subtarget.needsTwoConstNR();
13485     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13486   }
13487   return SDValue();
13488 }
13489 
13490 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13491                                             int Enabled,
13492                                             int &RefinementSteps) const {
13493   EVT VT = Operand.getValueType();
13494   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13495       (VT == MVT::f64 && Subtarget.hasFRE()) ||
13496       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13497       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13498     if (RefinementSteps == ReciprocalEstimate::Unspecified)
13499       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13500     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13501   }
13502   return SDValue();
13503 }
13504 
13505 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13506   // Note: This functionality is used only when unsafe-fp-math is enabled, and
13507   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13508   // enabled for division), this functionality is redundant with the default
13509   // combiner logic (once the division -> reciprocal/multiply transformation
13510   // has taken place). As a result, this matters more for older cores than for
13511   // newer ones.
13512 
13513   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13514   // reciprocal if there are two or more FDIVs (for embedded cores with only
13515   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13516   switch (Subtarget.getCPUDirective()) {
13517   default:
13518     return 3;
13519   case PPC::DIR_440:
13520   case PPC::DIR_A2:
13521   case PPC::DIR_E500:
13522   case PPC::DIR_E500mc:
13523   case PPC::DIR_E5500:
13524     return 2;
13525   }
13526 }
13527 
13528 // isConsecutiveLSLoc needs to work even if all adds have not yet been
13529 // collapsed, and so we need to look through chains of them.
13530 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
13531                                      int64_t& Offset, SelectionDAG &DAG) {
13532   if (DAG.isBaseWithConstantOffset(Loc)) {
13533     Base = Loc.getOperand(0);
13534     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13535 
13536     // The base might itself be a base plus an offset, and if so, accumulate
13537     // that as well.
13538     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
13539   }
13540 }
13541 
13542 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
13543                             unsigned Bytes, int Dist,
13544                             SelectionDAG &DAG) {
13545   if (VT.getSizeInBits() / 8 != Bytes)
13546     return false;
13547 
13548   SDValue BaseLoc = Base->getBasePtr();
13549   if (Loc.getOpcode() == ISD::FrameIndex) {
13550     if (BaseLoc.getOpcode() != ISD::FrameIndex)
13551       return false;
13552     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
13553     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
13554     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13555     int FS  = MFI.getObjectSize(FI);
13556     int BFS = MFI.getObjectSize(BFI);
13557     if (FS != BFS || FS != (int)Bytes) return false;
13558     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13559   }
13560 
13561   SDValue Base1 = Loc, Base2 = BaseLoc;
13562   int64_t Offset1 = 0, Offset2 = 0;
13563   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13564   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13565   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13566     return true;
13567 
13568   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13569   const GlobalValue *GV1 = nullptr;
13570   const GlobalValue *GV2 = nullptr;
13571   Offset1 = 0;
13572   Offset2 = 0;
13573   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13574   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13575   if (isGA1 && isGA2 && GV1 == GV2)
13576     return Offset1 == (Offset2 + Dist*Bytes);
13577   return false;
13578 }
13579 
13580 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13581 // not enforce equality of the chain operands.
13582 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
13583                             unsigned Bytes, int Dist,
13584                             SelectionDAG &DAG) {
13585   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13586     EVT VT = LS->getMemoryVT();
13587     SDValue Loc = LS->getBasePtr();
13588     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13589   }
13590 
13591   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13592     EVT VT;
13593     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13594     default: return false;
13595     case Intrinsic::ppc_altivec_lvx:
13596     case Intrinsic::ppc_altivec_lvxl:
13597     case Intrinsic::ppc_vsx_lxvw4x:
13598     case Intrinsic::ppc_vsx_lxvw4x_be:
13599       VT = MVT::v4i32;
13600       break;
13601     case Intrinsic::ppc_vsx_lxvd2x:
13602     case Intrinsic::ppc_vsx_lxvd2x_be:
13603       VT = MVT::v2f64;
13604       break;
13605     case Intrinsic::ppc_altivec_lvebx:
13606       VT = MVT::i8;
13607       break;
13608     case Intrinsic::ppc_altivec_lvehx:
13609       VT = MVT::i16;
13610       break;
13611     case Intrinsic::ppc_altivec_lvewx:
13612       VT = MVT::i32;
13613       break;
13614     }
13615 
13616     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13617   }
13618 
13619   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13620     EVT VT;
13621     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13622     default: return false;
13623     case Intrinsic::ppc_altivec_stvx:
13624     case Intrinsic::ppc_altivec_stvxl:
13625     case Intrinsic::ppc_vsx_stxvw4x:
13626       VT = MVT::v4i32;
13627       break;
13628     case Intrinsic::ppc_vsx_stxvd2x:
13629       VT = MVT::v2f64;
13630       break;
13631     case Intrinsic::ppc_vsx_stxvw4x_be:
13632       VT = MVT::v4i32;
13633       break;
13634     case Intrinsic::ppc_vsx_stxvd2x_be:
13635       VT = MVT::v2f64;
13636       break;
13637     case Intrinsic::ppc_altivec_stvebx:
13638       VT = MVT::i8;
13639       break;
13640     case Intrinsic::ppc_altivec_stvehx:
13641       VT = MVT::i16;
13642       break;
13643     case Intrinsic::ppc_altivec_stvewx:
13644       VT = MVT::i32;
13645       break;
13646     }
13647 
13648     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13649   }
13650 
13651   return false;
13652 }
13653 
13654 // Return true is there is a nearyby consecutive load to the one provided
13655 // (regardless of alignment). We search up and down the chain, looking though
13656 // token factors and other loads (but nothing else). As a result, a true result
13657 // indicates that it is safe to create a new consecutive load adjacent to the
13658 // load provided.
13659 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
13660   SDValue Chain = LD->getChain();
13661   EVT VT = LD->getMemoryVT();
13662 
13663   SmallSet<SDNode *, 16> LoadRoots;
13664   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13665   SmallSet<SDNode *, 16> Visited;
13666 
13667   // First, search up the chain, branching to follow all token-factor operands.
13668   // If we find a consecutive load, then we're done, otherwise, record all
13669   // nodes just above the top-level loads and token factors.
13670   while (!Queue.empty()) {
13671     SDNode *ChainNext = Queue.pop_back_val();
13672     if (!Visited.insert(ChainNext).second)
13673       continue;
13674 
13675     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13676       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13677         return true;
13678 
13679       if (!Visited.count(ChainLD->getChain().getNode()))
13680         Queue.push_back(ChainLD->getChain().getNode());
13681     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13682       for (const SDUse &O : ChainNext->ops())
13683         if (!Visited.count(O.getNode()))
13684           Queue.push_back(O.getNode());
13685     } else
13686       LoadRoots.insert(ChainNext);
13687   }
13688 
13689   // Second, search down the chain, starting from the top-level nodes recorded
13690   // in the first phase. These top-level nodes are the nodes just above all
13691   // loads and token factors. Starting with their uses, recursively look though
13692   // all loads (just the chain uses) and token factors to find a consecutive
13693   // load.
13694   Visited.clear();
13695   Queue.clear();
13696 
13697   for (SDNode *I : LoadRoots) {
13698     Queue.push_back(I);
13699 
13700     while (!Queue.empty()) {
13701       SDNode *LoadRoot = Queue.pop_back_val();
13702       if (!Visited.insert(LoadRoot).second)
13703         continue;
13704 
13705       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13706         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13707           return true;
13708 
13709       for (SDNode *U : LoadRoot->uses())
13710         if (((isa<MemSDNode>(U) &&
13711               cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13712              U->getOpcode() == ISD::TokenFactor) &&
13713             !Visited.count(U))
13714           Queue.push_back(U);
13715     }
13716   }
13717 
13718   return false;
13719 }
13720 
13721 /// This function is called when we have proved that a SETCC node can be replaced
13722 /// by subtraction (and other supporting instructions) so that the result of
13723 /// comparison is kept in a GPR instead of CR. This function is purely for
13724 /// codegen purposes and has some flags to guide the codegen process.
13725 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
13726                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13727   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13728 
13729   // Zero extend the operands to the largest legal integer. Originally, they
13730   // must be of a strictly smaller size.
13731   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
13732                          DAG.getConstant(Size, DL, MVT::i32));
13733   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
13734                          DAG.getConstant(Size, DL, MVT::i32));
13735 
13736   // Swap if needed. Depends on the condition code.
13737   if (Swap)
13738     std::swap(Op0, Op1);
13739 
13740   // Subtract extended integers.
13741   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
13742 
13743   // Move the sign bit to the least significant position and zero out the rest.
13744   // Now the least significant bit carries the result of original comparison.
13745   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
13746                              DAG.getConstant(Size - 1, DL, MVT::i32));
13747   auto Final = Shifted;
13748 
13749   // Complement the result if needed. Based on the condition code.
13750   if (Complement)
13751     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
13752                         DAG.getConstant(1, DL, MVT::i64));
13753 
13754   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13755 }
13756 
13757 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13758                                                   DAGCombinerInfo &DCI) const {
13759   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13760 
13761   SelectionDAG &DAG = DCI.DAG;
13762   SDLoc DL(N);
13763 
13764   // Size of integers being compared has a critical role in the following
13765   // analysis, so we prefer to do this when all types are legal.
13766   if (!DCI.isAfterLegalizeDAG())
13767     return SDValue();
13768 
13769   // If all users of SETCC extend its value to a legal integer type
13770   // then we replace SETCC with a subtraction
13771   for (const SDNode *U : N->uses())
13772     if (U->getOpcode() != ISD::ZERO_EXTEND)
13773       return SDValue();
13774 
13775   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13776   auto OpSize = N->getOperand(0).getValueSizeInBits();
13777 
13778   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
13779 
13780   if (OpSize < Size) {
13781     switch (CC) {
13782     default: break;
13783     case ISD::SETULT:
13784       return generateEquivalentSub(N, Size, false, false, DL, DAG);
13785     case ISD::SETULE:
13786       return generateEquivalentSub(N, Size, true, true, DL, DAG);
13787     case ISD::SETUGT:
13788       return generateEquivalentSub(N, Size, false, true, DL, DAG);
13789     case ISD::SETUGE:
13790       return generateEquivalentSub(N, Size, true, false, DL, DAG);
13791     }
13792   }
13793 
13794   return SDValue();
13795 }
13796 
13797 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13798                                                   DAGCombinerInfo &DCI) const {
13799   SelectionDAG &DAG = DCI.DAG;
13800   SDLoc dl(N);
13801 
13802   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
13803   // If we're tracking CR bits, we need to be careful that we don't have:
13804   //   trunc(binary-ops(zext(x), zext(y)))
13805   // or
13806   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13807   // such that we're unnecessarily moving things into GPRs when it would be
13808   // better to keep them in CR bits.
13809 
13810   // Note that trunc here can be an actual i1 trunc, or can be the effective
13811   // truncation that comes from a setcc or select_cc.
13812   if (N->getOpcode() == ISD::TRUNCATE &&
13813       N->getValueType(0) != MVT::i1)
13814     return SDValue();
13815 
13816   if (N->getOperand(0).getValueType() != MVT::i32 &&
13817       N->getOperand(0).getValueType() != MVT::i64)
13818     return SDValue();
13819 
13820   if (N->getOpcode() == ISD::SETCC ||
13821       N->getOpcode() == ISD::SELECT_CC) {
13822     // If we're looking at a comparison, then we need to make sure that the
13823     // high bits (all except for the first) don't matter the result.
13824     ISD::CondCode CC =
13825       cast<CondCodeSDNode>(N->getOperand(
13826         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13827     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13828 
13829     if (ISD::isSignedIntSetCC(CC)) {
13830       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13831           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13832         return SDValue();
13833     } else if (ISD::isUnsignedIntSetCC(CC)) {
13834       if (!DAG.MaskedValueIsZero(N->getOperand(0),
13835                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
13836           !DAG.MaskedValueIsZero(N->getOperand(1),
13837                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
13838         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13839                                              : SDValue());
13840     } else {
13841       // This is neither a signed nor an unsigned comparison, just make sure
13842       // that the high bits are equal.
13843       KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13844       KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13845 
13846       // We don't really care about what is known about the first bit (if
13847       // anything), so pretend that it is known zero for both to ensure they can
13848       // be compared as constants.
13849       Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
13850       Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
13851 
13852       if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
13853           Op1Known.getConstant() != Op2Known.getConstant())
13854         return SDValue();
13855     }
13856   }
13857 
13858   // We now know that the higher-order bits are irrelevant, we just need to
13859   // make sure that all of the intermediate operations are bit operations, and
13860   // all inputs are extensions.
13861   if (N->getOperand(0).getOpcode() != ISD::AND &&
13862       N->getOperand(0).getOpcode() != ISD::OR  &&
13863       N->getOperand(0).getOpcode() != ISD::XOR &&
13864       N->getOperand(0).getOpcode() != ISD::SELECT &&
13865       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13866       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13867       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13868       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13869       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13870     return SDValue();
13871 
13872   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13873       N->getOperand(1).getOpcode() != ISD::AND &&
13874       N->getOperand(1).getOpcode() != ISD::OR  &&
13875       N->getOperand(1).getOpcode() != ISD::XOR &&
13876       N->getOperand(1).getOpcode() != ISD::SELECT &&
13877       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13878       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13879       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13880       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13881       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13882     return SDValue();
13883 
13884   SmallVector<SDValue, 4> Inputs;
13885   SmallVector<SDValue, 8> BinOps, PromOps;
13886   SmallPtrSet<SDNode *, 16> Visited;
13887 
13888   for (unsigned i = 0; i < 2; ++i) {
13889     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13890           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13891           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13892           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13893         isa<ConstantSDNode>(N->getOperand(i)))
13894       Inputs.push_back(N->getOperand(i));
13895     else
13896       BinOps.push_back(N->getOperand(i));
13897 
13898     if (N->getOpcode() == ISD::TRUNCATE)
13899       break;
13900   }
13901 
13902   // Visit all inputs, collect all binary operations (and, or, xor and
13903   // select) that are all fed by extensions.
13904   while (!BinOps.empty()) {
13905     SDValue BinOp = BinOps.pop_back_val();
13906 
13907     if (!Visited.insert(BinOp.getNode()).second)
13908       continue;
13909 
13910     PromOps.push_back(BinOp);
13911 
13912     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13913       // The condition of the select is not promoted.
13914       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13915         continue;
13916       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13917         continue;
13918 
13919       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13920             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13921             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13922            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13923           isa<ConstantSDNode>(BinOp.getOperand(i))) {
13924         Inputs.push_back(BinOp.getOperand(i));
13925       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13926                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
13927                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13928                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13929                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13930                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13931                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13932                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13933                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13934         BinOps.push_back(BinOp.getOperand(i));
13935       } else {
13936         // We have an input that is not an extension or another binary
13937         // operation; we'll abort this transformation.
13938         return SDValue();
13939       }
13940     }
13941   }
13942 
13943   // Make sure that this is a self-contained cluster of operations (which
13944   // is not quite the same thing as saying that everything has only one
13945   // use).
13946   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13947     if (isa<ConstantSDNode>(Inputs[i]))
13948       continue;
13949 
13950     for (const SDNode *User : Inputs[i].getNode()->uses()) {
13951       if (User != N && !Visited.count(User))
13952         return SDValue();
13953 
13954       // Make sure that we're not going to promote the non-output-value
13955       // operand(s) or SELECT or SELECT_CC.
13956       // FIXME: Although we could sometimes handle this, and it does occur in
13957       // practice that one of the condition inputs to the select is also one of
13958       // the outputs, we currently can't deal with this.
13959       if (User->getOpcode() == ISD::SELECT) {
13960         if (User->getOperand(0) == Inputs[i])
13961           return SDValue();
13962       } else if (User->getOpcode() == ISD::SELECT_CC) {
13963         if (User->getOperand(0) == Inputs[i] ||
13964             User->getOperand(1) == Inputs[i])
13965           return SDValue();
13966       }
13967     }
13968   }
13969 
13970   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13971     for (const SDNode *User : PromOps[i].getNode()->uses()) {
13972       if (User != N && !Visited.count(User))
13973         return SDValue();
13974 
13975       // Make sure that we're not going to promote the non-output-value
13976       // operand(s) or SELECT or SELECT_CC.
13977       // FIXME: Although we could sometimes handle this, and it does occur in
13978       // practice that one of the condition inputs to the select is also one of
13979       // the outputs, we currently can't deal with this.
13980       if (User->getOpcode() == ISD::SELECT) {
13981         if (User->getOperand(0) == PromOps[i])
13982           return SDValue();
13983       } else if (User->getOpcode() == ISD::SELECT_CC) {
13984         if (User->getOperand(0) == PromOps[i] ||
13985             User->getOperand(1) == PromOps[i])
13986           return SDValue();
13987       }
13988     }
13989   }
13990 
13991   // Replace all inputs with the extension operand.
13992   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13993     // Constants may have users outside the cluster of to-be-promoted nodes,
13994     // and so we need to replace those as we do the promotions.
13995     if (isa<ConstantSDNode>(Inputs[i]))
13996       continue;
13997     else
13998       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13999   }
14000 
14001   std::list<HandleSDNode> PromOpHandles;
14002   for (auto &PromOp : PromOps)
14003     PromOpHandles.emplace_back(PromOp);
14004 
14005   // Replace all operations (these are all the same, but have a different
14006   // (i1) return type). DAG.getNode will validate that the types of
14007   // a binary operator match, so go through the list in reverse so that
14008   // we've likely promoted both operands first. Any intermediate truncations or
14009   // extensions disappear.
14010   while (!PromOpHandles.empty()) {
14011     SDValue PromOp = PromOpHandles.back().getValue();
14012     PromOpHandles.pop_back();
14013 
14014     if (PromOp.getOpcode() == ISD::TRUNCATE ||
14015         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14016         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14017         PromOp.getOpcode() == ISD::ANY_EXTEND) {
14018       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14019           PromOp.getOperand(0).getValueType() != MVT::i1) {
14020         // The operand is not yet ready (see comment below).
14021         PromOpHandles.emplace_front(PromOp);
14022         continue;
14023       }
14024 
14025       SDValue RepValue = PromOp.getOperand(0);
14026       if (isa<ConstantSDNode>(RepValue))
14027         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14028 
14029       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14030       continue;
14031     }
14032 
14033     unsigned C;
14034     switch (PromOp.getOpcode()) {
14035     default:             C = 0; break;
14036     case ISD::SELECT:    C = 1; break;
14037     case ISD::SELECT_CC: C = 2; break;
14038     }
14039 
14040     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14041          PromOp.getOperand(C).getValueType() != MVT::i1) ||
14042         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14043          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14044       // The to-be-promoted operands of this node have not yet been
14045       // promoted (this should be rare because we're going through the
14046       // list backward, but if one of the operands has several users in
14047       // this cluster of to-be-promoted nodes, it is possible).
14048       PromOpHandles.emplace_front(PromOp);
14049       continue;
14050     }
14051 
14052     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
14053                                 PromOp.getNode()->op_end());
14054 
14055     // If there are any constant inputs, make sure they're replaced now.
14056     for (unsigned i = 0; i < 2; ++i)
14057       if (isa<ConstantSDNode>(Ops[C+i]))
14058         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14059 
14060     DAG.ReplaceAllUsesOfValueWith(PromOp,
14061       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14062   }
14063 
14064   // Now we're left with the initial truncation itself.
14065   if (N->getOpcode() == ISD::TRUNCATE)
14066     return N->getOperand(0);
14067 
14068   // Otherwise, this is a comparison. The operands to be compared have just
14069   // changed type (to i1), but everything else is the same.
14070   return SDValue(N, 0);
14071 }
14072 
14073 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14074                                                   DAGCombinerInfo &DCI) const {
14075   SelectionDAG &DAG = DCI.DAG;
14076   SDLoc dl(N);
14077 
14078   // If we're tracking CR bits, we need to be careful that we don't have:
14079   //   zext(binary-ops(trunc(x), trunc(y)))
14080   // or
14081   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14082   // such that we're unnecessarily moving things into CR bits that can more
14083   // efficiently stay in GPRs. Note that if we're not certain that the high
14084   // bits are set as required by the final extension, we still may need to do
14085   // some masking to get the proper behavior.
14086 
14087   // This same functionality is important on PPC64 when dealing with
14088   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14089   // the return values of functions. Because it is so similar, it is handled
14090   // here as well.
14091 
14092   if (N->getValueType(0) != MVT::i32 &&
14093       N->getValueType(0) != MVT::i64)
14094     return SDValue();
14095 
14096   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14097         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14098     return SDValue();
14099 
14100   if (N->getOperand(0).getOpcode() != ISD::AND &&
14101       N->getOperand(0).getOpcode() != ISD::OR  &&
14102       N->getOperand(0).getOpcode() != ISD::XOR &&
14103       N->getOperand(0).getOpcode() != ISD::SELECT &&
14104       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14105     return SDValue();
14106 
14107   SmallVector<SDValue, 4> Inputs;
14108   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14109   SmallPtrSet<SDNode *, 16> Visited;
14110 
14111   // Visit all inputs, collect all binary operations (and, or, xor and
14112   // select) that are all fed by truncations.
14113   while (!BinOps.empty()) {
14114     SDValue BinOp = BinOps.pop_back_val();
14115 
14116     if (!Visited.insert(BinOp.getNode()).second)
14117       continue;
14118 
14119     PromOps.push_back(BinOp);
14120 
14121     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14122       // The condition of the select is not promoted.
14123       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14124         continue;
14125       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14126         continue;
14127 
14128       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14129           isa<ConstantSDNode>(BinOp.getOperand(i))) {
14130         Inputs.push_back(BinOp.getOperand(i));
14131       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14132                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
14133                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14134                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14135                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14136         BinOps.push_back(BinOp.getOperand(i));
14137       } else {
14138         // We have an input that is not a truncation or another binary
14139         // operation; we'll abort this transformation.
14140         return SDValue();
14141       }
14142     }
14143   }
14144 
14145   // The operands of a select that must be truncated when the select is
14146   // promoted because the operand is actually part of the to-be-promoted set.
14147   DenseMap<SDNode *, EVT> SelectTruncOp[2];
14148 
14149   // Make sure that this is a self-contained cluster of operations (which
14150   // is not quite the same thing as saying that everything has only one
14151   // use).
14152   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14153     if (isa<ConstantSDNode>(Inputs[i]))
14154       continue;
14155 
14156     for (SDNode *User : Inputs[i].getNode()->uses()) {
14157       if (User != N && !Visited.count(User))
14158         return SDValue();
14159 
14160       // If we're going to promote the non-output-value operand(s) or SELECT or
14161       // SELECT_CC, record them for truncation.
14162       if (User->getOpcode() == ISD::SELECT) {
14163         if (User->getOperand(0) == Inputs[i])
14164           SelectTruncOp[0].insert(std::make_pair(User,
14165                                     User->getOperand(0).getValueType()));
14166       } else if (User->getOpcode() == ISD::SELECT_CC) {
14167         if (User->getOperand(0) == Inputs[i])
14168           SelectTruncOp[0].insert(std::make_pair(User,
14169                                     User->getOperand(0).getValueType()));
14170         if (User->getOperand(1) == Inputs[i])
14171           SelectTruncOp[1].insert(std::make_pair(User,
14172                                     User->getOperand(1).getValueType()));
14173       }
14174     }
14175   }
14176 
14177   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14178     for (SDNode *User : PromOps[i].getNode()->uses()) {
14179       if (User != N && !Visited.count(User))
14180         return SDValue();
14181 
14182       // If we're going to promote the non-output-value operand(s) or SELECT or
14183       // SELECT_CC, record them for truncation.
14184       if (User->getOpcode() == ISD::SELECT) {
14185         if (User->getOperand(0) == PromOps[i])
14186           SelectTruncOp[0].insert(std::make_pair(User,
14187                                     User->getOperand(0).getValueType()));
14188       } else if (User->getOpcode() == ISD::SELECT_CC) {
14189         if (User->getOperand(0) == PromOps[i])
14190           SelectTruncOp[0].insert(std::make_pair(User,
14191                                     User->getOperand(0).getValueType()));
14192         if (User->getOperand(1) == PromOps[i])
14193           SelectTruncOp[1].insert(std::make_pair(User,
14194                                     User->getOperand(1).getValueType()));
14195       }
14196     }
14197   }
14198 
14199   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14200   bool ReallyNeedsExt = false;
14201   if (N->getOpcode() != ISD::ANY_EXTEND) {
14202     // If all of the inputs are not already sign/zero extended, then
14203     // we'll still need to do that at the end.
14204     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14205       if (isa<ConstantSDNode>(Inputs[i]))
14206         continue;
14207 
14208       unsigned OpBits =
14209         Inputs[i].getOperand(0).getValueSizeInBits();
14210       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14211 
14212       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14213            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14214                                   APInt::getHighBitsSet(OpBits,
14215                                                         OpBits-PromBits))) ||
14216           (N->getOpcode() == ISD::SIGN_EXTEND &&
14217            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14218              (OpBits-(PromBits-1)))) {
14219         ReallyNeedsExt = true;
14220         break;
14221       }
14222     }
14223   }
14224 
14225   // Replace all inputs, either with the truncation operand, or a
14226   // truncation or extension to the final output type.
14227   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14228     // Constant inputs need to be replaced with the to-be-promoted nodes that
14229     // use them because they might have users outside of the cluster of
14230     // promoted nodes.
14231     if (isa<ConstantSDNode>(Inputs[i]))
14232       continue;
14233 
14234     SDValue InSrc = Inputs[i].getOperand(0);
14235     if (Inputs[i].getValueType() == N->getValueType(0))
14236       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14237     else if (N->getOpcode() == ISD::SIGN_EXTEND)
14238       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14239         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14240     else if (N->getOpcode() == ISD::ZERO_EXTEND)
14241       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14242         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14243     else
14244       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14245         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14246   }
14247 
14248   std::list<HandleSDNode> PromOpHandles;
14249   for (auto &PromOp : PromOps)
14250     PromOpHandles.emplace_back(PromOp);
14251 
14252   // Replace all operations (these are all the same, but have a different
14253   // (promoted) return type). DAG.getNode will validate that the types of
14254   // a binary operator match, so go through the list in reverse so that
14255   // we've likely promoted both operands first.
14256   while (!PromOpHandles.empty()) {
14257     SDValue PromOp = PromOpHandles.back().getValue();
14258     PromOpHandles.pop_back();
14259 
14260     unsigned C;
14261     switch (PromOp.getOpcode()) {
14262     default:             C = 0; break;
14263     case ISD::SELECT:    C = 1; break;
14264     case ISD::SELECT_CC: C = 2; break;
14265     }
14266 
14267     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14268          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14269         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14270          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14271       // The to-be-promoted operands of this node have not yet been
14272       // promoted (this should be rare because we're going through the
14273       // list backward, but if one of the operands has several users in
14274       // this cluster of to-be-promoted nodes, it is possible).
14275       PromOpHandles.emplace_front(PromOp);
14276       continue;
14277     }
14278 
14279     // For SELECT and SELECT_CC nodes, we do a similar check for any
14280     // to-be-promoted comparison inputs.
14281     if (PromOp.getOpcode() == ISD::SELECT ||
14282         PromOp.getOpcode() == ISD::SELECT_CC) {
14283       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14284            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14285           (SelectTruncOp[1].count(PromOp.getNode()) &&
14286            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14287         PromOpHandles.emplace_front(PromOp);
14288         continue;
14289       }
14290     }
14291 
14292     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
14293                                 PromOp.getNode()->op_end());
14294 
14295     // If this node has constant inputs, then they'll need to be promoted here.
14296     for (unsigned i = 0; i < 2; ++i) {
14297       if (!isa<ConstantSDNode>(Ops[C+i]))
14298         continue;
14299       if (Ops[C+i].getValueType() == N->getValueType(0))
14300         continue;
14301 
14302       if (N->getOpcode() == ISD::SIGN_EXTEND)
14303         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14304       else if (N->getOpcode() == ISD::ZERO_EXTEND)
14305         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14306       else
14307         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14308     }
14309 
14310     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14311     // truncate them again to the original value type.
14312     if (PromOp.getOpcode() == ISD::SELECT ||
14313         PromOp.getOpcode() == ISD::SELECT_CC) {
14314       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14315       if (SI0 != SelectTruncOp[0].end())
14316         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14317       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14318       if (SI1 != SelectTruncOp[1].end())
14319         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14320     }
14321 
14322     DAG.ReplaceAllUsesOfValueWith(PromOp,
14323       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14324   }
14325 
14326   // Now we're left with the initial extension itself.
14327   if (!ReallyNeedsExt)
14328     return N->getOperand(0);
14329 
14330   // To zero extend, just mask off everything except for the first bit (in the
14331   // i1 case).
14332   if (N->getOpcode() == ISD::ZERO_EXTEND)
14333     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14334                        DAG.getConstant(APInt::getLowBitsSet(
14335                                          N->getValueSizeInBits(0), PromBits),
14336                                        dl, N->getValueType(0)));
14337 
14338   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14339          "Invalid extension type");
14340   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14341   SDValue ShiftCst =
14342       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14343   return DAG.getNode(
14344       ISD::SRA, dl, N->getValueType(0),
14345       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14346       ShiftCst);
14347 }
14348 
14349 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14350                                         DAGCombinerInfo &DCI) const {
14351   assert(N->getOpcode() == ISD::SETCC &&
14352          "Should be called with a SETCC node");
14353 
14354   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14355   if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14356     SDValue LHS = N->getOperand(0);
14357     SDValue RHS = N->getOperand(1);
14358 
14359     // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14360     if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14361         LHS.hasOneUse())
14362       std::swap(LHS, RHS);
14363 
14364     // x == 0-y --> x+y == 0
14365     // x != 0-y --> x+y != 0
14366     if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14367         RHS.hasOneUse()) {
14368       SDLoc DL(N);
14369       SelectionDAG &DAG = DCI.DAG;
14370       EVT VT = N->getValueType(0);
14371       EVT OpVT = LHS.getValueType();
14372       SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14373       return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14374     }
14375   }
14376 
14377   return DAGCombineTruncBoolExt(N, DCI);
14378 }
14379 
14380 // Is this an extending load from an f32 to an f64?
14381 static bool isFPExtLoad(SDValue Op) {
14382   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14383     return LD->getExtensionType() == ISD::EXTLOAD &&
14384       Op.getValueType() == MVT::f64;
14385   return false;
14386 }
14387 
14388 /// Reduces the number of fp-to-int conversion when building a vector.
14389 ///
14390 /// If this vector is built out of floating to integer conversions,
14391 /// transform it to a vector built out of floating point values followed by a
14392 /// single floating to integer conversion of the vector.
14393 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
14394 /// becomes (fptosi (build_vector ($A, $B, ...)))
14395 SDValue PPCTargetLowering::
14396 combineElementTruncationToVectorTruncation(SDNode *N,
14397                                            DAGCombinerInfo &DCI) const {
14398   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14399          "Should be called with a BUILD_VECTOR node");
14400 
14401   SelectionDAG &DAG = DCI.DAG;
14402   SDLoc dl(N);
14403 
14404   SDValue FirstInput = N->getOperand(0);
14405   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14406          "The input operand must be an fp-to-int conversion.");
14407 
14408   // This combine happens after legalization so the fp_to_[su]i nodes are
14409   // already converted to PPCSISD nodes.
14410   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14411   if (FirstConversion == PPCISD::FCTIDZ ||
14412       FirstConversion == PPCISD::FCTIDUZ ||
14413       FirstConversion == PPCISD::FCTIWZ ||
14414       FirstConversion == PPCISD::FCTIWUZ) {
14415     bool IsSplat = true;
14416     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14417       FirstConversion == PPCISD::FCTIWUZ;
14418     EVT SrcVT = FirstInput.getOperand(0).getValueType();
14419     SmallVector<SDValue, 4> Ops;
14420     EVT TargetVT = N->getValueType(0);
14421     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14422       SDValue NextOp = N->getOperand(i);
14423       if (NextOp.getOpcode() != PPCISD::MFVSR)
14424         return SDValue();
14425       unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14426       if (NextConversion != FirstConversion)
14427         return SDValue();
14428       // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14429       // This is not valid if the input was originally double precision. It is
14430       // also not profitable to do unless this is an extending load in which
14431       // case doing this combine will allow us to combine consecutive loads.
14432       if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14433         return SDValue();
14434       if (N->getOperand(i) != FirstInput)
14435         IsSplat = false;
14436     }
14437 
14438     // If this is a splat, we leave it as-is since there will be only a single
14439     // fp-to-int conversion followed by a splat of the integer. This is better
14440     // for 32-bit and smaller ints and neutral for 64-bit ints.
14441     if (IsSplat)
14442       return SDValue();
14443 
14444     // Now that we know we have the right type of node, get its operands
14445     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14446       SDValue In = N->getOperand(i).getOperand(0);
14447       if (Is32Bit) {
14448         // For 32-bit values, we need to add an FP_ROUND node (if we made it
14449         // here, we know that all inputs are extending loads so this is safe).
14450         if (In.isUndef())
14451           Ops.push_back(DAG.getUNDEF(SrcVT));
14452         else {
14453           SDValue Trunc =
14454               DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
14455                           DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
14456           Ops.push_back(Trunc);
14457         }
14458       } else
14459         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14460     }
14461 
14462     unsigned Opcode;
14463     if (FirstConversion == PPCISD::FCTIDZ ||
14464         FirstConversion == PPCISD::FCTIWZ)
14465       Opcode = ISD::FP_TO_SINT;
14466     else
14467       Opcode = ISD::FP_TO_UINT;
14468 
14469     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14470     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14471     return DAG.getNode(Opcode, dl, TargetVT, BV);
14472   }
14473   return SDValue();
14474 }
14475 
14476 /// Reduce the number of loads when building a vector.
14477 ///
14478 /// Building a vector out of multiple loads can be converted to a load
14479 /// of the vector type if the loads are consecutive. If the loads are
14480 /// consecutive but in descending order, a shuffle is added at the end
14481 /// to reorder the vector.
14482 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
14483   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14484          "Should be called with a BUILD_VECTOR node");
14485 
14486   SDLoc dl(N);
14487 
14488   // Return early for non byte-sized type, as they can't be consecutive.
14489   if (!N->getValueType(0).getVectorElementType().isByteSized())
14490     return SDValue();
14491 
14492   bool InputsAreConsecutiveLoads = true;
14493   bool InputsAreReverseConsecutive = true;
14494   unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14495   SDValue FirstInput = N->getOperand(0);
14496   bool IsRoundOfExtLoad = false;
14497   LoadSDNode *FirstLoad = nullptr;
14498 
14499   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14500       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14501     FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14502     IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14503   }
14504   // Not a build vector of (possibly fp_rounded) loads.
14505   if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14506       N->getNumOperands() == 1)
14507     return SDValue();
14508 
14509   if (!IsRoundOfExtLoad)
14510     FirstLoad = cast<LoadSDNode>(FirstInput);
14511 
14512   SmallVector<LoadSDNode *, 4> InputLoads;
14513   InputLoads.push_back(FirstLoad);
14514   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14515     // If any inputs are fp_round(extload), they all must be.
14516     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14517       return SDValue();
14518 
14519     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14520       N->getOperand(i);
14521     if (NextInput.getOpcode() != ISD::LOAD)
14522       return SDValue();
14523 
14524     SDValue PreviousInput =
14525       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14526     LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14527     LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
14528 
14529     // If any inputs are fp_round(extload), they all must be.
14530     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14531       return SDValue();
14532 
14533     // We only care about regular loads. The PPC-specific load intrinsics
14534     // will not lead to a merge opportunity.
14535     if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
14536       InputsAreConsecutiveLoads = false;
14537     if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
14538       InputsAreReverseConsecutive = false;
14539 
14540     // Exit early if the loads are neither consecutive nor reverse consecutive.
14541     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14542       return SDValue();
14543     InputLoads.push_back(LD2);
14544   }
14545 
14546   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14547          "The loads cannot be both consecutive and reverse consecutive.");
14548 
14549   SDValue WideLoad;
14550   SDValue ReturnSDVal;
14551   if (InputsAreConsecutiveLoads) {
14552     assert(FirstLoad && "Input needs to be a LoadSDNode.");
14553     WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
14554                            FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14555                            FirstLoad->getAlign());
14556     ReturnSDVal = WideLoad;
14557   } else if (InputsAreReverseConsecutive) {
14558     LoadSDNode *LastLoad = InputLoads.back();
14559     assert(LastLoad && "Input needs to be a LoadSDNode.");
14560     WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
14561                            LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
14562                            LastLoad->getAlign());
14563     SmallVector<int, 16> Ops;
14564     for (int i = N->getNumOperands() - 1; i >= 0; i--)
14565       Ops.push_back(i);
14566 
14567     ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
14568                                        DAG.getUNDEF(N->getValueType(0)), Ops);
14569   } else
14570     return SDValue();
14571 
14572   for (auto *LD : InputLoads)
14573     DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
14574   return ReturnSDVal;
14575 }
14576 
14577 // This function adds the required vector_shuffle needed to get
14578 // the elements of the vector extract in the correct position
14579 // as specified by the CorrectElems encoding.
14580 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
14581                                       SDValue Input, uint64_t Elems,
14582                                       uint64_t CorrectElems) {
14583   SDLoc dl(N);
14584 
14585   unsigned NumElems = Input.getValueType().getVectorNumElements();
14586   SmallVector<int, 16> ShuffleMask(NumElems, -1);
14587 
14588   // Knowing the element indices being extracted from the original
14589   // vector and the order in which they're being inserted, just put
14590   // them at element indices required for the instruction.
14591   for (unsigned i = 0; i < N->getNumOperands(); i++) {
14592     if (DAG.getDataLayout().isLittleEndian())
14593       ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14594     else
14595       ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14596     CorrectElems = CorrectElems >> 8;
14597     Elems = Elems >> 8;
14598   }
14599 
14600   SDValue Shuffle =
14601       DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14602                            DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14603 
14604   EVT VT = N->getValueType(0);
14605   SDValue Conv = DAG.getBitcast(VT, Shuffle);
14606 
14607   EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14608                                Input.getValueType().getVectorElementType(),
14609                                VT.getVectorNumElements());
14610   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14611                      DAG.getValueType(ExtVT));
14612 }
14613 
14614 // Look for build vector patterns where input operands come from sign
14615 // extended vector_extract elements of specific indices. If the correct indices
14616 // aren't used, add a vector shuffle to fix up the indices and create
14617 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14618 // during instruction selection.
14619 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
14620   // This array encodes the indices that the vector sign extend instructions
14621   // extract from when extending from one type to another for both BE and LE.
14622   // The right nibble of each byte corresponds to the LE incides.
14623   // and the left nibble of each byte corresponds to the BE incides.
14624   // For example: 0x3074B8FC  byte->word
14625   // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14626   // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14627   // For example: 0x000070F8  byte->double word
14628   // For LE: the allowed indices are: 0x0,0x8
14629   // For BE: the allowed indices are: 0x7,0xF
14630   uint64_t TargetElems[] = {
14631       0x3074B8FC, // b->w
14632       0x000070F8, // b->d
14633       0x10325476, // h->w
14634       0x00003074, // h->d
14635       0x00001032, // w->d
14636   };
14637 
14638   uint64_t Elems = 0;
14639   int Index;
14640   SDValue Input;
14641 
14642   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14643     if (!Op)
14644       return false;
14645     if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14646         Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14647       return false;
14648 
14649     // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14650     // of the right width.
14651     SDValue Extract = Op.getOperand(0);
14652     if (Extract.getOpcode() == ISD::ANY_EXTEND)
14653       Extract = Extract.getOperand(0);
14654     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14655       return false;
14656 
14657     ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14658     if (!ExtOp)
14659       return false;
14660 
14661     Index = ExtOp->getZExtValue();
14662     if (Input && Input != Extract.getOperand(0))
14663       return false;
14664 
14665     if (!Input)
14666       Input = Extract.getOperand(0);
14667 
14668     Elems = Elems << 8;
14669     Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14670     Elems |= Index;
14671 
14672     return true;
14673   };
14674 
14675   // If the build vector operands aren't sign extended vector extracts,
14676   // of the same input vector, then return.
14677   for (unsigned i = 0; i < N->getNumOperands(); i++) {
14678     if (!isSExtOfVecExtract(N->getOperand(i))) {
14679       return SDValue();
14680     }
14681   }
14682 
14683   // If the vector extract indicies are not correct, add the appropriate
14684   // vector_shuffle.
14685   int TgtElemArrayIdx;
14686   int InputSize = Input.getValueType().getScalarSizeInBits();
14687   int OutputSize = N->getValueType(0).getScalarSizeInBits();
14688   if (InputSize + OutputSize == 40)
14689     TgtElemArrayIdx = 0;
14690   else if (InputSize + OutputSize == 72)
14691     TgtElemArrayIdx = 1;
14692   else if (InputSize + OutputSize == 48)
14693     TgtElemArrayIdx = 2;
14694   else if (InputSize + OutputSize == 80)
14695     TgtElemArrayIdx = 3;
14696   else if (InputSize + OutputSize == 96)
14697     TgtElemArrayIdx = 4;
14698   else
14699     return SDValue();
14700 
14701   uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14702   CorrectElems = DAG.getDataLayout().isLittleEndian()
14703                      ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14704                      : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14705   if (Elems != CorrectElems) {
14706     return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14707   }
14708 
14709   // Regular lowering will catch cases where a shuffle is not needed.
14710   return SDValue();
14711 }
14712 
14713 // Look for the pattern of a load from a narrow width to i128, feeding
14714 // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14715 // (LXVRZX). This node represents a zero extending load that will be matched
14716 // to the Load VSX Vector Rightmost instructions.
14717 static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
14718   SDLoc DL(N);
14719 
14720   // This combine is only eligible for a BUILD_VECTOR of v1i128.
14721   if (N->getValueType(0) != MVT::v1i128)
14722     return SDValue();
14723 
14724   SDValue Operand = N->getOperand(0);
14725   // Proceed with the transformation if the operand to the BUILD_VECTOR
14726   // is a load instruction.
14727   if (Operand.getOpcode() != ISD::LOAD)
14728     return SDValue();
14729 
14730   auto *LD = cast<LoadSDNode>(Operand);
14731   EVT MemoryType = LD->getMemoryVT();
14732 
14733   // This transformation is only valid if the we are loading either a byte,
14734   // halfword, word, or doubleword.
14735   bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
14736                      MemoryType == MVT::i32 || MemoryType == MVT::i64;
14737 
14738   // Ensure that the load from the narrow width is being zero extended to i128.
14739   if (!ValidLDType ||
14740       (LD->getExtensionType() != ISD::ZEXTLOAD &&
14741        LD->getExtensionType() != ISD::EXTLOAD))
14742     return SDValue();
14743 
14744   SDValue LoadOps[] = {
14745       LD->getChain(), LD->getBasePtr(),
14746       DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
14747 
14748   return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
14749                                  DAG.getVTList(MVT::v1i128, MVT::Other),
14750                                  LoadOps, MemoryType, LD->getMemOperand());
14751 }
14752 
14753 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14754                                                  DAGCombinerInfo &DCI) const {
14755   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14756          "Should be called with a BUILD_VECTOR node");
14757 
14758   SelectionDAG &DAG = DCI.DAG;
14759   SDLoc dl(N);
14760 
14761   if (!Subtarget.hasVSX())
14762     return SDValue();
14763 
14764   // The target independent DAG combiner will leave a build_vector of
14765   // float-to-int conversions intact. We can generate MUCH better code for
14766   // a float-to-int conversion of a vector of floats.
14767   SDValue FirstInput = N->getOperand(0);
14768   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14769     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14770     if (Reduced)
14771       return Reduced;
14772   }
14773 
14774   // If we're building a vector out of consecutive loads, just load that
14775   // vector type.
14776   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
14777   if (Reduced)
14778     return Reduced;
14779 
14780   // If we're building a vector out of extended elements from another vector
14781   // we have P9 vector integer extend instructions. The code assumes legal
14782   // input types (i.e. it can't handle things like v4i16) so do not run before
14783   // legalization.
14784   if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14785     Reduced = combineBVOfVecSExt(N, DAG);
14786     if (Reduced)
14787       return Reduced;
14788   }
14789 
14790   // On Power10, the Load VSX Vector Rightmost instructions can be utilized
14791   // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14792   // is a load from <valid narrow width> to i128.
14793   if (Subtarget.isISA3_1()) {
14794     SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
14795     if (BVOfZLoad)
14796       return BVOfZLoad;
14797   }
14798 
14799   if (N->getValueType(0) != MVT::v2f64)
14800     return SDValue();
14801 
14802   // Looking for:
14803   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14804   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14805       FirstInput.getOpcode() != ISD::UINT_TO_FP)
14806     return SDValue();
14807   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14808       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14809     return SDValue();
14810   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14811     return SDValue();
14812 
14813   SDValue Ext1 = FirstInput.getOperand(0);
14814   SDValue Ext2 = N->getOperand(1).getOperand(0);
14815   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14816      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14817     return SDValue();
14818 
14819   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
14820   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
14821   if (!Ext1Op || !Ext2Op)
14822     return SDValue();
14823   if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14824       Ext1.getOperand(0) != Ext2.getOperand(0))
14825     return SDValue();
14826 
14827   int FirstElem = Ext1Op->getZExtValue();
14828   int SecondElem = Ext2Op->getZExtValue();
14829   int SubvecIdx;
14830   if (FirstElem == 0 && SecondElem == 1)
14831     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14832   else if (FirstElem == 2 && SecondElem == 3)
14833     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14834   else
14835     return SDValue();
14836 
14837   SDValue SrcVec = Ext1.getOperand(0);
14838   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14839     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
14840   return DAG.getNode(NodeType, dl, MVT::v2f64,
14841                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
14842 }
14843 
14844 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14845                                               DAGCombinerInfo &DCI) const {
14846   assert((N->getOpcode() == ISD::SINT_TO_FP ||
14847           N->getOpcode() == ISD::UINT_TO_FP) &&
14848          "Need an int -> FP conversion node here");
14849 
14850   if (useSoftFloat() || !Subtarget.has64BitSupport())
14851     return SDValue();
14852 
14853   SelectionDAG &DAG = DCI.DAG;
14854   SDLoc dl(N);
14855   SDValue Op(N, 0);
14856 
14857   // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14858   // from the hardware.
14859   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14860     return SDValue();
14861   if (!Op.getOperand(0).getValueType().isSimple())
14862     return SDValue();
14863   if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14864       Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14865     return SDValue();
14866 
14867   SDValue FirstOperand(Op.getOperand(0));
14868   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14869     (FirstOperand.getValueType() == MVT::i8 ||
14870      FirstOperand.getValueType() == MVT::i16);
14871   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14872     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14873     bool DstDouble = Op.getValueType() == MVT::f64;
14874     unsigned ConvOp = Signed ?
14875       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
14876       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14877     SDValue WidthConst =
14878       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14879                             dl, false);
14880     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14881     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14882     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
14883                                          DAG.getVTList(MVT::f64, MVT::Other),
14884                                          Ops, MVT::i8, LDN->getMemOperand());
14885 
14886     // For signed conversion, we need to sign-extend the value in the VSR
14887     if (Signed) {
14888       SDValue ExtOps[] = { Ld, WidthConst };
14889       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14890       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14891     } else
14892       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14893   }
14894 
14895 
14896   // For i32 intermediate values, unfortunately, the conversion functions
14897   // leave the upper 32 bits of the value are undefined. Within the set of
14898   // scalar instructions, we have no method for zero- or sign-extending the
14899   // value. Thus, we cannot handle i32 intermediate values here.
14900   if (Op.getOperand(0).getValueType() == MVT::i32)
14901     return SDValue();
14902 
14903   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
14904          "UINT_TO_FP is supported only with FPCVT");
14905 
14906   // If we have FCFIDS, then use it when converting to single-precision.
14907   // Otherwise, convert to double-precision and then round.
14908   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14909                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14910                                                             : PPCISD::FCFIDS)
14911                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14912                                                             : PPCISD::FCFID);
14913   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14914                   ? MVT::f32
14915                   : MVT::f64;
14916 
14917   // If we're converting from a float, to an int, and back to a float again,
14918   // then we don't need the store/load pair at all.
14919   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14920        Subtarget.hasFPCVT()) ||
14921       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14922     SDValue Src = Op.getOperand(0).getOperand(0);
14923     if (Src.getValueType() == MVT::f32) {
14924       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14925       DCI.AddToWorklist(Src.getNode());
14926     } else if (Src.getValueType() != MVT::f64) {
14927       // Make sure that we don't pick up a ppc_fp128 source value.
14928       return SDValue();
14929     }
14930 
14931     unsigned FCTOp =
14932       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14933                                                         PPCISD::FCTIDUZ;
14934 
14935     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14936     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14937 
14938     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14939       FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
14940                        DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
14941       DCI.AddToWorklist(FP.getNode());
14942     }
14943 
14944     return FP;
14945   }
14946 
14947   return SDValue();
14948 }
14949 
14950 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14951 // builtins) into loads with swaps.
14952 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
14953                                               DAGCombinerInfo &DCI) const {
14954   // Delay VSX load for LE combine until after LegalizeOps to prioritize other
14955   // load combines.
14956   if (DCI.isBeforeLegalizeOps())
14957     return SDValue();
14958 
14959   SelectionDAG &DAG = DCI.DAG;
14960   SDLoc dl(N);
14961   SDValue Chain;
14962   SDValue Base;
14963   MachineMemOperand *MMO;
14964 
14965   switch (N->getOpcode()) {
14966   default:
14967     llvm_unreachable("Unexpected opcode for little endian VSX load");
14968   case ISD::LOAD: {
14969     LoadSDNode *LD = cast<LoadSDNode>(N);
14970     Chain = LD->getChain();
14971     Base = LD->getBasePtr();
14972     MMO = LD->getMemOperand();
14973     // If the MMO suggests this isn't a load of a full vector, leave
14974     // things alone.  For a built-in, we have to make the change for
14975     // correctness, so if there is a size problem that will be a bug.
14976     if (MMO->getSize() < 16)
14977       return SDValue();
14978     break;
14979   }
14980   case ISD::INTRINSIC_W_CHAIN: {
14981     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14982     Chain = Intrin->getChain();
14983     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14984     // us what we want. Get operand 2 instead.
14985     Base = Intrin->getOperand(2);
14986     MMO = Intrin->getMemOperand();
14987     break;
14988   }
14989   }
14990 
14991   MVT VecTy = N->getValueType(0).getSimpleVT();
14992 
14993   SDValue LoadOps[] = { Chain, Base };
14994   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
14995                                          DAG.getVTList(MVT::v2f64, MVT::Other),
14996                                          LoadOps, MVT::v2f64, MMO);
14997 
14998   DCI.AddToWorklist(Load.getNode());
14999   Chain = Load.getValue(1);
15000   SDValue Swap = DAG.getNode(
15001       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15002   DCI.AddToWorklist(Swap.getNode());
15003 
15004   // Add a bitcast if the resulting load type doesn't match v2f64.
15005   if (VecTy != MVT::v2f64) {
15006     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15007     DCI.AddToWorklist(N.getNode());
15008     // Package {bitcast value, swap's chain} to match Load's shape.
15009     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15010                        N, Swap.getValue(1));
15011   }
15012 
15013   return Swap;
15014 }
15015 
15016 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15017 // builtins) into stores with swaps.
15018 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
15019                                                DAGCombinerInfo &DCI) const {
15020   // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15021   // store combines.
15022   if (DCI.isBeforeLegalizeOps())
15023     return SDValue();
15024 
15025   SelectionDAG &DAG = DCI.DAG;
15026   SDLoc dl(N);
15027   SDValue Chain;
15028   SDValue Base;
15029   unsigned SrcOpnd;
15030   MachineMemOperand *MMO;
15031 
15032   switch (N->getOpcode()) {
15033   default:
15034     llvm_unreachable("Unexpected opcode for little endian VSX store");
15035   case ISD::STORE: {
15036     StoreSDNode *ST = cast<StoreSDNode>(N);
15037     Chain = ST->getChain();
15038     Base = ST->getBasePtr();
15039     MMO = ST->getMemOperand();
15040     SrcOpnd = 1;
15041     // If the MMO suggests this isn't a store of a full vector, leave
15042     // things alone.  For a built-in, we have to make the change for
15043     // correctness, so if there is a size problem that will be a bug.
15044     if (MMO->getSize() < 16)
15045       return SDValue();
15046     break;
15047   }
15048   case ISD::INTRINSIC_VOID: {
15049     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15050     Chain = Intrin->getChain();
15051     // Intrin->getBasePtr() oddly does not get what we want.
15052     Base = Intrin->getOperand(3);
15053     MMO = Intrin->getMemOperand();
15054     SrcOpnd = 2;
15055     break;
15056   }
15057   }
15058 
15059   SDValue Src = N->getOperand(SrcOpnd);
15060   MVT VecTy = Src.getValueType().getSimpleVT();
15061 
15062   // All stores are done as v2f64 and possible bit cast.
15063   if (VecTy != MVT::v2f64) {
15064     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15065     DCI.AddToWorklist(Src.getNode());
15066   }
15067 
15068   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15069                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15070   DCI.AddToWorklist(Swap.getNode());
15071   Chain = Swap.getValue(1);
15072   SDValue StoreOps[] = { Chain, Swap, Base };
15073   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
15074                                           DAG.getVTList(MVT::Other),
15075                                           StoreOps, VecTy, MMO);
15076   DCI.AddToWorklist(Store.getNode());
15077   return Store;
15078 }
15079 
15080 // Handle DAG combine for STORE (FP_TO_INT F).
15081 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15082                                                DAGCombinerInfo &DCI) const {
15083   SelectionDAG &DAG = DCI.DAG;
15084   SDLoc dl(N);
15085   unsigned Opcode = N->getOperand(1).getOpcode();
15086   (void)Opcode;
15087   bool Strict = N->getOperand(1)->isStrictFPOpcode();
15088 
15089   assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15090           Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15091          && "Not a FP_TO_INT Instruction!");
15092 
15093   SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15094   EVT Op1VT = N->getOperand(1).getValueType();
15095   EVT ResVT = Val.getValueType();
15096 
15097   if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15098     return SDValue();
15099 
15100   // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15101   bool ValidTypeForStoreFltAsInt =
15102         (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15103          (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15104 
15105   // TODO: Lower conversion from f128 on all VSX targets
15106   if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15107     return SDValue();
15108 
15109   if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15110       cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15111     return SDValue();
15112 
15113   Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15114 
15115   // Set number of bytes being converted.
15116   unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15117   SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15118                    DAG.getIntPtrConstant(ByteSize, dl, false),
15119                    DAG.getValueType(Op1VT)};
15120 
15121   Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
15122           DAG.getVTList(MVT::Other), Ops,
15123           cast<StoreSDNode>(N)->getMemoryVT(),
15124           cast<StoreSDNode>(N)->getMemOperand());
15125 
15126   return Val;
15127 }
15128 
15129 static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15130   // Check that the source of the element keeps flipping
15131   // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15132   bool PrevElemFromFirstVec = Mask[0] < NumElts;
15133   for (int i = 1, e = Mask.size(); i < e; i++) {
15134     if (PrevElemFromFirstVec && Mask[i] < NumElts)
15135       return false;
15136     if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15137       return false;
15138     PrevElemFromFirstVec = !PrevElemFromFirstVec;
15139   }
15140   return true;
15141 }
15142 
15143 static bool isSplatBV(SDValue Op) {
15144   if (Op.getOpcode() != ISD::BUILD_VECTOR)
15145     return false;
15146   SDValue FirstOp;
15147 
15148   // Find first non-undef input.
15149   for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15150     FirstOp = Op.getOperand(i);
15151     if (!FirstOp.isUndef())
15152       break;
15153   }
15154 
15155   // All inputs are undef or the same as the first non-undef input.
15156   for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15157     if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15158       return false;
15159   return true;
15160 }
15161 
15162 static SDValue isScalarToVec(SDValue Op) {
15163   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15164     return Op;
15165   if (Op.getOpcode() != ISD::BITCAST)
15166     return SDValue();
15167   Op = Op.getOperand(0);
15168   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15169     return Op;
15170   return SDValue();
15171 }
15172 
15173 // Fix up the shuffle mask to account for the fact that the result of
15174 // scalar_to_vector is not in lane zero. This just takes all values in
15175 // the ranges specified by the min/max indices and adds the number of
15176 // elements required to ensure each element comes from the respective
15177 // position in the valid lane.
15178 // On little endian, that's just the corresponding element in the other
15179 // half of the vector. On big endian, it is in the same half but right
15180 // justified rather than left justified in that half.
15181 static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
15182                                             int LHSMaxIdx, int RHSMinIdx,
15183                                             int RHSMaxIdx, int HalfVec,
15184                                             unsigned ValidLaneWidth,
15185                                             const PPCSubtarget &Subtarget) {
15186   for (int i = 0, e = ShuffV.size(); i < e; i++) {
15187     int Idx = ShuffV[i];
15188     if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15189       ShuffV[i] +=
15190           Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15191   }
15192 }
15193 
15194 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15195 // the original is:
15196 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15197 // In such a case, just change the shuffle mask to extract the element
15198 // from the permuted index.
15199 static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG,
15200                                const PPCSubtarget &Subtarget) {
15201   SDLoc dl(OrigSToV);
15202   EVT VT = OrigSToV.getValueType();
15203   assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15204          "Expecting a SCALAR_TO_VECTOR here");
15205   SDValue Input = OrigSToV.getOperand(0);
15206 
15207   if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15208     ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15209     SDValue OrigVector = Input.getOperand(0);
15210 
15211     // Can't handle non-const element indices or different vector types
15212     // for the input to the extract and the output of the scalar_to_vector.
15213     if (Idx && VT == OrigVector.getValueType()) {
15214       unsigned NumElts = VT.getVectorNumElements();
15215       assert(
15216           NumElts > 1 &&
15217           "Cannot produce a permuted scalar_to_vector for one element vector");
15218       SmallVector<int, 16> NewMask(NumElts, -1);
15219       unsigned ResultInElt = NumElts / 2;
15220       ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15221       NewMask[ResultInElt] = Idx->getZExtValue();
15222       return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15223     }
15224   }
15225   return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15226                      OrigSToV.getOperand(0));
15227 }
15228 
15229 // On little endian subtargets, combine shuffles such as:
15230 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15231 // into:
15232 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15233 // because the latter can be matched to a single instruction merge.
15234 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15235 // to put the value into element zero. Adjust the shuffle mask so that the
15236 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
15237 // On big endian targets, this is still useful for SCALAR_TO_VECTOR
15238 // nodes with elements smaller than doubleword because all the ways
15239 // of getting scalar data into a vector register put the value in the
15240 // rightmost element of the left half of the vector.
15241 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15242                                                 SelectionDAG &DAG) const {
15243   SDValue LHS = SVN->getOperand(0);
15244   SDValue RHS = SVN->getOperand(1);
15245   auto Mask = SVN->getMask();
15246   int NumElts = LHS.getValueType().getVectorNumElements();
15247   SDValue Res(SVN, 0);
15248   SDLoc dl(SVN);
15249   bool IsLittleEndian = Subtarget.isLittleEndian();
15250 
15251   // On big endian targets this is only useful for subtargets with direct moves.
15252   // On little endian targets it would be useful for all subtargets with VSX.
15253   // However adding special handling for LE subtargets without direct moves
15254   // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15255   // which includes direct moves.
15256   if (!Subtarget.hasDirectMove())
15257     return Res;
15258 
15259   // If this is not a shuffle of a shuffle and the first element comes from
15260   // the second vector, canonicalize to the commuted form. This will make it
15261   // more likely to match one of the single instruction patterns.
15262   if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15263       RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15264     std::swap(LHS, RHS);
15265     Res = DAG.getCommutedVectorShuffle(*SVN);
15266     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15267   }
15268 
15269   // Adjust the shuffle mask if either input vector comes from a
15270   // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15271   // form (to prevent the need for a swap).
15272   SmallVector<int, 16> ShuffV(Mask);
15273   SDValue SToVLHS = isScalarToVec(LHS);
15274   SDValue SToVRHS = isScalarToVec(RHS);
15275   if (SToVLHS || SToVRHS) {
15276     // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15277     // same type and have differing element sizes, then do not perform
15278     // the following transformation. The current transformation for
15279     // SCALAR_TO_VECTOR assumes that both input vectors have the same
15280     // element size. This will be updated in the future to account for
15281     // differing sizes of the LHS and RHS.
15282     if (SToVLHS && SToVRHS &&
15283         (SToVLHS.getValueType().getScalarSizeInBits() !=
15284          SToVRHS.getValueType().getScalarSizeInBits()))
15285       return Res;
15286 
15287     int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15288                             : SToVRHS.getValueType().getVectorNumElements();
15289     int NumEltsOut = ShuffV.size();
15290     // The width of the "valid lane" (i.e. the lane that contains the value that
15291     // is vectorized) needs to be expressed in terms of the number of elements
15292     // of the shuffle. It is thereby the ratio of the values before and after
15293     // any bitcast.
15294     unsigned ValidLaneWidth =
15295         SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15296                       LHS.getValueType().getScalarSizeInBits()
15297                 : SToVRHS.getValueType().getScalarSizeInBits() /
15298                       RHS.getValueType().getScalarSizeInBits();
15299 
15300     // Initially assume that neither input is permuted. These will be adjusted
15301     // accordingly if either input is.
15302     int LHSMaxIdx = -1;
15303     int RHSMinIdx = -1;
15304     int RHSMaxIdx = -1;
15305     int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15306 
15307     // Get the permuted scalar to vector nodes for the source(s) that come from
15308     // ISD::SCALAR_TO_VECTOR.
15309     // On big endian systems, this only makes sense for element sizes smaller
15310     // than 64 bits since for 64-bit elements, all instructions already put
15311     // the value into element zero. Since scalar size of LHS and RHS may differ
15312     // after isScalarToVec, this should be checked using their own sizes.
15313     if (SToVLHS) {
15314       if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15315         return Res;
15316       // Set up the values for the shuffle vector fixup.
15317       LHSMaxIdx = NumEltsOut / NumEltsIn;
15318       SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15319       if (SToVLHS.getValueType() != LHS.getValueType())
15320         SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15321       LHS = SToVLHS;
15322     }
15323     if (SToVRHS) {
15324       if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15325         return Res;
15326       RHSMinIdx = NumEltsOut;
15327       RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15328       SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15329       if (SToVRHS.getValueType() != RHS.getValueType())
15330         SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15331       RHS = SToVRHS;
15332     }
15333 
15334     // Fix up the shuffle mask to reflect where the desired element actually is.
15335     // The minimum and maximum indices that correspond to element zero for both
15336     // the LHS and RHS are computed and will control which shuffle mask entries
15337     // are to be changed. For example, if the RHS is permuted, any shuffle mask
15338     // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15339     fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15340                                     HalfVec, ValidLaneWidth, Subtarget);
15341     Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15342 
15343     // We may have simplified away the shuffle. We won't be able to do anything
15344     // further with it here.
15345     if (!isa<ShuffleVectorSDNode>(Res))
15346       return Res;
15347     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15348   }
15349 
15350   SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15351   // The common case after we commuted the shuffle is that the RHS is a splat
15352   // and we have elements coming in from the splat at indices that are not
15353   // conducive to using a merge.
15354   // Example:
15355   // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15356   if (!isSplatBV(TheSplat))
15357     return Res;
15358 
15359   // We are looking for a mask such that all even elements are from
15360   // one vector and all odd elements from the other.
15361   if (!isAlternatingShuffMask(Mask, NumElts))
15362     return Res;
15363 
15364   // Adjust the mask so we are pulling in the same index from the splat
15365   // as the index from the interesting vector in consecutive elements.
15366   if (IsLittleEndian) {
15367     // Example (even elements from first vector):
15368     // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15369     if (Mask[0] < NumElts)
15370       for (int i = 1, e = Mask.size(); i < e; i += 2) {
15371         if (ShuffV[i] < 0)
15372           continue;
15373         ShuffV[i] = (ShuffV[i - 1] + NumElts);
15374       }
15375     // Example (odd elements from first vector):
15376     // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15377     else
15378       for (int i = 0, e = Mask.size(); i < e; i += 2) {
15379         if (ShuffV[i] < 0)
15380           continue;
15381         ShuffV[i] = (ShuffV[i + 1] + NumElts);
15382       }
15383   } else {
15384     // Example (even elements from first vector):
15385     // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15386     if (Mask[0] < NumElts)
15387       for (int i = 0, e = Mask.size(); i < e; i += 2) {
15388         if (ShuffV[i] < 0)
15389           continue;
15390         ShuffV[i] = ShuffV[i + 1] - NumElts;
15391       }
15392     // Example (odd elements from first vector):
15393     // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15394     else
15395       for (int i = 1, e = Mask.size(); i < e; i += 2) {
15396         if (ShuffV[i] < 0)
15397           continue;
15398         ShuffV[i] = ShuffV[i - 1] - NumElts;
15399       }
15400   }
15401 
15402   // If the RHS has undefs, we need to remove them since we may have created
15403   // a shuffle that adds those instead of the splat value.
15404   SDValue SplatVal =
15405       cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15406   TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15407 
15408   if (IsLittleEndian)
15409     RHS = TheSplat;
15410   else
15411     LHS = TheSplat;
15412   return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15413 }
15414 
15415 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15416                                                 LSBaseSDNode *LSBase,
15417                                                 DAGCombinerInfo &DCI) const {
15418   assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15419         "Not a reverse memop pattern!");
15420 
15421   auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15422     auto Mask = SVN->getMask();
15423     int i = 0;
15424     auto I = Mask.rbegin();
15425     auto E = Mask.rend();
15426 
15427     for (; I != E; ++I) {
15428       if (*I != i)
15429         return false;
15430       i++;
15431     }
15432     return true;
15433   };
15434 
15435   SelectionDAG &DAG = DCI.DAG;
15436   EVT VT = SVN->getValueType(0);
15437 
15438   if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15439     return SDValue();
15440 
15441   // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15442   // See comment in PPCVSXSwapRemoval.cpp.
15443   // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15444   if (!Subtarget.hasP9Vector())
15445     return SDValue();
15446 
15447   if(!IsElementReverse(SVN))
15448     return SDValue();
15449 
15450   if (LSBase->getOpcode() == ISD::LOAD) {
15451     // If the load return value 0 has more than one user except the
15452     // shufflevector instruction, it is not profitable to replace the
15453     // shufflevector with a reverse load.
15454     for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15455          UI != UE; ++UI)
15456       if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15457         return SDValue();
15458 
15459     SDLoc dl(LSBase);
15460     SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15461     return DAG.getMemIntrinsicNode(
15462         PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15463         LSBase->getMemoryVT(), LSBase->getMemOperand());
15464   }
15465 
15466   if (LSBase->getOpcode() == ISD::STORE) {
15467     // If there are other uses of the shuffle, the swap cannot be avoided.
15468     // Forcing the use of an X-Form (since swapped stores only have
15469     // X-Forms) without removing the swap is unprofitable.
15470     if (!SVN->hasOneUse())
15471       return SDValue();
15472 
15473     SDLoc dl(LSBase);
15474     SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15475                           LSBase->getBasePtr()};
15476     return DAG.getMemIntrinsicNode(
15477         PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15478         LSBase->getMemoryVT(), LSBase->getMemOperand());
15479   }
15480 
15481   llvm_unreachable("Expected a load or store node here");
15482 }
15483 
15484 static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
15485   unsigned IntrinsicID =
15486       cast<ConstantSDNode>(Intrin.getOperand(1))->getZExtValue();
15487   if (IntrinsicID == Intrinsic::ppc_stdcx)
15488     StoreWidth = 8;
15489   else if (IntrinsicID == Intrinsic::ppc_stwcx)
15490     StoreWidth = 4;
15491   else if (IntrinsicID == Intrinsic::ppc_sthcx)
15492     StoreWidth = 2;
15493   else if (IntrinsicID == Intrinsic::ppc_stbcx)
15494     StoreWidth = 1;
15495   else
15496     return false;
15497   return true;
15498 }
15499 
15500 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
15501                                              DAGCombinerInfo &DCI) const {
15502   SelectionDAG &DAG = DCI.DAG;
15503   SDLoc dl(N);
15504   switch (N->getOpcode()) {
15505   default: break;
15506   case ISD::ADD:
15507     return combineADD(N, DCI);
15508   case ISD::AND: {
15509     // We don't want (and (zext (shift...)), C) if C fits in the width of the
15510     // original input as that will prevent us from selecting optimal rotates.
15511     // This only matters if the input to the extend is i32 widened to i64.
15512     SDValue Op1 = N->getOperand(0);
15513     SDValue Op2 = N->getOperand(1);
15514     if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
15515          Op1.getOpcode() != ISD::ANY_EXTEND) ||
15516         !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
15517         Op1.getOperand(0).getValueType() != MVT::i32)
15518       break;
15519     SDValue NarrowOp = Op1.getOperand(0);
15520     if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
15521         NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
15522       break;
15523 
15524     uint64_t Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
15525     // Make sure that the constant is narrow enough to fit in the narrow type.
15526     if (!isUInt<32>(Imm))
15527       break;
15528     SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
15529     SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
15530     return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
15531   }
15532   case ISD::SHL:
15533     return combineSHL(N, DCI);
15534   case ISD::SRA:
15535     return combineSRA(N, DCI);
15536   case ISD::SRL:
15537     return combineSRL(N, DCI);
15538   case ISD::MUL:
15539     return combineMUL(N, DCI);
15540   case ISD::FMA:
15541   case PPCISD::FNMSUB:
15542     return combineFMALike(N, DCI);
15543   case PPCISD::SHL:
15544     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15545         return N->getOperand(0);
15546     break;
15547   case PPCISD::SRL:
15548     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15549         return N->getOperand(0);
15550     break;
15551   case PPCISD::SRA:
15552     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15553       if (C->isZero() ||  //  0 >>s V -> 0.
15554           C->isAllOnes()) // -1 >>s V -> -1.
15555         return N->getOperand(0);
15556     }
15557     break;
15558   case ISD::SIGN_EXTEND:
15559   case ISD::ZERO_EXTEND:
15560   case ISD::ANY_EXTEND:
15561     return DAGCombineExtBoolTrunc(N, DCI);
15562   case ISD::TRUNCATE:
15563     return combineTRUNCATE(N, DCI);
15564   case ISD::SETCC:
15565     if (SDValue CSCC = combineSetCC(N, DCI))
15566       return CSCC;
15567     [[fallthrough]];
15568   case ISD::SELECT_CC:
15569     return DAGCombineTruncBoolExt(N, DCI);
15570   case ISD::SINT_TO_FP:
15571   case ISD::UINT_TO_FP:
15572     return combineFPToIntToFP(N, DCI);
15573   case ISD::VECTOR_SHUFFLE:
15574     if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15575       LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15576       return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15577     }
15578     return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15579   case ISD::STORE: {
15580 
15581     EVT Op1VT = N->getOperand(1).getValueType();
15582     unsigned Opcode = N->getOperand(1).getOpcode();
15583 
15584     if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15585         Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
15586       SDValue Val = combineStoreFPToInt(N, DCI);
15587       if (Val)
15588         return Val;
15589     }
15590 
15591     if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15592       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15593       SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15594       if (Val)
15595         return Val;
15596     }
15597 
15598     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15599     if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15600         N->getOperand(1).getNode()->hasOneUse() &&
15601         (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15602          (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15603 
15604       // STBRX can only handle simple types and it makes no sense to store less
15605       // two bytes in byte-reversed order.
15606       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15607       if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15608         break;
15609 
15610       SDValue BSwapOp = N->getOperand(1).getOperand(0);
15611       // Do an any-extend to 32-bits if this is a half-word input.
15612       if (BSwapOp.getValueType() == MVT::i16)
15613         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15614 
15615       // If the type of BSWAP operand is wider than stored memory width
15616       // it need to be shifted to the right side before STBRX.
15617       if (Op1VT.bitsGT(mVT)) {
15618         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15619         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15620                               DAG.getConstant(Shift, dl, MVT::i32));
15621         // Need to truncate if this is a bswap of i64 stored as i32/i16.
15622         if (Op1VT == MVT::i64)
15623           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15624       }
15625 
15626       SDValue Ops[] = {
15627         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15628       };
15629       return
15630         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
15631                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15632                                 cast<StoreSDNode>(N)->getMemOperand());
15633     }
15634 
15635     // STORE Constant:i32<0>  ->  STORE<trunc to i32> Constant:i64<0>
15636     // So it can increase the chance of CSE constant construction.
15637     if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15638         isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15639       // Need to sign-extended to 64-bits to handle negative values.
15640       EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15641       uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15642                                     MemVT.getSizeInBits());
15643       SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15644 
15645       // DAG.getTruncStore() can't be used here because it doesn't accept
15646       // the general (base + offset) addressing mode.
15647       // So we use UpdateNodeOperands and setTruncatingStore instead.
15648       DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15649                              N->getOperand(3));
15650       cast<StoreSDNode>(N)->setTruncatingStore(true);
15651       return SDValue(N, 0);
15652     }
15653 
15654     // For little endian, VSX stores require generating xxswapd/lxvd2x.
15655     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15656     if (Op1VT.isSimple()) {
15657       MVT StoreVT = Op1VT.getSimpleVT();
15658       if (Subtarget.needsSwapsForVSXMemOps() &&
15659           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15660            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15661         return expandVSXStoreForLE(N, DCI);
15662     }
15663     break;
15664   }
15665   case ISD::LOAD: {
15666     LoadSDNode *LD = cast<LoadSDNode>(N);
15667     EVT VT = LD->getValueType(0);
15668 
15669     // For little endian, VSX loads require generating lxvd2x/xxswapd.
15670     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15671     if (VT.isSimple()) {
15672       MVT LoadVT = VT.getSimpleVT();
15673       if (Subtarget.needsSwapsForVSXMemOps() &&
15674           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15675            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15676         return expandVSXLoadForLE(N, DCI);
15677     }
15678 
15679     // We sometimes end up with a 64-bit integer load, from which we extract
15680     // two single-precision floating-point numbers. This happens with
15681     // std::complex<float>, and other similar structures, because of the way we
15682     // canonicalize structure copies. However, if we lack direct moves,
15683     // then the final bitcasts from the extracted integer values to the
15684     // floating-point numbers turn into store/load pairs. Even with direct moves,
15685     // just loading the two floating-point numbers is likely better.
15686     auto ReplaceTwoFloatLoad = [&]() {
15687       if (VT != MVT::i64)
15688         return false;
15689 
15690       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15691           LD->isVolatile())
15692         return false;
15693 
15694       //  We're looking for a sequence like this:
15695       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15696       //      t16: i64 = srl t13, Constant:i32<32>
15697       //    t17: i32 = truncate t16
15698       //  t18: f32 = bitcast t17
15699       //    t19: i32 = truncate t13
15700       //  t20: f32 = bitcast t19
15701 
15702       if (!LD->hasNUsesOfValue(2, 0))
15703         return false;
15704 
15705       auto UI = LD->use_begin();
15706       while (UI.getUse().getResNo() != 0) ++UI;
15707       SDNode *Trunc = *UI++;
15708       while (UI.getUse().getResNo() != 0) ++UI;
15709       SDNode *RightShift = *UI;
15710       if (Trunc->getOpcode() != ISD::TRUNCATE)
15711         std::swap(Trunc, RightShift);
15712 
15713       if (Trunc->getOpcode() != ISD::TRUNCATE ||
15714           Trunc->getValueType(0) != MVT::i32 ||
15715           !Trunc->hasOneUse())
15716         return false;
15717       if (RightShift->getOpcode() != ISD::SRL ||
15718           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15719           RightShift->getConstantOperandVal(1) != 32 ||
15720           !RightShift->hasOneUse())
15721         return false;
15722 
15723       SDNode *Trunc2 = *RightShift->use_begin();
15724       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
15725           Trunc2->getValueType(0) != MVT::i32 ||
15726           !Trunc2->hasOneUse())
15727         return false;
15728 
15729       SDNode *Bitcast = *Trunc->use_begin();
15730       SDNode *Bitcast2 = *Trunc2->use_begin();
15731 
15732       if (Bitcast->getOpcode() != ISD::BITCAST ||
15733           Bitcast->getValueType(0) != MVT::f32)
15734         return false;
15735       if (Bitcast2->getOpcode() != ISD::BITCAST ||
15736           Bitcast2->getValueType(0) != MVT::f32)
15737         return false;
15738 
15739       if (Subtarget.isLittleEndian())
15740         std::swap(Bitcast, Bitcast2);
15741 
15742       // Bitcast has the second float (in memory-layout order) and Bitcast2
15743       // has the first one.
15744 
15745       SDValue BasePtr = LD->getBasePtr();
15746       if (LD->isIndexed()) {
15747         assert(LD->getAddressingMode() == ISD::PRE_INC &&
15748                "Non-pre-inc AM on PPC?");
15749         BasePtr =
15750           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15751                       LD->getOffset());
15752       }
15753 
15754       auto MMOFlags =
15755           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
15756       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
15757                                       LD->getPointerInfo(), LD->getAlign(),
15758                                       MMOFlags, LD->getAAInfo());
15759       SDValue AddPtr =
15760         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
15761                     BasePtr, DAG.getIntPtrConstant(4, dl));
15762       SDValue FloatLoad2 = DAG.getLoad(
15763           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
15764           LD->getPointerInfo().getWithOffset(4),
15765           commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
15766 
15767       if (LD->isIndexed()) {
15768         // Note that DAGCombine should re-form any pre-increment load(s) from
15769         // what is produced here if that makes sense.
15770         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
15771       }
15772 
15773       DCI.CombineTo(Bitcast2, FloatLoad);
15774       DCI.CombineTo(Bitcast, FloatLoad2);
15775 
15776       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
15777                                     SDValue(FloatLoad2.getNode(), 1));
15778       return true;
15779     };
15780 
15781     if (ReplaceTwoFloatLoad())
15782       return SDValue(N, 0);
15783 
15784     EVT MemVT = LD->getMemoryVT();
15785     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
15786     Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
15787     if (LD->isUnindexed() && VT.isVector() &&
15788         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
15789           // P8 and later hardware should just use LOAD.
15790           !Subtarget.hasP8Vector() &&
15791           (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
15792            VT == MVT::v4f32))) &&
15793         LD->getAlign() < ABIAlignment) {
15794       // This is a type-legal unaligned Altivec load.
15795       SDValue Chain = LD->getChain();
15796       SDValue Ptr = LD->getBasePtr();
15797       bool isLittleEndian = Subtarget.isLittleEndian();
15798 
15799       // This implements the loading of unaligned vectors as described in
15800       // the venerable Apple Velocity Engine overview. Specifically:
15801       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
15802       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
15803       //
15804       // The general idea is to expand a sequence of one or more unaligned
15805       // loads into an alignment-based permutation-control instruction (lvsl
15806       // or lvsr), a series of regular vector loads (which always truncate
15807       // their input address to an aligned address), and a series of
15808       // permutations.  The results of these permutations are the requested
15809       // loaded values.  The trick is that the last "extra" load is not taken
15810       // from the address you might suspect (sizeof(vector) bytes after the
15811       // last requested load), but rather sizeof(vector) - 1 bytes after the
15812       // last requested vector. The point of this is to avoid a page fault if
15813       // the base address happened to be aligned. This works because if the
15814       // base address is aligned, then adding less than a full vector length
15815       // will cause the last vector in the sequence to be (re)loaded.
15816       // Otherwise, the next vector will be fetched as you might suspect was
15817       // necessary.
15818 
15819       // We might be able to reuse the permutation generation from
15820       // a different base address offset from this one by an aligned amount.
15821       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
15822       // optimization later.
15823       Intrinsic::ID Intr, IntrLD, IntrPerm;
15824       MVT PermCntlTy, PermTy, LDTy;
15825       Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15826                             : Intrinsic::ppc_altivec_lvsl;
15827       IntrLD = Intrinsic::ppc_altivec_lvx;
15828       IntrPerm = Intrinsic::ppc_altivec_vperm;
15829       PermCntlTy = MVT::v16i8;
15830       PermTy = MVT::v4i32;
15831       LDTy = MVT::v4i32;
15832 
15833       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
15834 
15835       // Create the new MMO for the new base load. It is like the original MMO,
15836       // but represents an area in memory almost twice the vector size centered
15837       // on the original address. If the address is unaligned, we might start
15838       // reading up to (sizeof(vector)-1) bytes below the address of the
15839       // original unaligned load.
15840       MachineFunction &MF = DAG.getMachineFunction();
15841       MachineMemOperand *BaseMMO =
15842         MF.getMachineMemOperand(LD->getMemOperand(),
15843                                 -(int64_t)MemVT.getStoreSize()+1,
15844                                 2*MemVT.getStoreSize()-1);
15845 
15846       // Create the new base load.
15847       SDValue LDXIntID =
15848           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
15849       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
15850       SDValue BaseLoad =
15851         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
15852                                 DAG.getVTList(PermTy, MVT::Other),
15853                                 BaseLoadOps, LDTy, BaseMMO);
15854 
15855       // Note that the value of IncOffset (which is provided to the next
15856       // load's pointer info offset value, and thus used to calculate the
15857       // alignment), and the value of IncValue (which is actually used to
15858       // increment the pointer value) are different! This is because we
15859       // require the next load to appear to be aligned, even though it
15860       // is actually offset from the base pointer by a lesser amount.
15861       int IncOffset = VT.getSizeInBits() / 8;
15862       int IncValue = IncOffset;
15863 
15864       // Walk (both up and down) the chain looking for another load at the real
15865       // (aligned) offset (the alignment of the other load does not matter in
15866       // this case). If found, then do not use the offset reduction trick, as
15867       // that will prevent the loads from being later combined (as they would
15868       // otherwise be duplicates).
15869       if (!findConsecutiveLoad(LD, DAG))
15870         --IncValue;
15871 
15872       SDValue Increment =
15873           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
15874       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
15875 
15876       MachineMemOperand *ExtraMMO =
15877         MF.getMachineMemOperand(LD->getMemOperand(),
15878                                 1, 2*MemVT.getStoreSize()-1);
15879       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
15880       SDValue ExtraLoad =
15881         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
15882                                 DAG.getVTList(PermTy, MVT::Other),
15883                                 ExtraLoadOps, LDTy, ExtraMMO);
15884 
15885       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
15886         BaseLoad.getValue(1), ExtraLoad.getValue(1));
15887 
15888       // Because vperm has a big-endian bias, we must reverse the order
15889       // of the input vectors and complement the permute control vector
15890       // when generating little endian code.  We have already handled the
15891       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
15892       // and ExtraLoad here.
15893       SDValue Perm;
15894       if (isLittleEndian)
15895         Perm = BuildIntrinsicOp(IntrPerm,
15896                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15897       else
15898         Perm = BuildIntrinsicOp(IntrPerm,
15899                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15900 
15901       if (VT != PermTy)
15902         Perm = Subtarget.hasAltivec()
15903                    ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
15904                    : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
15905                                  DAG.getTargetConstant(1, dl, MVT::i64));
15906                                // second argument is 1 because this rounding
15907                                // is always exact.
15908 
15909       // The output of the permutation is our loaded result, the TokenFactor is
15910       // our new chain.
15911       DCI.CombineTo(N, Perm, TF);
15912       return SDValue(N, 0);
15913     }
15914     }
15915     break;
15916     case ISD::INTRINSIC_WO_CHAIN: {
15917       bool isLittleEndian = Subtarget.isLittleEndian();
15918       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15919       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15920                                            : Intrinsic::ppc_altivec_lvsl);
15921       if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
15922         SDValue Add = N->getOperand(1);
15923 
15924         int Bits = 4 /* 16 byte alignment */;
15925 
15926         if (DAG.MaskedValueIsZero(Add->getOperand(1),
15927                                   APInt::getAllOnes(Bits /* alignment */)
15928                                       .zext(Add.getScalarValueSizeInBits()))) {
15929           SDNode *BasePtr = Add->getOperand(0).getNode();
15930           for (SDNode *U : BasePtr->uses()) {
15931             if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15932                 cast<ConstantSDNode>(U->getOperand(0))->getZExtValue() == IID) {
15933               // We've found another LVSL/LVSR, and this address is an aligned
15934               // multiple of that one. The results will be the same, so use the
15935               // one we've just found instead.
15936 
15937               return SDValue(U, 0);
15938             }
15939           }
15940         }
15941 
15942         if (isa<ConstantSDNode>(Add->getOperand(1))) {
15943           SDNode *BasePtr = Add->getOperand(0).getNode();
15944           for (SDNode *U : BasePtr->uses()) {
15945             if (U->getOpcode() == ISD::ADD &&
15946                 isa<ConstantSDNode>(U->getOperand(1)) &&
15947                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15948                  cast<ConstantSDNode>(U->getOperand(1))->getZExtValue()) %
15949                         (1ULL << Bits) ==
15950                     0) {
15951               SDNode *OtherAdd = U;
15952               for (SDNode *V : OtherAdd->uses()) {
15953                 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15954                     cast<ConstantSDNode>(V->getOperand(0))->getZExtValue() ==
15955                         IID) {
15956                   return SDValue(V, 0);
15957                 }
15958               }
15959             }
15960           }
15961         }
15962       }
15963 
15964       // Combine vmaxsw/h/b(a, a's negation) to abs(a)
15965       // Expose the vabsduw/h/b opportunity for down stream
15966       if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
15967           (IID == Intrinsic::ppc_altivec_vmaxsw ||
15968            IID == Intrinsic::ppc_altivec_vmaxsh ||
15969            IID == Intrinsic::ppc_altivec_vmaxsb)) {
15970         SDValue V1 = N->getOperand(1);
15971         SDValue V2 = N->getOperand(2);
15972         if ((V1.getSimpleValueType() == MVT::v4i32 ||
15973              V1.getSimpleValueType() == MVT::v8i16 ||
15974              V1.getSimpleValueType() == MVT::v16i8) &&
15975             V1.getSimpleValueType() == V2.getSimpleValueType()) {
15976           // (0-a, a)
15977           if (V1.getOpcode() == ISD::SUB &&
15978               ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
15979               V1.getOperand(1) == V2) {
15980             return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
15981           }
15982           // (a, 0-a)
15983           if (V2.getOpcode() == ISD::SUB &&
15984               ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
15985               V2.getOperand(1) == V1) {
15986             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15987           }
15988           // (x-y, y-x)
15989           if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
15990               V1.getOperand(0) == V2.getOperand(1) &&
15991               V1.getOperand(1) == V2.getOperand(0)) {
15992             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15993           }
15994         }
15995       }
15996     }
15997 
15998     break;
15999   case ISD::INTRINSIC_W_CHAIN:
16000     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
16001     default:
16002       break;
16003     case Intrinsic::ppc_altivec_vsum4sbs:
16004     case Intrinsic::ppc_altivec_vsum4shs:
16005     case Intrinsic::ppc_altivec_vsum4ubs: {
16006       // These sum-across intrinsics only have a chain due to the side effect
16007       // that they may set the SAT bit. If we know the SAT bit will not be set
16008       // for some inputs, we can replace any uses of their chain with the input
16009       // chain.
16010       if (BuildVectorSDNode *BVN =
16011               dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16012         APInt APSplatBits, APSplatUndef;
16013         unsigned SplatBitSize;
16014         bool HasAnyUndefs;
16015         bool BVNIsConstantSplat = BVN->isConstantSplat(
16016             APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16017             !Subtarget.isLittleEndian());
16018         // If the constant splat vector is 0, the SAT bit will not be set.
16019         if (BVNIsConstantSplat && APSplatBits == 0)
16020           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16021       }
16022       return SDValue();
16023     }
16024     case Intrinsic::ppc_vsx_lxvw4x:
16025     case Intrinsic::ppc_vsx_lxvd2x:
16026       // For little endian, VSX loads require generating lxvd2x/xxswapd.
16027       // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16028       if (Subtarget.needsSwapsForVSXMemOps())
16029         return expandVSXLoadForLE(N, DCI);
16030       break;
16031     }
16032     break;
16033   case ISD::INTRINSIC_VOID:
16034     // For little endian, VSX stores require generating xxswapd/stxvd2x.
16035     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16036     if (Subtarget.needsSwapsForVSXMemOps()) {
16037       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
16038       default:
16039         break;
16040       case Intrinsic::ppc_vsx_stxvw4x:
16041       case Intrinsic::ppc_vsx_stxvd2x:
16042         return expandVSXStoreForLE(N, DCI);
16043       }
16044     }
16045     break;
16046   case ISD::BSWAP: {
16047     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16048     // For subtargets without LDBRX, we can still do better than the default
16049     // expansion even for 64-bit BSWAP (LOAD).
16050     bool Is64BitBswapOn64BitTgt =
16051         Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16052     bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16053                                N->getOperand(0).hasOneUse();
16054     if (IsSingleUseNormalLd &&
16055         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16056          (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16057       SDValue Load = N->getOperand(0);
16058       LoadSDNode *LD = cast<LoadSDNode>(Load);
16059       // Create the byte-swapping load.
16060       SDValue Ops[] = {
16061         LD->getChain(),    // Chain
16062         LD->getBasePtr(),  // Ptr
16063         DAG.getValueType(N->getValueType(0)) // VT
16064       };
16065       SDValue BSLoad =
16066         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
16067                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16068                                               MVT::i64 : MVT::i32, MVT::Other),
16069                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
16070 
16071       // If this is an i16 load, insert the truncate.
16072       SDValue ResVal = BSLoad;
16073       if (N->getValueType(0) == MVT::i16)
16074         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16075 
16076       // First, combine the bswap away.  This makes the value produced by the
16077       // load dead.
16078       DCI.CombineTo(N, ResVal);
16079 
16080       // Next, combine the load away, we give it a bogus result value but a real
16081       // chain result.  The result value is dead because the bswap is dead.
16082       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16083 
16084       // Return N so it doesn't get rechecked!
16085       return SDValue(N, 0);
16086     }
16087     // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16088     // before legalization so that the BUILD_PAIR is handled correctly.
16089     if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16090         !IsSingleUseNormalLd)
16091       return SDValue();
16092     LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16093 
16094     // Can't split volatile or atomic loads.
16095     if (!LD->isSimple())
16096       return SDValue();
16097     SDValue BasePtr = LD->getBasePtr();
16098     SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16099                              LD->getPointerInfo(), LD->getAlign());
16100     Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16101     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16102                           DAG.getIntPtrConstant(4, dl));
16103     MachineMemOperand *NewMMO = DAG.getMachineFunction().getMachineMemOperand(
16104         LD->getMemOperand(), 4, 4);
16105     SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16106     Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16107     SDValue Res;
16108     if (Subtarget.isLittleEndian())
16109       Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16110     else
16111       Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16112     SDValue TF =
16113         DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16114                     Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16115     DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16116     return Res;
16117   }
16118   case PPCISD::VCMP:
16119     // If a VCMP_rec node already exists with exactly the same operands as this
16120     // node, use its result instead of this node (VCMP_rec computes both a CR6
16121     // and a normal output).
16122     //
16123     if (!N->getOperand(0).hasOneUse() &&
16124         !N->getOperand(1).hasOneUse() &&
16125         !N->getOperand(2).hasOneUse()) {
16126 
16127       // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16128       SDNode *VCMPrecNode = nullptr;
16129 
16130       SDNode *LHSN = N->getOperand(0).getNode();
16131       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
16132            UI != E; ++UI)
16133         if (UI->getOpcode() == PPCISD::VCMP_rec &&
16134             UI->getOperand(1) == N->getOperand(1) &&
16135             UI->getOperand(2) == N->getOperand(2) &&
16136             UI->getOperand(0) == N->getOperand(0)) {
16137           VCMPrecNode = *UI;
16138           break;
16139         }
16140 
16141       // If there is no VCMP_rec node, or if the flag value has a single use,
16142       // don't transform this.
16143       if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16144         break;
16145 
16146       // Look at the (necessarily single) use of the flag value.  If it has a
16147       // chain, this transformation is more complex.  Note that multiple things
16148       // could use the value result, which we should ignore.
16149       SDNode *FlagUser = nullptr;
16150       for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16151            FlagUser == nullptr; ++UI) {
16152         assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16153         SDNode *User = *UI;
16154         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16155           if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16156             FlagUser = User;
16157             break;
16158           }
16159         }
16160       }
16161 
16162       // If the user is a MFOCRF instruction, we know this is safe.
16163       // Otherwise we give up for right now.
16164       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16165         return SDValue(VCMPrecNode, 0);
16166     }
16167     break;
16168   case ISD::BR_CC: {
16169     // If this is a branch on an altivec predicate comparison, lower this so
16170     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
16171     // lowering is done pre-legalize, because the legalizer lowers the predicate
16172     // compare down to code that is difficult to reassemble.
16173     // This code also handles branches that depend on the result of a store
16174     // conditional.
16175     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16176     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16177 
16178     int CompareOpc;
16179     bool isDot;
16180 
16181     if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16182       break;
16183 
16184     // Since we are doing this pre-legalize, the RHS can be a constant of
16185     // arbitrary bitwidth which may cause issues when trying to get the value
16186     // from the underlying APInt.
16187     auto RHSAPInt = cast<ConstantSDNode>(RHS)->getAPIntValue();
16188     if (!RHSAPInt.isIntN(64))
16189       break;
16190 
16191     unsigned Val = RHSAPInt.getZExtValue();
16192     auto isImpossibleCompare = [&]() {
16193       // If this is a comparison against something other than 0/1, then we know
16194       // that the condition is never/always true.
16195       if (Val != 0 && Val != 1) {
16196         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
16197           return N->getOperand(0);
16198         // Always !=, turn it into an unconditional branch.
16199         return DAG.getNode(ISD::BR, dl, MVT::Other,
16200                            N->getOperand(0), N->getOperand(4));
16201       }
16202       return SDValue();
16203     };
16204     // Combine branches fed by store conditional instructions (st[bhwd]cx).
16205     unsigned StoreWidth = 0;
16206     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16207         isStoreConditional(LHS, StoreWidth)) {
16208       if (SDValue Impossible = isImpossibleCompare())
16209         return Impossible;
16210       PPC::Predicate CompOpc;
16211       // eq 0 => ne
16212       // ne 0 => eq
16213       // eq 1 => eq
16214       // ne 1 => ne
16215       if (Val == 0)
16216         CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16217       else
16218         CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16219 
16220       SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16221                        DAG.getConstant(StoreWidth, dl, MVT::i32)};
16222       auto *MemNode = cast<MemSDNode>(LHS);
16223       SDValue ConstSt = DAG.getMemIntrinsicNode(
16224           PPCISD::STORE_COND, dl,
16225           DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16226           MemNode->getMemoryVT(), MemNode->getMemOperand());
16227 
16228       SDValue InChain;
16229       // Unchain the branch from the original store conditional.
16230       if (N->getOperand(0) == LHS.getValue(1))
16231         InChain = LHS.getOperand(0);
16232       else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16233         SmallVector<SDValue, 4> InChains;
16234         SDValue InTF = N->getOperand(0);
16235         for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16236           if (InTF.getOperand(i) != LHS.getValue(1))
16237             InChains.push_back(InTF.getOperand(i));
16238         InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16239       }
16240 
16241       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16242                          DAG.getConstant(CompOpc, dl, MVT::i32),
16243                          DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16244                          ConstSt.getValue(2));
16245     }
16246 
16247     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16248         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16249       assert(isDot && "Can't compare against a vector result!");
16250 
16251       if (SDValue Impossible = isImpossibleCompare())
16252         return Impossible;
16253 
16254       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16255       // Create the PPCISD altivec 'dot' comparison node.
16256       SDValue Ops[] = {
16257         LHS.getOperand(2),  // LHS of compare
16258         LHS.getOperand(3),  // RHS of compare
16259         DAG.getConstant(CompareOpc, dl, MVT::i32)
16260       };
16261       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16262       SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16263 
16264       // Unpack the result based on how the target uses it.
16265       PPC::Predicate CompOpc;
16266       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
16267       default:  // Can't happen, don't crash on invalid number though.
16268       case 0:   // Branch on the value of the EQ bit of CR6.
16269         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16270         break;
16271       case 1:   // Branch on the inverted value of the EQ bit of CR6.
16272         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16273         break;
16274       case 2:   // Branch on the value of the LT bit of CR6.
16275         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16276         break;
16277       case 3:   // Branch on the inverted value of the LT bit of CR6.
16278         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16279         break;
16280       }
16281 
16282       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16283                          DAG.getConstant(CompOpc, dl, MVT::i32),
16284                          DAG.getRegister(PPC::CR6, MVT::i32),
16285                          N->getOperand(4), CompNode.getValue(1));
16286     }
16287     break;
16288   }
16289   case ISD::BUILD_VECTOR:
16290     return DAGCombineBuildVector(N, DCI);
16291   }
16292 
16293   return SDValue();
16294 }
16295 
16296 SDValue
16297 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
16298                                  SelectionDAG &DAG,
16299                                  SmallVectorImpl<SDNode *> &Created) const {
16300   // fold (sdiv X, pow2)
16301   EVT VT = N->getValueType(0);
16302   if (VT == MVT::i64 && !Subtarget.isPPC64())
16303     return SDValue();
16304   if ((VT != MVT::i32 && VT != MVT::i64) ||
16305       !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16306     return SDValue();
16307 
16308   SDLoc DL(N);
16309   SDValue N0 = N->getOperand(0);
16310 
16311   bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16312   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16313   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16314 
16315   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16316   Created.push_back(Op.getNode());
16317 
16318   if (IsNegPow2) {
16319     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16320     Created.push_back(Op.getNode());
16321   }
16322 
16323   return Op;
16324 }
16325 
16326 //===----------------------------------------------------------------------===//
16327 // Inline Assembly Support
16328 //===----------------------------------------------------------------------===//
16329 
16330 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
16331                                                       KnownBits &Known,
16332                                                       const APInt &DemandedElts,
16333                                                       const SelectionDAG &DAG,
16334                                                       unsigned Depth) const {
16335   Known.resetAll();
16336   switch (Op.getOpcode()) {
16337   default: break;
16338   case PPCISD::LBRX: {
16339     // lhbrx is known to have the top bits cleared out.
16340     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16341       Known.Zero = 0xFFFF0000;
16342     break;
16343   }
16344   case ISD::INTRINSIC_WO_CHAIN: {
16345     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
16346     default: break;
16347     case Intrinsic::ppc_altivec_vcmpbfp_p:
16348     case Intrinsic::ppc_altivec_vcmpeqfp_p:
16349     case Intrinsic::ppc_altivec_vcmpequb_p:
16350     case Intrinsic::ppc_altivec_vcmpequh_p:
16351     case Intrinsic::ppc_altivec_vcmpequw_p:
16352     case Intrinsic::ppc_altivec_vcmpequd_p:
16353     case Intrinsic::ppc_altivec_vcmpequq_p:
16354     case Intrinsic::ppc_altivec_vcmpgefp_p:
16355     case Intrinsic::ppc_altivec_vcmpgtfp_p:
16356     case Intrinsic::ppc_altivec_vcmpgtsb_p:
16357     case Intrinsic::ppc_altivec_vcmpgtsh_p:
16358     case Intrinsic::ppc_altivec_vcmpgtsw_p:
16359     case Intrinsic::ppc_altivec_vcmpgtsd_p:
16360     case Intrinsic::ppc_altivec_vcmpgtsq_p:
16361     case Intrinsic::ppc_altivec_vcmpgtub_p:
16362     case Intrinsic::ppc_altivec_vcmpgtuh_p:
16363     case Intrinsic::ppc_altivec_vcmpgtuw_p:
16364     case Intrinsic::ppc_altivec_vcmpgtud_p:
16365     case Intrinsic::ppc_altivec_vcmpgtuq_p:
16366       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
16367       break;
16368     }
16369     break;
16370   }
16371   case ISD::INTRINSIC_W_CHAIN: {
16372     switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
16373     default:
16374       break;
16375     case Intrinsic::ppc_load2r:
16376       // Top bits are cleared for load2r (which is the same as lhbrx).
16377       Known.Zero = 0xFFFF0000;
16378       break;
16379     }
16380     break;
16381   }
16382   }
16383 }
16384 
16385 Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
16386   switch (Subtarget.getCPUDirective()) {
16387   default: break;
16388   case PPC::DIR_970:
16389   case PPC::DIR_PWR4:
16390   case PPC::DIR_PWR5:
16391   case PPC::DIR_PWR5X:
16392   case PPC::DIR_PWR6:
16393   case PPC::DIR_PWR6X:
16394   case PPC::DIR_PWR7:
16395   case PPC::DIR_PWR8:
16396   case PPC::DIR_PWR9:
16397   case PPC::DIR_PWR10:
16398   case PPC::DIR_PWR_FUTURE: {
16399     if (!ML)
16400       break;
16401 
16402     if (!DisableInnermostLoopAlign32) {
16403       // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16404       // so that we can decrease cache misses and branch-prediction misses.
16405       // Actual alignment of the loop will depend on the hotness check and other
16406       // logic in alignBlocks.
16407       if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
16408         return Align(32);
16409     }
16410 
16411     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16412 
16413     // For small loops (between 5 and 8 instructions), align to a 32-byte
16414     // boundary so that the entire loop fits in one instruction-cache line.
16415     uint64_t LoopSize = 0;
16416     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16417       for (const MachineInstr &J : **I) {
16418         LoopSize += TII->getInstSizeInBytes(J);
16419         if (LoopSize > 32)
16420           break;
16421       }
16422 
16423     if (LoopSize > 16 && LoopSize <= 32)
16424       return Align(32);
16425 
16426     break;
16427   }
16428   }
16429 
16430   return TargetLowering::getPrefLoopAlignment(ML);
16431 }
16432 
16433 /// getConstraintType - Given a constraint, return the type of
16434 /// constraint it is for this target.
16435 PPCTargetLowering::ConstraintType
16436 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
16437   if (Constraint.size() == 1) {
16438     switch (Constraint[0]) {
16439     default: break;
16440     case 'b':
16441     case 'r':
16442     case 'f':
16443     case 'd':
16444     case 'v':
16445     case 'y':
16446       return C_RegisterClass;
16447     case 'Z':
16448       // FIXME: While Z does indicate a memory constraint, it specifically
16449       // indicates an r+r address (used in conjunction with the 'y' modifier
16450       // in the replacement string). Currently, we're forcing the base
16451       // register to be r0 in the asm printer (which is interpreted as zero)
16452       // and forming the complete address in the second register. This is
16453       // suboptimal.
16454       return C_Memory;
16455     }
16456   } else if (Constraint == "wc") { // individual CR bits.
16457     return C_RegisterClass;
16458   } else if (Constraint == "wa" || Constraint == "wd" ||
16459              Constraint == "wf" || Constraint == "ws" ||
16460              Constraint == "wi" || Constraint == "ww") {
16461     return C_RegisterClass; // VSX registers.
16462   }
16463   return TargetLowering::getConstraintType(Constraint);
16464 }
16465 
16466 /// Examine constraint type and operand type and determine a weight value.
16467 /// This object must already have been set up with the operand type
16468 /// and the current alternative constraint selected.
16469 TargetLowering::ConstraintWeight
16470 PPCTargetLowering::getSingleConstraintMatchWeight(
16471     AsmOperandInfo &info, const char *constraint) const {
16472   ConstraintWeight weight = CW_Invalid;
16473   Value *CallOperandVal = info.CallOperandVal;
16474     // If we don't have a value, we can't do a match,
16475     // but allow it at the lowest weight.
16476   if (!CallOperandVal)
16477     return CW_Default;
16478   Type *type = CallOperandVal->getType();
16479 
16480   // Look at the constraint type.
16481   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16482     return CW_Register; // an individual CR bit.
16483   else if ((StringRef(constraint) == "wa" ||
16484             StringRef(constraint) == "wd" ||
16485             StringRef(constraint) == "wf") &&
16486            type->isVectorTy())
16487     return CW_Register;
16488   else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16489     return CW_Register; // just hold 64-bit integers data.
16490   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16491     return CW_Register;
16492   else if (StringRef(constraint) == "ww" && type->isFloatTy())
16493     return CW_Register;
16494 
16495   switch (*constraint) {
16496   default:
16497     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
16498     break;
16499   case 'b':
16500     if (type->isIntegerTy())
16501       weight = CW_Register;
16502     break;
16503   case 'f':
16504     if (type->isFloatTy())
16505       weight = CW_Register;
16506     break;
16507   case 'd':
16508     if (type->isDoubleTy())
16509       weight = CW_Register;
16510     break;
16511   case 'v':
16512     if (type->isVectorTy())
16513       weight = CW_Register;
16514     break;
16515   case 'y':
16516     weight = CW_Register;
16517     break;
16518   case 'Z':
16519     weight = CW_Memory;
16520     break;
16521   }
16522   return weight;
16523 }
16524 
16525 std::pair<unsigned, const TargetRegisterClass *>
16526 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
16527                                                 StringRef Constraint,
16528                                                 MVT VT) const {
16529   if (Constraint.size() == 1) {
16530     // GCC RS6000 Constraint Letters
16531     switch (Constraint[0]) {
16532     case 'b':   // R1-R31
16533       if (VT == MVT::i64 && Subtarget.isPPC64())
16534         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16535       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16536     case 'r':   // R0-R31
16537       if (VT == MVT::i64 && Subtarget.isPPC64())
16538         return std::make_pair(0U, &PPC::G8RCRegClass);
16539       return std::make_pair(0U, &PPC::GPRCRegClass);
16540     // 'd' and 'f' constraints are both defined to be "the floating point
16541     // registers", where one is for 32-bit and the other for 64-bit. We don't
16542     // really care overly much here so just give them all the same reg classes.
16543     case 'd':
16544     case 'f':
16545       if (Subtarget.hasSPE()) {
16546         if (VT == MVT::f32 || VT == MVT::i32)
16547           return std::make_pair(0U, &PPC::GPRCRegClass);
16548         if (VT == MVT::f64 || VT == MVT::i64)
16549           return std::make_pair(0U, &PPC::SPERCRegClass);
16550       } else {
16551         if (VT == MVT::f32 || VT == MVT::i32)
16552           return std::make_pair(0U, &PPC::F4RCRegClass);
16553         if (VT == MVT::f64 || VT == MVT::i64)
16554           return std::make_pair(0U, &PPC::F8RCRegClass);
16555       }
16556       break;
16557     case 'v':
16558       if (Subtarget.hasAltivec() && VT.isVector())
16559         return std::make_pair(0U, &PPC::VRRCRegClass);
16560       else if (Subtarget.hasVSX())
16561         // Scalars in Altivec registers only make sense with VSX.
16562         return std::make_pair(0U, &PPC::VFRCRegClass);
16563       break;
16564     case 'y':   // crrc
16565       return std::make_pair(0U, &PPC::CRRCRegClass);
16566     }
16567   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16568     // An individual CR bit.
16569     return std::make_pair(0U, &PPC::CRBITRCRegClass);
16570   } else if ((Constraint == "wa" || Constraint == "wd" ||
16571              Constraint == "wf" || Constraint == "wi") &&
16572              Subtarget.hasVSX()) {
16573     // A VSX register for either a scalar (FP) or vector. There is no
16574     // support for single precision scalars on subtargets prior to Power8.
16575     if (VT.isVector())
16576       return std::make_pair(0U, &PPC::VSRCRegClass);
16577     if (VT == MVT::f32 && Subtarget.hasP8Vector())
16578       return std::make_pair(0U, &PPC::VSSRCRegClass);
16579     return std::make_pair(0U, &PPC::VSFRCRegClass);
16580   } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16581     if (VT == MVT::f32 && Subtarget.hasP8Vector())
16582       return std::make_pair(0U, &PPC::VSSRCRegClass);
16583     else
16584       return std::make_pair(0U, &PPC::VSFRCRegClass);
16585   } else if (Constraint == "lr") {
16586     if (VT == MVT::i64)
16587       return std::make_pair(0U, &PPC::LR8RCRegClass);
16588     else
16589       return std::make_pair(0U, &PPC::LRRCRegClass);
16590   }
16591 
16592   // Handle special cases of physical registers that are not properly handled
16593   // by the base class.
16594   if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16595     // If we name a VSX register, we can't defer to the base class because it
16596     // will not recognize the correct register (their names will be VSL{0-31}
16597     // and V{0-31} so they won't match). So we match them here.
16598     if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16599       int VSNum = atoi(Constraint.data() + 3);
16600       assert(VSNum >= 0 && VSNum <= 63 &&
16601              "Attempted to access a vsr out of range");
16602       if (VSNum < 32)
16603         return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16604       return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16605     }
16606 
16607     // For float registers, we can't defer to the base class as it will match
16608     // the SPILLTOVSRRC class.
16609     if (Constraint.size() > 3 && Constraint[1] == 'f') {
16610       int RegNum = atoi(Constraint.data() + 2);
16611       if (RegNum > 31 || RegNum < 0)
16612         report_fatal_error("Invalid floating point register number");
16613       if (VT == MVT::f32 || VT == MVT::i32)
16614         return Subtarget.hasSPE()
16615                    ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16616                    : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16617       if (VT == MVT::f64 || VT == MVT::i64)
16618         return Subtarget.hasSPE()
16619                    ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16620                    : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16621     }
16622   }
16623 
16624   std::pair<unsigned, const TargetRegisterClass *> R =
16625       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
16626 
16627   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16628   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16629   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16630   // register.
16631   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16632   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16633   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16634       PPC::GPRCRegClass.contains(R.first))
16635     return std::make_pair(TRI->getMatchingSuperReg(R.first,
16636                             PPC::sub_32, &PPC::G8RCRegClass),
16637                           &PPC::G8RCRegClass);
16638 
16639   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16640   if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16641     R.first = PPC::CR0;
16642     R.second = &PPC::CRRCRegClass;
16643   }
16644   // FIXME: This warning should ideally be emitted in the front end.
16645   const auto &TM = getTargetMachine();
16646   if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16647     if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16648          (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16649         (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16650       errs() << "warning: vector registers 20 to 32 are reserved in the "
16651                 "default AIX AltiVec ABI and cannot be used\n";
16652   }
16653 
16654   return R;
16655 }
16656 
16657 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16658 /// vector.  If it is invalid, don't add anything to Ops.
16659 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
16660                                                      std::string &Constraint,
16661                                                      std::vector<SDValue>&Ops,
16662                                                      SelectionDAG &DAG) const {
16663   SDValue Result;
16664 
16665   // Only support length 1 constraints.
16666   if (Constraint.length() > 1) return;
16667 
16668   char Letter = Constraint[0];
16669   switch (Letter) {
16670   default: break;
16671   case 'I':
16672   case 'J':
16673   case 'K':
16674   case 'L':
16675   case 'M':
16676   case 'N':
16677   case 'O':
16678   case 'P': {
16679     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
16680     if (!CST) return; // Must be an immediate to match.
16681     SDLoc dl(Op);
16682     int64_t Value = CST->getSExtValue();
16683     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16684                          // numbers are printed as such.
16685     switch (Letter) {
16686     default: llvm_unreachable("Unknown constraint letter!");
16687     case 'I':  // "I" is a signed 16-bit constant.
16688       if (isInt<16>(Value))
16689         Result = DAG.getTargetConstant(Value, dl, TCVT);
16690       break;
16691     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
16692       if (isShiftedUInt<16, 16>(Value))
16693         Result = DAG.getTargetConstant(Value, dl, TCVT);
16694       break;
16695     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
16696       if (isShiftedInt<16, 16>(Value))
16697         Result = DAG.getTargetConstant(Value, dl, TCVT);
16698       break;
16699     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
16700       if (isUInt<16>(Value))
16701         Result = DAG.getTargetConstant(Value, dl, TCVT);
16702       break;
16703     case 'M':  // "M" is a constant that is greater than 31.
16704       if (Value > 31)
16705         Result = DAG.getTargetConstant(Value, dl, TCVT);
16706       break;
16707     case 'N':  // "N" is a positive constant that is an exact power of two.
16708       if (Value > 0 && isPowerOf2_64(Value))
16709         Result = DAG.getTargetConstant(Value, dl, TCVT);
16710       break;
16711     case 'O':  // "O" is the constant zero.
16712       if (Value == 0)
16713         Result = DAG.getTargetConstant(Value, dl, TCVT);
16714       break;
16715     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
16716       if (isInt<16>(-Value))
16717         Result = DAG.getTargetConstant(Value, dl, TCVT);
16718       break;
16719     }
16720     break;
16721   }
16722   }
16723 
16724   if (Result.getNode()) {
16725     Ops.push_back(Result);
16726     return;
16727   }
16728 
16729   // Handle standard constraint letters.
16730   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16731 }
16732 
16733 void PPCTargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
16734                                               SmallVectorImpl<SDValue> &Ops,
16735                                               SelectionDAG &DAG) const {
16736   if (I.getNumOperands() <= 1)
16737     return;
16738   if (!isa<ConstantSDNode>(Ops[1].getNode()))
16739     return;
16740   auto IntrinsicID = cast<ConstantSDNode>(Ops[1].getNode())->getZExtValue();
16741   if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
16742       IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
16743     return;
16744 
16745   if (I.hasMetadata("annotation")) {
16746     MDNode *MDN = I.getMetadata("annotation");
16747     Ops.push_back(DAG.getMDNode(MDN));
16748   }
16749 }
16750 
16751 // isLegalAddressingMode - Return true if the addressing mode represented
16752 // by AM is legal for this target, for a load/store of the specified type.
16753 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
16754                                               const AddrMode &AM, Type *Ty,
16755                                               unsigned AS,
16756                                               Instruction *I) const {
16757   // Vector type r+i form is supported since power9 as DQ form. We don't check
16758   // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
16759   // imm form is preferred and the offset can be adjusted to use imm form later
16760   // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
16761   // max offset to check legal addressing mode, we should be a little aggressive
16762   // to contain other offsets for that LSRUse.
16763   if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
16764     return false;
16765 
16766   // PPC allows a sign-extended 16-bit immediate field.
16767   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
16768     return false;
16769 
16770   // No global is ever allowed as a base.
16771   if (AM.BaseGV)
16772     return false;
16773 
16774   // PPC only support r+r,
16775   switch (AM.Scale) {
16776   case 0:  // "r+i" or just "i", depending on HasBaseReg.
16777     break;
16778   case 1:
16779     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
16780       return false;
16781     // Otherwise we have r+r or r+i.
16782     break;
16783   case 2:
16784     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
16785       return false;
16786     // Allow 2*r as r+r.
16787     break;
16788   default:
16789     // No other scales are supported.
16790     return false;
16791   }
16792 
16793   return true;
16794 }
16795 
16796 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
16797                                            SelectionDAG &DAG) const {
16798   MachineFunction &MF = DAG.getMachineFunction();
16799   MachineFrameInfo &MFI = MF.getFrameInfo();
16800   MFI.setReturnAddressIsTaken(true);
16801 
16802   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
16803     return SDValue();
16804 
16805   SDLoc dl(Op);
16806   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16807 
16808   // Make sure the function does not optimize away the store of the RA to
16809   // the stack.
16810   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
16811   FuncInfo->setLRStoreRequired();
16812   bool isPPC64 = Subtarget.isPPC64();
16813   auto PtrVT = getPointerTy(MF.getDataLayout());
16814 
16815   if (Depth > 0) {
16816     // The link register (return address) is saved in the caller's frame
16817     // not the callee's stack frame. So we must get the caller's frame
16818     // address and load the return address at the LR offset from there.
16819     SDValue FrameAddr =
16820         DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16821                     LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
16822     SDValue Offset =
16823         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
16824                         isPPC64 ? MVT::i64 : MVT::i32);
16825     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
16826                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
16827                        MachinePointerInfo());
16828   }
16829 
16830   // Just load the return address off the stack.
16831   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
16832   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
16833                      MachinePointerInfo());
16834 }
16835 
16836 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
16837                                           SelectionDAG &DAG) const {
16838   SDLoc dl(Op);
16839   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16840 
16841   MachineFunction &MF = DAG.getMachineFunction();
16842   MachineFrameInfo &MFI = MF.getFrameInfo();
16843   MFI.setFrameAddressIsTaken(true);
16844 
16845   EVT PtrVT = getPointerTy(MF.getDataLayout());
16846   bool isPPC64 = PtrVT == MVT::i64;
16847 
16848   // Naked functions never have a frame pointer, and so we use r1. For all
16849   // other functions, this decision must be delayed until during PEI.
16850   unsigned FrameReg;
16851   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
16852     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
16853   else
16854     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
16855 
16856   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
16857                                          PtrVT);
16858   while (Depth--)
16859     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16860                             FrameAddr, MachinePointerInfo());
16861   return FrameAddr;
16862 }
16863 
16864 // FIXME? Maybe this could be a TableGen attribute on some registers and
16865 // this table could be generated automatically from RegInfo.
16866 Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
16867                                               const MachineFunction &MF) const {
16868   bool isPPC64 = Subtarget.isPPC64();
16869 
16870   bool is64Bit = isPPC64 && VT == LLT::scalar(64);
16871   if (!is64Bit && VT != LLT::scalar(32))
16872     report_fatal_error("Invalid register global variable type");
16873 
16874   Register Reg = StringSwitch<Register>(RegName)
16875                      .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
16876                      .Case("r2", isPPC64 ? Register() : PPC::R2)
16877                      .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
16878                      .Default(Register());
16879 
16880   if (Reg)
16881     return Reg;
16882   report_fatal_error("Invalid register name global variable");
16883 }
16884 
16885 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
16886   // 32-bit SVR4 ABI access everything as got-indirect.
16887   if (Subtarget.is32BitELFABI())
16888     return true;
16889 
16890   // AIX accesses everything indirectly through the TOC, which is similar to
16891   // the GOT.
16892   if (Subtarget.isAIXABI())
16893     return true;
16894 
16895   CodeModel::Model CModel = getTargetMachine().getCodeModel();
16896   // If it is small or large code model, module locals are accessed
16897   // indirectly by loading their address from .toc/.got.
16898   if (CModel == CodeModel::Small || CModel == CodeModel::Large)
16899     return true;
16900 
16901   // JumpTable and BlockAddress are accessed as got-indirect.
16902   if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
16903     return true;
16904 
16905   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
16906     return Subtarget.isGVIndirectSymbol(G->getGlobal());
16907 
16908   return false;
16909 }
16910 
16911 bool
16912 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
16913   // The PowerPC target isn't yet aware of offsets.
16914   return false;
16915 }
16916 
16917 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
16918                                            const CallInst &I,
16919                                            MachineFunction &MF,
16920                                            unsigned Intrinsic) const {
16921   switch (Intrinsic) {
16922   case Intrinsic::ppc_atomicrmw_xchg_i128:
16923   case Intrinsic::ppc_atomicrmw_add_i128:
16924   case Intrinsic::ppc_atomicrmw_sub_i128:
16925   case Intrinsic::ppc_atomicrmw_nand_i128:
16926   case Intrinsic::ppc_atomicrmw_and_i128:
16927   case Intrinsic::ppc_atomicrmw_or_i128:
16928   case Intrinsic::ppc_atomicrmw_xor_i128:
16929   case Intrinsic::ppc_cmpxchg_i128:
16930     Info.opc = ISD::INTRINSIC_W_CHAIN;
16931     Info.memVT = MVT::i128;
16932     Info.ptrVal = I.getArgOperand(0);
16933     Info.offset = 0;
16934     Info.align = Align(16);
16935     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
16936                  MachineMemOperand::MOVolatile;
16937     return true;
16938   case Intrinsic::ppc_atomic_load_i128:
16939     Info.opc = ISD::INTRINSIC_W_CHAIN;
16940     Info.memVT = MVT::i128;
16941     Info.ptrVal = I.getArgOperand(0);
16942     Info.offset = 0;
16943     Info.align = Align(16);
16944     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
16945     return true;
16946   case Intrinsic::ppc_atomic_store_i128:
16947     Info.opc = ISD::INTRINSIC_VOID;
16948     Info.memVT = MVT::i128;
16949     Info.ptrVal = I.getArgOperand(2);
16950     Info.offset = 0;
16951     Info.align = Align(16);
16952     Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
16953     return true;
16954   case Intrinsic::ppc_altivec_lvx:
16955   case Intrinsic::ppc_altivec_lvxl:
16956   case Intrinsic::ppc_altivec_lvebx:
16957   case Intrinsic::ppc_altivec_lvehx:
16958   case Intrinsic::ppc_altivec_lvewx:
16959   case Intrinsic::ppc_vsx_lxvd2x:
16960   case Intrinsic::ppc_vsx_lxvw4x:
16961   case Intrinsic::ppc_vsx_lxvd2x_be:
16962   case Intrinsic::ppc_vsx_lxvw4x_be:
16963   case Intrinsic::ppc_vsx_lxvl:
16964   case Intrinsic::ppc_vsx_lxvll: {
16965     EVT VT;
16966     switch (Intrinsic) {
16967     case Intrinsic::ppc_altivec_lvebx:
16968       VT = MVT::i8;
16969       break;
16970     case Intrinsic::ppc_altivec_lvehx:
16971       VT = MVT::i16;
16972       break;
16973     case Intrinsic::ppc_altivec_lvewx:
16974       VT = MVT::i32;
16975       break;
16976     case Intrinsic::ppc_vsx_lxvd2x:
16977     case Intrinsic::ppc_vsx_lxvd2x_be:
16978       VT = MVT::v2f64;
16979       break;
16980     default:
16981       VT = MVT::v4i32;
16982       break;
16983     }
16984 
16985     Info.opc = ISD::INTRINSIC_W_CHAIN;
16986     Info.memVT = VT;
16987     Info.ptrVal = I.getArgOperand(0);
16988     Info.offset = -VT.getStoreSize()+1;
16989     Info.size = 2*VT.getStoreSize()-1;
16990     Info.align = Align(1);
16991     Info.flags = MachineMemOperand::MOLoad;
16992     return true;
16993   }
16994   case Intrinsic::ppc_altivec_stvx:
16995   case Intrinsic::ppc_altivec_stvxl:
16996   case Intrinsic::ppc_altivec_stvebx:
16997   case Intrinsic::ppc_altivec_stvehx:
16998   case Intrinsic::ppc_altivec_stvewx:
16999   case Intrinsic::ppc_vsx_stxvd2x:
17000   case Intrinsic::ppc_vsx_stxvw4x:
17001   case Intrinsic::ppc_vsx_stxvd2x_be:
17002   case Intrinsic::ppc_vsx_stxvw4x_be:
17003   case Intrinsic::ppc_vsx_stxvl:
17004   case Intrinsic::ppc_vsx_stxvll: {
17005     EVT VT;
17006     switch (Intrinsic) {
17007     case Intrinsic::ppc_altivec_stvebx:
17008       VT = MVT::i8;
17009       break;
17010     case Intrinsic::ppc_altivec_stvehx:
17011       VT = MVT::i16;
17012       break;
17013     case Intrinsic::ppc_altivec_stvewx:
17014       VT = MVT::i32;
17015       break;
17016     case Intrinsic::ppc_vsx_stxvd2x:
17017     case Intrinsic::ppc_vsx_stxvd2x_be:
17018       VT = MVT::v2f64;
17019       break;
17020     default:
17021       VT = MVT::v4i32;
17022       break;
17023     }
17024 
17025     Info.opc = ISD::INTRINSIC_VOID;
17026     Info.memVT = VT;
17027     Info.ptrVal = I.getArgOperand(1);
17028     Info.offset = -VT.getStoreSize()+1;
17029     Info.size = 2*VT.getStoreSize()-1;
17030     Info.align = Align(1);
17031     Info.flags = MachineMemOperand::MOStore;
17032     return true;
17033   }
17034   case Intrinsic::ppc_stdcx:
17035   case Intrinsic::ppc_stwcx:
17036   case Intrinsic::ppc_sthcx:
17037   case Intrinsic::ppc_stbcx: {
17038     EVT VT;
17039     auto Alignment = Align(8);
17040     switch (Intrinsic) {
17041     case Intrinsic::ppc_stdcx:
17042       VT = MVT::i64;
17043       break;
17044     case Intrinsic::ppc_stwcx:
17045       VT = MVT::i32;
17046       Alignment = Align(4);
17047       break;
17048     case Intrinsic::ppc_sthcx:
17049       VT = MVT::i16;
17050       Alignment = Align(2);
17051       break;
17052     case Intrinsic::ppc_stbcx:
17053       VT = MVT::i8;
17054       Alignment = Align(1);
17055       break;
17056     }
17057     Info.opc = ISD::INTRINSIC_W_CHAIN;
17058     Info.memVT = VT;
17059     Info.ptrVal = I.getArgOperand(0);
17060     Info.offset = 0;
17061     Info.align = Alignment;
17062     Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
17063     return true;
17064   }
17065   default:
17066     break;
17067   }
17068 
17069   return false;
17070 }
17071 
17072 /// It returns EVT::Other if the type should be determined using generic
17073 /// target-independent logic.
17074 EVT PPCTargetLowering::getOptimalMemOpType(
17075     const MemOp &Op, const AttributeList &FuncAttributes) const {
17076   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
17077     // We should use Altivec/VSX loads and stores when available. For unaligned
17078     // addresses, unaligned VSX loads are only fast starting with the P8.
17079     if (Subtarget.hasAltivec() && Op.size() >= 16 &&
17080         (Op.isAligned(Align(16)) ||
17081          ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
17082       return MVT::v4i32;
17083   }
17084 
17085   if (Subtarget.isPPC64()) {
17086     return MVT::i64;
17087   }
17088 
17089   return MVT::i32;
17090 }
17091 
17092 /// Returns true if it is beneficial to convert a load of a constant
17093 /// to just the constant itself.
17094 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
17095                                                           Type *Ty) const {
17096   assert(Ty->isIntegerTy());
17097 
17098   unsigned BitSize = Ty->getPrimitiveSizeInBits();
17099   return !(BitSize == 0 || BitSize > 64);
17100 }
17101 
17102 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
17103   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17104     return false;
17105   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17106   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17107   return NumBits1 == 64 && NumBits2 == 32;
17108 }
17109 
17110 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
17111   if (!VT1.isInteger() || !VT2.isInteger())
17112     return false;
17113   unsigned NumBits1 = VT1.getSizeInBits();
17114   unsigned NumBits2 = VT2.getSizeInBits();
17115   return NumBits1 == 64 && NumBits2 == 32;
17116 }
17117 
17118 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
17119   // Generally speaking, zexts are not free, but they are free when they can be
17120   // folded with other operations.
17121   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17122     EVT MemVT = LD->getMemoryVT();
17123     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17124          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17125         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17126          LD->getExtensionType() == ISD::ZEXTLOAD))
17127       return true;
17128   }
17129 
17130   // FIXME: Add other cases...
17131   //  - 32-bit shifts with a zext to i64
17132   //  - zext after ctlz, bswap, etc.
17133   //  - zext after and by a constant mask
17134 
17135   return TargetLowering::isZExtFree(Val, VT2);
17136 }
17137 
17138 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17139   assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17140          "invalid fpext types");
17141   // Extending to float128 is not free.
17142   if (DestVT == MVT::f128)
17143     return false;
17144   return true;
17145 }
17146 
17147 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
17148   return isInt<16>(Imm) || isUInt<16>(Imm);
17149 }
17150 
17151 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
17152   return isInt<16>(Imm) || isUInt<16>(Imm);
17153 }
17154 
17155 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
17156                                                        MachineMemOperand::Flags,
17157                                                        unsigned *Fast) const {
17158   if (DisablePPCUnaligned)
17159     return false;
17160 
17161   // PowerPC supports unaligned memory access for simple non-vector types.
17162   // Although accessing unaligned addresses is not as efficient as accessing
17163   // aligned addresses, it is generally more efficient than manual expansion,
17164   // and generally only traps for software emulation when crossing page
17165   // boundaries.
17166 
17167   if (!VT.isSimple())
17168     return false;
17169 
17170   if (VT.isFloatingPoint() && !VT.isVector() &&
17171       !Subtarget.allowsUnalignedFPAccess())
17172     return false;
17173 
17174   if (VT.getSimpleVT().isVector()) {
17175     if (Subtarget.hasVSX()) {
17176       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17177           VT != MVT::v4f32 && VT != MVT::v4i32)
17178         return false;
17179     } else {
17180       return false;
17181     }
17182   }
17183 
17184   if (VT == MVT::ppcf128)
17185     return false;
17186 
17187   if (Fast)
17188     *Fast = 1;
17189 
17190   return true;
17191 }
17192 
17193 bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
17194                                                SDValue C) const {
17195   // Check integral scalar types.
17196   if (!VT.isScalarInteger())
17197     return false;
17198   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17199     if (!ConstNode->getAPIntValue().isSignedIntN(64))
17200       return false;
17201     // This transformation will generate >= 2 operations. But the following
17202     // cases will generate <= 2 instructions during ISEL. So exclude them.
17203     // 1. If the constant multiplier fits 16 bits, it can be handled by one
17204     // HW instruction, ie. MULLI
17205     // 2. If the multiplier after shifted fits 16 bits, an extra shift
17206     // instruction is needed than case 1, ie. MULLI and RLDICR
17207     int64_t Imm = ConstNode->getSExtValue();
17208     unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17209     Imm >>= Shift;
17210     if (isInt<16>(Imm))
17211       return false;
17212     uint64_t UImm = static_cast<uint64_t>(Imm);
17213     if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17214         isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17215       return true;
17216   }
17217   return false;
17218 }
17219 
17220 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
17221                                                    EVT VT) const {
17222   return isFMAFasterThanFMulAndFAdd(
17223       MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
17224 }
17225 
17226 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
17227                                                    Type *Ty) const {
17228   if (Subtarget.hasSPE())
17229     return false;
17230   switch (Ty->getScalarType()->getTypeID()) {
17231   case Type::FloatTyID:
17232   case Type::DoubleTyID:
17233     return true;
17234   case Type::FP128TyID:
17235     return Subtarget.hasP9Vector();
17236   default:
17237     return false;
17238   }
17239 }
17240 
17241 // FIXME: add more patterns which are not profitable to hoist.
17242 bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
17243   if (!I->hasOneUse())
17244     return true;
17245 
17246   Instruction *User = I->user_back();
17247   assert(User && "A single use instruction with no uses.");
17248 
17249   switch (I->getOpcode()) {
17250   case Instruction::FMul: {
17251     // Don't break FMA, PowerPC prefers FMA.
17252     if (User->getOpcode() != Instruction::FSub &&
17253         User->getOpcode() != Instruction::FAdd)
17254       return true;
17255 
17256     const TargetOptions &Options = getTargetMachine().Options;
17257     const Function *F = I->getFunction();
17258     const DataLayout &DL = F->getParent()->getDataLayout();
17259     Type *Ty = User->getOperand(0)->getType();
17260 
17261     return !(
17262         isFMAFasterThanFMulAndFAdd(*F, Ty) &&
17263         isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
17264         (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17265   }
17266   case Instruction::Load: {
17267     // Don't break "store (load float*)" pattern, this pattern will be combined
17268     // to "store (load int32)" in later InstCombine pass. See function
17269     // combineLoadToOperationType. On PowerPC, loading a float point takes more
17270     // cycles than loading a 32 bit integer.
17271     LoadInst *LI = cast<LoadInst>(I);
17272     // For the loads that combineLoadToOperationType does nothing, like
17273     // ordered load, it should be profitable to hoist them.
17274     // For swifterror load, it can only be used for pointer to pointer type, so
17275     // later type check should get rid of this case.
17276     if (!LI->isUnordered())
17277       return true;
17278 
17279     if (User->getOpcode() != Instruction::Store)
17280       return true;
17281 
17282     if (I->getType()->getTypeID() != Type::FloatTyID)
17283       return true;
17284 
17285     return false;
17286   }
17287   default:
17288     return true;
17289   }
17290   return true;
17291 }
17292 
17293 const MCPhysReg *
17294 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
17295   // LR is a callee-save register, but we must treat it as clobbered by any call
17296   // site. Hence we include LR in the scratch registers, which are in turn added
17297   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17298   // to CTR, which is used by any indirect call.
17299   static const MCPhysReg ScratchRegs[] = {
17300     PPC::X12, PPC::LR8, PPC::CTR8, 0
17301   };
17302 
17303   return ScratchRegs;
17304 }
17305 
17306 Register PPCTargetLowering::getExceptionPointerRegister(
17307     const Constant *PersonalityFn) const {
17308   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17309 }
17310 
17311 Register PPCTargetLowering::getExceptionSelectorRegister(
17312     const Constant *PersonalityFn) const {
17313   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17314 }
17315 
17316 bool
17317 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
17318                      EVT VT , unsigned DefinedValues) const {
17319   if (VT == MVT::v2i64)
17320     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17321 
17322   if (Subtarget.hasVSX())
17323     return true;
17324 
17325   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
17326 }
17327 
17328 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
17329   if (DisableILPPref || Subtarget.enableMachineScheduler())
17330     return TargetLowering::getSchedulingPreference(N);
17331 
17332   return Sched::ILP;
17333 }
17334 
17335 // Create a fast isel object.
17336 FastISel *
17337 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
17338                                   const TargetLibraryInfo *LibInfo) const {
17339   return PPC::createFastISel(FuncInfo, LibInfo);
17340 }
17341 
17342 // 'Inverted' means the FMA opcode after negating one multiplicand.
17343 // For example, (fma -a b c) = (fnmsub a b c)
17344 static unsigned invertFMAOpcode(unsigned Opc) {
17345   switch (Opc) {
17346   default:
17347     llvm_unreachable("Invalid FMA opcode for PowerPC!");
17348   case ISD::FMA:
17349     return PPCISD::FNMSUB;
17350   case PPCISD::FNMSUB:
17351     return ISD::FMA;
17352   }
17353 }
17354 
17355 SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
17356                                                 bool LegalOps, bool OptForSize,
17357                                                 NegatibleCost &Cost,
17358                                                 unsigned Depth) const {
17359   if (Depth > SelectionDAG::MaxRecursionDepth)
17360     return SDValue();
17361 
17362   unsigned Opc = Op.getOpcode();
17363   EVT VT = Op.getValueType();
17364   SDNodeFlags Flags = Op.getNode()->getFlags();
17365 
17366   switch (Opc) {
17367   case PPCISD::FNMSUB:
17368     if (!Op.hasOneUse() || !isTypeLegal(VT))
17369       break;
17370 
17371     const TargetOptions &Options = getTargetMachine().Options;
17372     SDValue N0 = Op.getOperand(0);
17373     SDValue N1 = Op.getOperand(1);
17374     SDValue N2 = Op.getOperand(2);
17375     SDLoc Loc(Op);
17376 
17377     NegatibleCost N2Cost = NegatibleCost::Expensive;
17378     SDValue NegN2 =
17379         getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
17380 
17381     if (!NegN2)
17382       return SDValue();
17383 
17384     // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17385     // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17386     // These transformations may change sign of zeroes. For example,
17387     // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17388     if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
17389       // Try and choose the cheaper one to negate.
17390       NegatibleCost N0Cost = NegatibleCost::Expensive;
17391       SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
17392                                            N0Cost, Depth + 1);
17393 
17394       NegatibleCost N1Cost = NegatibleCost::Expensive;
17395       SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
17396                                            N1Cost, Depth + 1);
17397 
17398       if (NegN0 && N0Cost <= N1Cost) {
17399         Cost = std::min(N0Cost, N2Cost);
17400         return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17401       } else if (NegN1) {
17402         Cost = std::min(N1Cost, N2Cost);
17403         return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17404       }
17405     }
17406 
17407     // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17408     if (isOperationLegal(ISD::FMA, VT)) {
17409       Cost = N2Cost;
17410       return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
17411     }
17412 
17413     break;
17414   }
17415 
17416   return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17417                                               Cost, Depth);
17418 }
17419 
17420 // Override to enable LOAD_STACK_GUARD lowering on Linux.
17421 bool PPCTargetLowering::useLoadStackGuardNode() const {
17422   if (!Subtarget.isTargetLinux())
17423     return TargetLowering::useLoadStackGuardNode();
17424   return true;
17425 }
17426 
17427 // Override to disable global variable loading on Linux and insert AIX canary
17428 // word declaration.
17429 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
17430   if (Subtarget.isAIXABI()) {
17431     M.getOrInsertGlobal(AIXSSPCanaryWordName,
17432                         Type::getInt8PtrTy(M.getContext()));
17433     return;
17434   }
17435   if (!Subtarget.isTargetLinux())
17436     return TargetLowering::insertSSPDeclarations(M);
17437 }
17438 
17439 Value *PPCTargetLowering::getSDagStackGuard(const Module &M) const {
17440   if (Subtarget.isAIXABI())
17441     return M.getGlobalVariable(AIXSSPCanaryWordName);
17442   return TargetLowering::getSDagStackGuard(M);
17443 }
17444 
17445 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
17446                                      bool ForCodeSize) const {
17447   if (!VT.isSimple() || !Subtarget.hasVSX())
17448     return false;
17449 
17450   switch(VT.getSimpleVT().SimpleTy) {
17451   default:
17452     // For FP types that are currently not supported by PPC backend, return
17453     // false. Examples: f16, f80.
17454     return false;
17455   case MVT::f32:
17456   case MVT::f64: {
17457     if (Subtarget.hasPrefixInstrs()) {
17458       // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17459       return true;
17460     }
17461     bool IsExact;
17462     APSInt IntResult(16, false);
17463     // The rounding mode doesn't really matter because we only care about floats
17464     // that can be converted to integers exactly.
17465     Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
17466     // For exact values in the range [-16, 15] we can materialize the float.
17467     if (IsExact && IntResult <= 15 && IntResult >= -16)
17468       return true;
17469     return Imm.isZero();
17470   }
17471   case MVT::ppcf128:
17472     return Imm.isPosZero();
17473   }
17474 }
17475 
17476 // For vector shift operation op, fold
17477 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17478 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
17479                                   SelectionDAG &DAG) {
17480   SDValue N0 = N->getOperand(0);
17481   SDValue N1 = N->getOperand(1);
17482   EVT VT = N0.getValueType();
17483   unsigned OpSizeInBits = VT.getScalarSizeInBits();
17484   unsigned Opcode = N->getOpcode();
17485   unsigned TargetOpcode;
17486 
17487   switch (Opcode) {
17488   default:
17489     llvm_unreachable("Unexpected shift operation");
17490   case ISD::SHL:
17491     TargetOpcode = PPCISD::SHL;
17492     break;
17493   case ISD::SRL:
17494     TargetOpcode = PPCISD::SRL;
17495     break;
17496   case ISD::SRA:
17497     TargetOpcode = PPCISD::SRA;
17498     break;
17499   }
17500 
17501   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
17502       N1->getOpcode() == ISD::AND)
17503     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
17504       if (Mask->getZExtValue() == OpSizeInBits - 1)
17505         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
17506 
17507   return SDValue();
17508 }
17509 
17510 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17511   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17512     return Value;
17513 
17514   SDValue N0 = N->getOperand(0);
17515   ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17516   if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17517       N0.getOpcode() != ISD::SIGN_EXTEND ||
17518       N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17519       N->getValueType(0) != MVT::i64)
17520     return SDValue();
17521 
17522   // We can't save an operation here if the value is already extended, and
17523   // the existing shift is easier to combine.
17524   SDValue ExtsSrc = N0.getOperand(0);
17525   if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17526       ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17527     return SDValue();
17528 
17529   SDLoc DL(N0);
17530   SDValue ShiftBy = SDValue(CN1, 0);
17531   // We want the shift amount to be i32 on the extswli, but the shift could
17532   // have an i64.
17533   if (ShiftBy.getValueType() == MVT::i64)
17534     ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17535 
17536   return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17537                          ShiftBy);
17538 }
17539 
17540 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17541   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17542     return Value;
17543 
17544   return SDValue();
17545 }
17546 
17547 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17548   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17549     return Value;
17550 
17551   return SDValue();
17552 }
17553 
17554 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17555 // Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17556 // When C is zero, the equation (addi Z, -C) can be simplified to Z
17557 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17558 static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
17559                                  const PPCSubtarget &Subtarget) {
17560   if (!Subtarget.isPPC64())
17561     return SDValue();
17562 
17563   SDValue LHS = N->getOperand(0);
17564   SDValue RHS = N->getOperand(1);
17565 
17566   auto isZextOfCompareWithConstant = [](SDValue Op) {
17567     if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17568         Op.getValueType() != MVT::i64)
17569       return false;
17570 
17571     SDValue Cmp = Op.getOperand(0);
17572     if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17573         Cmp.getOperand(0).getValueType() != MVT::i64)
17574       return false;
17575 
17576     if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17577       int64_t NegConstant = 0 - Constant->getSExtValue();
17578       // Due to the limitations of the addi instruction,
17579       // -C is required to be [-32768, 32767].
17580       return isInt<16>(NegConstant);
17581     }
17582 
17583     return false;
17584   };
17585 
17586   bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17587   bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17588 
17589   // If there is a pattern, canonicalize a zext operand to the RHS.
17590   if (LHSHasPattern && !RHSHasPattern)
17591     std::swap(LHS, RHS);
17592   else if (!LHSHasPattern && !RHSHasPattern)
17593     return SDValue();
17594 
17595   SDLoc DL(N);
17596   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17597   SDValue Cmp = RHS.getOperand(0);
17598   SDValue Z = Cmp.getOperand(0);
17599   auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17600   int64_t NegConstant = 0 - Constant->getSExtValue();
17601 
17602   switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17603   default: break;
17604   case ISD::SETNE: {
17605     //                                 when C == 0
17606     //                             --> addze X, (addic Z, -1).carry
17607     //                            /
17608     // add X, (zext(setne Z, C))--
17609     //                            \    when -32768 <= -C <= 32767 && C != 0
17610     //                             --> addze X, (addic (addi Z, -C), -1).carry
17611     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17612                               DAG.getConstant(NegConstant, DL, MVT::i64));
17613     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17614     SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17615                                AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17616     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17617                        SDValue(Addc.getNode(), 1));
17618     }
17619   case ISD::SETEQ: {
17620     //                                 when C == 0
17621     //                             --> addze X, (subfic Z, 0).carry
17622     //                            /
17623     // add X, (zext(sete  Z, C))--
17624     //                            \    when -32768 <= -C <= 32767 && C != 0
17625     //                             --> addze X, (subfic (addi Z, -C), 0).carry
17626     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17627                               DAG.getConstant(NegConstant, DL, MVT::i64));
17628     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17629     SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17630                                DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17631     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17632                        SDValue(Subc.getNode(), 1));
17633     }
17634   }
17635 
17636   return SDValue();
17637 }
17638 
17639 // Transform
17640 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17641 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17642 // In this case both C1 and C2 must be known constants.
17643 // C1+C2 must fit into a 34 bit signed integer.
17644 static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
17645                                           const PPCSubtarget &Subtarget) {
17646   if (!Subtarget.isUsingPCRelativeCalls())
17647     return SDValue();
17648 
17649   // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17650   // If we find that node try to cast the Global Address and the Constant.
17651   SDValue LHS = N->getOperand(0);
17652   SDValue RHS = N->getOperand(1);
17653 
17654   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17655     std::swap(LHS, RHS);
17656 
17657   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17658     return SDValue();
17659 
17660   // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17661   GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
17662   ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
17663 
17664   // Check that both casts succeeded.
17665   if (!GSDN || !ConstNode)
17666     return SDValue();
17667 
17668   int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17669   SDLoc DL(GSDN);
17670 
17671   // The signed int offset needs to fit in 34 bits.
17672   if (!isInt<34>(NewOffset))
17673     return SDValue();
17674 
17675   // The new global address is a copy of the old global address except
17676   // that it has the updated Offset.
17677   SDValue GA =
17678       DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17679                                  NewOffset, GSDN->getTargetFlags());
17680   SDValue MatPCRel =
17681       DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17682   return MatPCRel;
17683 }
17684 
17685 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17686   if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17687     return Value;
17688 
17689   if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17690     return Value;
17691 
17692   return SDValue();
17693 }
17694 
17695 // Detect TRUNCATE operations on bitcasts of float128 values.
17696 // What we are looking for here is the situtation where we extract a subset
17697 // of bits from a 128 bit float.
17698 // This can be of two forms:
17699 // 1) BITCAST of f128 feeding TRUNCATE
17700 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17701 // The reason this is required is because we do not have a legal i128 type
17702 // and so we want to prevent having to store the f128 and then reload part
17703 // of it.
17704 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17705                                            DAGCombinerInfo &DCI) const {
17706   // If we are using CRBits then try that first.
17707   if (Subtarget.useCRBits()) {
17708     // Check if CRBits did anything and return that if it did.
17709     if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17710       return CRTruncValue;
17711   }
17712 
17713   SDLoc dl(N);
17714   SDValue Op0 = N->getOperand(0);
17715 
17716   // Looking for a truncate of i128 to i64.
17717   if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
17718     return SDValue();
17719 
17720   int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
17721 
17722   // SRL feeding TRUNCATE.
17723   if (Op0.getOpcode() == ISD::SRL) {
17724     ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
17725     // The right shift has to be by 64 bits.
17726     if (!ConstNode || ConstNode->getZExtValue() != 64)
17727       return SDValue();
17728 
17729     // Switch the element number to extract.
17730     EltToExtract = EltToExtract ? 0 : 1;
17731     // Update Op0 past the SRL.
17732     Op0 = Op0.getOperand(0);
17733   }
17734 
17735   // BITCAST feeding a TRUNCATE possibly via SRL.
17736   if (Op0.getOpcode() == ISD::BITCAST &&
17737       Op0.getValueType() == MVT::i128 &&
17738       Op0.getOperand(0).getValueType() == MVT::f128) {
17739     SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
17740     return DCI.DAG.getNode(
17741         ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
17742         DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
17743   }
17744   return SDValue();
17745 }
17746 
17747 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
17748   SelectionDAG &DAG = DCI.DAG;
17749 
17750   ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
17751   if (!ConstOpOrElement)
17752     return SDValue();
17753 
17754   // An imul is usually smaller than the alternative sequence for legal type.
17755   if (DAG.getMachineFunction().getFunction().hasMinSize() &&
17756       isOperationLegal(ISD::MUL, N->getValueType(0)))
17757     return SDValue();
17758 
17759   auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
17760     switch (this->Subtarget.getCPUDirective()) {
17761     default:
17762       // TODO: enhance the condition for subtarget before pwr8
17763       return false;
17764     case PPC::DIR_PWR8:
17765       //  type        mul     add    shl
17766       // scalar        4       1      1
17767       // vector        7       2      2
17768       return true;
17769     case PPC::DIR_PWR9:
17770     case PPC::DIR_PWR10:
17771     case PPC::DIR_PWR_FUTURE:
17772       //  type        mul     add    shl
17773       // scalar        5       2      2
17774       // vector        7       2      2
17775 
17776       // The cycle RATIO of related operations are showed as a table above.
17777       // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
17778       // scalar and vector type. For 2 instrs patterns, add/sub + shl
17779       // are 4, it is always profitable; but for 3 instrs patterns
17780       // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
17781       // So we should only do it for vector type.
17782       return IsAddOne && IsNeg ? VT.isVector() : true;
17783     }
17784   };
17785 
17786   EVT VT = N->getValueType(0);
17787   SDLoc DL(N);
17788 
17789   const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
17790   bool IsNeg = MulAmt.isNegative();
17791   APInt MulAmtAbs = MulAmt.abs();
17792 
17793   if ((MulAmtAbs - 1).isPowerOf2()) {
17794     // (mul x, 2^N + 1) => (add (shl x, N), x)
17795     // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
17796 
17797     if (!IsProfitable(IsNeg, true, VT))
17798       return SDValue();
17799 
17800     SDValue Op0 = N->getOperand(0);
17801     SDValue Op1 =
17802         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17803                     DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
17804     SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
17805 
17806     if (!IsNeg)
17807       return Res;
17808 
17809     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
17810   } else if ((MulAmtAbs + 1).isPowerOf2()) {
17811     // (mul x, 2^N - 1) => (sub (shl x, N), x)
17812     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
17813 
17814     if (!IsProfitable(IsNeg, false, VT))
17815       return SDValue();
17816 
17817     SDValue Op0 = N->getOperand(0);
17818     SDValue Op1 =
17819         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17820                     DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
17821 
17822     if (!IsNeg)
17823       return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
17824     else
17825       return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
17826 
17827   } else {
17828     return SDValue();
17829   }
17830 }
17831 
17832 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
17833 // in combiner since we need to check SD flags and other subtarget features.
17834 SDValue PPCTargetLowering::combineFMALike(SDNode *N,
17835                                           DAGCombinerInfo &DCI) const {
17836   SDValue N0 = N->getOperand(0);
17837   SDValue N1 = N->getOperand(1);
17838   SDValue N2 = N->getOperand(2);
17839   SDNodeFlags Flags = N->getFlags();
17840   EVT VT = N->getValueType(0);
17841   SelectionDAG &DAG = DCI.DAG;
17842   const TargetOptions &Options = getTargetMachine().Options;
17843   unsigned Opc = N->getOpcode();
17844   bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
17845   bool LegalOps = !DCI.isBeforeLegalizeOps();
17846   SDLoc Loc(N);
17847 
17848   if (!isOperationLegal(ISD::FMA, VT))
17849     return SDValue();
17850 
17851   // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
17852   // since (fnmsub a b c)=-0 while c-ab=+0.
17853   if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
17854     return SDValue();
17855 
17856   // (fma (fneg a) b c) => (fnmsub a b c)
17857   // (fnmsub (fneg a) b c) => (fma a b c)
17858   if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
17859     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
17860 
17861   // (fma a (fneg b) c) => (fnmsub a b c)
17862   // (fnmsub a (fneg b) c) => (fma a b c)
17863   if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
17864     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
17865 
17866   return SDValue();
17867 }
17868 
17869 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
17870   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
17871   if (!Subtarget.is64BitELFABI())
17872     return false;
17873 
17874   // If not a tail call then no need to proceed.
17875   if (!CI->isTailCall())
17876     return false;
17877 
17878   // If sibling calls have been disabled and tail-calls aren't guaranteed
17879   // there is no reason to duplicate.
17880   auto &TM = getTargetMachine();
17881   if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
17882     return false;
17883 
17884   // Can't tail call a function called indirectly, or if it has variadic args.
17885   const Function *Callee = CI->getCalledFunction();
17886   if (!Callee || Callee->isVarArg())
17887     return false;
17888 
17889   // Make sure the callee and caller calling conventions are eligible for tco.
17890   const Function *Caller = CI->getParent()->getParent();
17891   if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
17892                                            CI->getCallingConv()))
17893       return false;
17894 
17895   // If the function is local then we have a good chance at tail-calling it
17896   return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
17897 }
17898 
17899 bool PPCTargetLowering::
17900 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
17901   const Value *Mask = AndI.getOperand(1);
17902   // If the mask is suitable for andi. or andis. we should sink the and.
17903   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
17904     // Can't handle constants wider than 64-bits.
17905     if (CI->getBitWidth() > 64)
17906       return false;
17907     int64_t ConstVal = CI->getZExtValue();
17908     return isUInt<16>(ConstVal) ||
17909       (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
17910   }
17911 
17912   // For non-constant masks, we can always use the record-form and.
17913   return true;
17914 }
17915 
17916 /// getAddrModeForFlags - Based on the set of address flags, select the most
17917 /// optimal instruction format to match by.
17918 PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
17919   // This is not a node we should be handling here.
17920   if (Flags == PPC::MOF_None)
17921     return PPC::AM_None;
17922   // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
17923   for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
17924     if ((Flags & FlagSet) == FlagSet)
17925       return PPC::AM_DForm;
17926   for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
17927     if ((Flags & FlagSet) == FlagSet)
17928       return PPC::AM_DSForm;
17929   for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
17930     if ((Flags & FlagSet) == FlagSet)
17931       return PPC::AM_DQForm;
17932   for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
17933     if ((Flags & FlagSet) == FlagSet)
17934       return PPC::AM_PrefixDForm;
17935   // If no other forms are selected, return an X-Form as it is the most
17936   // general addressing mode.
17937   return PPC::AM_XForm;
17938 }
17939 
17940 /// Set alignment flags based on whether or not the Frame Index is aligned.
17941 /// Utilized when computing flags for address computation when selecting
17942 /// load and store instructions.
17943 static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
17944                                SelectionDAG &DAG) {
17945   bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
17946   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
17947   if (!FI)
17948     return;
17949   const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
17950   unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
17951   // If this is (add $FI, $S16Imm), the alignment flags are already set
17952   // based on the immediate. We just need to clear the alignment flags
17953   // if the FI alignment is weaker.
17954   if ((FrameIndexAlign % 4) != 0)
17955     FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
17956   if ((FrameIndexAlign % 16) != 0)
17957     FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
17958   // If the address is a plain FrameIndex, set alignment flags based on
17959   // FI alignment.
17960   if (!IsAdd) {
17961     if ((FrameIndexAlign % 4) == 0)
17962       FlagSet |= PPC::MOF_RPlusSImm16Mult4;
17963     if ((FrameIndexAlign % 16) == 0)
17964       FlagSet |= PPC::MOF_RPlusSImm16Mult16;
17965   }
17966 }
17967 
17968 /// Given a node, compute flags that are used for address computation when
17969 /// selecting load and store instructions. The flags computed are stored in
17970 /// FlagSet. This function takes into account whether the node is a constant,
17971 /// an ADD, OR, or a constant, and computes the address flags accordingly.
17972 static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
17973                                               SelectionDAG &DAG) {
17974   // Set the alignment flags for the node depending on if the node is
17975   // 4-byte or 16-byte aligned.
17976   auto SetAlignFlagsForImm = [&](uint64_t Imm) {
17977     if ((Imm & 0x3) == 0)
17978       FlagSet |= PPC::MOF_RPlusSImm16Mult4;
17979     if ((Imm & 0xf) == 0)
17980       FlagSet |= PPC::MOF_RPlusSImm16Mult16;
17981   };
17982 
17983   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
17984     // All 32-bit constants can be computed as LIS + Disp.
17985     const APInt &ConstImm = CN->getAPIntValue();
17986     if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
17987       FlagSet |= PPC::MOF_AddrIsSImm32;
17988       SetAlignFlagsForImm(ConstImm.getZExtValue());
17989       setAlignFlagsForFI(N, FlagSet, DAG);
17990     }
17991     if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
17992       FlagSet |= PPC::MOF_RPlusSImm34;
17993     else // Let constant materialization handle large constants.
17994       FlagSet |= PPC::MOF_NotAddNorCst;
17995   } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
17996     // This address can be represented as an addition of:
17997     // - Register + Imm16 (possibly a multiple of 4/16)
17998     // - Register + Imm34
17999     // - Register + PPCISD::Lo
18000     // - Register + Register
18001     // In any case, we won't have to match this as Base + Zero.
18002     SDValue RHS = N.getOperand(1);
18003     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18004       const APInt &ConstImm = CN->getAPIntValue();
18005       if (ConstImm.isSignedIntN(16)) {
18006         FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18007         SetAlignFlagsForImm(ConstImm.getZExtValue());
18008         setAlignFlagsForFI(N, FlagSet, DAG);
18009       }
18010       if (ConstImm.isSignedIntN(34))
18011         FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18012       else
18013         FlagSet |= PPC::MOF_RPlusR; // Register.
18014     } else if (RHS.getOpcode() == PPCISD::Lo &&
18015                !cast<ConstantSDNode>(RHS.getOperand(1))->getZExtValue())
18016       FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18017     else
18018       FlagSet |= PPC::MOF_RPlusR;
18019   } else { // The address computation is not a constant or an addition.
18020     setAlignFlagsForFI(N, FlagSet, DAG);
18021     FlagSet |= PPC::MOF_NotAddNorCst;
18022   }
18023 }
18024 
18025 static bool isPCRelNode(SDValue N) {
18026   return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18027       isValidPCRelNode<ConstantPoolSDNode>(N) ||
18028       isValidPCRelNode<GlobalAddressSDNode>(N) ||
18029       isValidPCRelNode<JumpTableSDNode>(N) ||
18030       isValidPCRelNode<BlockAddressSDNode>(N));
18031 }
18032 
18033 /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18034 /// the address flags of the load/store instruction that is to be matched.
18035 unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18036                                            SelectionDAG &DAG) const {
18037   unsigned FlagSet = PPC::MOF_None;
18038 
18039   // Compute subtarget flags.
18040   if (!Subtarget.hasP9Vector())
18041     FlagSet |= PPC::MOF_SubtargetBeforeP9;
18042   else {
18043     FlagSet |= PPC::MOF_SubtargetP9;
18044     if (Subtarget.hasPrefixInstrs())
18045       FlagSet |= PPC::MOF_SubtargetP10;
18046   }
18047   if (Subtarget.hasSPE())
18048     FlagSet |= PPC::MOF_SubtargetSPE;
18049 
18050   // Check if we have a PCRel node and return early.
18051   if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18052     return FlagSet;
18053 
18054   // If the node is the paired load/store intrinsics, compute flags for
18055   // address computation and return early.
18056   unsigned ParentOp = Parent->getOpcode();
18057   if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18058                                (ParentOp == ISD::INTRINSIC_VOID))) {
18059     unsigned ID = cast<ConstantSDNode>(Parent->getOperand(1))->getZExtValue();
18060     if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18061       SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18062                              ? Parent->getOperand(2)
18063                              : Parent->getOperand(3);
18064       computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18065       FlagSet |= PPC::MOF_Vector;
18066       return FlagSet;
18067     }
18068   }
18069 
18070   // Mark this as something we don't want to handle here if it is atomic
18071   // or pre-increment instruction.
18072   if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18073     if (LSB->isIndexed())
18074       return PPC::MOF_None;
18075 
18076   // Compute in-memory type flags. This is based on if there are scalars,
18077   // floats or vectors.
18078   const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18079   assert(MN && "Parent should be a MemSDNode!");
18080   EVT MemVT = MN->getMemoryVT();
18081   unsigned Size = MemVT.getSizeInBits();
18082   if (MemVT.isScalarInteger()) {
18083     assert(Size <= 128 &&
18084            "Not expecting scalar integers larger than 16 bytes!");
18085     if (Size < 32)
18086       FlagSet |= PPC::MOF_SubWordInt;
18087     else if (Size == 32)
18088       FlagSet |= PPC::MOF_WordInt;
18089     else
18090       FlagSet |= PPC::MOF_DoubleWordInt;
18091   } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18092     if (Size == 128)
18093       FlagSet |= PPC::MOF_Vector;
18094     else if (Size == 256) {
18095       assert(Subtarget.pairedVectorMemops() &&
18096              "256-bit vectors are only available when paired vector memops is "
18097              "enabled!");
18098       FlagSet |= PPC::MOF_Vector;
18099     } else
18100       llvm_unreachable("Not expecting illegal vectors!");
18101   } else { // Floating point type: can be scalar, f128 or vector types.
18102     if (Size == 32 || Size == 64)
18103       FlagSet |= PPC::MOF_ScalarFloat;
18104     else if (MemVT == MVT::f128 || MemVT.isVector())
18105       FlagSet |= PPC::MOF_Vector;
18106     else
18107       llvm_unreachable("Not expecting illegal scalar floats!");
18108   }
18109 
18110   // Compute flags for address computation.
18111   computeFlagsForAddressComputation(N, FlagSet, DAG);
18112 
18113   // Compute type extension flags.
18114   if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18115     switch (LN->getExtensionType()) {
18116     case ISD::SEXTLOAD:
18117       FlagSet |= PPC::MOF_SExt;
18118       break;
18119     case ISD::EXTLOAD:
18120     case ISD::ZEXTLOAD:
18121       FlagSet |= PPC::MOF_ZExt;
18122       break;
18123     case ISD::NON_EXTLOAD:
18124       FlagSet |= PPC::MOF_NoExt;
18125       break;
18126     }
18127   } else
18128     FlagSet |= PPC::MOF_NoExt;
18129 
18130   // For integers, no extension is the same as zero extension.
18131   // We set the extension mode to zero extension so we don't have
18132   // to add separate entries in AddrModesMap for loads and stores.
18133   if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18134     FlagSet |= PPC::MOF_ZExt;
18135     FlagSet &= ~PPC::MOF_NoExt;
18136   }
18137 
18138   // If we don't have prefixed instructions, 34-bit constants should be
18139   // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18140   bool IsNonP1034BitConst =
18141       ((PPC::MOF_RPlusSImm34 | PPC::MOF_AddrIsSImm32 | PPC::MOF_SubtargetP10) &
18142        FlagSet) == PPC::MOF_RPlusSImm34;
18143   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18144       IsNonP1034BitConst)
18145     FlagSet |= PPC::MOF_NotAddNorCst;
18146 
18147   return FlagSet;
18148 }
18149 
18150 /// SelectForceXFormMode - Given the specified address, force it to be
18151 /// represented as an indexed [r+r] operation (an XForm instruction).
18152 PPC::AddrMode PPCTargetLowering::SelectForceXFormMode(SDValue N, SDValue &Disp,
18153                                                       SDValue &Base,
18154                                                       SelectionDAG &DAG) const {
18155 
18156   PPC::AddrMode Mode = PPC::AM_XForm;
18157   int16_t ForceXFormImm = 0;
18158   if (provablyDisjointOr(DAG, N) &&
18159       !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18160     Disp = N.getOperand(0);
18161     Base = N.getOperand(1);
18162     return Mode;
18163   }
18164 
18165   // If the address is the result of an add, we will utilize the fact that the
18166   // address calculation includes an implicit add.  However, we can reduce
18167   // register pressure if we do not materialize a constant just for use as the
18168   // index register.  We only get rid of the add if it is not an add of a
18169   // value and a 16-bit signed constant and both have a single use.
18170   if (N.getOpcode() == ISD::ADD &&
18171       (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18172        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18173     Disp = N.getOperand(0);
18174     Base = N.getOperand(1);
18175     return Mode;
18176   }
18177 
18178   // Otherwise, use R0 as the base register.
18179   Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18180                          N.getValueType());
18181   Base = N;
18182 
18183   return Mode;
18184 }
18185 
18186 bool PPCTargetLowering::splitValueIntoRegisterParts(
18187     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18188     unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18189   EVT ValVT = Val.getValueType();
18190   // If we are splitting a scalar integer into f64 parts (i.e. so they
18191   // can be placed into VFRC registers), we need to zero extend and
18192   // bitcast the values. This will ensure the value is placed into a
18193   // VSR using direct moves or stack operations as needed.
18194   if (PartVT == MVT::f64 &&
18195       (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18196     Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18197     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18198     Parts[0] = Val;
18199     return true;
18200   }
18201   return false;
18202 }
18203 
18204 SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18205                                           SelectionDAG &DAG) const {
18206   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18207   TargetLowering::CallLoweringInfo CLI(DAG);
18208   EVT RetVT = Op.getValueType();
18209   Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18210   SDValue Callee =
18211       DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18212   bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
18213   TargetLowering::ArgListTy Args;
18214   TargetLowering::ArgListEntry Entry;
18215   for (const SDValue &N : Op->op_values()) {
18216     EVT ArgVT = N.getValueType();
18217     Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18218     Entry.Node = N;
18219     Entry.Ty = ArgTy;
18220     Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
18221     Entry.IsZExt = !Entry.IsSExt;
18222     Args.push_back(Entry);
18223   }
18224 
18225   SDValue InChain = DAG.getEntryNode();
18226   SDValue TCChain = InChain;
18227   const Function &F = DAG.getMachineFunction().getFunction();
18228   bool isTailCall =
18229       TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18230       (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18231   if (isTailCall)
18232     InChain = TCChain;
18233   CLI.setDebugLoc(SDLoc(Op))
18234       .setChain(InChain)
18235       .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18236       .setTailCall(isTailCall)
18237       .setSExtResult(SignExtend)
18238       .setZExtResult(!SignExtend)
18239       .setIsPostTypeLegalization(true);
18240   return TLI.LowerCallTo(CLI).first;
18241 }
18242 
18243 SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18244     const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18245     SelectionDAG &DAG) const {
18246   if (Op.getValueType() == MVT::f32)
18247     return lowerToLibCall(LibCallFloatName, Op, DAG);
18248 
18249   if (Op.getValueType() == MVT::f64)
18250     return lowerToLibCall(LibCallDoubleName, Op, DAG);
18251 
18252   return SDValue();
18253 }
18254 
18255 bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18256   SDNodeFlags Flags = Op.getNode()->getFlags();
18257   return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18258          Flags.hasNoNaNs() && Flags.hasNoInfs();
18259 }
18260 
18261 bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18262   return Op.getNode()->getFlags().hasApproximateFuncs();
18263 }
18264 
18265 bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18266   return getTargetMachine().Options.PPCGenScalarMASSEntries;
18267 }
18268 
18269 SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18270                                             const char *LibCallFloatName,
18271                                             const char *LibCallDoubleNameFinite,
18272                                             const char *LibCallFloatNameFinite,
18273                                             SDValue Op,
18274                                             SelectionDAG &DAG) const {
18275   if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18276     return SDValue();
18277 
18278   if (!isLowringToMASSFiniteSafe(Op))
18279     return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18280                                    DAG);
18281 
18282   return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18283                                  LibCallDoubleNameFinite, Op, DAG);
18284 }
18285 
18286 SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18287   return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18288                           "__xl_powf_finite", Op, DAG);
18289 }
18290 
18291 SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18292   return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18293                           "__xl_sinf_finite", Op, DAG);
18294 }
18295 
18296 SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18297   return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18298                           "__xl_cosf_finite", Op, DAG);
18299 }
18300 
18301 SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18302   return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18303                           "__xl_logf_finite", Op, DAG);
18304 }
18305 
18306 SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18307   return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18308                           "__xl_log10f_finite", Op, DAG);
18309 }
18310 
18311 SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18312   return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18313                           "__xl_expf_finite", Op, DAG);
18314 }
18315 
18316 // If we happen to match to an aligned D-Form, check if the Frame Index is
18317 // adequately aligned. If it is not, reset the mode to match to X-Form.
18318 static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18319                                    PPC::AddrMode &Mode) {
18320   if (!isa<FrameIndexSDNode>(N))
18321     return;
18322   if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18323       (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18324     Mode = PPC::AM_XForm;
18325 }
18326 
18327 /// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18328 /// compute the address flags of the node, get the optimal address mode based
18329 /// on the flags, and set the Base and Disp based on the address mode.
18330 PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
18331                                                        SDValue N, SDValue &Disp,
18332                                                        SDValue &Base,
18333                                                        SelectionDAG &DAG,
18334                                                        MaybeAlign Align) const {
18335   SDLoc DL(Parent);
18336 
18337   // Compute the address flags.
18338   unsigned Flags = computeMOFlags(Parent, N, DAG);
18339 
18340   // Get the optimal address mode based on the Flags.
18341   PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18342 
18343   // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18344   // Select an X-Form load if it is not.
18345   setXFormForUnalignedFI(N, Flags, Mode);
18346 
18347   // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18348   if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18349     assert(Subtarget.isUsingPCRelativeCalls() &&
18350            "Must be using PC-Relative calls when a valid PC-Relative node is "
18351            "present!");
18352     Mode = PPC::AM_PCRel;
18353   }
18354 
18355   // Set Base and Disp accordingly depending on the address mode.
18356   switch (Mode) {
18357   case PPC::AM_DForm:
18358   case PPC::AM_DSForm:
18359   case PPC::AM_DQForm: {
18360     // This is a register plus a 16-bit immediate. The base will be the
18361     // register and the displacement will be the immediate unless it
18362     // isn't sufficiently aligned.
18363     if (Flags & PPC::MOF_RPlusSImm16) {
18364       SDValue Op0 = N.getOperand(0);
18365       SDValue Op1 = N.getOperand(1);
18366       int16_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue();
18367       if (!Align || isAligned(*Align, Imm)) {
18368         Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
18369         Base = Op0;
18370         if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
18371           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18372           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18373         }
18374         break;
18375       }
18376     }
18377     // This is a register plus the @lo relocation. The base is the register
18378     // and the displacement is the global address.
18379     else if (Flags & PPC::MOF_RPlusLo) {
18380       Disp = N.getOperand(1).getOperand(0); // The global address.
18381       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
18382              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
18383              Disp.getOpcode() == ISD::TargetConstantPool ||
18384              Disp.getOpcode() == ISD::TargetJumpTable);
18385       Base = N.getOperand(0);
18386       break;
18387     }
18388     // This is a constant address at most 32 bits. The base will be
18389     // zero or load-immediate-shifted and the displacement will be
18390     // the low 16 bits of the address.
18391     else if (Flags & PPC::MOF_AddrIsSImm32) {
18392       auto *CN = cast<ConstantSDNode>(N);
18393       EVT CNType = CN->getValueType(0);
18394       uint64_t CNImm = CN->getZExtValue();
18395       // If this address fits entirely in a 16-bit sext immediate field, codegen
18396       // this as "d, 0".
18397       int16_t Imm;
18398       if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18399         Disp = DAG.getTargetConstant(Imm, DL, CNType);
18400         Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18401                                CNType);
18402         break;
18403       }
18404       // Handle 32-bit sext immediate with LIS + Addr mode.
18405       if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18406           (!Align || isAligned(*Align, CNImm))) {
18407         int32_t Addr = (int32_t)CNImm;
18408         // Otherwise, break this down into LIS + Disp.
18409         Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18410         Base =
18411             DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18412         uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18413         Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18414         break;
18415       }
18416     }
18417     // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18418     Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18419     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
18420       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18421       fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18422     } else
18423       Base = N;
18424     break;
18425   }
18426   case PPC::AM_PrefixDForm: {
18427     int64_t Imm34 = 0;
18428     unsigned Opcode = N.getOpcode();
18429     if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18430         (isIntS34Immediate(N.getOperand(1), Imm34))) {
18431       // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18432       Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18433       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18434         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18435       else
18436         Base = N.getOperand(0);
18437     } else if (isIntS34Immediate(N, Imm34)) {
18438       // The address is a 34-bit signed immediate.
18439       Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18440       Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18441     }
18442     break;
18443   }
18444   case PPC::AM_PCRel: {
18445     // When selecting PC-Relative instructions, "Base" is not utilized as
18446     // we select the address as [PC+imm].
18447     Disp = N;
18448     break;
18449   }
18450   case PPC::AM_None:
18451     break;
18452   default: { // By default, X-Form is always available to be selected.
18453     // When a frame index is not aligned, we also match by XForm.
18454     FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
18455     Base = FI ? N : N.getOperand(1);
18456     Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18457                                 N.getValueType())
18458               : N.getOperand(0);
18459     break;
18460   }
18461   }
18462   return Mode;
18463 }
18464 
18465 CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
18466                                                  bool Return,
18467                                                  bool IsVarArg) const {
18468   switch (CC) {
18469   case CallingConv::Cold:
18470     return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
18471   default:
18472     return CC_PPC64_ELF;
18473   }
18474 }
18475 
18476 bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
18477   return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
18478 }
18479 
18480 TargetLowering::AtomicExpansionKind
18481 PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
18482   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18483   if (shouldInlineQuadwordAtomics() && Size == 128)
18484     return AtomicExpansionKind::MaskedIntrinsic;
18485 
18486   switch (AI->getOperation()) {
18487   case AtomicRMWInst::UIncWrap:
18488   case AtomicRMWInst::UDecWrap:
18489     return AtomicExpansionKind::CmpXChg;
18490   default:
18491     return TargetLowering::shouldExpandAtomicRMWInIR(AI);
18492   }
18493 
18494   llvm_unreachable("unreachable atomicrmw operation");
18495 }
18496 
18497 TargetLowering::AtomicExpansionKind
18498 PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
18499   unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18500   if (shouldInlineQuadwordAtomics() && Size == 128)
18501     return AtomicExpansionKind::MaskedIntrinsic;
18502   return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI);
18503 }
18504 
18505 static Intrinsic::ID
18506 getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp) {
18507   switch (BinOp) {
18508   default:
18509     llvm_unreachable("Unexpected AtomicRMW BinOp");
18510   case AtomicRMWInst::Xchg:
18511     return Intrinsic::ppc_atomicrmw_xchg_i128;
18512   case AtomicRMWInst::Add:
18513     return Intrinsic::ppc_atomicrmw_add_i128;
18514   case AtomicRMWInst::Sub:
18515     return Intrinsic::ppc_atomicrmw_sub_i128;
18516   case AtomicRMWInst::And:
18517     return Intrinsic::ppc_atomicrmw_and_i128;
18518   case AtomicRMWInst::Or:
18519     return Intrinsic::ppc_atomicrmw_or_i128;
18520   case AtomicRMWInst::Xor:
18521     return Intrinsic::ppc_atomicrmw_xor_i128;
18522   case AtomicRMWInst::Nand:
18523     return Intrinsic::ppc_atomicrmw_nand_i128;
18524   }
18525 }
18526 
18527 Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
18528     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18529     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18530   assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18531   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18532   Type *ValTy = Incr->getType();
18533   assert(ValTy->getPrimitiveSizeInBits() == 128);
18534   Function *RMW = Intrinsic::getDeclaration(
18535       M, getIntrinsicForAtomicRMWBinOp128(AI->getOperation()));
18536   Type *Int64Ty = Type::getInt64Ty(M->getContext());
18537   Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18538   Value *IncrHi =
18539       Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18540   Value *Addr =
18541       Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
18542   Value *LoHi = Builder.CreateCall(RMW, {Addr, IncrLo, IncrHi});
18543   Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18544   Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18545   Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18546   Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18547   return Builder.CreateOr(
18548       Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18549 }
18550 
18551 Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
18552     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18553     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18554   assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18555   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18556   Type *ValTy = CmpVal->getType();
18557   assert(ValTy->getPrimitiveSizeInBits() == 128);
18558   Function *IntCmpXchg =
18559       Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18560   Type *Int64Ty = Type::getInt64Ty(M->getContext());
18561   Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18562   Value *CmpHi =
18563       Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18564   Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18565   Value *NewHi =
18566       Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18567   Value *Addr =
18568       Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
18569   emitLeadingFence(Builder, CI, Ord);
18570   Value *LoHi =
18571       Builder.CreateCall(IntCmpXchg, {Addr, CmpLo, CmpHi, NewLo, NewHi});
18572   emitTrailingFence(Builder, CI, Ord);
18573   Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18574   Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18575   Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18576   Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18577   return Builder.CreateOr(
18578       Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18579 }
18580