1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
15 #include "PPC.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/CodeGen/CallingConvLower.h"
38 #include "llvm/CodeGen/ISDOpcodes.h"
39 #include "llvm/CodeGen/MachineBasicBlock.h"
40 #include "llvm/CodeGen/MachineFrameInfo.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineModuleInfo.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/RuntimeLibcalls.h"
51 #include "llvm/CodeGen/SelectionDAG.h"
52 #include "llvm/CodeGen/SelectionDAGNodes.h"
53 #include "llvm/CodeGen/TargetInstrInfo.h"
54 #include "llvm/CodeGen/TargetLowering.h"
55 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
56 #include "llvm/CodeGen/TargetRegisterInfo.h"
57 #include "llvm/CodeGen/ValueTypes.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/IntrinsicsPowerPC.h"
70 #include "llvm/IR/Module.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/Use.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/MC/MCContext.h"
75 #include "llvm/MC/MCExpr.h"
76 #include "llvm/MC/MCRegisterInfo.h"
77 #include "llvm/MC/MCSectionXCOFF.h"
78 #include "llvm/MC/MCSymbolXCOFF.h"
79 #include "llvm/Support/AtomicOrdering.h"
80 #include "llvm/Support/BranchProbability.h"
81 #include "llvm/Support/Casting.h"
82 #include "llvm/Support/CodeGen.h"
83 #include "llvm/Support/CommandLine.h"
84 #include "llvm/Support/Compiler.h"
85 #include "llvm/Support/Debug.h"
86 #include "llvm/Support/ErrorHandling.h"
87 #include "llvm/Support/Format.h"
88 #include "llvm/Support/KnownBits.h"
89 #include "llvm/Support/MachineValueType.h"
90 #include "llvm/Support/MathExtras.h"
91 #include "llvm/Support/raw_ostream.h"
92 #include "llvm/Target/TargetMachine.h"
93 #include "llvm/Target/TargetOptions.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstdint>
97 #include <iterator>
98 #include <list>
99 #include <utility>
100 #include <vector>
101 
102 using namespace llvm;
103 
104 #define DEBUG_TYPE "ppc-lowering"
105 
106 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108 
109 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111 
112 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114 
115 static cl::opt<bool> DisableSCO("disable-ppc-sco",
116 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117 
118 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120 
121 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123 
124 // TODO - Remove this option if soft fp128 has been fully supported .
125 static cl::opt<bool>
126     EnableSoftFP128("enable-soft-fp128",
127                     cl::desc("temp option to enable soft fp128"), cl::Hidden);
128 
129 STATISTIC(NumTailCalls, "Number of tail calls");
130 STATISTIC(NumSiblingCalls, "Number of sibling calls");
131 STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
132 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
133 
134 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
135 
136 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
137 
138 // FIXME: Remove this once the bug has been fixed!
139 extern cl::opt<bool> ANDIGlueBug;
140 
141 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
142                                      const PPCSubtarget &STI)
143     : TargetLowering(TM), Subtarget(STI) {
144   // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
145   // arguments are at least 4/8 bytes aligned.
146   bool isPPC64 = Subtarget.isPPC64();
147   setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
148 
149   // Set up the register classes.
150   addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
151   if (!useSoftFloat()) {
152     if (hasSPE()) {
153       addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
154       // EFPU2 APU only supports f32
155       if (!Subtarget.hasEFPU2())
156         addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
157     } else {
158       addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
159       addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
160     }
161   }
162 
163   // Match BITREVERSE to customized fast code sequence in the td file.
164   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
165   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
166 
167   // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
168   setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
169 
170   // Custom lower inline assembly to check for special registers.
171   setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
172   setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
173 
174   // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
175   for (MVT VT : MVT::integer_valuetypes()) {
176     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
177     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
178   }
179 
180   if (Subtarget.isISA3_0()) {
181     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
182     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
183     setTruncStoreAction(MVT::f64, MVT::f16, Legal);
184     setTruncStoreAction(MVT::f32, MVT::f16, Legal);
185   } else {
186     // No extending loads from f16 or HW conversions back and forth.
187     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
188     setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
189     setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
190     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
191     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
192     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
193     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
194     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
195   }
196 
197   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
198 
199   // PowerPC has pre-inc load and store's.
200   setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal);
201   setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal);
202   setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal);
203   setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal);
204   setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal);
205   setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal);
206   setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal);
207   setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal);
208   setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal);
209   setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal);
210   if (!Subtarget.hasSPE()) {
211     setIndexedLoadAction(ISD::PRE_INC, MVT::f32, Legal);
212     setIndexedLoadAction(ISD::PRE_INC, MVT::f64, Legal);
213     setIndexedStoreAction(ISD::PRE_INC, MVT::f32, Legal);
214     setIndexedStoreAction(ISD::PRE_INC, MVT::f64, Legal);
215   }
216 
217   // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
218   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
219   for (MVT VT : ScalarIntVTs) {
220     setOperationAction(ISD::ADDC, VT, Legal);
221     setOperationAction(ISD::ADDE, VT, Legal);
222     setOperationAction(ISD::SUBC, VT, Legal);
223     setOperationAction(ISD::SUBE, VT, Legal);
224   }
225 
226   if (Subtarget.useCRBits()) {
227     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
228 
229     if (isPPC64 || Subtarget.hasFPCVT()) {
230       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Promote);
231       AddPromotedToType(ISD::STRICT_SINT_TO_FP, MVT::i1,
232                         isPPC64 ? MVT::i64 : MVT::i32);
233       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Promote);
234       AddPromotedToType(ISD::STRICT_UINT_TO_FP, MVT::i1,
235                         isPPC64 ? MVT::i64 : MVT::i32);
236 
237       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote);
238       AddPromotedToType (ISD::SINT_TO_FP, MVT::i1,
239                          isPPC64 ? MVT::i64 : MVT::i32);
240       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote);
241       AddPromotedToType(ISD::UINT_TO_FP, MVT::i1,
242                         isPPC64 ? MVT::i64 : MVT::i32);
243 
244       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i1, Promote);
245       AddPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::i1,
246                         isPPC64 ? MVT::i64 : MVT::i32);
247       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i1, Promote);
248       AddPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::i1,
249                         isPPC64 ? MVT::i64 : MVT::i32);
250 
251       setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote);
252       AddPromotedToType(ISD::FP_TO_SINT, MVT::i1,
253                         isPPC64 ? MVT::i64 : MVT::i32);
254       setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote);
255       AddPromotedToType(ISD::FP_TO_UINT, MVT::i1,
256                         isPPC64 ? MVT::i64 : MVT::i32);
257     } else {
258       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i1, Custom);
259       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i1, Custom);
260       setOperationAction(ISD::SINT_TO_FP, MVT::i1, Custom);
261       setOperationAction(ISD::UINT_TO_FP, MVT::i1, Custom);
262     }
263 
264     // PowerPC does not support direct load/store of condition registers.
265     setOperationAction(ISD::LOAD, MVT::i1, Custom);
266     setOperationAction(ISD::STORE, MVT::i1, Custom);
267 
268     // FIXME: Remove this once the ANDI glue bug is fixed:
269     if (ANDIGlueBug)
270       setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
271 
272     for (MVT VT : MVT::integer_valuetypes()) {
273       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
274       setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
275       setTruncStoreAction(VT, MVT::i1, Expand);
276     }
277 
278     addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
279   }
280 
281   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
282   // PPC (the libcall is not available).
283   setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
284   setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
285   setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
286   setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
287 
288   // We do not currently implement these libm ops for PowerPC.
289   setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
290   setOperationAction(ISD::FCEIL,  MVT::ppcf128, Expand);
291   setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
292   setOperationAction(ISD::FRINT,  MVT::ppcf128, Expand);
293   setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
294   setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
295 
296   // PowerPC has no SREM/UREM instructions unless we are on P9
297   // On P9 we may use a hardware instruction to compute the remainder.
298   // When the result of both the remainder and the division is required it is
299   // more efficient to compute the remainder from the result of the division
300   // rather than use the remainder instruction. The instructions are legalized
301   // directly because the DivRemPairsPass performs the transformation at the IR
302   // level.
303   if (Subtarget.isISA3_0()) {
304     setOperationAction(ISD::SREM, MVT::i32, Legal);
305     setOperationAction(ISD::UREM, MVT::i32, Legal);
306     setOperationAction(ISD::SREM, MVT::i64, Legal);
307     setOperationAction(ISD::UREM, MVT::i64, Legal);
308   } else {
309     setOperationAction(ISD::SREM, MVT::i32, Expand);
310     setOperationAction(ISD::UREM, MVT::i32, Expand);
311     setOperationAction(ISD::SREM, MVT::i64, Expand);
312     setOperationAction(ISD::UREM, MVT::i64, Expand);
313   }
314 
315   // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
316   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
317   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
318   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
319   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
320   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
321   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
322   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
323   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
324 
325   // Handle constrained floating-point operations of scalar.
326   // TODO: Handle SPE specific operation.
327   setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
328   setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
329   setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
330   setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
331   setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
332   setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
333 
334   setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
335   setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
336   setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
337   setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
338   setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
339   if (Subtarget.hasVSX()) {
340     setOperationAction(ISD::STRICT_FRINT, MVT::f32, Legal);
341     setOperationAction(ISD::STRICT_FRINT, MVT::f64, Legal);
342   }
343 
344   if (Subtarget.hasFSQRT()) {
345     setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
346     setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
347   }
348 
349   if (Subtarget.hasFPRND()) {
350     setOperationAction(ISD::STRICT_FFLOOR, MVT::f32, Legal);
351     setOperationAction(ISD::STRICT_FCEIL,  MVT::f32, Legal);
352     setOperationAction(ISD::STRICT_FTRUNC, MVT::f32, Legal);
353     setOperationAction(ISD::STRICT_FROUND, MVT::f32, Legal);
354 
355     setOperationAction(ISD::STRICT_FFLOOR, MVT::f64, Legal);
356     setOperationAction(ISD::STRICT_FCEIL,  MVT::f64, Legal);
357     setOperationAction(ISD::STRICT_FTRUNC, MVT::f64, Legal);
358     setOperationAction(ISD::STRICT_FROUND, MVT::f64, Legal);
359   }
360 
361   // We don't support sin/cos/sqrt/fmod/pow
362   setOperationAction(ISD::FSIN , MVT::f64, Expand);
363   setOperationAction(ISD::FCOS , MVT::f64, Expand);
364   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
365   setOperationAction(ISD::FREM , MVT::f64, Expand);
366   setOperationAction(ISD::FPOW , MVT::f64, Expand);
367   setOperationAction(ISD::FSIN , MVT::f32, Expand);
368   setOperationAction(ISD::FCOS , MVT::f32, Expand);
369   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
370   setOperationAction(ISD::FREM , MVT::f32, Expand);
371   setOperationAction(ISD::FPOW , MVT::f32, Expand);
372   if (Subtarget.hasSPE()) {
373     setOperationAction(ISD::FMA  , MVT::f64, Expand);
374     setOperationAction(ISD::FMA  , MVT::f32, Expand);
375   } else {
376     setOperationAction(ISD::FMA  , MVT::f64, Legal);
377     setOperationAction(ISD::FMA  , MVT::f32, Legal);
378   }
379 
380   if (Subtarget.hasSPE())
381     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
382 
383   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
384 
385   // If we're enabling GP optimizations, use hardware square root
386   if (!Subtarget.hasFSQRT() &&
387       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
388         Subtarget.hasFRE()))
389     setOperationAction(ISD::FSQRT, MVT::f64, Expand);
390 
391   if (!Subtarget.hasFSQRT() &&
392       !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
393         Subtarget.hasFRES()))
394     setOperationAction(ISD::FSQRT, MVT::f32, Expand);
395 
396   if (Subtarget.hasFCPSGN()) {
397     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
398     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
399   } else {
400     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
401     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
402   }
403 
404   if (Subtarget.hasFPRND()) {
405     setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
406     setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
407     setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
408     setOperationAction(ISD::FROUND, MVT::f64, Legal);
409 
410     setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
411     setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
412     setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
413     setOperationAction(ISD::FROUND, MVT::f32, Legal);
414   }
415 
416   // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
417   // to speed up scalar BSWAP64.
418   // CTPOP or CTTZ were introduced in P8/P9 respectively
419   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
420   if (Subtarget.hasP9Vector())
421     setOperationAction(ISD::BSWAP, MVT::i64  , Custom);
422   else
423     setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
424   if (Subtarget.isISA3_0()) {
425     setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
426     setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
427   } else {
428     setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
429     setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
430   }
431 
432   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
433     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
434     setOperationAction(ISD::CTPOP, MVT::i64  , Legal);
435   } else {
436     setOperationAction(ISD::CTPOP, MVT::i32  , Expand);
437     setOperationAction(ISD::CTPOP, MVT::i64  , Expand);
438   }
439 
440   // PowerPC does not have ROTR
441   setOperationAction(ISD::ROTR, MVT::i32   , Expand);
442   setOperationAction(ISD::ROTR, MVT::i64   , Expand);
443 
444   if (!Subtarget.useCRBits()) {
445     // PowerPC does not have Select
446     setOperationAction(ISD::SELECT, MVT::i32, Expand);
447     setOperationAction(ISD::SELECT, MVT::i64, Expand);
448     setOperationAction(ISD::SELECT, MVT::f32, Expand);
449     setOperationAction(ISD::SELECT, MVT::f64, Expand);
450   }
451 
452   // PowerPC wants to turn select_cc of FP into fsel when possible.
453   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
454   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
455 
456   // PowerPC wants to optimize integer setcc a bit
457   if (!Subtarget.useCRBits())
458     setOperationAction(ISD::SETCC, MVT::i32, Custom);
459 
460   if (Subtarget.hasFPU()) {
461     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
462     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
463     setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Legal);
464 
465     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
466     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
467     setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Legal);
468   }
469 
470   // PowerPC does not have BRCOND which requires SetCC
471   if (!Subtarget.useCRBits())
472     setOperationAction(ISD::BRCOND, MVT::Other, Expand);
473 
474   setOperationAction(ISD::BR_JT,  MVT::Other, Expand);
475 
476   if (Subtarget.hasSPE()) {
477     // SPE has built-in conversions
478     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
479     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
480     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
481     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
482     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
483     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
484   } else {
485     // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
486     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
487     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
488 
489     // PowerPC does not have [U|S]INT_TO_FP
490     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Expand);
491     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Expand);
492     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
493     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
494   }
495 
496   if (Subtarget.hasDirectMove() && isPPC64) {
497     setOperationAction(ISD::BITCAST, MVT::f32, Legal);
498     setOperationAction(ISD::BITCAST, MVT::i32, Legal);
499     setOperationAction(ISD::BITCAST, MVT::i64, Legal);
500     setOperationAction(ISD::BITCAST, MVT::f64, Legal);
501     if (TM.Options.UnsafeFPMath) {
502       setOperationAction(ISD::LRINT, MVT::f64, Legal);
503       setOperationAction(ISD::LRINT, MVT::f32, Legal);
504       setOperationAction(ISD::LLRINT, MVT::f64, Legal);
505       setOperationAction(ISD::LLRINT, MVT::f32, Legal);
506       setOperationAction(ISD::LROUND, MVT::f64, Legal);
507       setOperationAction(ISD::LROUND, MVT::f32, Legal);
508       setOperationAction(ISD::LLROUND, MVT::f64, Legal);
509       setOperationAction(ISD::LLROUND, MVT::f32, Legal);
510     }
511   } else {
512     setOperationAction(ISD::BITCAST, MVT::f32, Expand);
513     setOperationAction(ISD::BITCAST, MVT::i32, Expand);
514     setOperationAction(ISD::BITCAST, MVT::i64, Expand);
515     setOperationAction(ISD::BITCAST, MVT::f64, Expand);
516   }
517 
518   // We cannot sextinreg(i1).  Expand to shifts.
519   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
520 
521   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
522   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
523   // support continuation, user-level threading, and etc.. As a result, no
524   // other SjLj exception interfaces are implemented and please don't build
525   // your own exception handling based on them.
526   // LLVM/Clang supports zero-cost DWARF exception handling.
527   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
528   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
529 
530   // We want to legalize GlobalAddress and ConstantPool nodes into the
531   // appropriate instructions to materialize the address.
532   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
533   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
534   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
535   setOperationAction(ISD::ConstantPool,  MVT::i32, Custom);
536   setOperationAction(ISD::JumpTable,     MVT::i32, Custom);
537   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
538   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
539   setOperationAction(ISD::BlockAddress,  MVT::i64, Custom);
540   setOperationAction(ISD::ConstantPool,  MVT::i64, Custom);
541   setOperationAction(ISD::JumpTable,     MVT::i64, Custom);
542 
543   // TRAP is legal.
544   setOperationAction(ISD::TRAP, MVT::Other, Legal);
545 
546   // TRAMPOLINE is custom lowered.
547   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
548   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
549 
550   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
551   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
552 
553   if (Subtarget.is64BitELFABI()) {
554     // VAARG always uses double-word chunks, so promote anything smaller.
555     setOperationAction(ISD::VAARG, MVT::i1, Promote);
556     AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
557     setOperationAction(ISD::VAARG, MVT::i8, Promote);
558     AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
559     setOperationAction(ISD::VAARG, MVT::i16, Promote);
560     AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
561     setOperationAction(ISD::VAARG, MVT::i32, Promote);
562     AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
563     setOperationAction(ISD::VAARG, MVT::Other, Expand);
564   } else if (Subtarget.is32BitELFABI()) {
565     // VAARG is custom lowered with the 32-bit SVR4 ABI.
566     setOperationAction(ISD::VAARG, MVT::Other, Custom);
567     setOperationAction(ISD::VAARG, MVT::i64, Custom);
568   } else
569     setOperationAction(ISD::VAARG, MVT::Other, Expand);
570 
571   // VACOPY is custom lowered with the 32-bit SVR4 ABI.
572   if (Subtarget.is32BitELFABI())
573     setOperationAction(ISD::VACOPY            , MVT::Other, Custom);
574   else
575     setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
576 
577   // Use the default implementation.
578   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
579   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
580   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Custom);
581   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
582   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Custom);
583   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
584   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
585   setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
586   setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
587 
588   // We want to custom lower some of our intrinsics.
589   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
590 
591   // To handle counter-based loop conditions.
592   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
593 
594   setOperationAction(ISD::INTRINSIC_VOID, MVT::i8, Custom);
595   setOperationAction(ISD::INTRINSIC_VOID, MVT::i16, Custom);
596   setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
597   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
598 
599   // Comparisons that require checking two conditions.
600   if (Subtarget.hasSPE()) {
601     setCondCodeAction(ISD::SETO, MVT::f32, Expand);
602     setCondCodeAction(ISD::SETO, MVT::f64, Expand);
603     setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
604     setCondCodeAction(ISD::SETUO, MVT::f64, Expand);
605   }
606   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
607   setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
608   setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
609   setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
610   setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
611   setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
612   setCondCodeAction(ISD::SETOGE, MVT::f32, Expand);
613   setCondCodeAction(ISD::SETOGE, MVT::f64, Expand);
614   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
615   setCondCodeAction(ISD::SETOLE, MVT::f64, Expand);
616   setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
617   setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
618 
619   setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
620   setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
621 
622   if (Subtarget.has64BitSupport()) {
623     // They also have instructions for converting between i64 and fp.
624     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
625     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand);
626     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
627     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
628     setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
629     setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
630     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
631     setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
632     // This is just the low 32 bits of a (signed) fp->i64 conversion.
633     // We cannot do this with Promote because i64 is not a legal type.
634     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
635     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
636 
637     if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
638       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
639       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
640     }
641   } else {
642     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
643     if (Subtarget.hasSPE()) {
644       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
645       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
646     } else {
647       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand);
648       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
649     }
650   }
651 
652   // With the instructions enabled under FPCVT, we can do everything.
653   if (Subtarget.hasFPCVT()) {
654     if (Subtarget.has64BitSupport()) {
655       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
656       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
657       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
658       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
659       setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
660       setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
661       setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
662       setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
663     }
664 
665     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
666     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
667     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
668     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
669     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
670     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
671     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
672     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
673   }
674 
675   if (Subtarget.use64BitRegs()) {
676     // 64-bit PowerPC implementations can support i64 types directly
677     addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
678     // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
679     setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
680     // 64-bit PowerPC wants to expand i128 shifts itself.
681     setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
682     setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
683     setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
684   } else {
685     // 32-bit PowerPC wants to expand i64 shifts itself.
686     setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
687     setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
688     setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
689   }
690 
691   // PowerPC has better expansions for funnel shifts than the generic
692   // TargetLowering::expandFunnelShift.
693   if (Subtarget.has64BitSupport()) {
694     setOperationAction(ISD::FSHL, MVT::i64, Custom);
695     setOperationAction(ISD::FSHR, MVT::i64, Custom);
696   }
697   setOperationAction(ISD::FSHL, MVT::i32, Custom);
698   setOperationAction(ISD::FSHR, MVT::i32, Custom);
699 
700   if (Subtarget.hasVSX()) {
701     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
702     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
703     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
704     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
705   }
706 
707   if (Subtarget.hasAltivec()) {
708     for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
709       setOperationAction(ISD::SADDSAT, VT, Legal);
710       setOperationAction(ISD::SSUBSAT, VT, Legal);
711       setOperationAction(ISD::UADDSAT, VT, Legal);
712       setOperationAction(ISD::USUBSAT, VT, Legal);
713     }
714     // First set operation action for all vector types to expand. Then we
715     // will selectively turn on ones that can be effectively codegen'd.
716     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
717       // add/sub are legal for all supported vector VT's.
718       setOperationAction(ISD::ADD, VT, Legal);
719       setOperationAction(ISD::SUB, VT, Legal);
720 
721       // For v2i64, these are only valid with P8Vector. This is corrected after
722       // the loop.
723       if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
724         setOperationAction(ISD::SMAX, VT, Legal);
725         setOperationAction(ISD::SMIN, VT, Legal);
726         setOperationAction(ISD::UMAX, VT, Legal);
727         setOperationAction(ISD::UMIN, VT, Legal);
728       }
729       else {
730         setOperationAction(ISD::SMAX, VT, Expand);
731         setOperationAction(ISD::SMIN, VT, Expand);
732         setOperationAction(ISD::UMAX, VT, Expand);
733         setOperationAction(ISD::UMIN, VT, Expand);
734       }
735 
736       if (Subtarget.hasVSX()) {
737         setOperationAction(ISD::FMAXNUM, VT, Legal);
738         setOperationAction(ISD::FMINNUM, VT, Legal);
739       }
740 
741       // Vector instructions introduced in P8
742       if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
743         setOperationAction(ISD::CTPOP, VT, Legal);
744         setOperationAction(ISD::CTLZ, VT, Legal);
745       }
746       else {
747         setOperationAction(ISD::CTPOP, VT, Expand);
748         setOperationAction(ISD::CTLZ, VT, Expand);
749       }
750 
751       // Vector instructions introduced in P9
752       if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
753         setOperationAction(ISD::CTTZ, VT, Legal);
754       else
755         setOperationAction(ISD::CTTZ, VT, Expand);
756 
757       // We promote all shuffles to v16i8.
758       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
759       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
760 
761       // We promote all non-typed operations to v4i32.
762       setOperationAction(ISD::AND   , VT, Promote);
763       AddPromotedToType (ISD::AND   , VT, MVT::v4i32);
764       setOperationAction(ISD::OR    , VT, Promote);
765       AddPromotedToType (ISD::OR    , VT, MVT::v4i32);
766       setOperationAction(ISD::XOR   , VT, Promote);
767       AddPromotedToType (ISD::XOR   , VT, MVT::v4i32);
768       setOperationAction(ISD::LOAD  , VT, Promote);
769       AddPromotedToType (ISD::LOAD  , VT, MVT::v4i32);
770       setOperationAction(ISD::SELECT, VT, Promote);
771       AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
772       setOperationAction(ISD::VSELECT, VT, Legal);
773       setOperationAction(ISD::SELECT_CC, VT, Promote);
774       AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
775       setOperationAction(ISD::STORE, VT, Promote);
776       AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
777 
778       // No other operations are legal.
779       setOperationAction(ISD::MUL , VT, Expand);
780       setOperationAction(ISD::SDIV, VT, Expand);
781       setOperationAction(ISD::SREM, VT, Expand);
782       setOperationAction(ISD::UDIV, VT, Expand);
783       setOperationAction(ISD::UREM, VT, Expand);
784       setOperationAction(ISD::FDIV, VT, Expand);
785       setOperationAction(ISD::FREM, VT, Expand);
786       setOperationAction(ISD::FNEG, VT, Expand);
787       setOperationAction(ISD::FSQRT, VT, Expand);
788       setOperationAction(ISD::FLOG, VT, Expand);
789       setOperationAction(ISD::FLOG10, VT, Expand);
790       setOperationAction(ISD::FLOG2, VT, Expand);
791       setOperationAction(ISD::FEXP, VT, Expand);
792       setOperationAction(ISD::FEXP2, VT, Expand);
793       setOperationAction(ISD::FSIN, VT, Expand);
794       setOperationAction(ISD::FCOS, VT, Expand);
795       setOperationAction(ISD::FABS, VT, Expand);
796       setOperationAction(ISD::FFLOOR, VT, Expand);
797       setOperationAction(ISD::FCEIL,  VT, Expand);
798       setOperationAction(ISD::FTRUNC, VT, Expand);
799       setOperationAction(ISD::FRINT,  VT, Expand);
800       setOperationAction(ISD::FNEARBYINT, VT, Expand);
801       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
802       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
803       setOperationAction(ISD::BUILD_VECTOR, VT, Expand);
804       setOperationAction(ISD::MULHU, VT, Expand);
805       setOperationAction(ISD::MULHS, VT, Expand);
806       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
807       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
808       setOperationAction(ISD::UDIVREM, VT, Expand);
809       setOperationAction(ISD::SDIVREM, VT, Expand);
810       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
811       setOperationAction(ISD::FPOW, VT, Expand);
812       setOperationAction(ISD::BSWAP, VT, Expand);
813       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
814       setOperationAction(ISD::ROTL, VT, Expand);
815       setOperationAction(ISD::ROTR, VT, Expand);
816 
817       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
818         setTruncStoreAction(VT, InnerVT, Expand);
819         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
820         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
821         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
822       }
823     }
824     setOperationAction(ISD::SELECT_CC, MVT::v4i32, Expand);
825     if (!Subtarget.hasP8Vector()) {
826       setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
827       setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
828       setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
829       setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
830     }
831 
832     // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
833     // with merges, splats, etc.
834     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom);
835 
836     // Vector truncates to sub-word integer that fit in an Altivec/VSX register
837     // are cheap, so handle them before they get expanded to scalar.
838     setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
839     setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
840     setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
841     setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
842     setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
843 
844     setOperationAction(ISD::AND   , MVT::v4i32, Legal);
845     setOperationAction(ISD::OR    , MVT::v4i32, Legal);
846     setOperationAction(ISD::XOR   , MVT::v4i32, Legal);
847     setOperationAction(ISD::LOAD  , MVT::v4i32, Legal);
848     setOperationAction(ISD::SELECT, MVT::v4i32,
849                        Subtarget.useCRBits() ? Legal : Expand);
850     setOperationAction(ISD::STORE , MVT::v4i32, Legal);
851     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
852     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
853     setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
854     setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
855     setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
856     setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
857     setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
858     setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
859     setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
860     setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
861     setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
862     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
863 
864     // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
865     setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
866     // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
867     if (Subtarget.hasAltivec())
868       for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
869         setOperationAction(ISD::ROTL, VT, Legal);
870     // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
871     if (Subtarget.hasP8Altivec())
872       setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
873 
874     addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
875     addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
876     addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
877     addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
878 
879     setOperationAction(ISD::MUL, MVT::v4f32, Legal);
880     setOperationAction(ISD::FMA, MVT::v4f32, Legal);
881 
882     if (Subtarget.hasVSX()) {
883       setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
884       setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
885     }
886 
887     if (Subtarget.hasP8Altivec())
888       setOperationAction(ISD::MUL, MVT::v4i32, Legal);
889     else
890       setOperationAction(ISD::MUL, MVT::v4i32, Custom);
891 
892     if (Subtarget.isISA3_1()) {
893       setOperationAction(ISD::MUL, MVT::v2i64, Legal);
894       setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
895       setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
896       setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
897       setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
898       setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
899       setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
900       setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
901       setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
902       setOperationAction(ISD::UREM, MVT::v2i64, Legal);
903       setOperationAction(ISD::SREM, MVT::v2i64, Legal);
904       setOperationAction(ISD::UREM, MVT::v4i32, Legal);
905       setOperationAction(ISD::SREM, MVT::v4i32, Legal);
906       setOperationAction(ISD::UREM, MVT::v1i128, Legal);
907       setOperationAction(ISD::SREM, MVT::v1i128, Legal);
908       setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
909       setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
910       setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
911     }
912 
913     setOperationAction(ISD::MUL, MVT::v8i16, Legal);
914     setOperationAction(ISD::MUL, MVT::v16i8, Custom);
915 
916     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
917     setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
918 
919     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
920     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
921     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
922     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
923 
924     // Altivec does not contain unordered floating-point compare instructions
925     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
926     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
927     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
928     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
929 
930     if (Subtarget.hasVSX()) {
931       setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
932       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
933       if (Subtarget.hasP8Vector()) {
934         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
935         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Legal);
936       }
937       if (Subtarget.hasDirectMove() && isPPC64) {
938         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Legal);
939         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Legal);
940         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Legal);
941         setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2i64, Legal);
942         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Legal);
943         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Legal);
944         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Legal);
945         setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal);
946       }
947       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal);
948 
949       // The nearbyint variants are not allowed to raise the inexact exception
950       // so we can only code-gen them with unsafe math.
951       if (TM.Options.UnsafeFPMath) {
952         setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
953         setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
954       }
955 
956       setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
957       setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
958       setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
959       setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
960       setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
961       setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
962       setOperationAction(ISD::FROUND, MVT::f64, Legal);
963       setOperationAction(ISD::FRINT, MVT::f64, Legal);
964 
965       setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
966       setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
967       setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
968       setOperationAction(ISD::FROUND, MVT::f32, Legal);
969       setOperationAction(ISD::FRINT, MVT::f32, Legal);
970 
971       setOperationAction(ISD::MUL, MVT::v2f64, Legal);
972       setOperationAction(ISD::FMA, MVT::v2f64, Legal);
973 
974       setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
975       setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
976 
977       // Share the Altivec comparison restrictions.
978       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
979       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
980       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
981       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
982 
983       setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
984       setOperationAction(ISD::STORE, MVT::v2f64, Legal);
985 
986       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Legal);
987 
988       if (Subtarget.hasP8Vector())
989         addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
990 
991       addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
992 
993       addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
994       addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
995       addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
996 
997       if (Subtarget.hasP8Altivec()) {
998         setOperationAction(ISD::SHL, MVT::v2i64, Legal);
999         setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1000         setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1001 
1002         // 128 bit shifts can be accomplished via 3 instructions for SHL and
1003         // SRL, but not for SRA because of the instructions available:
1004         // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1005         // doing
1006         setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1007         setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1008         setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1009 
1010         setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1011       }
1012       else {
1013         setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1014         setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1015         setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1016 
1017         setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1018 
1019         // VSX v2i64 only supports non-arithmetic operations.
1020         setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1021         setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1022       }
1023 
1024       if (Subtarget.isISA3_1())
1025         setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1026       else
1027         setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1028 
1029       setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1030       AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1031       setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1032       AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1033 
1034       setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Legal);
1035 
1036       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
1037       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
1038       setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
1039       setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
1040       setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1041       setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1042       setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1043       setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1044 
1045       // Custom handling for partial vectors of integers converted to
1046       // floating point. We already have optimal handling for v2i32 through
1047       // the DAG combine, so those aren't necessary.
1048       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i8, Custom);
1049       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i8, Custom);
1050       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i16, Custom);
1051       setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i16, Custom);
1052       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i8, Custom);
1053       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i8, Custom);
1054       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i16, Custom);
1055       setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i16, Custom);
1056       setOperationAction(ISD::UINT_TO_FP, MVT::v2i8, Custom);
1057       setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
1058       setOperationAction(ISD::UINT_TO_FP, MVT::v2i16, Custom);
1059       setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1060       setOperationAction(ISD::SINT_TO_FP, MVT::v2i8, Custom);
1061       setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Custom);
1062       setOperationAction(ISD::SINT_TO_FP, MVT::v2i16, Custom);
1063       setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1064 
1065       setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1066       setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1067       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1068       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1069       setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Legal);
1070       setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Legal);
1071 
1072       if (Subtarget.hasDirectMove())
1073         setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
1074       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
1075 
1076       // Handle constrained floating-point operations of vector.
1077       // The predictor is `hasVSX` because altivec instruction has
1078       // no exception but VSX vector instruction has.
1079       setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
1080       setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
1081       setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
1082       setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
1083       setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
1084       setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
1085       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v4f32, Legal);
1086       setOperationAction(ISD::STRICT_FMINNUM, MVT::v4f32, Legal);
1087       setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
1088       setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
1089       setOperationAction(ISD::STRICT_FCEIL,  MVT::v4f32, Legal);
1090       setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
1091       setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
1092 
1093       setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1094       setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1095       setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1096       setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1097       setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
1098       setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1099       setOperationAction(ISD::STRICT_FMAXNUM, MVT::v2f64, Legal);
1100       setOperationAction(ISD::STRICT_FMINNUM, MVT::v2f64, Legal);
1101       setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
1102       setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
1103       setOperationAction(ISD::STRICT_FCEIL,  MVT::v2f64, Legal);
1104       setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
1105       setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
1106 
1107       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1108     }
1109 
1110     if (Subtarget.hasP8Altivec()) {
1111       addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1112       addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1113     }
1114 
1115     if (Subtarget.hasP9Vector()) {
1116       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
1117       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
1118 
1119       // 128 bit shifts can be accomplished via 3 instructions for SHL and
1120       // SRL, but not for SRA because of the instructions available:
1121       // VS{RL} and VS{RL}O.
1122       setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1123       setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1124       setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1125 
1126       addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1127       setOperationAction(ISD::FADD, MVT::f128, Legal);
1128       setOperationAction(ISD::FSUB, MVT::f128, Legal);
1129       setOperationAction(ISD::FDIV, MVT::f128, Legal);
1130       setOperationAction(ISD::FMUL, MVT::f128, Legal);
1131       setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1132       // No extending loads to f128 on PPC.
1133       for (MVT FPT : MVT::fp_valuetypes())
1134         setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1135       setOperationAction(ISD::FMA, MVT::f128, Legal);
1136       setCondCodeAction(ISD::SETULT, MVT::f128, Expand);
1137       setCondCodeAction(ISD::SETUGT, MVT::f128, Expand);
1138       setCondCodeAction(ISD::SETUEQ, MVT::f128, Expand);
1139       setCondCodeAction(ISD::SETOGE, MVT::f128, Expand);
1140       setCondCodeAction(ISD::SETOLE, MVT::f128, Expand);
1141       setCondCodeAction(ISD::SETONE, MVT::f128, Expand);
1142 
1143       setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1144       setOperationAction(ISD::FRINT, MVT::f128, Legal);
1145       setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1146       setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1147       setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1148       setOperationAction(ISD::FROUND, MVT::f128, Legal);
1149 
1150       setOperationAction(ISD::SELECT, MVT::f128, Expand);
1151       setOperationAction(ISD::FP_ROUND, MVT::f64, Legal);
1152       setOperationAction(ISD::FP_ROUND, MVT::f32, Legal);
1153       setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1154       setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1155       setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1156       // No implementation for these ops for PowerPC.
1157       setOperationAction(ISD::FSIN, MVT::f128, Expand);
1158       setOperationAction(ISD::FCOS, MVT::f128, Expand);
1159       setOperationAction(ISD::FPOW, MVT::f128, Expand);
1160       setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1161       setOperationAction(ISD::FREM, MVT::f128, Expand);
1162 
1163       // Handle constrained floating-point operations of fp128
1164       setOperationAction(ISD::STRICT_FADD, MVT::f128, Legal);
1165       setOperationAction(ISD::STRICT_FSUB, MVT::f128, Legal);
1166       setOperationAction(ISD::STRICT_FMUL, MVT::f128, Legal);
1167       setOperationAction(ISD::STRICT_FDIV, MVT::f128, Legal);
1168       setOperationAction(ISD::STRICT_FMA, MVT::f128, Legal);
1169       setOperationAction(ISD::STRICT_FSQRT, MVT::f128, Legal);
1170       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Legal);
1171       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
1172       setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
1173       setOperationAction(ISD::STRICT_FRINT, MVT::f128, Legal);
1174       setOperationAction(ISD::STRICT_FNEARBYINT, MVT::f128, Legal);
1175       setOperationAction(ISD::STRICT_FFLOOR, MVT::f128, Legal);
1176       setOperationAction(ISD::STRICT_FCEIL, MVT::f128, Legal);
1177       setOperationAction(ISD::STRICT_FTRUNC, MVT::f128, Legal);
1178       setOperationAction(ISD::STRICT_FROUND, MVT::f128, Legal);
1179       setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1180       setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1181       setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1182       setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1183       setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1184     } else if (Subtarget.hasAltivec() && EnableSoftFP128) {
1185       addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1186 
1187       for (MVT FPT : MVT::fp_valuetypes())
1188         setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1189 
1190       setOperationAction(ISD::LOAD, MVT::f128, Promote);
1191       setOperationAction(ISD::STORE, MVT::f128, Promote);
1192 
1193       AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1194       AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1195 
1196       // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1197       // fp_to_uint and int_to_fp.
1198       setOperationAction(ISD::FADD, MVT::f128, LibCall);
1199       setOperationAction(ISD::FSUB, MVT::f128, LibCall);
1200 
1201       setOperationAction(ISD::FMUL, MVT::f128, Expand);
1202       setOperationAction(ISD::FDIV, MVT::f128, Expand);
1203       setOperationAction(ISD::FNEG, MVT::f128, Expand);
1204       setOperationAction(ISD::FABS, MVT::f128, Expand);
1205       setOperationAction(ISD::FSIN, MVT::f128, Expand);
1206       setOperationAction(ISD::FCOS, MVT::f128, Expand);
1207       setOperationAction(ISD::FPOW, MVT::f128, Expand);
1208       setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1209       setOperationAction(ISD::FREM, MVT::f128, Expand);
1210       setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1211       setOperationAction(ISD::FMA, MVT::f128, Expand);
1212       setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
1213 
1214       setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1215       setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1216 
1217       // Expand the fp_extend if the target type is fp128.
1218       setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1219       setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Expand);
1220 
1221       // Expand the fp_round if the source type is fp128.
1222       for (MVT VT : {MVT::f32, MVT::f64}) {
1223         setOperationAction(ISD::FP_ROUND, VT, Custom);
1224         setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom);
1225       }
1226     }
1227 
1228     if (Subtarget.hasP9Altivec()) {
1229       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
1230       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1231 
1232       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8,  Legal);
1233       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Legal);
1234       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Legal);
1235       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
1236       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1237       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1238       setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
1239     }
1240   }
1241 
1242   if (Subtarget.pairedVectorMemops()) {
1243     addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1244     setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1245     setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1246   }
1247   if (Subtarget.hasMMA()) {
1248     addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1249     setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1250     setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1251     setOperationAction(ISD::BUILD_VECTOR, MVT::v512i1, Custom);
1252   }
1253 
1254   if (Subtarget.has64BitSupport())
1255     setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1256 
1257   if (Subtarget.isISA3_1())
1258     setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1259 
1260   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1261 
1262   if (!isPPC64) {
1263     setOperationAction(ISD::ATOMIC_LOAD,  MVT::i64, Expand);
1264     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1265   }
1266 
1267   setBooleanContents(ZeroOrOneBooleanContent);
1268 
1269   if (Subtarget.hasAltivec()) {
1270     // Altivec instructions set fields to all zeros or all ones.
1271     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
1272   }
1273 
1274   if (!isPPC64) {
1275     // These libcalls are not available in 32-bit.
1276     setLibcallName(RTLIB::SHL_I128, nullptr);
1277     setLibcallName(RTLIB::SRL_I128, nullptr);
1278     setLibcallName(RTLIB::SRA_I128, nullptr);
1279   }
1280 
1281   if (!isPPC64)
1282     setMaxAtomicSizeInBitsSupported(32);
1283 
1284   setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1285 
1286   // We have target-specific dag combine patterns for the following nodes:
1287   setTargetDAGCombine(ISD::ADD);
1288   setTargetDAGCombine(ISD::SHL);
1289   setTargetDAGCombine(ISD::SRA);
1290   setTargetDAGCombine(ISD::SRL);
1291   setTargetDAGCombine(ISD::MUL);
1292   setTargetDAGCombine(ISD::FMA);
1293   setTargetDAGCombine(ISD::SINT_TO_FP);
1294   setTargetDAGCombine(ISD::BUILD_VECTOR);
1295   if (Subtarget.hasFPCVT())
1296     setTargetDAGCombine(ISD::UINT_TO_FP);
1297   setTargetDAGCombine(ISD::LOAD);
1298   setTargetDAGCombine(ISD::STORE);
1299   setTargetDAGCombine(ISD::BR_CC);
1300   if (Subtarget.useCRBits())
1301     setTargetDAGCombine(ISD::BRCOND);
1302   setTargetDAGCombine(ISD::BSWAP);
1303   setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
1304   setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
1305   setTargetDAGCombine(ISD::INTRINSIC_VOID);
1306 
1307   setTargetDAGCombine(ISD::SIGN_EXTEND);
1308   setTargetDAGCombine(ISD::ZERO_EXTEND);
1309   setTargetDAGCombine(ISD::ANY_EXTEND);
1310 
1311   setTargetDAGCombine(ISD::TRUNCATE);
1312   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1313 
1314 
1315   if (Subtarget.useCRBits()) {
1316     setTargetDAGCombine(ISD::TRUNCATE);
1317     setTargetDAGCombine(ISD::SETCC);
1318     setTargetDAGCombine(ISD::SELECT_CC);
1319   }
1320 
1321   if (Subtarget.hasP9Altivec()) {
1322     setTargetDAGCombine(ISD::ABS);
1323     setTargetDAGCombine(ISD::VSELECT);
1324   }
1325 
1326   setLibcallName(RTLIB::LOG_F128, "logf128");
1327   setLibcallName(RTLIB::LOG2_F128, "log2f128");
1328   setLibcallName(RTLIB::LOG10_F128, "log10f128");
1329   setLibcallName(RTLIB::EXP_F128, "expf128");
1330   setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1331   setLibcallName(RTLIB::SIN_F128, "sinf128");
1332   setLibcallName(RTLIB::COS_F128, "cosf128");
1333   setLibcallName(RTLIB::POW_F128, "powf128");
1334   setLibcallName(RTLIB::FMIN_F128, "fminf128");
1335   setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1336   setLibcallName(RTLIB::REM_F128, "fmodf128");
1337   setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1338   setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1339   setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1340   setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1341   setLibcallName(RTLIB::ROUND_F128, "roundf128");
1342   setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1343   setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1344   setLibcallName(RTLIB::RINT_F128, "rintf128");
1345   setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1346   setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1347   setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1348   setLibcallName(RTLIB::FMA_F128, "fmaf128");
1349 
1350   // With 32 condition bits, we don't need to sink (and duplicate) compares
1351   // aggressively in CodeGenPrep.
1352   if (Subtarget.useCRBits()) {
1353     setHasMultipleConditionRegisters();
1354     setJumpIsExpensive();
1355   }
1356 
1357   setMinFunctionAlignment(Align(4));
1358 
1359   switch (Subtarget.getCPUDirective()) {
1360   default: break;
1361   case PPC::DIR_970:
1362   case PPC::DIR_A2:
1363   case PPC::DIR_E500:
1364   case PPC::DIR_E500mc:
1365   case PPC::DIR_E5500:
1366   case PPC::DIR_PWR4:
1367   case PPC::DIR_PWR5:
1368   case PPC::DIR_PWR5X:
1369   case PPC::DIR_PWR6:
1370   case PPC::DIR_PWR6X:
1371   case PPC::DIR_PWR7:
1372   case PPC::DIR_PWR8:
1373   case PPC::DIR_PWR9:
1374   case PPC::DIR_PWR10:
1375   case PPC::DIR_PWR_FUTURE:
1376     setPrefLoopAlignment(Align(16));
1377     setPrefFunctionAlignment(Align(16));
1378     break;
1379   }
1380 
1381   if (Subtarget.enableMachineScheduler())
1382     setSchedulingPreference(Sched::Source);
1383   else
1384     setSchedulingPreference(Sched::Hybrid);
1385 
1386   computeRegisterProperties(STI.getRegisterInfo());
1387 
1388   // The Freescale cores do better with aggressive inlining of memcpy and
1389   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1390   if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1391       Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1392     MaxStoresPerMemset = 32;
1393     MaxStoresPerMemsetOptSize = 16;
1394     MaxStoresPerMemcpy = 32;
1395     MaxStoresPerMemcpyOptSize = 8;
1396     MaxStoresPerMemmove = 32;
1397     MaxStoresPerMemmoveOptSize = 8;
1398   } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1399     // The A2 also benefits from (very) aggressive inlining of memcpy and
1400     // friends. The overhead of a the function call, even when warm, can be
1401     // over one hundred cycles.
1402     MaxStoresPerMemset = 128;
1403     MaxStoresPerMemcpy = 128;
1404     MaxStoresPerMemmove = 128;
1405     MaxLoadsPerMemcmp = 128;
1406   } else {
1407     MaxLoadsPerMemcmp = 8;
1408     MaxLoadsPerMemcmpOptSize = 4;
1409   }
1410 
1411   IsStrictFPEnabled = true;
1412 
1413   // Let the subtarget (CPU) decide if a predictable select is more expensive
1414   // than the corresponding branch. This information is used in CGP to decide
1415   // when to convert selects into branches.
1416   PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1417 }
1418 
1419 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1420 /// the desired ByVal argument alignment.
1421 static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1422   if (MaxAlign == MaxMaxAlign)
1423     return;
1424   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1425     if (MaxMaxAlign >= 32 &&
1426         VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1427       MaxAlign = Align(32);
1428     else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1429              MaxAlign < 16)
1430       MaxAlign = Align(16);
1431   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1432     Align EltAlign;
1433     getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1434     if (EltAlign > MaxAlign)
1435       MaxAlign = EltAlign;
1436   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1437     for (auto *EltTy : STy->elements()) {
1438       Align EltAlign;
1439       getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1440       if (EltAlign > MaxAlign)
1441         MaxAlign = EltAlign;
1442       if (MaxAlign == MaxMaxAlign)
1443         break;
1444     }
1445   }
1446 }
1447 
1448 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1449 /// function arguments in the caller parameter area.
1450 unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty,
1451                                                   const DataLayout &DL) const {
1452   // 16byte and wider vectors are passed on 16byte boundary.
1453   // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1454   Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1455   if (Subtarget.hasAltivec())
1456     getMaxByValAlign(Ty, Alignment, Align(16));
1457   return Alignment.value();
1458 }
1459 
1460 bool PPCTargetLowering::useSoftFloat() const {
1461   return Subtarget.useSoftFloat();
1462 }
1463 
1464 bool PPCTargetLowering::hasSPE() const {
1465   return Subtarget.hasSPE();
1466 }
1467 
1468 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
1469   return VT.isScalarInteger();
1470 }
1471 
1472 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1473   switch ((PPCISD::NodeType)Opcode) {
1474   case PPCISD::FIRST_NUMBER:    break;
1475   case PPCISD::FSEL:            return "PPCISD::FSEL";
1476   case PPCISD::XSMAXCDP:        return "PPCISD::XSMAXCDP";
1477   case PPCISD::XSMINCDP:        return "PPCISD::XSMINCDP";
1478   case PPCISD::FCFID:           return "PPCISD::FCFID";
1479   case PPCISD::FCFIDU:          return "PPCISD::FCFIDU";
1480   case PPCISD::FCFIDS:          return "PPCISD::FCFIDS";
1481   case PPCISD::FCFIDUS:         return "PPCISD::FCFIDUS";
1482   case PPCISD::FCTIDZ:          return "PPCISD::FCTIDZ";
1483   case PPCISD::FCTIWZ:          return "PPCISD::FCTIWZ";
1484   case PPCISD::FCTIDUZ:         return "PPCISD::FCTIDUZ";
1485   case PPCISD::FCTIWUZ:         return "PPCISD::FCTIWUZ";
1486   case PPCISD::FP_TO_UINT_IN_VSR:
1487                                 return "PPCISD::FP_TO_UINT_IN_VSR,";
1488   case PPCISD::FP_TO_SINT_IN_VSR:
1489                                 return "PPCISD::FP_TO_SINT_IN_VSR";
1490   case PPCISD::FRE:             return "PPCISD::FRE";
1491   case PPCISD::FRSQRTE:         return "PPCISD::FRSQRTE";
1492   case PPCISD::FTSQRT:
1493     return "PPCISD::FTSQRT";
1494   case PPCISD::FSQRT:
1495     return "PPCISD::FSQRT";
1496   case PPCISD::STFIWX:          return "PPCISD::STFIWX";
1497   case PPCISD::VPERM:           return "PPCISD::VPERM";
1498   case PPCISD::XXSPLT:          return "PPCISD::XXSPLT";
1499   case PPCISD::XXSPLTI_SP_TO_DP:
1500     return "PPCISD::XXSPLTI_SP_TO_DP";
1501   case PPCISD::XXSPLTI32DX:
1502     return "PPCISD::XXSPLTI32DX";
1503   case PPCISD::VECINSERT:       return "PPCISD::VECINSERT";
1504   case PPCISD::XXPERMDI:        return "PPCISD::XXPERMDI";
1505   case PPCISD::VECSHL:          return "PPCISD::VECSHL";
1506   case PPCISD::CMPB:            return "PPCISD::CMPB";
1507   case PPCISD::Hi:              return "PPCISD::Hi";
1508   case PPCISD::Lo:              return "PPCISD::Lo";
1509   case PPCISD::TOC_ENTRY:       return "PPCISD::TOC_ENTRY";
1510   case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1511   case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1512   case PPCISD::DYNALLOC:        return "PPCISD::DYNALLOC";
1513   case PPCISD::DYNAREAOFFSET:   return "PPCISD::DYNAREAOFFSET";
1514   case PPCISD::PROBED_ALLOCA:   return "PPCISD::PROBED_ALLOCA";
1515   case PPCISD::GlobalBaseReg:   return "PPCISD::GlobalBaseReg";
1516   case PPCISD::SRL:             return "PPCISD::SRL";
1517   case PPCISD::SRA:             return "PPCISD::SRA";
1518   case PPCISD::SHL:             return "PPCISD::SHL";
1519   case PPCISD::SRA_ADDZE:       return "PPCISD::SRA_ADDZE";
1520   case PPCISD::CALL:            return "PPCISD::CALL";
1521   case PPCISD::CALL_NOP:        return "PPCISD::CALL_NOP";
1522   case PPCISD::CALL_NOTOC:      return "PPCISD::CALL_NOTOC";
1523   case PPCISD::MTCTR:           return "PPCISD::MTCTR";
1524   case PPCISD::BCTRL:           return "PPCISD::BCTRL";
1525   case PPCISD::BCTRL_LOAD_TOC:  return "PPCISD::BCTRL_LOAD_TOC";
1526   case PPCISD::RET_FLAG:        return "PPCISD::RET_FLAG";
1527   case PPCISD::READ_TIME_BASE:  return "PPCISD::READ_TIME_BASE";
1528   case PPCISD::EH_SJLJ_SETJMP:  return "PPCISD::EH_SJLJ_SETJMP";
1529   case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1530   case PPCISD::MFOCRF:          return "PPCISD::MFOCRF";
1531   case PPCISD::MFVSR:           return "PPCISD::MFVSR";
1532   case PPCISD::MTVSRA:          return "PPCISD::MTVSRA";
1533   case PPCISD::MTVSRZ:          return "PPCISD::MTVSRZ";
1534   case PPCISD::SINT_VEC_TO_FP:  return "PPCISD::SINT_VEC_TO_FP";
1535   case PPCISD::UINT_VEC_TO_FP:  return "PPCISD::UINT_VEC_TO_FP";
1536   case PPCISD::SCALAR_TO_VECTOR_PERMUTED:
1537     return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1538   case PPCISD::ANDI_rec_1_EQ_BIT:
1539     return "PPCISD::ANDI_rec_1_EQ_BIT";
1540   case PPCISD::ANDI_rec_1_GT_BIT:
1541     return "PPCISD::ANDI_rec_1_GT_BIT";
1542   case PPCISD::VCMP:            return "PPCISD::VCMP";
1543   case PPCISD::VCMP_rec:        return "PPCISD::VCMP_rec";
1544   case PPCISD::LBRX:            return "PPCISD::LBRX";
1545   case PPCISD::STBRX:           return "PPCISD::STBRX";
1546   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
1547   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
1548   case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
1549   case PPCISD::STXSIX:          return "PPCISD::STXSIX";
1550   case PPCISD::VEXTS:           return "PPCISD::VEXTS";
1551   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
1552   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
1553   case PPCISD::LOAD_VEC_BE:     return "PPCISD::LOAD_VEC_BE";
1554   case PPCISD::STORE_VEC_BE:    return "PPCISD::STORE_VEC_BE";
1555   case PPCISD::ST_VSR_SCAL_INT:
1556                                 return "PPCISD::ST_VSR_SCAL_INT";
1557   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
1558   case PPCISD::BDNZ:            return "PPCISD::BDNZ";
1559   case PPCISD::BDZ:             return "PPCISD::BDZ";
1560   case PPCISD::MFFS:            return "PPCISD::MFFS";
1561   case PPCISD::FADDRTZ:         return "PPCISD::FADDRTZ";
1562   case PPCISD::TC_RETURN:       return "PPCISD::TC_RETURN";
1563   case PPCISD::CR6SET:          return "PPCISD::CR6SET";
1564   case PPCISD::CR6UNSET:        return "PPCISD::CR6UNSET";
1565   case PPCISD::PPC32_GOT:       return "PPCISD::PPC32_GOT";
1566   case PPCISD::PPC32_PICGOT:    return "PPCISD::PPC32_PICGOT";
1567   case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1568   case PPCISD::LD_GOT_TPREL_L:  return "PPCISD::LD_GOT_TPREL_L";
1569   case PPCISD::ADD_TLS:         return "PPCISD::ADD_TLS";
1570   case PPCISD::ADDIS_TLSGD_HA:  return "PPCISD::ADDIS_TLSGD_HA";
1571   case PPCISD::ADDI_TLSGD_L:    return "PPCISD::ADDI_TLSGD_L";
1572   case PPCISD::GET_TLS_ADDR:    return "PPCISD::GET_TLS_ADDR";
1573   case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1574   case PPCISD::ADDIS_TLSLD_HA:  return "PPCISD::ADDIS_TLSLD_HA";
1575   case PPCISD::ADDI_TLSLD_L:    return "PPCISD::ADDI_TLSLD_L";
1576   case PPCISD::GET_TLSLD_ADDR:  return "PPCISD::GET_TLSLD_ADDR";
1577   case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1578   case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1579   case PPCISD::ADDI_DTPREL_L:   return "PPCISD::ADDI_DTPREL_L";
1580   case PPCISD::PADDI_DTPREL:
1581     return "PPCISD::PADDI_DTPREL";
1582   case PPCISD::VADD_SPLAT:      return "PPCISD::VADD_SPLAT";
1583   case PPCISD::SC:              return "PPCISD::SC";
1584   case PPCISD::CLRBHRB:         return "PPCISD::CLRBHRB";
1585   case PPCISD::MFBHRBE:         return "PPCISD::MFBHRBE";
1586   case PPCISD::RFEBB:           return "PPCISD::RFEBB";
1587   case PPCISD::XXSWAPD:         return "PPCISD::XXSWAPD";
1588   case PPCISD::SWAP_NO_CHAIN:   return "PPCISD::SWAP_NO_CHAIN";
1589   case PPCISD::VABSD:           return "PPCISD::VABSD";
1590   case PPCISD::BUILD_FP128:     return "PPCISD::BUILD_FP128";
1591   case PPCISD::BUILD_SPE64:     return "PPCISD::BUILD_SPE64";
1592   case PPCISD::EXTRACT_SPE:     return "PPCISD::EXTRACT_SPE";
1593   case PPCISD::EXTSWSLI:        return "PPCISD::EXTSWSLI";
1594   case PPCISD::LD_VSX_LH:       return "PPCISD::LD_VSX_LH";
1595   case PPCISD::FP_EXTEND_HALF:  return "PPCISD::FP_EXTEND_HALF";
1596   case PPCISD::MAT_PCREL_ADDR:  return "PPCISD::MAT_PCREL_ADDR";
1597   case PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR:
1598     return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1599   case PPCISD::TLS_LOCAL_EXEC_MAT_ADDR:
1600     return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1601   case PPCISD::ACC_BUILD:       return "PPCISD::ACC_BUILD";
1602   case PPCISD::PAIR_BUILD:      return "PPCISD::PAIR_BUILD";
1603   case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1604   case PPCISD::XXMFACC:         return "PPCISD::XXMFACC";
1605   case PPCISD::LD_SPLAT:        return "PPCISD::LD_SPLAT";
1606   case PPCISD::FNMSUB:          return "PPCISD::FNMSUB";
1607   case PPCISD::STRICT_FADDRTZ:
1608     return "PPCISD::STRICT_FADDRTZ";
1609   case PPCISD::STRICT_FCTIDZ:
1610     return "PPCISD::STRICT_FCTIDZ";
1611   case PPCISD::STRICT_FCTIWZ:
1612     return "PPCISD::STRICT_FCTIWZ";
1613   case PPCISD::STRICT_FCTIDUZ:
1614     return "PPCISD::STRICT_FCTIDUZ";
1615   case PPCISD::STRICT_FCTIWUZ:
1616     return "PPCISD::STRICT_FCTIWUZ";
1617   case PPCISD::STRICT_FCFID:
1618     return "PPCISD::STRICT_FCFID";
1619   case PPCISD::STRICT_FCFIDU:
1620     return "PPCISD::STRICT_FCFIDU";
1621   case PPCISD::STRICT_FCFIDS:
1622     return "PPCISD::STRICT_FCFIDS";
1623   case PPCISD::STRICT_FCFIDUS:
1624     return "PPCISD::STRICT_FCFIDUS";
1625   case PPCISD::LXVRZX:          return "PPCISD::LXVRZX";
1626   }
1627   return nullptr;
1628 }
1629 
1630 EVT PPCTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &C,
1631                                           EVT VT) const {
1632   if (!VT.isVector())
1633     return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1634 
1635   return VT.changeVectorElementTypeToInteger();
1636 }
1637 
1638 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT) const {
1639   assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1640   return true;
1641 }
1642 
1643 //===----------------------------------------------------------------------===//
1644 // Node matching predicates, for use by the tblgen matching code.
1645 //===----------------------------------------------------------------------===//
1646 
1647 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1648 static bool isFloatingPointZero(SDValue Op) {
1649   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1650     return CFP->getValueAPF().isZero();
1651   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1652     // Maybe this has already been legalized into the constant pool?
1653     if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1654       if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1655         return CFP->getValueAPF().isZero();
1656   }
1657   return false;
1658 }
1659 
1660 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode.  Return
1661 /// true if Op is undef or if it matches the specified value.
1662 static bool isConstantOrUndef(int Op, int Val) {
1663   return Op < 0 || Op == Val;
1664 }
1665 
1666 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1667 /// VPKUHUM instruction.
1668 /// The ShuffleKind distinguishes between big-endian operations with
1669 /// two different inputs (0), either-endian operations with two identical
1670 /// inputs (1), and little-endian operations with two different inputs (2).
1671 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1672 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1673                                SelectionDAG &DAG) {
1674   bool IsLE = DAG.getDataLayout().isLittleEndian();
1675   if (ShuffleKind == 0) {
1676     if (IsLE)
1677       return false;
1678     for (unsigned i = 0; i != 16; ++i)
1679       if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1680         return false;
1681   } else if (ShuffleKind == 2) {
1682     if (!IsLE)
1683       return false;
1684     for (unsigned i = 0; i != 16; ++i)
1685       if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1686         return false;
1687   } else if (ShuffleKind == 1) {
1688     unsigned j = IsLE ? 0 : 1;
1689     for (unsigned i = 0; i != 8; ++i)
1690       if (!isConstantOrUndef(N->getMaskElt(i),    i*2+j) ||
1691           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j))
1692         return false;
1693   }
1694   return true;
1695 }
1696 
1697 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1698 /// VPKUWUM instruction.
1699 /// The ShuffleKind distinguishes between big-endian operations with
1700 /// two different inputs (0), either-endian operations with two identical
1701 /// inputs (1), and little-endian operations with two different inputs (2).
1702 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1703 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1704                                SelectionDAG &DAG) {
1705   bool IsLE = DAG.getDataLayout().isLittleEndian();
1706   if (ShuffleKind == 0) {
1707     if (IsLE)
1708       return false;
1709     for (unsigned i = 0; i != 16; i += 2)
1710       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+2) ||
1711           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+3))
1712         return false;
1713   } else if (ShuffleKind == 2) {
1714     if (!IsLE)
1715       return false;
1716     for (unsigned i = 0; i != 16; i += 2)
1717       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1718           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1))
1719         return false;
1720   } else if (ShuffleKind == 1) {
1721     unsigned j = IsLE ? 0 : 2;
1722     for (unsigned i = 0; i != 8; i += 2)
1723       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1724           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1725           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1726           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1))
1727         return false;
1728   }
1729   return true;
1730 }
1731 
1732 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1733 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1734 /// current subtarget.
1735 ///
1736 /// The ShuffleKind distinguishes between big-endian operations with
1737 /// two different inputs (0), either-endian operations with two identical
1738 /// inputs (1), and little-endian operations with two different inputs (2).
1739 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1740 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
1741                                SelectionDAG &DAG) {
1742   const PPCSubtarget& Subtarget =
1743       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1744   if (!Subtarget.hasP8Vector())
1745     return false;
1746 
1747   bool IsLE = DAG.getDataLayout().isLittleEndian();
1748   if (ShuffleKind == 0) {
1749     if (IsLE)
1750       return false;
1751     for (unsigned i = 0; i != 16; i += 4)
1752       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+4) ||
1753           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+5) ||
1754           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+6) ||
1755           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+7))
1756         return false;
1757   } else if (ShuffleKind == 2) {
1758     if (!IsLE)
1759       return false;
1760     for (unsigned i = 0; i != 16; i += 4)
1761       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2) ||
1762           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+1) ||
1763           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+2) ||
1764           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+3))
1765         return false;
1766   } else if (ShuffleKind == 1) {
1767     unsigned j = IsLE ? 0 : 4;
1768     for (unsigned i = 0; i != 8; i += 4)
1769       if (!isConstantOrUndef(N->getMaskElt(i  ),  i*2+j)   ||
1770           !isConstantOrUndef(N->getMaskElt(i+1),  i*2+j+1) ||
1771           !isConstantOrUndef(N->getMaskElt(i+2),  i*2+j+2) ||
1772           !isConstantOrUndef(N->getMaskElt(i+3),  i*2+j+3) ||
1773           !isConstantOrUndef(N->getMaskElt(i+8),  i*2+j)   ||
1774           !isConstantOrUndef(N->getMaskElt(i+9),  i*2+j+1) ||
1775           !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1776           !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1777         return false;
1778   }
1779   return true;
1780 }
1781 
1782 /// isVMerge - Common function, used to match vmrg* shuffles.
1783 ///
1784 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1785                      unsigned LHSStart, unsigned RHSStart) {
1786   if (N->getValueType(0) != MVT::v16i8)
1787     return false;
1788   assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1789          "Unsupported merge size!");
1790 
1791   for (unsigned i = 0; i != 8/UnitSize; ++i)     // Step over units
1792     for (unsigned j = 0; j != UnitSize; ++j) {   // Step over bytes within unit
1793       if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1794                              LHSStart+j+i*UnitSize) ||
1795           !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1796                              RHSStart+j+i*UnitSize))
1797         return false;
1798     }
1799   return true;
1800 }
1801 
1802 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1803 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1804 /// The ShuffleKind distinguishes between big-endian merges with two
1805 /// different inputs (0), either-endian merges with two identical inputs (1),
1806 /// and little-endian merges with two different inputs (2).  For the latter,
1807 /// the input operands are swapped (see PPCInstrAltivec.td).
1808 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1809                              unsigned ShuffleKind, SelectionDAG &DAG) {
1810   if (DAG.getDataLayout().isLittleEndian()) {
1811     if (ShuffleKind == 1) // unary
1812       return isVMerge(N, UnitSize, 0, 0);
1813     else if (ShuffleKind == 2) // swapped
1814       return isVMerge(N, UnitSize, 0, 16);
1815     else
1816       return false;
1817   } else {
1818     if (ShuffleKind == 1) // unary
1819       return isVMerge(N, UnitSize, 8, 8);
1820     else if (ShuffleKind == 0) // normal
1821       return isVMerge(N, UnitSize, 8, 24);
1822     else
1823       return false;
1824   }
1825 }
1826 
1827 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1828 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1829 /// The ShuffleKind distinguishes between big-endian merges with two
1830 /// different inputs (0), either-endian merges with two identical inputs (1),
1831 /// and little-endian merges with two different inputs (2).  For the latter,
1832 /// the input operands are swapped (see PPCInstrAltivec.td).
1833 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
1834                              unsigned ShuffleKind, SelectionDAG &DAG) {
1835   if (DAG.getDataLayout().isLittleEndian()) {
1836     if (ShuffleKind == 1) // unary
1837       return isVMerge(N, UnitSize, 8, 8);
1838     else if (ShuffleKind == 2) // swapped
1839       return isVMerge(N, UnitSize, 8, 24);
1840     else
1841       return false;
1842   } else {
1843     if (ShuffleKind == 1) // unary
1844       return isVMerge(N, UnitSize, 0, 0);
1845     else if (ShuffleKind == 0) // normal
1846       return isVMerge(N, UnitSize, 0, 16);
1847     else
1848       return false;
1849   }
1850 }
1851 
1852 /**
1853  * Common function used to match vmrgew and vmrgow shuffles
1854  *
1855  * The indexOffset determines whether to look for even or odd words in
1856  * the shuffle mask. This is based on the of the endianness of the target
1857  * machine.
1858  *   - Little Endian:
1859  *     - Use offset of 0 to check for odd elements
1860  *     - Use offset of 4 to check for even elements
1861  *   - Big Endian:
1862  *     - Use offset of 0 to check for even elements
1863  *     - Use offset of 4 to check for odd elements
1864  * A detailed description of the vector element ordering for little endian and
1865  * big endian can be found at
1866  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1867  * Targeting your applications - what little endian and big endian IBM XL C/C++
1868  * compiler differences mean to you
1869  *
1870  * The mask to the shuffle vector instruction specifies the indices of the
1871  * elements from the two input vectors to place in the result. The elements are
1872  * numbered in array-access order, starting with the first vector. These vectors
1873  * are always of type v16i8, thus each vector will contain 16 elements of size
1874  * 8. More info on the shuffle vector can be found in the
1875  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1876  * Language Reference.
1877  *
1878  * The RHSStartValue indicates whether the same input vectors are used (unary)
1879  * or two different input vectors are used, based on the following:
1880  *   - If the instruction uses the same vector for both inputs, the range of the
1881  *     indices will be 0 to 15. In this case, the RHSStart value passed should
1882  *     be 0.
1883  *   - If the instruction has two different vectors then the range of the
1884  *     indices will be 0 to 31. In this case, the RHSStart value passed should
1885  *     be 16 (indices 0-15 specify elements in the first vector while indices 16
1886  *     to 31 specify elements in the second vector).
1887  *
1888  * \param[in] N The shuffle vector SD Node to analyze
1889  * \param[in] IndexOffset Specifies whether to look for even or odd elements
1890  * \param[in] RHSStartValue Specifies the starting index for the righthand input
1891  * vector to the shuffle_vector instruction
1892  * \return true iff this shuffle vector represents an even or odd word merge
1893  */
1894 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1895                      unsigned RHSStartValue) {
1896   if (N->getValueType(0) != MVT::v16i8)
1897     return false;
1898 
1899   for (unsigned i = 0; i < 2; ++i)
1900     for (unsigned j = 0; j < 4; ++j)
1901       if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1902                              i*RHSStartValue+j+IndexOffset) ||
1903           !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1904                              i*RHSStartValue+j+IndexOffset+8))
1905         return false;
1906   return true;
1907 }
1908 
1909 /**
1910  * Determine if the specified shuffle mask is suitable for the vmrgew or
1911  * vmrgow instructions.
1912  *
1913  * \param[in] N The shuffle vector SD Node to analyze
1914  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1915  * \param[in] ShuffleKind Identify the type of merge:
1916  *   - 0 = big-endian merge with two different inputs;
1917  *   - 1 = either-endian merge with two identical inputs;
1918  *   - 2 = little-endian merge with two different inputs (inputs are swapped for
1919  *     little-endian merges).
1920  * \param[in] DAG The current SelectionDAG
1921  * \return true iff this shuffle mask
1922  */
1923 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
1924                               unsigned ShuffleKind, SelectionDAG &DAG) {
1925   if (DAG.getDataLayout().isLittleEndian()) {
1926     unsigned indexOffset = CheckEven ? 4 : 0;
1927     if (ShuffleKind == 1) // Unary
1928       return isVMerge(N, indexOffset, 0);
1929     else if (ShuffleKind == 2) // swapped
1930       return isVMerge(N, indexOffset, 16);
1931     else
1932       return false;
1933   }
1934   else {
1935     unsigned indexOffset = CheckEven ? 0 : 4;
1936     if (ShuffleKind == 1) // Unary
1937       return isVMerge(N, indexOffset, 0);
1938     else if (ShuffleKind == 0) // Normal
1939       return isVMerge(N, indexOffset, 16);
1940     else
1941       return false;
1942   }
1943   return false;
1944 }
1945 
1946 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1947 /// amount, otherwise return -1.
1948 /// The ShuffleKind distinguishes between big-endian operations with two
1949 /// different inputs (0), either-endian operations with two identical inputs
1950 /// (1), and little-endian operations with two different inputs (2).  For the
1951 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1952 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
1953                              SelectionDAG &DAG) {
1954   if (N->getValueType(0) != MVT::v16i8)
1955     return -1;
1956 
1957   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
1958 
1959   // Find the first non-undef value in the shuffle mask.
1960   unsigned i;
1961   for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
1962     /*search*/;
1963 
1964   if (i == 16) return -1;  // all undef.
1965 
1966   // Otherwise, check to see if the rest of the elements are consecutively
1967   // numbered from this value.
1968   unsigned ShiftAmt = SVOp->getMaskElt(i);
1969   if (ShiftAmt < i) return -1;
1970 
1971   ShiftAmt -= i;
1972   bool isLE = DAG.getDataLayout().isLittleEndian();
1973 
1974   if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
1975     // Check the rest of the elements to see if they are consecutive.
1976     for (++i; i != 16; ++i)
1977       if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
1978         return -1;
1979   } else if (ShuffleKind == 1) {
1980     // Check the rest of the elements to see if they are consecutive.
1981     for (++i; i != 16; ++i)
1982       if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
1983         return -1;
1984   } else
1985     return -1;
1986 
1987   if (isLE)
1988     ShiftAmt = 16 - ShiftAmt;
1989 
1990   return ShiftAmt;
1991 }
1992 
1993 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1994 /// specifies a splat of a single element that is suitable for input to
1995 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1996 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
1997   assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
1998          EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
1999 
2000   // The consecutive indices need to specify an element, not part of two
2001   // different elements.  So abandon ship early if this isn't the case.
2002   if (N->getMaskElt(0) % EltSize != 0)
2003     return false;
2004 
2005   // This is a splat operation if each element of the permute is the same, and
2006   // if the value doesn't reference the second vector.
2007   unsigned ElementBase = N->getMaskElt(0);
2008 
2009   // FIXME: Handle UNDEF elements too!
2010   if (ElementBase >= 16)
2011     return false;
2012 
2013   // Check that the indices are consecutive, in the case of a multi-byte element
2014   // splatted with a v16i8 mask.
2015   for (unsigned i = 1; i != EltSize; ++i)
2016     if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2017       return false;
2018 
2019   for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2020     if (N->getMaskElt(i) < 0) continue;
2021     for (unsigned j = 0; j != EltSize; ++j)
2022       if (N->getMaskElt(i+j) != N->getMaskElt(j))
2023         return false;
2024   }
2025   return true;
2026 }
2027 
2028 /// Check that the mask is shuffling N byte elements. Within each N byte
2029 /// element of the mask, the indices could be either in increasing or
2030 /// decreasing order as long as they are consecutive.
2031 /// \param[in] N the shuffle vector SD Node to analyze
2032 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2033 /// Word/DoubleWord/QuadWord).
2034 /// \param[in] StepLen the delta indices number among the N byte element, if
2035 /// the mask is in increasing/decreasing order then it is 1/-1.
2036 /// \return true iff the mask is shuffling N byte elements.
2037 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2038                                    int StepLen) {
2039   assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2040          "Unexpected element width.");
2041   assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2042 
2043   unsigned NumOfElem = 16 / Width;
2044   unsigned MaskVal[16]; //  Width is never greater than 16
2045   for (unsigned i = 0; i < NumOfElem; ++i) {
2046     MaskVal[0] = N->getMaskElt(i * Width);
2047     if ((StepLen == 1) && (MaskVal[0] % Width)) {
2048       return false;
2049     } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2050       return false;
2051     }
2052 
2053     for (unsigned int j = 1; j < Width; ++j) {
2054       MaskVal[j] = N->getMaskElt(i * Width + j);
2055       if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2056         return false;
2057       }
2058     }
2059   }
2060 
2061   return true;
2062 }
2063 
2064 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2065                           unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2066   if (!isNByteElemShuffleMask(N, 4, 1))
2067     return false;
2068 
2069   // Now we look at mask elements 0,4,8,12
2070   unsigned M0 = N->getMaskElt(0) / 4;
2071   unsigned M1 = N->getMaskElt(4) / 4;
2072   unsigned M2 = N->getMaskElt(8) / 4;
2073   unsigned M3 = N->getMaskElt(12) / 4;
2074   unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2075   unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2076 
2077   // Below, let H and L be arbitrary elements of the shuffle mask
2078   // where H is in the range [4,7] and L is in the range [0,3].
2079   // H, 1, 2, 3 or L, 5, 6, 7
2080   if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2081       (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2082     ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2083     InsertAtByte = IsLE ? 12 : 0;
2084     Swap = M0 < 4;
2085     return true;
2086   }
2087   // 0, H, 2, 3 or 4, L, 6, 7
2088   if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2089       (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2090     ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2091     InsertAtByte = IsLE ? 8 : 4;
2092     Swap = M1 < 4;
2093     return true;
2094   }
2095   // 0, 1, H, 3 or 4, 5, L, 7
2096   if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2097       (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2098     ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2099     InsertAtByte = IsLE ? 4 : 8;
2100     Swap = M2 < 4;
2101     return true;
2102   }
2103   // 0, 1, 2, H or 4, 5, 6, L
2104   if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2105       (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2106     ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2107     InsertAtByte = IsLE ? 0 : 12;
2108     Swap = M3 < 4;
2109     return true;
2110   }
2111 
2112   // If both vector operands for the shuffle are the same vector, the mask will
2113   // contain only elements from the first one and the second one will be undef.
2114   if (N->getOperand(1).isUndef()) {
2115     ShiftElts = 0;
2116     Swap = true;
2117     unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2118     if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2119       InsertAtByte = IsLE ? 12 : 0;
2120       return true;
2121     }
2122     if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2123       InsertAtByte = IsLE ? 8 : 4;
2124       return true;
2125     }
2126     if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2127       InsertAtByte = IsLE ? 4 : 8;
2128       return true;
2129     }
2130     if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2131       InsertAtByte = IsLE ? 0 : 12;
2132       return true;
2133     }
2134   }
2135 
2136   return false;
2137 }
2138 
2139 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2140                                bool &Swap, bool IsLE) {
2141   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2142   // Ensure each byte index of the word is consecutive.
2143   if (!isNByteElemShuffleMask(N, 4, 1))
2144     return false;
2145 
2146   // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2147   unsigned M0 = N->getMaskElt(0) / 4;
2148   unsigned M1 = N->getMaskElt(4) / 4;
2149   unsigned M2 = N->getMaskElt(8) / 4;
2150   unsigned M3 = N->getMaskElt(12) / 4;
2151 
2152   // If both vector operands for the shuffle are the same vector, the mask will
2153   // contain only elements from the first one and the second one will be undef.
2154   if (N->getOperand(1).isUndef()) {
2155     assert(M0 < 4 && "Indexing into an undef vector?");
2156     if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2157       return false;
2158 
2159     ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2160     Swap = false;
2161     return true;
2162   }
2163 
2164   // Ensure each word index of the ShuffleVector Mask is consecutive.
2165   if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2166     return false;
2167 
2168   if (IsLE) {
2169     if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2170       // Input vectors don't need to be swapped if the leading element
2171       // of the result is one of the 3 left elements of the second vector
2172       // (or if there is no shift to be done at all).
2173       Swap = false;
2174       ShiftElts = (8 - M0) % 8;
2175     } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2176       // Input vectors need to be swapped if the leading element
2177       // of the result is one of the 3 left elements of the first vector
2178       // (or if we're shifting by 4 - thereby simply swapping the vectors).
2179       Swap = true;
2180       ShiftElts = (4 - M0) % 4;
2181     }
2182 
2183     return true;
2184   } else {                                          // BE
2185     if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2186       // Input vectors don't need to be swapped if the leading element
2187       // of the result is one of the 4 elements of the first vector.
2188       Swap = false;
2189       ShiftElts = M0;
2190     } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2191       // Input vectors need to be swapped if the leading element
2192       // of the result is one of the 4 elements of the right vector.
2193       Swap = true;
2194       ShiftElts = M0 - 4;
2195     }
2196 
2197     return true;
2198   }
2199 }
2200 
2201 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width) {
2202   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2203 
2204   if (!isNByteElemShuffleMask(N, Width, -1))
2205     return false;
2206 
2207   for (int i = 0; i < 16; i += Width)
2208     if (N->getMaskElt(i) != i + Width - 1)
2209       return false;
2210 
2211   return true;
2212 }
2213 
2214 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode *N) {
2215   return isXXBRShuffleMaskHelper(N, 2);
2216 }
2217 
2218 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode *N) {
2219   return isXXBRShuffleMaskHelper(N, 4);
2220 }
2221 
2222 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode *N) {
2223   return isXXBRShuffleMaskHelper(N, 8);
2224 }
2225 
2226 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode *N) {
2227   return isXXBRShuffleMaskHelper(N, 16);
2228 }
2229 
2230 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2231 /// if the inputs to the instruction should be swapped and set \p DM to the
2232 /// value for the immediate.
2233 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2234 /// AND element 0 of the result comes from the first input (LE) or second input
2235 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2236 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2237 /// mask.
2238 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &DM,
2239                                bool &Swap, bool IsLE) {
2240   assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2241 
2242   // Ensure each byte index of the double word is consecutive.
2243   if (!isNByteElemShuffleMask(N, 8, 1))
2244     return false;
2245 
2246   unsigned M0 = N->getMaskElt(0) / 8;
2247   unsigned M1 = N->getMaskElt(8) / 8;
2248   assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2249 
2250   // If both vector operands for the shuffle are the same vector, the mask will
2251   // contain only elements from the first one and the second one will be undef.
2252   if (N->getOperand(1).isUndef()) {
2253     if ((M0 | M1) < 2) {
2254       DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2255       Swap = false;
2256       return true;
2257     } else
2258       return false;
2259   }
2260 
2261   if (IsLE) {
2262     if (M0 > 1 && M1 < 2) {
2263       Swap = false;
2264     } else if (M0 < 2 && M1 > 1) {
2265       M0 = (M0 + 2) % 4;
2266       M1 = (M1 + 2) % 4;
2267       Swap = true;
2268     } else
2269       return false;
2270 
2271     // Note: if control flow comes here that means Swap is already set above
2272     DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2273     return true;
2274   } else { // BE
2275     if (M0 < 2 && M1 > 1) {
2276       Swap = false;
2277     } else if (M0 > 1 && M1 < 2) {
2278       M0 = (M0 + 2) % 4;
2279       M1 = (M1 + 2) % 4;
2280       Swap = true;
2281     } else
2282       return false;
2283 
2284     // Note: if control flow comes here that means Swap is already set above
2285     DM = (M0 << 1) + (M1 & 1);
2286     return true;
2287   }
2288 }
2289 
2290 
2291 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2292 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2293 /// elements are counted from the left of the vector register).
2294 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2295                                          SelectionDAG &DAG) {
2296   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2297   assert(isSplatShuffleMask(SVOp, EltSize));
2298   if (DAG.getDataLayout().isLittleEndian())
2299     return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2300   else
2301     return SVOp->getMaskElt(0) / EltSize;
2302 }
2303 
2304 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2305 /// by using a vspltis[bhw] instruction of the specified element size, return
2306 /// the constant being splatted.  The ByteSize field indicates the number of
2307 /// bytes of each element [124] -> [bhw].
2308 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2309   SDValue OpVal(nullptr, 0);
2310 
2311   // If ByteSize of the splat is bigger than the element size of the
2312   // build_vector, then we have a case where we are checking for a splat where
2313   // multiple elements of the buildvector are folded together into a single
2314   // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2315   unsigned EltSize = 16/N->getNumOperands();
2316   if (EltSize < ByteSize) {
2317     unsigned Multiple = ByteSize/EltSize;   // Number of BV entries per spltval.
2318     SDValue UniquedVals[4];
2319     assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2320 
2321     // See if all of the elements in the buildvector agree across.
2322     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2323       if (N->getOperand(i).isUndef()) continue;
2324       // If the element isn't a constant, bail fully out.
2325       if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2326 
2327       if (!UniquedVals[i&(Multiple-1)].getNode())
2328         UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2329       else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2330         return SDValue();  // no match.
2331     }
2332 
2333     // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2334     // either constant or undef values that are identical for each chunk.  See
2335     // if these chunks can form into a larger vspltis*.
2336 
2337     // Check to see if all of the leading entries are either 0 or -1.  If
2338     // neither, then this won't fit into the immediate field.
2339     bool LeadingZero = true;
2340     bool LeadingOnes = true;
2341     for (unsigned i = 0; i != Multiple-1; ++i) {
2342       if (!UniquedVals[i].getNode()) continue;  // Must have been undefs.
2343 
2344       LeadingZero &= isNullConstant(UniquedVals[i]);
2345       LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2346     }
2347     // Finally, check the least significant entry.
2348     if (LeadingZero) {
2349       if (!UniquedVals[Multiple-1].getNode())
2350         return DAG.getTargetConstant(0, SDLoc(N), MVT::i32);  // 0,0,0,undef
2351       int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2352       if (Val < 16)                                   // 0,0,0,4 -> vspltisw(4)
2353         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2354     }
2355     if (LeadingOnes) {
2356       if (!UniquedVals[Multiple-1].getNode())
2357         return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2358       int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2359       if (Val >= -16)                            // -1,-1,-1,-2 -> vspltisw(-2)
2360         return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2361     }
2362 
2363     return SDValue();
2364   }
2365 
2366   // Check to see if this buildvec has a single non-undef value in its elements.
2367   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2368     if (N->getOperand(i).isUndef()) continue;
2369     if (!OpVal.getNode())
2370       OpVal = N->getOperand(i);
2371     else if (OpVal != N->getOperand(i))
2372       return SDValue();
2373   }
2374 
2375   if (!OpVal.getNode()) return SDValue();  // All UNDEF: use implicit def.
2376 
2377   unsigned ValSizeInBytes = EltSize;
2378   uint64_t Value = 0;
2379   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2380     Value = CN->getZExtValue();
2381   } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2382     assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2383     Value = FloatToBits(CN->getValueAPF().convertToFloat());
2384   }
2385 
2386   // If the splat value is larger than the element value, then we can never do
2387   // this splat.  The only case that we could fit the replicated bits into our
2388   // immediate field for would be zero, and we prefer to use vxor for it.
2389   if (ValSizeInBytes < ByteSize) return SDValue();
2390 
2391   // If the element value is larger than the splat value, check if it consists
2392   // of a repeated bit pattern of size ByteSize.
2393   if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2394     return SDValue();
2395 
2396   // Properly sign extend the value.
2397   int MaskVal = SignExtend32(Value, ByteSize * 8);
2398 
2399   // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2400   if (MaskVal == 0) return SDValue();
2401 
2402   // Finally, if this value fits in a 5 bit sext field, return it
2403   if (SignExtend32<5>(MaskVal) == MaskVal)
2404     return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2405   return SDValue();
2406 }
2407 
2408 //===----------------------------------------------------------------------===//
2409 //  Addressing Mode Selection
2410 //===----------------------------------------------------------------------===//
2411 
2412 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2413 /// or 64-bit immediate, and if the value can be accurately represented as a
2414 /// sign extension from a 16-bit value.  If so, this returns true and the
2415 /// immediate.
2416 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2417   if (!isa<ConstantSDNode>(N))
2418     return false;
2419 
2420   Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2421   if (N->getValueType(0) == MVT::i32)
2422     return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2423   else
2424     return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2425 }
2426 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2427   return isIntS16Immediate(Op.getNode(), Imm);
2428 }
2429 
2430 
2431 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2432 /// be represented as an indexed [r+r] operation.
2433 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N, SDValue &Base,
2434                                                SDValue &Index,
2435                                                SelectionDAG &DAG) const {
2436   for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
2437       UI != E; ++UI) {
2438     if (MemSDNode *Memop = dyn_cast<MemSDNode>(*UI)) {
2439       if (Memop->getMemoryVT() == MVT::f64) {
2440           Base = N.getOperand(0);
2441           Index = N.getOperand(1);
2442           return true;
2443       }
2444     }
2445   }
2446   return false;
2447 }
2448 
2449 /// isIntS34Immediate - This method tests if value of node given can be
2450 /// accurately represented as a sign extension from a 34-bit value.  If so,
2451 /// this returns true and the immediate.
2452 bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2453   if (!isa<ConstantSDNode>(N))
2454     return false;
2455 
2456   Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2457   return isInt<34>(Imm);
2458 }
2459 bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2460   return isIntS34Immediate(Op.getNode(), Imm);
2461 }
2462 
2463 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2464 /// can be represented as an indexed [r+r] operation.  Returns false if it
2465 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2466 /// non-zero and N can be represented by a base register plus a signed 16-bit
2467 /// displacement, make a more precise judgement by checking (displacement % \p
2468 /// EncodingAlignment).
2469 bool PPCTargetLowering::SelectAddressRegReg(
2470     SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2471     MaybeAlign EncodingAlignment) const {
2472   // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2473   // a [pc+imm].
2474   if (SelectAddressPCRel(N, Base))
2475     return false;
2476 
2477   int16_t Imm = 0;
2478   if (N.getOpcode() == ISD::ADD) {
2479     // Is there any SPE load/store (f64), which can't handle 16bit offset?
2480     // SPE load/store can only handle 8-bit offsets.
2481     if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2482         return true;
2483     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2484         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2485       return false; // r+i
2486     if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2487       return false;    // r+i
2488 
2489     Base = N.getOperand(0);
2490     Index = N.getOperand(1);
2491     return true;
2492   } else if (N.getOpcode() == ISD::OR) {
2493     if (isIntS16Immediate(N.getOperand(1), Imm) &&
2494         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2495       return false; // r+i can fold it if we can.
2496 
2497     // If this is an or of disjoint bitfields, we can codegen this as an add
2498     // (for better address arithmetic) if the LHS and RHS of the OR are provably
2499     // disjoint.
2500     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2501 
2502     if (LHSKnown.Zero.getBoolValue()) {
2503       KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2504       // If all of the bits are known zero on the LHS or RHS, the add won't
2505       // carry.
2506       if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2507         Base = N.getOperand(0);
2508         Index = N.getOperand(1);
2509         return true;
2510       }
2511     }
2512   }
2513 
2514   return false;
2515 }
2516 
2517 // If we happen to be doing an i64 load or store into a stack slot that has
2518 // less than a 4-byte alignment, then the frame-index elimination may need to
2519 // use an indexed load or store instruction (because the offset may not be a
2520 // multiple of 4). The extra register needed to hold the offset comes from the
2521 // register scavenger, and it is possible that the scavenger will need to use
2522 // an emergency spill slot. As a result, we need to make sure that a spill slot
2523 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2524 // stack slot.
2525 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2526   // FIXME: This does not handle the LWA case.
2527   if (VT != MVT::i64)
2528     return;
2529 
2530   // NOTE: We'll exclude negative FIs here, which come from argument
2531   // lowering, because there are no known test cases triggering this problem
2532   // using packed structures (or similar). We can remove this exclusion if
2533   // we find such a test case. The reason why this is so test-case driven is
2534   // because this entire 'fixup' is only to prevent crashes (from the
2535   // register scavenger) on not-really-valid inputs. For example, if we have:
2536   //   %a = alloca i1
2537   //   %b = bitcast i1* %a to i64*
2538   //   store i64* a, i64 b
2539   // then the store should really be marked as 'align 1', but is not. If it
2540   // were marked as 'align 1' then the indexed form would have been
2541   // instruction-selected initially, and the problem this 'fixup' is preventing
2542   // won't happen regardless.
2543   if (FrameIdx < 0)
2544     return;
2545 
2546   MachineFunction &MF = DAG.getMachineFunction();
2547   MachineFrameInfo &MFI = MF.getFrameInfo();
2548 
2549   if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2550     return;
2551 
2552   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2553   FuncInfo->setHasNonRISpills();
2554 }
2555 
2556 /// Returns true if the address N can be represented by a base register plus
2557 /// a signed 16-bit displacement [r+imm], and if it is not better
2558 /// represented as reg+reg.  If \p EncodingAlignment is non-zero, only accept
2559 /// displacements that are multiples of that value.
2560 bool PPCTargetLowering::SelectAddressRegImm(
2561     SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2562     MaybeAlign EncodingAlignment) const {
2563   // FIXME dl should come from parent load or store, not from address
2564   SDLoc dl(N);
2565 
2566   // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2567   // a [pc+imm].
2568   if (SelectAddressPCRel(N, Base))
2569     return false;
2570 
2571   // If this can be more profitably realized as r+r, fail.
2572   if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2573     return false;
2574 
2575   if (N.getOpcode() == ISD::ADD) {
2576     int16_t imm = 0;
2577     if (isIntS16Immediate(N.getOperand(1), imm) &&
2578         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2579       Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2580       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2581         Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2582         fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2583       } else {
2584         Base = N.getOperand(0);
2585       }
2586       return true; // [r+i]
2587     } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2588       // Match LOAD (ADD (X, Lo(G))).
2589       assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2590              && "Cannot handle constant offsets yet!");
2591       Disp = N.getOperand(1).getOperand(0);  // The global address.
2592       assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
2593              Disp.getOpcode() == ISD::TargetGlobalTLSAddress ||
2594              Disp.getOpcode() == ISD::TargetConstantPool ||
2595              Disp.getOpcode() == ISD::TargetJumpTable);
2596       Base = N.getOperand(0);
2597       return true;  // [&g+r]
2598     }
2599   } else if (N.getOpcode() == ISD::OR) {
2600     int16_t imm = 0;
2601     if (isIntS16Immediate(N.getOperand(1), imm) &&
2602         (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2603       // If this is an or of disjoint bitfields, we can codegen this as an add
2604       // (for better address arithmetic) if the LHS and RHS of the OR are
2605       // provably disjoint.
2606       KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2607 
2608       if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2609         // If all of the bits are known zero on the LHS or RHS, the add won't
2610         // carry.
2611         if (FrameIndexSDNode *FI =
2612               dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2613           Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2614           fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2615         } else {
2616           Base = N.getOperand(0);
2617         }
2618         Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2619         return true;
2620       }
2621     }
2622   } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2623     // Loading from a constant address.
2624 
2625     // If this address fits entirely in a 16-bit sext immediate field, codegen
2626     // this as "d, 0"
2627     int16_t Imm;
2628     if (isIntS16Immediate(CN, Imm) &&
2629         (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2630       Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2631       Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2632                              CN->getValueType(0));
2633       return true;
2634     }
2635 
2636     // Handle 32-bit sext immediates with LIS + addr mode.
2637     if ((CN->getValueType(0) == MVT::i32 ||
2638          (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2639         (!EncodingAlignment ||
2640          isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2641       int Addr = (int)CN->getZExtValue();
2642 
2643       // Otherwise, break this down into an LIS + disp.
2644       Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2645 
2646       Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2647                                    MVT::i32);
2648       unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2649       Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2650       return true;
2651     }
2652   }
2653 
2654   Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2655   if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2656     Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2657     fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2658   } else
2659     Base = N;
2660   return true;      // [r+0]
2661 }
2662 
2663 /// Similar to the 16-bit case but for instructions that take a 34-bit
2664 /// displacement field (prefixed loads/stores).
2665 bool PPCTargetLowering::SelectAddressRegImm34(SDValue N, SDValue &Disp,
2666                                               SDValue &Base,
2667                                               SelectionDAG &DAG) const {
2668   // Only on 64-bit targets.
2669   if (N.getValueType() != MVT::i64)
2670     return false;
2671 
2672   SDLoc dl(N);
2673   int64_t Imm = 0;
2674 
2675   if (N.getOpcode() == ISD::ADD) {
2676     if (!isIntS34Immediate(N.getOperand(1), Imm))
2677       return false;
2678     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2679     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2680       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2681     else
2682       Base = N.getOperand(0);
2683     return true;
2684   }
2685 
2686   if (N.getOpcode() == ISD::OR) {
2687     if (!isIntS34Immediate(N.getOperand(1), Imm))
2688       return false;
2689     // If this is an or of disjoint bitfields, we can codegen this as an add
2690     // (for better address arithmetic) if the LHS and RHS of the OR are
2691     // provably disjoint.
2692     KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2693     if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2694       return false;
2695     if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2696       Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2697     else
2698       Base = N.getOperand(0);
2699     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2700     return true;
2701   }
2702 
2703   if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2704     Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2705     Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2706     return true;
2707   }
2708 
2709   return false;
2710 }
2711 
2712 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2713 /// represented as an indexed [r+r] operation.
2714 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
2715                                                 SDValue &Index,
2716                                                 SelectionDAG &DAG) const {
2717   // Check to see if we can easily represent this as an [r+r] address.  This
2718   // will fail if it thinks that the address is more profitably represented as
2719   // reg+imm, e.g. where imm = 0.
2720   if (SelectAddressRegReg(N, Base, Index, DAG))
2721     return true;
2722 
2723   // If the address is the result of an add, we will utilize the fact that the
2724   // address calculation includes an implicit add.  However, we can reduce
2725   // register pressure if we do not materialize a constant just for use as the
2726   // index register.  We only get rid of the add if it is not an add of a
2727   // value and a 16-bit signed constant and both have a single use.
2728   int16_t imm = 0;
2729   if (N.getOpcode() == ISD::ADD &&
2730       (!isIntS16Immediate(N.getOperand(1), imm) ||
2731        !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2732     Base = N.getOperand(0);
2733     Index = N.getOperand(1);
2734     return true;
2735   }
2736 
2737   // Otherwise, do it the hard way, using R0 as the base register.
2738   Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2739                          N.getValueType());
2740   Index = N;
2741   return true;
2742 }
2743 
2744 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2745   Ty *PCRelCand = dyn_cast<Ty>(N);
2746   return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2747 }
2748 
2749 /// Returns true if this address is a PC Relative address.
2750 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2751 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2752 bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
2753   // This is a materialize PC Relative node. Always select this as PC Relative.
2754   Base = N;
2755   if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2756     return true;
2757   if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2758       isValidPCRelNode<GlobalAddressSDNode>(N) ||
2759       isValidPCRelNode<JumpTableSDNode>(N) ||
2760       isValidPCRelNode<BlockAddressSDNode>(N))
2761     return true;
2762   return false;
2763 }
2764 
2765 /// Returns true if we should use a direct load into vector instruction
2766 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2767 static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2768 
2769   // If there are any other uses other than scalar to vector, then we should
2770   // keep it as a scalar load -> direct move pattern to prevent multiple
2771   // loads.
2772   LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2773   if (!LD)
2774     return false;
2775 
2776   EVT MemVT = LD->getMemoryVT();
2777   if (!MemVT.isSimple())
2778     return false;
2779   switch(MemVT.getSimpleVT().SimpleTy) {
2780   case MVT::i64:
2781     break;
2782   case MVT::i32:
2783     if (!ST.hasP8Vector())
2784       return false;
2785     break;
2786   case MVT::i16:
2787   case MVT::i8:
2788     if (!ST.hasP9Vector())
2789       return false;
2790     break;
2791   default:
2792     return false;
2793   }
2794 
2795   SDValue LoadedVal(N, 0);
2796   if (!LoadedVal.hasOneUse())
2797     return false;
2798 
2799   for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2800        UI != UE; ++UI)
2801     if (UI.getUse().get().getResNo() == 0 &&
2802         UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2803         UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2804       return false;
2805 
2806   return true;
2807 }
2808 
2809 /// getPreIndexedAddressParts - returns true by value, base pointer and
2810 /// offset pointer and addressing mode by reference if the node's address
2811 /// can be legally represented as pre-indexed load / store address.
2812 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
2813                                                   SDValue &Offset,
2814                                                   ISD::MemIndexedMode &AM,
2815                                                   SelectionDAG &DAG) const {
2816   if (DisablePPCPreinc) return false;
2817 
2818   bool isLoad = true;
2819   SDValue Ptr;
2820   EVT VT;
2821   unsigned Alignment;
2822   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2823     Ptr = LD->getBasePtr();
2824     VT = LD->getMemoryVT();
2825     Alignment = LD->getAlignment();
2826   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2827     Ptr = ST->getBasePtr();
2828     VT  = ST->getMemoryVT();
2829     Alignment = ST->getAlignment();
2830     isLoad = false;
2831   } else
2832     return false;
2833 
2834   // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2835   // instructions because we can fold these into a more efficient instruction
2836   // instead, (such as LXSD).
2837   if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2838     return false;
2839   }
2840 
2841   // PowerPC doesn't have preinc load/store instructions for vectors
2842   if (VT.isVector())
2843     return false;
2844 
2845   if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2846     // Common code will reject creating a pre-inc form if the base pointer
2847     // is a frame index, or if N is a store and the base pointer is either
2848     // the same as or a predecessor of the value being stored.  Check for
2849     // those situations here, and try with swapped Base/Offset instead.
2850     bool Swap = false;
2851 
2852     if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2853       Swap = true;
2854     else if (!isLoad) {
2855       SDValue Val = cast<StoreSDNode>(N)->getValue();
2856       if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2857         Swap = true;
2858     }
2859 
2860     if (Swap)
2861       std::swap(Base, Offset);
2862 
2863     AM = ISD::PRE_INC;
2864     return true;
2865   }
2866 
2867   // LDU/STU can only handle immediates that are a multiple of 4.
2868   if (VT != MVT::i64) {
2869     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
2870       return false;
2871   } else {
2872     // LDU/STU need an address with at least 4-byte alignment.
2873     if (Alignment < 4)
2874       return false;
2875 
2876     if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2877       return false;
2878   }
2879 
2880   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2881     // PPC64 doesn't have lwau, but it does have lwaux.  Reject preinc load of
2882     // sext i32 to i64 when addr mode is r+i.
2883     if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2884         LD->getExtensionType() == ISD::SEXTLOAD &&
2885         isa<ConstantSDNode>(Offset))
2886       return false;
2887   }
2888 
2889   AM = ISD::PRE_INC;
2890   return true;
2891 }
2892 
2893 //===----------------------------------------------------------------------===//
2894 //  LowerOperation implementation
2895 //===----------------------------------------------------------------------===//
2896 
2897 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2898 /// and LoOpFlags to the target MO flags.
2899 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2900                                unsigned &HiOpFlags, unsigned &LoOpFlags,
2901                                const GlobalValue *GV = nullptr) {
2902   HiOpFlags = PPCII::MO_HA;
2903   LoOpFlags = PPCII::MO_LO;
2904 
2905   // Don't use the pic base if not in PIC relocation model.
2906   if (IsPIC) {
2907     HiOpFlags |= PPCII::MO_PIC_FLAG;
2908     LoOpFlags |= PPCII::MO_PIC_FLAG;
2909   }
2910 }
2911 
2912 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
2913                              SelectionDAG &DAG) {
2914   SDLoc DL(HiPart);
2915   EVT PtrVT = HiPart.getValueType();
2916   SDValue Zero = DAG.getConstant(0, DL, PtrVT);
2917 
2918   SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
2919   SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
2920 
2921   // With PIC, the first instruction is actually "GR+hi(&G)".
2922   if (isPIC)
2923     Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
2924                      DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
2925 
2926   // Generate non-pic code that has direct accesses to the constant pool.
2927   // The address of the global is just (hi(&g)+lo(&g)).
2928   return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
2929 }
2930 
2931 static void setUsesTOCBasePtr(MachineFunction &MF) {
2932   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2933   FuncInfo->setUsesTOCBasePtr();
2934 }
2935 
2936 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
2937   setUsesTOCBasePtr(DAG.getMachineFunction());
2938 }
2939 
2940 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
2941                                        SDValue GA) const {
2942   const bool Is64Bit = Subtarget.isPPC64();
2943   EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
2944   SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
2945                         : Subtarget.isAIXABI()
2946                               ? DAG.getRegister(PPC::R2, VT)
2947                               : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
2948   SDValue Ops[] = { GA, Reg };
2949   return DAG.getMemIntrinsicNode(
2950       PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
2951       MachinePointerInfo::getGOT(DAG.getMachineFunction()), None,
2952       MachineMemOperand::MOLoad);
2953 }
2954 
2955 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
2956                                              SelectionDAG &DAG) const {
2957   EVT PtrVT = Op.getValueType();
2958   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2959   const Constant *C = CP->getConstVal();
2960 
2961   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
2962   // The actual address of the GlobalValue is stored in the TOC.
2963   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
2964     if (Subtarget.isUsingPCRelativeCalls()) {
2965       SDLoc DL(CP);
2966       EVT Ty = getPointerTy(DAG.getDataLayout());
2967       SDValue ConstPool = DAG.getTargetConstantPool(
2968           C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
2969       return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
2970     }
2971     setUsesTOCBasePtr(DAG);
2972     SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
2973     return getTOCEntry(DAG, SDLoc(CP), GA);
2974   }
2975 
2976   unsigned MOHiFlag, MOLoFlag;
2977   bool IsPIC = isPositionIndependent();
2978   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
2979 
2980   if (IsPIC && Subtarget.isSVR4ABI()) {
2981     SDValue GA =
2982         DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
2983     return getTOCEntry(DAG, SDLoc(CP), GA);
2984   }
2985 
2986   SDValue CPIHi =
2987       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
2988   SDValue CPILo =
2989       DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
2990   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
2991 }
2992 
2993 // For 64-bit PowerPC, prefer the more compact relative encodings.
2994 // This trades 32 bits per jump table entry for one or two instructions
2995 // on the jump site.
2996 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2997   if (isJumpTableRelative())
2998     return MachineJumpTableInfo::EK_LabelDifference32;
2999 
3000   return TargetLowering::getJumpTableEncoding();
3001 }
3002 
3003 bool PPCTargetLowering::isJumpTableRelative() const {
3004   if (UseAbsoluteJumpTables)
3005     return false;
3006   if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3007     return true;
3008   return TargetLowering::isJumpTableRelative();
3009 }
3010 
3011 SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3012                                                     SelectionDAG &DAG) const {
3013   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3014     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3015 
3016   switch (getTargetMachine().getCodeModel()) {
3017   case CodeModel::Small:
3018   case CodeModel::Medium:
3019     return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3020   default:
3021     return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3022                        getPointerTy(DAG.getDataLayout()));
3023   }
3024 }
3025 
3026 const MCExpr *
3027 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3028                                                 unsigned JTI,
3029                                                 MCContext &Ctx) const {
3030   if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3031     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3032 
3033   switch (getTargetMachine().getCodeModel()) {
3034   case CodeModel::Small:
3035   case CodeModel::Medium:
3036     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3037   default:
3038     return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3039   }
3040 }
3041 
3042 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3043   EVT PtrVT = Op.getValueType();
3044   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3045 
3046   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3047   if (Subtarget.isUsingPCRelativeCalls()) {
3048     SDLoc DL(JT);
3049     EVT Ty = getPointerTy(DAG.getDataLayout());
3050     SDValue GA =
3051         DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3052     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3053     return MatAddr;
3054   }
3055 
3056   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3057   // The actual address of the GlobalValue is stored in the TOC.
3058   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3059     setUsesTOCBasePtr(DAG);
3060     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3061     return getTOCEntry(DAG, SDLoc(JT), GA);
3062   }
3063 
3064   unsigned MOHiFlag, MOLoFlag;
3065   bool IsPIC = isPositionIndependent();
3066   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3067 
3068   if (IsPIC && Subtarget.isSVR4ABI()) {
3069     SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3070                                         PPCII::MO_PIC_FLAG);
3071     return getTOCEntry(DAG, SDLoc(GA), GA);
3072   }
3073 
3074   SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3075   SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3076   return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3077 }
3078 
3079 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3080                                              SelectionDAG &DAG) const {
3081   EVT PtrVT = Op.getValueType();
3082   BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3083   const BlockAddress *BA = BASDN->getBlockAddress();
3084 
3085   // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3086   if (Subtarget.isUsingPCRelativeCalls()) {
3087     SDLoc DL(BASDN);
3088     EVT Ty = getPointerTy(DAG.getDataLayout());
3089     SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3090                                            PPCII::MO_PCREL_FLAG);
3091     SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3092     return MatAddr;
3093   }
3094 
3095   // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3096   // The actual BlockAddress is stored in the TOC.
3097   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3098     setUsesTOCBasePtr(DAG);
3099     SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3100     return getTOCEntry(DAG, SDLoc(BASDN), GA);
3101   }
3102 
3103   // 32-bit position-independent ELF stores the BlockAddress in the .got.
3104   if (Subtarget.is32BitELFABI() && isPositionIndependent())
3105     return getTOCEntry(
3106         DAG, SDLoc(BASDN),
3107         DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3108 
3109   unsigned MOHiFlag, MOLoFlag;
3110   bool IsPIC = isPositionIndependent();
3111   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3112   SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3113   SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3114   return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3115 }
3116 
3117 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3118                                               SelectionDAG &DAG) const {
3119   // FIXME: TLS addresses currently use medium model code sequences,
3120   // which is the most useful form.  Eventually support for small and
3121   // large models could be added if users need it, at the cost of
3122   // additional complexity.
3123   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3124   if (DAG.getTarget().useEmulatedTLS())
3125     return LowerToTLSEmulatedModel(GA, DAG);
3126 
3127   SDLoc dl(GA);
3128   const GlobalValue *GV = GA->getGlobal();
3129   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3130   bool is64bit = Subtarget.isPPC64();
3131   const Module *M = DAG.getMachineFunction().getFunction().getParent();
3132   PICLevel::Level picLevel = M->getPICLevel();
3133 
3134   const TargetMachine &TM = getTargetMachine();
3135   TLSModel::Model Model = TM.getTLSModel(GV);
3136 
3137   if (Model == TLSModel::LocalExec) {
3138     if (Subtarget.isUsingPCRelativeCalls()) {
3139       SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3140       SDValue TGA = DAG.getTargetGlobalAddress(
3141           GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3142       SDValue MatAddr =
3143           DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3144       return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3145     }
3146 
3147     SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3148                                                PPCII::MO_TPREL_HA);
3149     SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3150                                                PPCII::MO_TPREL_LO);
3151     SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3152                              : DAG.getRegister(PPC::R2, MVT::i32);
3153 
3154     SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3155     return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3156   }
3157 
3158   if (Model == TLSModel::InitialExec) {
3159     bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3160     SDValue TGA = DAG.getTargetGlobalAddress(
3161         GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3162     SDValue TGATLS = DAG.getTargetGlobalAddress(
3163         GV, dl, PtrVT, 0,
3164         IsPCRel ? (PPCII::MO_TLS | PPCII::MO_PCREL_FLAG) : PPCII::MO_TLS);
3165     SDValue TPOffset;
3166     if (IsPCRel) {
3167       SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3168       TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3169                              MachinePointerInfo());
3170     } else {
3171       SDValue GOTPtr;
3172       if (is64bit) {
3173         setUsesTOCBasePtr(DAG);
3174         SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3175         GOTPtr =
3176             DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3177       } else {
3178         if (!TM.isPositionIndependent())
3179           GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3180         else if (picLevel == PICLevel::SmallPIC)
3181           GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3182         else
3183           GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3184       }
3185       TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3186     }
3187     return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3188   }
3189 
3190   if (Model == TLSModel::GeneralDynamic) {
3191     if (Subtarget.isUsingPCRelativeCalls()) {
3192       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3193                                                PPCII::MO_GOT_TLSGD_PCREL_FLAG);
3194       return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3195     }
3196 
3197     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3198     SDValue GOTPtr;
3199     if (is64bit) {
3200       setUsesTOCBasePtr(DAG);
3201       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3202       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3203                                    GOTReg, TGA);
3204     } else {
3205       if (picLevel == PICLevel::SmallPIC)
3206         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3207       else
3208         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3209     }
3210     return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3211                        GOTPtr, TGA, TGA);
3212   }
3213 
3214   if (Model == TLSModel::LocalDynamic) {
3215     if (Subtarget.isUsingPCRelativeCalls()) {
3216       SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3217                                                PPCII::MO_GOT_TLSLD_PCREL_FLAG);
3218       SDValue MatPCRel =
3219           DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3220       return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3221     }
3222 
3223     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3224     SDValue GOTPtr;
3225     if (is64bit) {
3226       setUsesTOCBasePtr(DAG);
3227       SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3228       GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3229                            GOTReg, TGA);
3230     } else {
3231       if (picLevel == PICLevel::SmallPIC)
3232         GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3233       else
3234         GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3235     }
3236     SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3237                                   PtrVT, GOTPtr, TGA, TGA);
3238     SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3239                                       PtrVT, TLSAddr, TGA);
3240     return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3241   }
3242 
3243   llvm_unreachable("Unknown TLS model!");
3244 }
3245 
3246 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3247                                               SelectionDAG &DAG) const {
3248   EVT PtrVT = Op.getValueType();
3249   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3250   SDLoc DL(GSDN);
3251   const GlobalValue *GV = GSDN->getGlobal();
3252 
3253   // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3254   // The actual address of the GlobalValue is stored in the TOC.
3255   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3256     if (Subtarget.isUsingPCRelativeCalls()) {
3257       EVT Ty = getPointerTy(DAG.getDataLayout());
3258       if (isAccessedAsGotIndirect(Op)) {
3259         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3260                                                 PPCII::MO_PCREL_FLAG |
3261                                                     PPCII::MO_GOT_FLAG);
3262         SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3263         SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3264                                    MachinePointerInfo());
3265         return Load;
3266       } else {
3267         SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3268                                                 PPCII::MO_PCREL_FLAG);
3269         return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3270       }
3271     }
3272     setUsesTOCBasePtr(DAG);
3273     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3274     return getTOCEntry(DAG, DL, GA);
3275   }
3276 
3277   unsigned MOHiFlag, MOLoFlag;
3278   bool IsPIC = isPositionIndependent();
3279   getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3280 
3281   if (IsPIC && Subtarget.isSVR4ABI()) {
3282     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3283                                             GSDN->getOffset(),
3284                                             PPCII::MO_PIC_FLAG);
3285     return getTOCEntry(DAG, DL, GA);
3286   }
3287 
3288   SDValue GAHi =
3289     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3290   SDValue GALo =
3291     DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3292 
3293   return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3294 }
3295 
3296 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3297   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3298   SDLoc dl(Op);
3299 
3300   if (Op.getValueType() == MVT::v2i64) {
3301     // When the operands themselves are v2i64 values, we need to do something
3302     // special because VSX has no underlying comparison operations for these.
3303     if (Op.getOperand(0).getValueType() == MVT::v2i64) {
3304       // Equality can be handled by casting to the legal type for Altivec
3305       // comparisons, everything else needs to be expanded.
3306       if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3307         return DAG.getNode(ISD::BITCAST, dl, MVT::v2i64,
3308                  DAG.getSetCC(dl, MVT::v4i32,
3309                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(0)),
3310                    DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Op.getOperand(1)),
3311                    CC));
3312       }
3313 
3314       return SDValue();
3315     }
3316 
3317     // We handle most of these in the usual way.
3318     return Op;
3319   }
3320 
3321   // If we're comparing for equality to zero, expose the fact that this is
3322   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3323   // fold the new nodes.
3324   if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3325     return V;
3326 
3327   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
3328     // Leave comparisons against 0 and -1 alone for now, since they're usually
3329     // optimized.  FIXME: revisit this when we can custom lower all setcc
3330     // optimizations.
3331     if (C->isAllOnesValue() || C->isNullValue())
3332       return SDValue();
3333   }
3334 
3335   // If we have an integer seteq/setne, turn it into a compare against zero
3336   // by xor'ing the rhs with the lhs, which is faster than setting a
3337   // condition register, reading it back out, and masking the correct bit.  The
3338   // normal approach here uses sub to do this instead of xor.  Using xor exposes
3339   // the result to other bit-twiddling opportunities.
3340   EVT LHSVT = Op.getOperand(0).getValueType();
3341   if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3342     EVT VT = Op.getValueType();
3343     SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0),
3344                                 Op.getOperand(1));
3345     return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3346   }
3347   return SDValue();
3348 }
3349 
3350 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3351   SDNode *Node = Op.getNode();
3352   EVT VT = Node->getValueType(0);
3353   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3354   SDValue InChain = Node->getOperand(0);
3355   SDValue VAListPtr = Node->getOperand(1);
3356   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3357   SDLoc dl(Node);
3358 
3359   assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3360 
3361   // gpr_index
3362   SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3363                                     VAListPtr, MachinePointerInfo(SV), MVT::i8);
3364   InChain = GprIndex.getValue(1);
3365 
3366   if (VT == MVT::i64) {
3367     // Check if GprIndex is even
3368     SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3369                                  DAG.getConstant(1, dl, MVT::i32));
3370     SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3371                                 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3372     SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3373                                           DAG.getConstant(1, dl, MVT::i32));
3374     // Align GprIndex to be even if it isn't
3375     GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3376                            GprIndex);
3377   }
3378 
3379   // fpr index is 1 byte after gpr
3380   SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3381                                DAG.getConstant(1, dl, MVT::i32));
3382 
3383   // fpr
3384   SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3385                                     FprPtr, MachinePointerInfo(SV), MVT::i8);
3386   InChain = FprIndex.getValue(1);
3387 
3388   SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3389                                        DAG.getConstant(8, dl, MVT::i32));
3390 
3391   SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3392                                         DAG.getConstant(4, dl, MVT::i32));
3393 
3394   // areas
3395   SDValue OverflowArea =
3396       DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3397   InChain = OverflowArea.getValue(1);
3398 
3399   SDValue RegSaveArea =
3400       DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3401   InChain = RegSaveArea.getValue(1);
3402 
3403   // select overflow_area if index > 8
3404   SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3405                             DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3406 
3407   // adjustment constant gpr_index * 4/8
3408   SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3409                                     VT.isInteger() ? GprIndex : FprIndex,
3410                                     DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3411                                                     MVT::i32));
3412 
3413   // OurReg = RegSaveArea + RegConstant
3414   SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3415                                RegConstant);
3416 
3417   // Floating types are 32 bytes into RegSaveArea
3418   if (VT.isFloatingPoint())
3419     OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3420                          DAG.getConstant(32, dl, MVT::i32));
3421 
3422   // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3423   SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3424                                    VT.isInteger() ? GprIndex : FprIndex,
3425                                    DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3426                                                    MVT::i32));
3427 
3428   InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3429                               VT.isInteger() ? VAListPtr : FprPtr,
3430                               MachinePointerInfo(SV), MVT::i8);
3431 
3432   // determine if we should load from reg_save_area or overflow_area
3433   SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3434 
3435   // increase overflow_area by 4/8 if gpr/fpr > 8
3436   SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3437                                           DAG.getConstant(VT.isInteger() ? 4 : 8,
3438                                           dl, MVT::i32));
3439 
3440   OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3441                              OverflowAreaPlusN);
3442 
3443   InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3444                               MachinePointerInfo(), MVT::i32);
3445 
3446   return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3447 }
3448 
3449 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3450   assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3451 
3452   // We have to copy the entire va_list struct:
3453   // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3454   return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3455                        DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3456                        false, true, false, MachinePointerInfo(),
3457                        MachinePointerInfo());
3458 }
3459 
3460 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3461                                                   SelectionDAG &DAG) const {
3462   if (Subtarget.isAIXABI())
3463     report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3464 
3465   return Op.getOperand(0);
3466 }
3467 
3468 SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3469   MachineFunction &MF = DAG.getMachineFunction();
3470   PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3471 
3472   assert((Op.getOpcode() == ISD::INLINEASM ||
3473           Op.getOpcode() == ISD::INLINEASM_BR) &&
3474          "Expecting Inline ASM node.");
3475 
3476   // If an LR store is already known to be required then there is not point in
3477   // checking this ASM as well.
3478   if (MFI.isLRStoreRequired())
3479     return Op;
3480 
3481   // Inline ASM nodes have an optional last operand that is an incoming Flag of
3482   // type MVT::Glue. We want to ignore this last operand if that is the case.
3483   unsigned NumOps = Op.getNumOperands();
3484   if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3485     --NumOps;
3486 
3487   // Check all operands that may contain the LR.
3488   for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3489     unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
3490     unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
3491     ++i; // Skip the ID value.
3492 
3493     switch (InlineAsm::getKind(Flags)) {
3494     default:
3495       llvm_unreachable("Bad flags!");
3496     case InlineAsm::Kind_RegUse:
3497     case InlineAsm::Kind_Imm:
3498     case InlineAsm::Kind_Mem:
3499       i += NumVals;
3500       break;
3501     case InlineAsm::Kind_Clobber:
3502     case InlineAsm::Kind_RegDef:
3503     case InlineAsm::Kind_RegDefEarlyClobber: {
3504       for (; NumVals; --NumVals, ++i) {
3505         Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3506         if (Reg != PPC::LR && Reg != PPC::LR8)
3507           continue;
3508         MFI.setLRStoreRequired();
3509         return Op;
3510       }
3511       break;
3512     }
3513     }
3514   }
3515 
3516   return Op;
3517 }
3518 
3519 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3520                                                 SelectionDAG &DAG) const {
3521   if (Subtarget.isAIXABI())
3522     report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3523 
3524   SDValue Chain = Op.getOperand(0);
3525   SDValue Trmp = Op.getOperand(1); // trampoline
3526   SDValue FPtr = Op.getOperand(2); // nested function
3527   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3528   SDLoc dl(Op);
3529 
3530   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3531   bool isPPC64 = (PtrVT == MVT::i64);
3532   Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3533 
3534   TargetLowering::ArgListTy Args;
3535   TargetLowering::ArgListEntry Entry;
3536 
3537   Entry.Ty = IntPtrTy;
3538   Entry.Node = Trmp; Args.push_back(Entry);
3539 
3540   // TrampSize == (isPPC64 ? 48 : 40);
3541   Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3542                                isPPC64 ? MVT::i64 : MVT::i32);
3543   Args.push_back(Entry);
3544 
3545   Entry.Node = FPtr; Args.push_back(Entry);
3546   Entry.Node = Nest; Args.push_back(Entry);
3547 
3548   // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3549   TargetLowering::CallLoweringInfo CLI(DAG);
3550   CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3551       CallingConv::C, Type::getVoidTy(*DAG.getContext()),
3552       DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3553 
3554   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3555   return CallResult.second;
3556 }
3557 
3558 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3559   MachineFunction &MF = DAG.getMachineFunction();
3560   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3561   EVT PtrVT = getPointerTy(MF.getDataLayout());
3562 
3563   SDLoc dl(Op);
3564 
3565   if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3566     // vastart just stores the address of the VarArgsFrameIndex slot into the
3567     // memory location argument.
3568     SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3569     const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3570     return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3571                         MachinePointerInfo(SV));
3572   }
3573 
3574   // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3575   // We suppose the given va_list is already allocated.
3576   //
3577   // typedef struct {
3578   //  char gpr;     /* index into the array of 8 GPRs
3579   //                 * stored in the register save area
3580   //                 * gpr=0 corresponds to r3,
3581   //                 * gpr=1 to r4, etc.
3582   //                 */
3583   //  char fpr;     /* index into the array of 8 FPRs
3584   //                 * stored in the register save area
3585   //                 * fpr=0 corresponds to f1,
3586   //                 * fpr=1 to f2, etc.
3587   //                 */
3588   //  char *overflow_arg_area;
3589   //                /* location on stack that holds
3590   //                 * the next overflow argument
3591   //                 */
3592   //  char *reg_save_area;
3593   //               /* where r3:r10 and f1:f8 (if saved)
3594   //                * are stored
3595   //                */
3596   // } va_list[1];
3597 
3598   SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3599   SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3600   SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3601                                             PtrVT);
3602   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3603                                  PtrVT);
3604 
3605   uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3606   SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3607 
3608   uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3609   SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3610 
3611   uint64_t FPROffset = 1;
3612   SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3613 
3614   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3615 
3616   // Store first byte : number of int regs
3617   SDValue firstStore =
3618       DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3619                         MachinePointerInfo(SV), MVT::i8);
3620   uint64_t nextOffset = FPROffset;
3621   SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3622                                   ConstFPROffset);
3623 
3624   // Store second byte : number of float regs
3625   SDValue secondStore =
3626       DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3627                         MachinePointerInfo(SV, nextOffset), MVT::i8);
3628   nextOffset += StackOffset;
3629   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3630 
3631   // Store second word : arguments given on stack
3632   SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3633                                     MachinePointerInfo(SV, nextOffset));
3634   nextOffset += FrameOffset;
3635   nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3636 
3637   // Store third word : arguments given in registers
3638   return DAG.getStore(thirdStore, dl, FR, nextPtr,
3639                       MachinePointerInfo(SV, nextOffset));
3640 }
3641 
3642 /// FPR - The set of FP registers that should be allocated for arguments
3643 /// on Darwin and AIX.
3644 static const MCPhysReg FPR[] = {PPC::F1,  PPC::F2,  PPC::F3, PPC::F4, PPC::F5,
3645                                 PPC::F6,  PPC::F7,  PPC::F8, PPC::F9, PPC::F10,
3646                                 PPC::F11, PPC::F12, PPC::F13};
3647 
3648 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3649 /// the stack.
3650 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3651                                        unsigned PtrByteSize) {
3652   unsigned ArgSize = ArgVT.getStoreSize();
3653   if (Flags.isByVal())
3654     ArgSize = Flags.getByValSize();
3655 
3656   // Round up to multiples of the pointer size, except for array members,
3657   // which are always packed.
3658   if (!Flags.isInConsecutiveRegs())
3659     ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3660 
3661   return ArgSize;
3662 }
3663 
3664 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3665 /// on the stack.
3666 static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT,
3667                                          ISD::ArgFlagsTy Flags,
3668                                          unsigned PtrByteSize) {
3669   Align Alignment(PtrByteSize);
3670 
3671   // Altivec parameters are padded to a 16 byte boundary.
3672   if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3673       ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3674       ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3675       ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3676     Alignment = Align(16);
3677 
3678   // ByVal parameters are aligned as requested.
3679   if (Flags.isByVal()) {
3680     auto BVAlign = Flags.getNonZeroByValAlign();
3681     if (BVAlign > PtrByteSize) {
3682       if (BVAlign.value() % PtrByteSize != 0)
3683         llvm_unreachable(
3684             "ByVal alignment is not a multiple of the pointer size");
3685 
3686       Alignment = BVAlign;
3687     }
3688   }
3689 
3690   // Array members are always packed to their original alignment.
3691   if (Flags.isInConsecutiveRegs()) {
3692     // If the array member was split into multiple registers, the first
3693     // needs to be aligned to the size of the full type.  (Except for
3694     // ppcf128, which is only aligned as its f64 components.)
3695     if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3696       Alignment = Align(OrigVT.getStoreSize());
3697     else
3698       Alignment = Align(ArgVT.getStoreSize());
3699   }
3700 
3701   return Alignment;
3702 }
3703 
3704 /// CalculateStackSlotUsed - Return whether this argument will use its
3705 /// stack slot (instead of being passed in registers).  ArgOffset,
3706 /// AvailableFPRs, and AvailableVRs must hold the current argument
3707 /// position, and will be updated to account for this argument.
3708 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3709                                    unsigned PtrByteSize, unsigned LinkageSize,
3710                                    unsigned ParamAreaSize, unsigned &ArgOffset,
3711                                    unsigned &AvailableFPRs,
3712                                    unsigned &AvailableVRs) {
3713   bool UseMemory = false;
3714 
3715   // Respect alignment of argument on the stack.
3716   Align Alignment =
3717       CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3718   ArgOffset = alignTo(ArgOffset, Alignment);
3719   // If there's no space left in the argument save area, we must
3720   // use memory (this check also catches zero-sized arguments).
3721   if (ArgOffset >= LinkageSize + ParamAreaSize)
3722     UseMemory = true;
3723 
3724   // Allocate argument on the stack.
3725   ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3726   if (Flags.isInConsecutiveRegsLast())
3727     ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3728   // If we overran the argument save area, we must use memory
3729   // (this check catches arguments passed partially in memory)
3730   if (ArgOffset > LinkageSize + ParamAreaSize)
3731     UseMemory = true;
3732 
3733   // However, if the argument is actually passed in an FPR or a VR,
3734   // we don't use memory after all.
3735   if (!Flags.isByVal()) {
3736     if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3737       if (AvailableFPRs > 0) {
3738         --AvailableFPRs;
3739         return false;
3740       }
3741     if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3742         ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3743         ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3744         ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3745       if (AvailableVRs > 0) {
3746         --AvailableVRs;
3747         return false;
3748       }
3749   }
3750 
3751   return UseMemory;
3752 }
3753 
3754 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3755 /// ensure minimum alignment required for target.
3756 static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering,
3757                                      unsigned NumBytes) {
3758   return alignTo(NumBytes, Lowering->getStackAlign());
3759 }
3760 
3761 SDValue PPCTargetLowering::LowerFormalArguments(
3762     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3763     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3764     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3765   if (Subtarget.isAIXABI())
3766     return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3767                                     InVals);
3768   if (Subtarget.is64BitELFABI())
3769     return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3770                                        InVals);
3771   assert(Subtarget.is32BitELFABI());
3772   return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3773                                      InVals);
3774 }
3775 
3776 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3777     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3778     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3779     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3780 
3781   // 32-bit SVR4 ABI Stack Frame Layout:
3782   //              +-----------------------------------+
3783   //        +-->  |            Back chain             |
3784   //        |     +-----------------------------------+
3785   //        |     | Floating-point register save area |
3786   //        |     +-----------------------------------+
3787   //        |     |    General register save area     |
3788   //        |     +-----------------------------------+
3789   //        |     |          CR save word             |
3790   //        |     +-----------------------------------+
3791   //        |     |         VRSAVE save word          |
3792   //        |     +-----------------------------------+
3793   //        |     |         Alignment padding         |
3794   //        |     +-----------------------------------+
3795   //        |     |     Vector register save area     |
3796   //        |     +-----------------------------------+
3797   //        |     |       Local variable space        |
3798   //        |     +-----------------------------------+
3799   //        |     |        Parameter list area        |
3800   //        |     +-----------------------------------+
3801   //        |     |           LR save word            |
3802   //        |     +-----------------------------------+
3803   // SP-->  +---  |            Back chain             |
3804   //              +-----------------------------------+
3805   //
3806   // Specifications:
3807   //   System V Application Binary Interface PowerPC Processor Supplement
3808   //   AltiVec Technology Programming Interface Manual
3809 
3810   MachineFunction &MF = DAG.getMachineFunction();
3811   MachineFrameInfo &MFI = MF.getFrameInfo();
3812   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3813 
3814   EVT PtrVT = getPointerTy(MF.getDataLayout());
3815   // Potential tail calls could cause overwriting of argument stack slots.
3816   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
3817                        (CallConv == CallingConv::Fast));
3818   const Align PtrAlign(4);
3819 
3820   // Assign locations to all of the incoming arguments.
3821   SmallVector<CCValAssign, 16> ArgLocs;
3822   PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3823                  *DAG.getContext());
3824 
3825   // Reserve space for the linkage area on the stack.
3826   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
3827   CCInfo.AllocateStack(LinkageSize, PtrAlign);
3828   if (useSoftFloat())
3829     CCInfo.PreAnalyzeFormalArguments(Ins);
3830 
3831   CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
3832   CCInfo.clearWasPPCF128();
3833 
3834   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3835     CCValAssign &VA = ArgLocs[i];
3836 
3837     // Arguments stored in registers.
3838     if (VA.isRegLoc()) {
3839       const TargetRegisterClass *RC;
3840       EVT ValVT = VA.getValVT();
3841 
3842       switch (ValVT.getSimpleVT().SimpleTy) {
3843         default:
3844           llvm_unreachable("ValVT not supported by formal arguments Lowering");
3845         case MVT::i1:
3846         case MVT::i32:
3847           RC = &PPC::GPRCRegClass;
3848           break;
3849         case MVT::f32:
3850           if (Subtarget.hasP8Vector())
3851             RC = &PPC::VSSRCRegClass;
3852           else if (Subtarget.hasSPE())
3853             RC = &PPC::GPRCRegClass;
3854           else
3855             RC = &PPC::F4RCRegClass;
3856           break;
3857         case MVT::f64:
3858           if (Subtarget.hasVSX())
3859             RC = &PPC::VSFRCRegClass;
3860           else if (Subtarget.hasSPE())
3861             // SPE passes doubles in GPR pairs.
3862             RC = &PPC::GPRCRegClass;
3863           else
3864             RC = &PPC::F8RCRegClass;
3865           break;
3866         case MVT::v16i8:
3867         case MVT::v8i16:
3868         case MVT::v4i32:
3869           RC = &PPC::VRRCRegClass;
3870           break;
3871         case MVT::v4f32:
3872           RC = &PPC::VRRCRegClass;
3873           break;
3874         case MVT::v2f64:
3875         case MVT::v2i64:
3876           RC = &PPC::VRRCRegClass;
3877           break;
3878       }
3879 
3880       SDValue ArgValue;
3881       // Transform the arguments stored in physical registers into
3882       // virtual ones.
3883       if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
3884         assert(i + 1 < e && "No second half of double precision argument");
3885         unsigned RegLo = MF.addLiveIn(VA.getLocReg(), RC);
3886         unsigned RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
3887         SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
3888         SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
3889         if (!Subtarget.isLittleEndian())
3890           std::swap (ArgValueLo, ArgValueHi);
3891         ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
3892                                ArgValueHi);
3893       } else {
3894         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3895         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
3896                                       ValVT == MVT::i1 ? MVT::i32 : ValVT);
3897         if (ValVT == MVT::i1)
3898           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
3899       }
3900 
3901       InVals.push_back(ArgValue);
3902     } else {
3903       // Argument stored in memory.
3904       assert(VA.isMemLoc());
3905 
3906       // Get the extended size of the argument type in stack
3907       unsigned ArgSize = VA.getLocVT().getStoreSize();
3908       // Get the actual size of the argument type
3909       unsigned ObjSize = VA.getValVT().getStoreSize();
3910       unsigned ArgOffset = VA.getLocMemOffset();
3911       // Stack objects in PPC32 are right justified.
3912       ArgOffset += ArgSize - ObjSize;
3913       int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
3914 
3915       // Create load nodes to retrieve arguments from the stack.
3916       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3917       InVals.push_back(
3918           DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
3919     }
3920   }
3921 
3922   // Assign locations to all of the incoming aggregate by value arguments.
3923   // Aggregates passed by value are stored in the local variable space of the
3924   // caller's stack frame, right above the parameter list area.
3925   SmallVector<CCValAssign, 16> ByValArgLocs;
3926   CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3927                       ByValArgLocs, *DAG.getContext());
3928 
3929   // Reserve stack space for the allocations in CCInfo.
3930   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
3931 
3932   CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
3933 
3934   // Area that is at least reserved in the caller of this function.
3935   unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
3936   MinReservedArea = std::max(MinReservedArea, LinkageSize);
3937 
3938   // Set the size that is at least reserved in caller of this function.  Tail
3939   // call optimized function's reserved stack space needs to be aligned so that
3940   // taking the difference between two stack areas will result in an aligned
3941   // stack.
3942   MinReservedArea =
3943       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
3944   FuncInfo->setMinReservedArea(MinReservedArea);
3945 
3946   SmallVector<SDValue, 8> MemOps;
3947 
3948   // If the function takes variable number of arguments, make a frame index for
3949   // the start of the first vararg value... for expansion of llvm.va_start.
3950   if (isVarArg) {
3951     static const MCPhysReg GPArgRegs[] = {
3952       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
3953       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
3954     };
3955     const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
3956 
3957     static const MCPhysReg FPArgRegs[] = {
3958       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
3959       PPC::F8
3960     };
3961     unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
3962 
3963     if (useSoftFloat() || hasSPE())
3964        NumFPArgRegs = 0;
3965 
3966     FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
3967     FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
3968 
3969     // Make room for NumGPArgRegs and NumFPArgRegs.
3970     int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
3971                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
3972 
3973     FuncInfo->setVarArgsStackOffset(
3974       MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
3975                             CCInfo.getNextStackOffset(), true));
3976 
3977     FuncInfo->setVarArgsFrameIndex(
3978         MFI.CreateStackObject(Depth, Align(8), false));
3979     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3980 
3981     // The fixed integer arguments of a variadic function are stored to the
3982     // VarArgsFrameIndex on the stack so that they may be loaded by
3983     // dereferencing the result of va_next.
3984     for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
3985       // Get an existing live-in vreg, or add a new one.
3986       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
3987       if (!VReg)
3988         VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
3989 
3990       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
3991       SDValue Store =
3992           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
3993       MemOps.push_back(Store);
3994       // Increment the address by four for the next argument to store
3995       SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
3996       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
3997     }
3998 
3999     // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4000     // is set.
4001     // The double arguments are stored to the VarArgsFrameIndex
4002     // on the stack.
4003     for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4004       // Get an existing live-in vreg, or add a new one.
4005       unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4006       if (!VReg)
4007         VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4008 
4009       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4010       SDValue Store =
4011           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4012       MemOps.push_back(Store);
4013       // Increment the address by eight for the next argument to store
4014       SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4015                                          PtrVT);
4016       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4017     }
4018   }
4019 
4020   if (!MemOps.empty())
4021     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4022 
4023   return Chain;
4024 }
4025 
4026 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4027 // value to MVT::i64 and then truncate to the correct register size.
4028 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4029                                              EVT ObjectVT, SelectionDAG &DAG,
4030                                              SDValue ArgVal,
4031                                              const SDLoc &dl) const {
4032   if (Flags.isSExt())
4033     ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4034                          DAG.getValueType(ObjectVT));
4035   else if (Flags.isZExt())
4036     ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4037                          DAG.getValueType(ObjectVT));
4038 
4039   return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4040 }
4041 
4042 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4043     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4044     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4045     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4046   // TODO: add description of PPC stack frame format, or at least some docs.
4047   //
4048   bool isELFv2ABI = Subtarget.isELFv2ABI();
4049   bool isLittleEndian = Subtarget.isLittleEndian();
4050   MachineFunction &MF = DAG.getMachineFunction();
4051   MachineFrameInfo &MFI = MF.getFrameInfo();
4052   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4053 
4054   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4055          "fastcc not supported on varargs functions");
4056 
4057   EVT PtrVT = getPointerTy(MF.getDataLayout());
4058   // Potential tail calls could cause overwriting of argument stack slots.
4059   bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4060                        (CallConv == CallingConv::Fast));
4061   unsigned PtrByteSize = 8;
4062   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4063 
4064   static const MCPhysReg GPR[] = {
4065     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4066     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4067   };
4068   static const MCPhysReg VR[] = {
4069     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4070     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4071   };
4072 
4073   const unsigned Num_GPR_Regs = array_lengthof(GPR);
4074   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4075   const unsigned Num_VR_Regs  = array_lengthof(VR);
4076 
4077   // Do a first pass over the arguments to determine whether the ABI
4078   // guarantees that our caller has allocated the parameter save area
4079   // on its stack frame.  In the ELFv1 ABI, this is always the case;
4080   // in the ELFv2 ABI, it is true if this is a vararg function or if
4081   // any parameter is located in a stack slot.
4082 
4083   bool HasParameterArea = !isELFv2ABI || isVarArg;
4084   unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4085   unsigned NumBytes = LinkageSize;
4086   unsigned AvailableFPRs = Num_FPR_Regs;
4087   unsigned AvailableVRs = Num_VR_Regs;
4088   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4089     if (Ins[i].Flags.isNest())
4090       continue;
4091 
4092     if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4093                                PtrByteSize, LinkageSize, ParamAreaSize,
4094                                NumBytes, AvailableFPRs, AvailableVRs))
4095       HasParameterArea = true;
4096   }
4097 
4098   // Add DAG nodes to load the arguments or copy them out of registers.  On
4099   // entry to a function on PPC, the arguments start after the linkage area,
4100   // although the first ones are often in registers.
4101 
4102   unsigned ArgOffset = LinkageSize;
4103   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4104   SmallVector<SDValue, 8> MemOps;
4105   Function::const_arg_iterator FuncArg = MF.getFunction().arg_begin();
4106   unsigned CurArgIdx = 0;
4107   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4108     SDValue ArgVal;
4109     bool needsLoad = false;
4110     EVT ObjectVT = Ins[ArgNo].VT;
4111     EVT OrigVT = Ins[ArgNo].ArgVT;
4112     unsigned ObjSize = ObjectVT.getStoreSize();
4113     unsigned ArgSize = ObjSize;
4114     ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4115     if (Ins[ArgNo].isOrigArg()) {
4116       std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4117       CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4118     }
4119     // We re-align the argument offset for each argument, except when using the
4120     // fast calling convention, when we need to make sure we do that only when
4121     // we'll actually use a stack slot.
4122     unsigned CurArgOffset;
4123     Align Alignment;
4124     auto ComputeArgOffset = [&]() {
4125       /* Respect alignment of argument on the stack.  */
4126       Alignment =
4127           CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4128       ArgOffset = alignTo(ArgOffset, Alignment);
4129       CurArgOffset = ArgOffset;
4130     };
4131 
4132     if (CallConv != CallingConv::Fast) {
4133       ComputeArgOffset();
4134 
4135       /* Compute GPR index associated with argument offset.  */
4136       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4137       GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4138     }
4139 
4140     // FIXME the codegen can be much improved in some cases.
4141     // We do not have to keep everything in memory.
4142     if (Flags.isByVal()) {
4143       assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4144 
4145       if (CallConv == CallingConv::Fast)
4146         ComputeArgOffset();
4147 
4148       // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4149       ObjSize = Flags.getByValSize();
4150       ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4151       // Empty aggregate parameters do not take up registers.  Examples:
4152       //   struct { } a;
4153       //   union  { } b;
4154       //   int c[0];
4155       // etc.  However, we have to provide a place-holder in InVals, so
4156       // pretend we have an 8-byte item at the current address for that
4157       // purpose.
4158       if (!ObjSize) {
4159         int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4160         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4161         InVals.push_back(FIN);
4162         continue;
4163       }
4164 
4165       // Create a stack object covering all stack doublewords occupied
4166       // by the argument.  If the argument is (fully or partially) on
4167       // the stack, or if the argument is fully in registers but the
4168       // caller has allocated the parameter save anyway, we can refer
4169       // directly to the caller's stack frame.  Otherwise, create a
4170       // local copy in our own frame.
4171       int FI;
4172       if (HasParameterArea ||
4173           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4174         FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4175       else
4176         FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4177       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4178 
4179       // Handle aggregates smaller than 8 bytes.
4180       if (ObjSize < PtrByteSize) {
4181         // The value of the object is its address, which differs from the
4182         // address of the enclosing doubleword on big-endian systems.
4183         SDValue Arg = FIN;
4184         if (!isLittleEndian) {
4185           SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4186           Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4187         }
4188         InVals.push_back(Arg);
4189 
4190         if (GPR_idx != Num_GPR_Regs) {
4191           unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4192           FuncInfo->addLiveInAttr(VReg, Flags);
4193           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4194           SDValue Store;
4195 
4196           if (ObjSize==1 || ObjSize==2 || ObjSize==4) {
4197             EVT ObjType = (ObjSize == 1 ? MVT::i8 :
4198                            (ObjSize == 2 ? MVT::i16 : MVT::i32));
4199             Store = DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4200                                       MachinePointerInfo(&*FuncArg), ObjType);
4201           } else {
4202             // For sizes that don't fit a truncating store (3, 5, 6, 7),
4203             // store the whole register as-is to the parameter save area
4204             // slot.
4205             Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4206                                  MachinePointerInfo(&*FuncArg));
4207           }
4208 
4209           MemOps.push_back(Store);
4210         }
4211         // Whether we copied from a register or not, advance the offset
4212         // into the parameter save area by a full doubleword.
4213         ArgOffset += PtrByteSize;
4214         continue;
4215       }
4216 
4217       // The value of the object is its address, which is the address of
4218       // its first stack doubleword.
4219       InVals.push_back(FIN);
4220 
4221       // Store whatever pieces of the object are in registers to memory.
4222       for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4223         if (GPR_idx == Num_GPR_Regs)
4224           break;
4225 
4226         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4227         FuncInfo->addLiveInAttr(VReg, Flags);
4228         SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4229         SDValue Addr = FIN;
4230         if (j) {
4231           SDValue Off = DAG.getConstant(j, dl, PtrVT);
4232           Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4233         }
4234         SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4235                                      MachinePointerInfo(&*FuncArg, j));
4236         MemOps.push_back(Store);
4237         ++GPR_idx;
4238       }
4239       ArgOffset += ArgSize;
4240       continue;
4241     }
4242 
4243     switch (ObjectVT.getSimpleVT().SimpleTy) {
4244     default: llvm_unreachable("Unhandled argument type!");
4245     case MVT::i1:
4246     case MVT::i32:
4247     case MVT::i64:
4248       if (Flags.isNest()) {
4249         // The 'nest' parameter, if any, is passed in R11.
4250         unsigned VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4251         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4252 
4253         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4254           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4255 
4256         break;
4257       }
4258 
4259       // These can be scalar arguments or elements of an integer array type
4260       // passed directly.  Clang may use those instead of "byval" aggregate
4261       // types to avoid forcing arguments to memory unnecessarily.
4262       if (GPR_idx != Num_GPR_Regs) {
4263         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4264         FuncInfo->addLiveInAttr(VReg, Flags);
4265         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4266 
4267         if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4268           // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4269           // value to MVT::i64 and then truncate to the correct register size.
4270           ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4271       } else {
4272         if (CallConv == CallingConv::Fast)
4273           ComputeArgOffset();
4274 
4275         needsLoad = true;
4276         ArgSize = PtrByteSize;
4277       }
4278       if (CallConv != CallingConv::Fast || needsLoad)
4279         ArgOffset += 8;
4280       break;
4281 
4282     case MVT::f32:
4283     case MVT::f64:
4284       // These can be scalar arguments or elements of a float array type
4285       // passed directly.  The latter are used to implement ELFv2 homogenous
4286       // float aggregates.
4287       if (FPR_idx != Num_FPR_Regs) {
4288         unsigned VReg;
4289 
4290         if (ObjectVT == MVT::f32)
4291           VReg = MF.addLiveIn(FPR[FPR_idx],
4292                               Subtarget.hasP8Vector()
4293                                   ? &PPC::VSSRCRegClass
4294                                   : &PPC::F4RCRegClass);
4295         else
4296           VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4297                                                 ? &PPC::VSFRCRegClass
4298                                                 : &PPC::F8RCRegClass);
4299 
4300         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4301         ++FPR_idx;
4302       } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4303         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4304         // once we support fp <-> gpr moves.
4305 
4306         // This can only ever happen in the presence of f32 array types,
4307         // since otherwise we never run out of FPRs before running out
4308         // of GPRs.
4309         unsigned VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4310         FuncInfo->addLiveInAttr(VReg, Flags);
4311         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4312 
4313         if (ObjectVT == MVT::f32) {
4314           if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4315             ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4316                                  DAG.getConstant(32, dl, MVT::i32));
4317           ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4318         }
4319 
4320         ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4321       } else {
4322         if (CallConv == CallingConv::Fast)
4323           ComputeArgOffset();
4324 
4325         needsLoad = true;
4326       }
4327 
4328       // When passing an array of floats, the array occupies consecutive
4329       // space in the argument area; only round up to the next doubleword
4330       // at the end of the array.  Otherwise, each float takes 8 bytes.
4331       if (CallConv != CallingConv::Fast || needsLoad) {
4332         ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4333         ArgOffset += ArgSize;
4334         if (Flags.isInConsecutiveRegsLast())
4335           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4336       }
4337       break;
4338     case MVT::v4f32:
4339     case MVT::v4i32:
4340     case MVT::v8i16:
4341     case MVT::v16i8:
4342     case MVT::v2f64:
4343     case MVT::v2i64:
4344     case MVT::v1i128:
4345     case MVT::f128:
4346       // These can be scalar arguments or elements of a vector array type
4347       // passed directly.  The latter are used to implement ELFv2 homogenous
4348       // vector aggregates.
4349       if (VR_idx != Num_VR_Regs) {
4350         unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4351         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4352         ++VR_idx;
4353       } else {
4354         if (CallConv == CallingConv::Fast)
4355           ComputeArgOffset();
4356         needsLoad = true;
4357       }
4358       if (CallConv != CallingConv::Fast || needsLoad)
4359         ArgOffset += 16;
4360       break;
4361     }
4362 
4363     // We need to load the argument to a virtual register if we determined
4364     // above that we ran out of physical registers of the appropriate type.
4365     if (needsLoad) {
4366       if (ObjSize < ArgSize && !isLittleEndian)
4367         CurArgOffset += ArgSize - ObjSize;
4368       int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4369       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4370       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4371     }
4372 
4373     InVals.push_back(ArgVal);
4374   }
4375 
4376   // Area that is at least reserved in the caller of this function.
4377   unsigned MinReservedArea;
4378   if (HasParameterArea)
4379     MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4380   else
4381     MinReservedArea = LinkageSize;
4382 
4383   // Set the size that is at least reserved in caller of this function.  Tail
4384   // call optimized functions' reserved stack space needs to be aligned so that
4385   // taking the difference between two stack areas will result in an aligned
4386   // stack.
4387   MinReservedArea =
4388       EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4389   FuncInfo->setMinReservedArea(MinReservedArea);
4390 
4391   // If the function takes variable number of arguments, make a frame index for
4392   // the start of the first vararg value... for expansion of llvm.va_start.
4393   // On ELFv2ABI spec, it writes:
4394   // C programs that are intended to be *portable* across different compilers
4395   // and architectures must use the header file <stdarg.h> to deal with variable
4396   // argument lists.
4397   if (isVarArg && MFI.hasVAStart()) {
4398     int Depth = ArgOffset;
4399 
4400     FuncInfo->setVarArgsFrameIndex(
4401       MFI.CreateFixedObject(PtrByteSize, Depth, true));
4402     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4403 
4404     // If this function is vararg, store any remaining integer argument regs
4405     // to their spots on the stack so that they may be loaded by dereferencing
4406     // the result of va_next.
4407     for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4408          GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4409       unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4410       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4411       SDValue Store =
4412           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4413       MemOps.push_back(Store);
4414       // Increment the address by four for the next argument to store
4415       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4416       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4417     }
4418   }
4419 
4420   if (!MemOps.empty())
4421     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4422 
4423   return Chain;
4424 }
4425 
4426 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4427 /// adjusted to accommodate the arguments for the tailcall.
4428 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4429                                    unsigned ParamSize) {
4430 
4431   if (!isTailCall) return 0;
4432 
4433   PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>();
4434   unsigned CallerMinReservedArea = FI->getMinReservedArea();
4435   int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4436   // Remember only if the new adjustment is bigger.
4437   if (SPDiff < FI->getTailCallSPDelta())
4438     FI->setTailCallSPDelta(SPDiff);
4439 
4440   return SPDiff;
4441 }
4442 
4443 static bool isFunctionGlobalAddress(SDValue Callee);
4444 
4445 static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4446                               const TargetMachine &TM) {
4447   // It does not make sense to call callsShareTOCBase() with a caller that
4448   // is PC Relative since PC Relative callers do not have a TOC.
4449 #ifndef NDEBUG
4450   const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4451   assert(!STICaller->isUsingPCRelativeCalls() &&
4452          "PC Relative callers do not have a TOC and cannot share a TOC Base");
4453 #endif
4454 
4455   // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4456   // don't have enough information to determine if the caller and callee share
4457   // the same  TOC base, so we have to pessimistically assume they don't for
4458   // correctness.
4459   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4460   if (!G)
4461     return false;
4462 
4463   const GlobalValue *GV = G->getGlobal();
4464 
4465   // If the callee is preemptable, then the static linker will use a plt-stub
4466   // which saves the toc to the stack, and needs a nop after the call
4467   // instruction to convert to a toc-restore.
4468   if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4469     return false;
4470 
4471   // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4472   // We may need a TOC restore in the situation where the caller requires a
4473   // valid TOC but the callee is PC Relative and does not.
4474   const Function *F = dyn_cast<Function>(GV);
4475   const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4476 
4477   // If we have an Alias we can try to get the function from there.
4478   if (Alias) {
4479     const GlobalObject *GlobalObj = Alias->getBaseObject();
4480     F = dyn_cast<Function>(GlobalObj);
4481   }
4482 
4483   // If we still have no valid function pointer we do not have enough
4484   // information to determine if the callee uses PC Relative calls so we must
4485   // assume that it does.
4486   if (!F)
4487     return false;
4488 
4489   // If the callee uses PC Relative we cannot guarantee that the callee won't
4490   // clobber the TOC of the caller and so we must assume that the two
4491   // functions do not share a TOC base.
4492   const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4493   if (STICallee->isUsingPCRelativeCalls())
4494     return false;
4495 
4496   // If the GV is not a strong definition then we need to assume it can be
4497   // replaced by another function at link time. The function that replaces
4498   // it may not share the same TOC as the caller since the callee may be
4499   // replaced by a PC Relative version of the same function.
4500   if (!GV->isStrongDefinitionForLinker())
4501     return false;
4502 
4503   // The medium and large code models are expected to provide a sufficiently
4504   // large TOC to provide all data addressing needs of a module with a
4505   // single TOC.
4506   if (CodeModel::Medium == TM.getCodeModel() ||
4507       CodeModel::Large == TM.getCodeModel())
4508     return true;
4509 
4510   // Any explicitly-specified sections and section prefixes must also match.
4511   // Also, if we're using -ffunction-sections, then each function is always in
4512   // a different section (the same is true for COMDAT functions).
4513   if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4514       GV->getSection() != Caller->getSection())
4515     return false;
4516   if (const auto *F = dyn_cast<Function>(GV)) {
4517     if (F->getSectionPrefix() != Caller->getSectionPrefix())
4518       return false;
4519   }
4520 
4521   return true;
4522 }
4523 
4524 static bool
4525 needStackSlotPassParameters(const PPCSubtarget &Subtarget,
4526                             const SmallVectorImpl<ISD::OutputArg> &Outs) {
4527   assert(Subtarget.is64BitELFABI());
4528 
4529   const unsigned PtrByteSize = 8;
4530   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4531 
4532   static const MCPhysReg GPR[] = {
4533     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4534     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4535   };
4536   static const MCPhysReg VR[] = {
4537     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4538     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4539   };
4540 
4541   const unsigned NumGPRs = array_lengthof(GPR);
4542   const unsigned NumFPRs = 13;
4543   const unsigned NumVRs = array_lengthof(VR);
4544   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4545 
4546   unsigned NumBytes = LinkageSize;
4547   unsigned AvailableFPRs = NumFPRs;
4548   unsigned AvailableVRs = NumVRs;
4549 
4550   for (const ISD::OutputArg& Param : Outs) {
4551     if (Param.Flags.isNest()) continue;
4552 
4553     if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4554                                LinkageSize, ParamAreaSize, NumBytes,
4555                                AvailableFPRs, AvailableVRs))
4556       return true;
4557   }
4558   return false;
4559 }
4560 
4561 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4562   if (CB.arg_size() != CallerFn->arg_size())
4563     return false;
4564 
4565   auto CalleeArgIter = CB.arg_begin();
4566   auto CalleeArgEnd = CB.arg_end();
4567   Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4568 
4569   for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4570     const Value* CalleeArg = *CalleeArgIter;
4571     const Value* CallerArg = &(*CallerArgIter);
4572     if (CalleeArg == CallerArg)
4573       continue;
4574 
4575     // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4576     //        tail call @callee([4 x i64] undef, [4 x i64] %b)
4577     //      }
4578     // 1st argument of callee is undef and has the same type as caller.
4579     if (CalleeArg->getType() == CallerArg->getType() &&
4580         isa<UndefValue>(CalleeArg))
4581       continue;
4582 
4583     return false;
4584   }
4585 
4586   return true;
4587 }
4588 
4589 // Returns true if TCO is possible between the callers and callees
4590 // calling conventions.
4591 static bool
4592 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
4593                                     CallingConv::ID CalleeCC) {
4594   // Tail calls are possible with fastcc and ccc.
4595   auto isTailCallableCC  = [] (CallingConv::ID CC){
4596       return  CC == CallingConv::C || CC == CallingConv::Fast;
4597   };
4598   if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4599     return false;
4600 
4601   // We can safely tail call both fastcc and ccc callees from a c calling
4602   // convention caller. If the caller is fastcc, we may have less stack space
4603   // than a non-fastcc caller with the same signature so disable tail-calls in
4604   // that case.
4605   return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4606 }
4607 
4608 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4609     SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4610     const SmallVectorImpl<ISD::OutputArg> &Outs,
4611     const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4612   bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4613 
4614   if (DisableSCO && !TailCallOpt) return false;
4615 
4616   // Variadic argument functions are not supported.
4617   if (isVarArg) return false;
4618 
4619   auto &Caller = DAG.getMachineFunction().getFunction();
4620   // Check that the calling conventions are compatible for tco.
4621   if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4622     return false;
4623 
4624   // Caller contains any byval parameter is not supported.
4625   if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4626     return false;
4627 
4628   // Callee contains any byval parameter is not supported, too.
4629   // Note: This is a quick work around, because in some cases, e.g.
4630   // caller's stack size > callee's stack size, we are still able to apply
4631   // sibling call optimization. For example, gcc is able to do SCO for caller1
4632   // in the following example, but not for caller2.
4633   //   struct test {
4634   //     long int a;
4635   //     char ary[56];
4636   //   } gTest;
4637   //   __attribute__((noinline)) int callee(struct test v, struct test *b) {
4638   //     b->a = v.a;
4639   //     return 0;
4640   //   }
4641   //   void caller1(struct test a, struct test c, struct test *b) {
4642   //     callee(gTest, b); }
4643   //   void caller2(struct test *b) { callee(gTest, b); }
4644   if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4645     return false;
4646 
4647   // If callee and caller use different calling conventions, we cannot pass
4648   // parameters on stack since offsets for the parameter area may be different.
4649   if (Caller.getCallingConv() != CalleeCC &&
4650       needStackSlotPassParameters(Subtarget, Outs))
4651     return false;
4652 
4653   // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4654   // the caller and callee share the same TOC for TCO/SCO. If the caller and
4655   // callee potentially have different TOC bases then we cannot tail call since
4656   // we need to restore the TOC pointer after the call.
4657   // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4658   // We cannot guarantee this for indirect calls or calls to external functions.
4659   // When PC-Relative addressing is used, the concept of the TOC is no longer
4660   // applicable so this check is not required.
4661   // Check first for indirect calls.
4662   if (!Subtarget.isUsingPCRelativeCalls() &&
4663       !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4664     return false;
4665 
4666   // Check if we share the TOC base.
4667   if (!Subtarget.isUsingPCRelativeCalls() &&
4668       !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4669     return false;
4670 
4671   // TCO allows altering callee ABI, so we don't have to check further.
4672   if (CalleeCC == CallingConv::Fast && TailCallOpt)
4673     return true;
4674 
4675   if (DisableSCO) return false;
4676 
4677   // If callee use the same argument list that caller is using, then we can
4678   // apply SCO on this case. If it is not, then we need to check if callee needs
4679   // stack for passing arguments.
4680   // PC Relative tail calls may not have a CallBase.
4681   // If there is no CallBase we cannot verify if we have the same argument
4682   // list so assume that we don't have the same argument list.
4683   if (CB && !hasSameArgumentList(&Caller, *CB) &&
4684       needStackSlotPassParameters(Subtarget, Outs))
4685     return false;
4686   else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4687     return false;
4688 
4689   return true;
4690 }
4691 
4692 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4693 /// for tail call optimization. Targets which want to do tail call
4694 /// optimization should implement this function.
4695 bool
4696 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4697                                                      CallingConv::ID CalleeCC,
4698                                                      bool isVarArg,
4699                                       const SmallVectorImpl<ISD::InputArg> &Ins,
4700                                                      SelectionDAG& DAG) const {
4701   if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4702     return false;
4703 
4704   // Variable argument functions are not supported.
4705   if (isVarArg)
4706     return false;
4707 
4708   MachineFunction &MF = DAG.getMachineFunction();
4709   CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4710   if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4711     // Functions containing by val parameters are not supported.
4712     for (unsigned i = 0; i != Ins.size(); i++) {
4713        ISD::ArgFlagsTy Flags = Ins[i].Flags;
4714        if (Flags.isByVal()) return false;
4715     }
4716 
4717     // Non-PIC/GOT tail calls are supported.
4718     if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4719       return true;
4720 
4721     // At the moment we can only do local tail calls (in same module, hidden
4722     // or protected) if we are generating PIC.
4723     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4724       return G->getGlobal()->hasHiddenVisibility()
4725           || G->getGlobal()->hasProtectedVisibility();
4726   }
4727 
4728   return false;
4729 }
4730 
4731 /// isCallCompatibleAddress - Return the immediate to use if the specified
4732 /// 32-bit value is representable in the immediate field of a BxA instruction.
4733 static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) {
4734   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4735   if (!C) return nullptr;
4736 
4737   int Addr = C->getZExtValue();
4738   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
4739       SignExtend32<26>(Addr) != Addr)
4740     return nullptr;  // Top 6 bits have to be sext of immediate.
4741 
4742   return DAG
4743       .getConstant(
4744           (int)C->getZExtValue() >> 2, SDLoc(Op),
4745           DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()))
4746       .getNode();
4747 }
4748 
4749 namespace {
4750 
4751 struct TailCallArgumentInfo {
4752   SDValue Arg;
4753   SDValue FrameIdxOp;
4754   int FrameIdx = 0;
4755 
4756   TailCallArgumentInfo() = default;
4757 };
4758 
4759 } // end anonymous namespace
4760 
4761 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4762 static void StoreTailCallArgumentsToStackSlot(
4763     SelectionDAG &DAG, SDValue Chain,
4764     const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4765     SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4766   for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4767     SDValue Arg = TailCallArgs[i].Arg;
4768     SDValue FIN = TailCallArgs[i].FrameIdxOp;
4769     int FI = TailCallArgs[i].FrameIdx;
4770     // Store relative to framepointer.
4771     MemOpChains.push_back(DAG.getStore(
4772         Chain, dl, Arg, FIN,
4773         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4774   }
4775 }
4776 
4777 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4778 /// the appropriate stack slot for the tail call optimized function call.
4779 static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
4780                                              SDValue OldRetAddr, SDValue OldFP,
4781                                              int SPDiff, const SDLoc &dl) {
4782   if (SPDiff) {
4783     // Calculate the new stack slot for the return address.
4784     MachineFunction &MF = DAG.getMachineFunction();
4785     const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4786     const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4787     bool isPPC64 = Subtarget.isPPC64();
4788     int SlotSize = isPPC64 ? 8 : 4;
4789     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4790     int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4791                                                          NewRetAddrLoc, true);
4792     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4793     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4794     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4795                          MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4796   }
4797   return Chain;
4798 }
4799 
4800 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4801 /// the position of the argument.
4802 static void
4803 CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
4804                          SDValue Arg, int SPDiff, unsigned ArgOffset,
4805                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4806   int Offset = ArgOffset + SPDiff;
4807   uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4808   int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4809   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4810   SDValue FIN = DAG.getFrameIndex(FI, VT);
4811   TailCallArgumentInfo Info;
4812   Info.Arg = Arg;
4813   Info.FrameIdxOp = FIN;
4814   Info.FrameIdx = FI;
4815   TailCallArguments.push_back(Info);
4816 }
4817 
4818 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4819 /// stack slot. Returns the chain as result and the loaded frame pointers in
4820 /// LROpOut/FPOpout. Used when tail calling.
4821 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4822     SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
4823     SDValue &FPOpOut, const SDLoc &dl) const {
4824   if (SPDiff) {
4825     // Load the LR and FP stack slot for later adjusting.
4826     EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
4827     LROpOut = getReturnAddrFrameIndex(DAG);
4828     LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
4829     Chain = SDValue(LROpOut.getNode(), 1);
4830   }
4831   return Chain;
4832 }
4833 
4834 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4835 /// by "Src" to address "Dst" of size "Size".  Alignment information is
4836 /// specified by the specific parameter attribute. The copy will be passed as
4837 /// a byval function parameter.
4838 /// Sometimes what we are copying is the end of a larger object, the part that
4839 /// does not fit in registers.
4840 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
4841                                          SDValue Chain, ISD::ArgFlagsTy Flags,
4842                                          SelectionDAG &DAG, const SDLoc &dl) {
4843   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
4844   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
4845                        Flags.getNonZeroByValAlign(), false, false, false,
4846                        MachinePointerInfo(), MachinePointerInfo());
4847 }
4848 
4849 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4850 /// tail calls.
4851 static void LowerMemOpCallTo(
4852     SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
4853     SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
4854     bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
4855     SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
4856   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
4857   if (!isTailCall) {
4858     if (isVector) {
4859       SDValue StackPtr;
4860       if (isPPC64)
4861         StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
4862       else
4863         StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
4864       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
4865                            DAG.getConstant(ArgOffset, dl, PtrVT));
4866     }
4867     MemOpChains.push_back(
4868         DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
4869     // Calculate and remember argument location.
4870   } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
4871                                   TailCallArguments);
4872 }
4873 
4874 static void
4875 PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
4876                 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
4877                 SDValue FPOp,
4878                 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
4879   // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4880   // might overwrite each other in case of tail call optimization.
4881   SmallVector<SDValue, 8> MemOpChains2;
4882   // Do not flag preceding copytoreg stuff together with the following stuff.
4883   InFlag = SDValue();
4884   StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
4885                                     MemOpChains2, dl);
4886   if (!MemOpChains2.empty())
4887     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4888 
4889   // Store the return address to the appropriate stack slot.
4890   Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
4891 
4892   // Emit callseq_end just before tailcall node.
4893   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
4894                              DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4895   InFlag = Chain.getValue(1);
4896 }
4897 
4898 // Is this global address that of a function that can be called by name? (as
4899 // opposed to something that must hold a descriptor for an indirect call).
4900 static bool isFunctionGlobalAddress(SDValue Callee) {
4901   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4902     if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
4903         Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
4904       return false;
4905 
4906     return G->getGlobal()->getValueType()->isFunctionTy();
4907   }
4908 
4909   return false;
4910 }
4911 
4912 SDValue PPCTargetLowering::LowerCallResult(
4913     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
4914     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4915     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4916   SmallVector<CCValAssign, 16> RVLocs;
4917   CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4918                     *DAG.getContext());
4919 
4920   CCRetInfo.AnalyzeCallResult(
4921       Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
4922                ? RetCC_PPC_Cold
4923                : RetCC_PPC);
4924 
4925   // Copy all of the result registers out of their specified physreg.
4926   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4927     CCValAssign &VA = RVLocs[i];
4928     assert(VA.isRegLoc() && "Can only return in registers!");
4929 
4930     SDValue Val;
4931 
4932     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
4933       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
4934                                       InFlag);
4935       Chain = Lo.getValue(1);
4936       InFlag = Lo.getValue(2);
4937       VA = RVLocs[++i]; // skip ahead to next loc
4938       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
4939                                       InFlag);
4940       Chain = Hi.getValue(1);
4941       InFlag = Hi.getValue(2);
4942       if (!Subtarget.isLittleEndian())
4943         std::swap (Lo, Hi);
4944       Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
4945     } else {
4946       Val = DAG.getCopyFromReg(Chain, dl,
4947                                VA.getLocReg(), VA.getLocVT(), InFlag);
4948       Chain = Val.getValue(1);
4949       InFlag = Val.getValue(2);
4950     }
4951 
4952     switch (VA.getLocInfo()) {
4953     default: llvm_unreachable("Unknown loc info!");
4954     case CCValAssign::Full: break;
4955     case CCValAssign::AExt:
4956       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4957       break;
4958     case CCValAssign::ZExt:
4959       Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
4960                         DAG.getValueType(VA.getValVT()));
4961       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4962       break;
4963     case CCValAssign::SExt:
4964       Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
4965                         DAG.getValueType(VA.getValVT()));
4966       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
4967       break;
4968     }
4969 
4970     InVals.push_back(Val);
4971   }
4972 
4973   return Chain;
4974 }
4975 
4976 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
4977                            const PPCSubtarget &Subtarget, bool isPatchPoint) {
4978   // PatchPoint calls are not indirect.
4979   if (isPatchPoint)
4980     return false;
4981 
4982   if (isFunctionGlobalAddress(Callee) || dyn_cast<ExternalSymbolSDNode>(Callee))
4983     return false;
4984 
4985   // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
4986   // becuase the immediate function pointer points to a descriptor instead of
4987   // a function entry point. The ELFv2 ABI cannot use a BLA because the function
4988   // pointer immediate points to the global entry point, while the BLA would
4989   // need to jump to the local entry point (see rL211174).
4990   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
4991       isBLACompatibleAddress(Callee, DAG))
4992     return false;
4993 
4994   return true;
4995 }
4996 
4997 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
4998 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
4999   return Subtarget.isAIXABI() ||
5000          (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5001 }
5002 
5003 static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
5004                               const Function &Caller,
5005                               const SDValue &Callee,
5006                               const PPCSubtarget &Subtarget,
5007                               const TargetMachine &TM) {
5008   if (CFlags.IsTailCall)
5009     return PPCISD::TC_RETURN;
5010 
5011   // This is a call through a function pointer.
5012   if (CFlags.IsIndirect) {
5013     // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5014     // indirect calls. The save of the caller's TOC pointer to the stack will be
5015     // inserted into the DAG as part of call lowering. The restore of the TOC
5016     // pointer is modeled by using a pseudo instruction for the call opcode that
5017     // represents the 2 instruction sequence of an indirect branch and link,
5018     // immediately followed by a load of the TOC pointer from the the stack save
5019     // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5020     // as it is not saved or used.
5021     return isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5022                                                : PPCISD::BCTRL;
5023   }
5024 
5025   if (Subtarget.isUsingPCRelativeCalls()) {
5026     assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5027     return PPCISD::CALL_NOTOC;
5028   }
5029 
5030   // The ABIs that maintain a TOC pointer accross calls need to have a nop
5031   // immediately following the call instruction if the caller and callee may
5032   // have different TOC bases. At link time if the linker determines the calls
5033   // may not share a TOC base, the call is redirected to a trampoline inserted
5034   // by the linker. The trampoline will (among other things) save the callers
5035   // TOC pointer at an ABI designated offset in the linkage area and the linker
5036   // will rewrite the nop to be a load of the TOC pointer from the linkage area
5037   // into gpr2.
5038   if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5039     return callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5040                                                   : PPCISD::CALL_NOP;
5041 
5042   return PPCISD::CALL;
5043 }
5044 
5045 static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5046                                const SDLoc &dl, const PPCSubtarget &Subtarget) {
5047   if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5048     if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5049       return SDValue(Dest, 0);
5050 
5051   // Returns true if the callee is local, and false otherwise.
5052   auto isLocalCallee = [&]() {
5053     const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5054     const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5055     const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5056 
5057     return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5058            !dyn_cast_or_null<GlobalIFunc>(GV);
5059   };
5060 
5061   // The PLT is only used in 32-bit ELF PIC mode.  Attempting to use the PLT in
5062   // a static relocation model causes some versions of GNU LD (2.17.50, at
5063   // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5064   // built with secure-PLT.
5065   bool UsePlt =
5066       Subtarget.is32BitELFABI() && !isLocalCallee() &&
5067       Subtarget.getTargetMachine().getRelocationModel() == Reloc::PIC_;
5068 
5069   const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5070     const TargetMachine &TM = Subtarget.getTargetMachine();
5071     const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5072     MCSymbolXCOFF *S =
5073         cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5074 
5075     MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5076     return DAG.getMCSymbol(S, PtrVT);
5077   };
5078 
5079   if (isFunctionGlobalAddress(Callee)) {
5080     const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5081 
5082     if (Subtarget.isAIXABI()) {
5083       assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5084       return getAIXFuncEntryPointSymbolSDNode(GV);
5085     }
5086     return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5087                                       UsePlt ? PPCII::MO_PLT : 0);
5088   }
5089 
5090   if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5091     const char *SymName = S->getSymbol();
5092     if (Subtarget.isAIXABI()) {
5093       // If there exists a user-declared function whose name is the same as the
5094       // ExternalSymbol's, then we pick up the user-declared version.
5095       const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5096       if (const Function *F =
5097               dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5098         return getAIXFuncEntryPointSymbolSDNode(F);
5099 
5100       // On AIX, direct function calls reference the symbol for the function's
5101       // entry point, which is named by prepending a "." before the function's
5102       // C-linkage name. A Qualname is returned here because an external
5103       // function entry point is a csect with XTY_ER property.
5104       const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5105         auto &Context = DAG.getMachineFunction().getMMI().getContext();
5106         MCSectionXCOFF *Sec = Context.getXCOFFSection(
5107             (Twine(".") + Twine(SymName)).str(), XCOFF::XMC_PR, XCOFF::XTY_ER,
5108             SectionKind::getMetadata());
5109         return Sec->getQualNameSymbol();
5110       };
5111 
5112       SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5113     }
5114     return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5115                                        UsePlt ? PPCII::MO_PLT : 0);
5116   }
5117 
5118   // No transformation needed.
5119   assert(Callee.getNode() && "What no callee?");
5120   return Callee;
5121 }
5122 
5123 static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart) {
5124   assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5125          "Expected a CALLSEQ_STARTSDNode.");
5126 
5127   // The last operand is the chain, except when the node has glue. If the node
5128   // has glue, then the last operand is the glue, and the chain is the second
5129   // last operand.
5130   SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5131   if (LastValue.getValueType() != MVT::Glue)
5132     return LastValue;
5133 
5134   return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5135 }
5136 
5137 // Creates the node that moves a functions address into the count register
5138 // to prepare for an indirect call instruction.
5139 static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5140                                 SDValue &Glue, SDValue &Chain,
5141                                 const SDLoc &dl) {
5142   SDValue MTCTROps[] = {Chain, Callee, Glue};
5143   EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5144   Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5145                       makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5146   // The glue is the second value produced.
5147   Glue = Chain.getValue(1);
5148 }
5149 
5150 static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5151                                           SDValue &Glue, SDValue &Chain,
5152                                           SDValue CallSeqStart,
5153                                           const CallBase *CB, const SDLoc &dl,
5154                                           bool hasNest,
5155                                           const PPCSubtarget &Subtarget) {
5156   // Function pointers in the 64-bit SVR4 ABI do not point to the function
5157   // entry point, but to the function descriptor (the function entry point
5158   // address is part of the function descriptor though).
5159   // The function descriptor is a three doubleword structure with the
5160   // following fields: function entry point, TOC base address and
5161   // environment pointer.
5162   // Thus for a call through a function pointer, the following actions need
5163   // to be performed:
5164   //   1. Save the TOC of the caller in the TOC save area of its stack
5165   //      frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5166   //   2. Load the address of the function entry point from the function
5167   //      descriptor.
5168   //   3. Load the TOC of the callee from the function descriptor into r2.
5169   //   4. Load the environment pointer from the function descriptor into
5170   //      r11.
5171   //   5. Branch to the function entry point address.
5172   //   6. On return of the callee, the TOC of the caller needs to be
5173   //      restored (this is done in FinishCall()).
5174   //
5175   // The loads are scheduled at the beginning of the call sequence, and the
5176   // register copies are flagged together to ensure that no other
5177   // operations can be scheduled in between. E.g. without flagging the
5178   // copies together, a TOC access in the caller could be scheduled between
5179   // the assignment of the callee TOC and the branch to the callee, which leads
5180   // to incorrect code.
5181 
5182   // Start by loading the function address from the descriptor.
5183   SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5184   auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5185                       ? (MachineMemOperand::MODereferenceable |
5186                          MachineMemOperand::MOInvariant)
5187                       : MachineMemOperand::MONone;
5188 
5189   MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5190 
5191   // Registers used in building the DAG.
5192   const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5193   const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5194 
5195   // Offsets of descriptor members.
5196   const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5197   const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5198 
5199   const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5200   const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5201 
5202   // One load for the functions entry point address.
5203   SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5204                                     Alignment, MMOFlags);
5205 
5206   // One for loading the TOC anchor for the module that contains the called
5207   // function.
5208   SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5209   SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5210   SDValue TOCPtr =
5211       DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5212                   MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5213 
5214   // One for loading the environment pointer.
5215   SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5216   SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5217   SDValue LoadEnvPtr =
5218       DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5219                   MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5220 
5221 
5222   // Then copy the newly loaded TOC anchor to the TOC pointer.
5223   SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5224   Chain = TOCVal.getValue(0);
5225   Glue = TOCVal.getValue(1);
5226 
5227   // If the function call has an explicit 'nest' parameter, it takes the
5228   // place of the environment pointer.
5229   assert((!hasNest || !Subtarget.isAIXABI()) &&
5230          "Nest parameter is not supported on AIX.");
5231   if (!hasNest) {
5232     SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5233     Chain = EnvVal.getValue(0);
5234     Glue = EnvVal.getValue(1);
5235   }
5236 
5237   // The rest of the indirect call sequence is the same as the non-descriptor
5238   // DAG.
5239   prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5240 }
5241 
5242 static void
5243 buildCallOperands(SmallVectorImpl<SDValue> &Ops,
5244                   PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5245                   SelectionDAG &DAG,
5246                   SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5247                   SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5248                   const PPCSubtarget &Subtarget) {
5249   const bool IsPPC64 = Subtarget.isPPC64();
5250   // MVT for a general purpose register.
5251   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5252 
5253   // First operand is always the chain.
5254   Ops.push_back(Chain);
5255 
5256   // If it's a direct call pass the callee as the second operand.
5257   if (!CFlags.IsIndirect)
5258     Ops.push_back(Callee);
5259   else {
5260     assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5261 
5262     // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5263     // on the stack (this would have been done in `LowerCall_64SVR4` or
5264     // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5265     // represents both the indirect branch and a load that restores the TOC
5266     // pointer from the linkage area. The operand for the TOC restore is an add
5267     // of the TOC save offset to the stack pointer. This must be the second
5268     // operand: after the chain input but before any other variadic arguments.
5269     // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5270     // saved or used.
5271     if (isTOCSaveRestoreRequired(Subtarget)) {
5272       const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5273 
5274       SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5275       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5276       SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5277       SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5278       Ops.push_back(AddTOC);
5279     }
5280 
5281     // Add the register used for the environment pointer.
5282     if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5283       Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5284                                     RegVT));
5285 
5286 
5287     // Add CTR register as callee so a bctr can be emitted later.
5288     if (CFlags.IsTailCall)
5289       Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5290   }
5291 
5292   // If this is a tail call add stack pointer delta.
5293   if (CFlags.IsTailCall)
5294     Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5295 
5296   // Add argument registers to the end of the list so that they are known live
5297   // into the call.
5298   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5299     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5300                                   RegsToPass[i].second.getValueType()));
5301 
5302   // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5303   // no way to mark dependencies as implicit here.
5304   // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5305   if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5306        !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5307     Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5308 
5309   // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5310   if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5311     Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5312 
5313   // Add a register mask operand representing the call-preserved registers.
5314   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5315   const uint32_t *Mask =
5316       TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5317   assert(Mask && "Missing call preserved mask for calling convention");
5318   Ops.push_back(DAG.getRegisterMask(Mask));
5319 
5320   // If the glue is valid, it is the last operand.
5321   if (Glue.getNode())
5322     Ops.push_back(Glue);
5323 }
5324 
5325 SDValue PPCTargetLowering::FinishCall(
5326     CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5327     SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5328     SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5329     unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5330     SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5331 
5332   if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5333       Subtarget.isAIXABI())
5334     setUsesTOCBasePtr(DAG);
5335 
5336   unsigned CallOpc =
5337       getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5338                     Subtarget, DAG.getTarget());
5339 
5340   if (!CFlags.IsIndirect)
5341     Callee = transformCallee(Callee, DAG, dl, Subtarget);
5342   else if (Subtarget.usesFunctionDescriptors())
5343     prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5344                                   dl, CFlags.HasNest, Subtarget);
5345   else
5346     prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5347 
5348   // Build the operand list for the call instruction.
5349   SmallVector<SDValue, 8> Ops;
5350   buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5351                     SPDiff, Subtarget);
5352 
5353   // Emit tail call.
5354   if (CFlags.IsTailCall) {
5355     // Indirect tail call when using PC Relative calls do not have the same
5356     // constraints.
5357     assert(((Callee.getOpcode() == ISD::Register &&
5358              cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5359             Callee.getOpcode() == ISD::TargetExternalSymbol ||
5360             Callee.getOpcode() == ISD::TargetGlobalAddress ||
5361             isa<ConstantSDNode>(Callee) ||
5362             (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5363            "Expecting a global address, external symbol, absolute value, "
5364            "register or an indirect tail call when PC Relative calls are "
5365            "used.");
5366     // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5367     assert(CallOpc == PPCISD::TC_RETURN &&
5368            "Unexpected call opcode for a tail call.");
5369     DAG.getMachineFunction().getFrameInfo().setHasTailCall();
5370     return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5371   }
5372 
5373   std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5374   Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5375   DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5376   Glue = Chain.getValue(1);
5377 
5378   // When performing tail call optimization the callee pops its arguments off
5379   // the stack. Account for this here so these bytes can be pushed back on in
5380   // PPCFrameLowering::eliminateCallFramePseudoInstr.
5381   int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5382                          getTargetMachine().Options.GuaranteedTailCallOpt)
5383                             ? NumBytes
5384                             : 0;
5385 
5386   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5387                              DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5388                              Glue, dl);
5389   Glue = Chain.getValue(1);
5390 
5391   return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5392                          DAG, InVals);
5393 }
5394 
5395 SDValue
5396 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5397                              SmallVectorImpl<SDValue> &InVals) const {
5398   SelectionDAG &DAG                     = CLI.DAG;
5399   SDLoc &dl                             = CLI.DL;
5400   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5401   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
5402   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
5403   SDValue Chain                         = CLI.Chain;
5404   SDValue Callee                        = CLI.Callee;
5405   bool &isTailCall                      = CLI.IsTailCall;
5406   CallingConv::ID CallConv              = CLI.CallConv;
5407   bool isVarArg                         = CLI.IsVarArg;
5408   bool isPatchPoint                     = CLI.IsPatchPoint;
5409   const CallBase *CB                    = CLI.CB;
5410 
5411   if (isTailCall) {
5412     if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5413       isTailCall = false;
5414     else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5415       isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5416           Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5417     else
5418       isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5419                                                      Ins, DAG);
5420     if (isTailCall) {
5421       ++NumTailCalls;
5422       if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5423         ++NumSiblingCalls;
5424 
5425       // PC Relative calls no longer guarantee that the callee is a Global
5426       // Address Node. The callee could be an indirect tail call in which
5427       // case the SDValue for the callee could be a load (to load the address
5428       // of a function pointer) or it may be a register copy (to move the
5429       // address of the callee from a function parameter into a virtual
5430       // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5431       assert((Subtarget.isUsingPCRelativeCalls() ||
5432               isa<GlobalAddressSDNode>(Callee)) &&
5433              "Callee should be an llvm::Function object.");
5434 
5435       LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5436                         << "\nTCO callee: ");
5437       LLVM_DEBUG(Callee.dump());
5438     }
5439   }
5440 
5441   if (!isTailCall && CB && CB->isMustTailCall())
5442     report_fatal_error("failed to perform tail call elimination on a call "
5443                        "site marked musttail");
5444 
5445   // When long calls (i.e. indirect calls) are always used, calls are always
5446   // made via function pointer. If we have a function name, first translate it
5447   // into a pointer.
5448   if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5449       !isTailCall)
5450     Callee = LowerGlobalAddress(Callee, DAG);
5451 
5452   CallFlags CFlags(
5453       CallConv, isTailCall, isVarArg, isPatchPoint,
5454       isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5455       // hasNest
5456       Subtarget.is64BitELFABI() &&
5457           any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5458       CLI.NoMerge);
5459 
5460   if (Subtarget.isAIXABI())
5461     return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5462                          InVals, CB);
5463 
5464   assert(Subtarget.isSVR4ABI());
5465   if (Subtarget.isPPC64())
5466     return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5467                             InVals, CB);
5468   return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5469                           InVals, CB);
5470 }
5471 
5472 SDValue PPCTargetLowering::LowerCall_32SVR4(
5473     SDValue Chain, SDValue Callee, CallFlags CFlags,
5474     const SmallVectorImpl<ISD::OutputArg> &Outs,
5475     const SmallVectorImpl<SDValue> &OutVals,
5476     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5477     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5478     const CallBase *CB) const {
5479   // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5480   // of the 32-bit SVR4 ABI stack frame layout.
5481 
5482   const CallingConv::ID CallConv = CFlags.CallConv;
5483   const bool IsVarArg = CFlags.IsVarArg;
5484   const bool IsTailCall = CFlags.IsTailCall;
5485 
5486   assert((CallConv == CallingConv::C ||
5487           CallConv == CallingConv::Cold ||
5488           CallConv == CallingConv::Fast) && "Unknown calling convention!");
5489 
5490   const Align PtrAlign(4);
5491 
5492   MachineFunction &MF = DAG.getMachineFunction();
5493 
5494   // Mark this function as potentially containing a function that contains a
5495   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5496   // and restoring the callers stack pointer in this functions epilog. This is
5497   // done because by tail calling the called function might overwrite the value
5498   // in this function's (MF) stack pointer stack slot 0(SP).
5499   if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5500       CallConv == CallingConv::Fast)
5501     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5502 
5503   // Count how many bytes are to be pushed on the stack, including the linkage
5504   // area, parameter list area and the part of the local variable space which
5505   // contains copies of aggregates which are passed by value.
5506 
5507   // Assign locations to all of the outgoing arguments.
5508   SmallVector<CCValAssign, 16> ArgLocs;
5509   PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5510 
5511   // Reserve space for the linkage area on the stack.
5512   CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5513                        PtrAlign);
5514   if (useSoftFloat())
5515     CCInfo.PreAnalyzeCallOperands(Outs);
5516 
5517   if (IsVarArg) {
5518     // Handle fixed and variable vector arguments differently.
5519     // Fixed vector arguments go into registers as long as registers are
5520     // available. Variable vector arguments always go into memory.
5521     unsigned NumArgs = Outs.size();
5522 
5523     for (unsigned i = 0; i != NumArgs; ++i) {
5524       MVT ArgVT = Outs[i].VT;
5525       ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5526       bool Result;
5527 
5528       if (Outs[i].IsFixed) {
5529         Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5530                                CCInfo);
5531       } else {
5532         Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5533                                       ArgFlags, CCInfo);
5534       }
5535 
5536       if (Result) {
5537 #ifndef NDEBUG
5538         errs() << "Call operand #" << i << " has unhandled type "
5539              << EVT(ArgVT).getEVTString() << "\n";
5540 #endif
5541         llvm_unreachable(nullptr);
5542       }
5543     }
5544   } else {
5545     // All arguments are treated the same.
5546     CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5547   }
5548   CCInfo.clearWasPPCF128();
5549 
5550   // Assign locations to all of the outgoing aggregate by value arguments.
5551   SmallVector<CCValAssign, 16> ByValArgLocs;
5552   CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5553 
5554   // Reserve stack space for the allocations in CCInfo.
5555   CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5556 
5557   CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5558 
5559   // Size of the linkage area, parameter list area and the part of the local
5560   // space variable where copies of aggregates which are passed by value are
5561   // stored.
5562   unsigned NumBytes = CCByValInfo.getNextStackOffset();
5563 
5564   // Calculate by how many bytes the stack has to be adjusted in case of tail
5565   // call optimization.
5566   int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5567 
5568   // Adjust the stack pointer for the new arguments...
5569   // These operations are automatically eliminated by the prolog/epilog pass
5570   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5571   SDValue CallSeqStart = Chain;
5572 
5573   // Load the return address and frame pointer so it can be moved somewhere else
5574   // later.
5575   SDValue LROp, FPOp;
5576   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5577 
5578   // Set up a copy of the stack pointer for use loading and storing any
5579   // arguments that may not fit in the registers available for argument
5580   // passing.
5581   SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5582 
5583   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5584   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5585   SmallVector<SDValue, 8> MemOpChains;
5586 
5587   bool seenFloatArg = false;
5588   // Walk the register/memloc assignments, inserting copies/loads.
5589   // i - Tracks the index into the list of registers allocated for the call
5590   // RealArgIdx - Tracks the index into the list of actual function arguments
5591   // j - Tracks the index into the list of byval arguments
5592   for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5593        i != e;
5594        ++i, ++RealArgIdx) {
5595     CCValAssign &VA = ArgLocs[i];
5596     SDValue Arg = OutVals[RealArgIdx];
5597     ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5598 
5599     if (Flags.isByVal()) {
5600       // Argument is an aggregate which is passed by value, thus we need to
5601       // create a copy of it in the local variable space of the current stack
5602       // frame (which is the stack frame of the caller) and pass the address of
5603       // this copy to the callee.
5604       assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5605       CCValAssign &ByValVA = ByValArgLocs[j++];
5606       assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5607 
5608       // Memory reserved in the local variable space of the callers stack frame.
5609       unsigned LocMemOffset = ByValVA.getLocMemOffset();
5610 
5611       SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5612       PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5613                            StackPtr, PtrOff);
5614 
5615       // Create a copy of the argument in the local area of the current
5616       // stack frame.
5617       SDValue MemcpyCall =
5618         CreateCopyOfByValArgument(Arg, PtrOff,
5619                                   CallSeqStart.getNode()->getOperand(0),
5620                                   Flags, DAG, dl);
5621 
5622       // This must go outside the CALLSEQ_START..END.
5623       SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5624                                                      SDLoc(MemcpyCall));
5625       DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5626                              NewCallSeqStart.getNode());
5627       Chain = CallSeqStart = NewCallSeqStart;
5628 
5629       // Pass the address of the aggregate copy on the stack either in a
5630       // physical register or in the parameter list area of the current stack
5631       // frame to the callee.
5632       Arg = PtrOff;
5633     }
5634 
5635     // When useCRBits() is true, there can be i1 arguments.
5636     // It is because getRegisterType(MVT::i1) => MVT::i1,
5637     // and for other integer types getRegisterType() => MVT::i32.
5638     // Extend i1 and ensure callee will get i32.
5639     if (Arg.getValueType() == MVT::i1)
5640       Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
5641                         dl, MVT::i32, Arg);
5642 
5643     if (VA.isRegLoc()) {
5644       seenFloatArg |= VA.getLocVT().isFloatingPoint();
5645       // Put argument in a physical register.
5646       if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5647         bool IsLE = Subtarget.isLittleEndian();
5648         SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5649                         DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5650         RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5651         SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5652                            DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5653         RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5654                              SVal.getValue(0)));
5655       } else
5656         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5657     } else {
5658       // Put argument in the parameter list area of the current stack frame.
5659       assert(VA.isMemLoc());
5660       unsigned LocMemOffset = VA.getLocMemOffset();
5661 
5662       if (!IsTailCall) {
5663         SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5664         PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5665                              StackPtr, PtrOff);
5666 
5667         MemOpChains.push_back(
5668             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5669       } else {
5670         // Calculate and remember argument location.
5671         CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5672                                  TailCallArguments);
5673       }
5674     }
5675   }
5676 
5677   if (!MemOpChains.empty())
5678     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5679 
5680   // Build a sequence of copy-to-reg nodes chained together with token chain
5681   // and flag operands which copy the outgoing args into the appropriate regs.
5682   SDValue InFlag;
5683   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5684     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5685                              RegsToPass[i].second, InFlag);
5686     InFlag = Chain.getValue(1);
5687   }
5688 
5689   // Set CR bit 6 to true if this is a vararg call with floating args passed in
5690   // registers.
5691   if (IsVarArg) {
5692     SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5693     SDValue Ops[] = { Chain, InFlag };
5694 
5695     Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5696                         dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5697 
5698     InFlag = Chain.getValue(1);
5699   }
5700 
5701   if (IsTailCall)
5702     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5703                     TailCallArguments);
5704 
5705   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5706                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
5707 }
5708 
5709 // Copy an argument into memory, being careful to do this outside the
5710 // call sequence for the call to which the argument belongs.
5711 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5712     SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5713     SelectionDAG &DAG, const SDLoc &dl) const {
5714   SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5715                         CallSeqStart.getNode()->getOperand(0),
5716                         Flags, DAG, dl);
5717   // The MEMCPY must go outside the CALLSEQ_START..END.
5718   int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5719   SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5720                                                  SDLoc(MemcpyCall));
5721   DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5722                          NewCallSeqStart.getNode());
5723   return NewCallSeqStart;
5724 }
5725 
5726 SDValue PPCTargetLowering::LowerCall_64SVR4(
5727     SDValue Chain, SDValue Callee, CallFlags CFlags,
5728     const SmallVectorImpl<ISD::OutputArg> &Outs,
5729     const SmallVectorImpl<SDValue> &OutVals,
5730     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5731     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5732     const CallBase *CB) const {
5733   bool isELFv2ABI = Subtarget.isELFv2ABI();
5734   bool isLittleEndian = Subtarget.isLittleEndian();
5735   unsigned NumOps = Outs.size();
5736   bool IsSibCall = false;
5737   bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5738 
5739   EVT PtrVT = getPointerTy(DAG.getDataLayout());
5740   unsigned PtrByteSize = 8;
5741 
5742   MachineFunction &MF = DAG.getMachineFunction();
5743 
5744   if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5745     IsSibCall = true;
5746 
5747   // Mark this function as potentially containing a function that contains a
5748   // tail call. As a consequence the frame pointer will be used for dynamicalloc
5749   // and restoring the callers stack pointer in this functions epilog. This is
5750   // done because by tail calling the called function might overwrite the value
5751   // in this function's (MF) stack pointer stack slot 0(SP).
5752   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5753     MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5754 
5755   assert(!(IsFastCall && CFlags.IsVarArg) &&
5756          "fastcc not supported on varargs functions");
5757 
5758   // Count how many bytes are to be pushed on the stack, including the linkage
5759   // area, and parameter passing area.  On ELFv1, the linkage area is 48 bytes
5760   // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5761   // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5762   unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5763   unsigned NumBytes = LinkageSize;
5764   unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5765 
5766   static const MCPhysReg GPR[] = {
5767     PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5768     PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5769   };
5770   static const MCPhysReg VR[] = {
5771     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5772     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5773   };
5774 
5775   const unsigned NumGPRs = array_lengthof(GPR);
5776   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5777   const unsigned NumVRs  = array_lengthof(VR);
5778 
5779   // On ELFv2, we can avoid allocating the parameter area if all the arguments
5780   // can be passed to the callee in registers.
5781   // For the fast calling convention, there is another check below.
5782   // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5783   bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
5784   if (!HasParameterArea) {
5785     unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5786     unsigned AvailableFPRs = NumFPRs;
5787     unsigned AvailableVRs = NumVRs;
5788     unsigned NumBytesTmp = NumBytes;
5789     for (unsigned i = 0; i != NumOps; ++i) {
5790       if (Outs[i].Flags.isNest()) continue;
5791       if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5792                                  PtrByteSize, LinkageSize, ParamAreaSize,
5793                                  NumBytesTmp, AvailableFPRs, AvailableVRs))
5794         HasParameterArea = true;
5795     }
5796   }
5797 
5798   // When using the fast calling convention, we don't provide backing for
5799   // arguments that will be in registers.
5800   unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
5801 
5802   // Avoid allocating parameter area for fastcc functions if all the arguments
5803   // can be passed in the registers.
5804   if (IsFastCall)
5805     HasParameterArea = false;
5806 
5807   // Add up all the space actually used.
5808   for (unsigned i = 0; i != NumOps; ++i) {
5809     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5810     EVT ArgVT = Outs[i].VT;
5811     EVT OrigVT = Outs[i].ArgVT;
5812 
5813     if (Flags.isNest())
5814       continue;
5815 
5816     if (IsFastCall) {
5817       if (Flags.isByVal()) {
5818         NumGPRsUsed += (Flags.getByValSize()+7)/8;
5819         if (NumGPRsUsed > NumGPRs)
5820           HasParameterArea = true;
5821       } else {
5822         switch (ArgVT.getSimpleVT().SimpleTy) {
5823         default: llvm_unreachable("Unexpected ValueType for argument!");
5824         case MVT::i1:
5825         case MVT::i32:
5826         case MVT::i64:
5827           if (++NumGPRsUsed <= NumGPRs)
5828             continue;
5829           break;
5830         case MVT::v4i32:
5831         case MVT::v8i16:
5832         case MVT::v16i8:
5833         case MVT::v2f64:
5834         case MVT::v2i64:
5835         case MVT::v1i128:
5836         case MVT::f128:
5837           if (++NumVRsUsed <= NumVRs)
5838             continue;
5839           break;
5840         case MVT::v4f32:
5841           if (++NumVRsUsed <= NumVRs)
5842             continue;
5843           break;
5844         case MVT::f32:
5845         case MVT::f64:
5846           if (++NumFPRsUsed <= NumFPRs)
5847             continue;
5848           break;
5849         }
5850         HasParameterArea = true;
5851       }
5852     }
5853 
5854     /* Respect alignment of argument on the stack.  */
5855     auto Alignement =
5856         CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5857     NumBytes = alignTo(NumBytes, Alignement);
5858 
5859     NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
5860     if (Flags.isInConsecutiveRegsLast())
5861       NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
5862   }
5863 
5864   unsigned NumBytesActuallyUsed = NumBytes;
5865 
5866   // In the old ELFv1 ABI,
5867   // the prolog code of the callee may store up to 8 GPR argument registers to
5868   // the stack, allowing va_start to index over them in memory if its varargs.
5869   // Because we cannot tell if this is needed on the caller side, we have to
5870   // conservatively assume that it is needed.  As such, make sure we have at
5871   // least enough stack space for the caller to store the 8 GPRs.
5872   // In the ELFv2 ABI, we allocate the parameter area iff a callee
5873   // really requires memory operands, e.g. a vararg function.
5874   if (HasParameterArea)
5875     NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
5876   else
5877     NumBytes = LinkageSize;
5878 
5879   // Tail call needs the stack to be aligned.
5880   if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5881     NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
5882 
5883   int SPDiff = 0;
5884 
5885   // Calculate by how many bytes the stack has to be adjusted in case of tail
5886   // call optimization.
5887   if (!IsSibCall)
5888     SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
5889 
5890   // To protect arguments on the stack from being clobbered in a tail call,
5891   // force all the loads to happen before doing any other lowering.
5892   if (CFlags.IsTailCall)
5893     Chain = DAG.getStackArgumentTokenFactor(Chain);
5894 
5895   // Adjust the stack pointer for the new arguments...
5896   // These operations are automatically eliminated by the prolog/epilog pass
5897   if (!IsSibCall)
5898     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5899   SDValue CallSeqStart = Chain;
5900 
5901   // Load the return address and frame pointer so it can be move somewhere else
5902   // later.
5903   SDValue LROp, FPOp;
5904   Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5905 
5906   // Set up a copy of the stack pointer for use loading and storing any
5907   // arguments that may not fit in the registers available for argument
5908   // passing.
5909   SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5910 
5911   // Figure out which arguments are going to go in registers, and which in
5912   // memory.  Also, if this is a vararg function, floating point operations
5913   // must be stored to our stack, and loaded into integer regs as well, if
5914   // any integer regs are available for argument passing.
5915   unsigned ArgOffset = LinkageSize;
5916 
5917   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5918   SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5919 
5920   SmallVector<SDValue, 8> MemOpChains;
5921   for (unsigned i = 0; i != NumOps; ++i) {
5922     SDValue Arg = OutVals[i];
5923     ISD::ArgFlagsTy Flags = Outs[i].Flags;
5924     EVT ArgVT = Outs[i].VT;
5925     EVT OrigVT = Outs[i].ArgVT;
5926 
5927     // PtrOff will be used to store the current argument to the stack if a
5928     // register cannot be found for it.
5929     SDValue PtrOff;
5930 
5931     // We re-align the argument offset for each argument, except when using the
5932     // fast calling convention, when we need to make sure we do that only when
5933     // we'll actually use a stack slot.
5934     auto ComputePtrOff = [&]() {
5935       /* Respect alignment of argument on the stack.  */
5936       auto Alignment =
5937           CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
5938       ArgOffset = alignTo(ArgOffset, Alignment);
5939 
5940       PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
5941 
5942       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
5943     };
5944 
5945     if (!IsFastCall) {
5946       ComputePtrOff();
5947 
5948       /* Compute GPR index associated with argument offset.  */
5949       GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
5950       GPR_idx = std::min(GPR_idx, NumGPRs);
5951     }
5952 
5953     // Promote integers to 64-bit values.
5954     if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
5955       // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5956       unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
5957       Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
5958     }
5959 
5960     // FIXME memcpy is used way more than necessary.  Correctness first.
5961     // Note: "by value" is code for passing a structure by value, not
5962     // basic types.
5963     if (Flags.isByVal()) {
5964       // Note: Size includes alignment padding, so
5965       //   struct x { short a; char b; }
5966       // will have Size = 4.  With #pragma pack(1), it will have Size = 3.
5967       // These are the proper values we need for right-justifying the
5968       // aggregate in a parameter register.
5969       unsigned Size = Flags.getByValSize();
5970 
5971       // An empty aggregate parameter takes up no storage and no
5972       // registers.
5973       if (Size == 0)
5974         continue;
5975 
5976       if (IsFastCall)
5977         ComputePtrOff();
5978 
5979       // All aggregates smaller than 8 bytes must be passed right-justified.
5980       if (Size==1 || Size==2 || Size==4) {
5981         EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
5982         if (GPR_idx != NumGPRs) {
5983           SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
5984                                         MachinePointerInfo(), VT);
5985           MemOpChains.push_back(Load.getValue(1));
5986           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
5987 
5988           ArgOffset += PtrByteSize;
5989           continue;
5990         }
5991       }
5992 
5993       if (GPR_idx == NumGPRs && Size < 8) {
5994         SDValue AddPtr = PtrOff;
5995         if (!isLittleEndian) {
5996           SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
5997                                           PtrOff.getValueType());
5998           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
5999         }
6000         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6001                                                           CallSeqStart,
6002                                                           Flags, DAG, dl);
6003         ArgOffset += PtrByteSize;
6004         continue;
6005       }
6006       // Copy entire object into memory.  There are cases where gcc-generated
6007       // code assumes it is there, even if it could be put entirely into
6008       // registers.  (This is not what the doc says.)
6009 
6010       // FIXME: The above statement is likely due to a misunderstanding of the
6011       // documents.  All arguments must be copied into the parameter area BY
6012       // THE CALLEE in the event that the callee takes the address of any
6013       // formal argument.  That has not yet been implemented.  However, it is
6014       // reasonable to use the stack area as a staging area for the register
6015       // load.
6016 
6017       // Skip this for small aggregates, as we will use the same slot for a
6018       // right-justified copy, below.
6019       if (Size >= 8)
6020         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6021                                                           CallSeqStart,
6022                                                           Flags, DAG, dl);
6023 
6024       // When a register is available, pass a small aggregate right-justified.
6025       if (Size < 8 && GPR_idx != NumGPRs) {
6026         // The easiest way to get this right-justified in a register
6027         // is to copy the structure into the rightmost portion of a
6028         // local variable slot, then load the whole slot into the
6029         // register.
6030         // FIXME: The memcpy seems to produce pretty awful code for
6031         // small aggregates, particularly for packed ones.
6032         // FIXME: It would be preferable to use the slot in the
6033         // parameter save area instead of a new local variable.
6034         SDValue AddPtr = PtrOff;
6035         if (!isLittleEndian) {
6036           SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6037           AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6038         }
6039         Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6040                                                           CallSeqStart,
6041                                                           Flags, DAG, dl);
6042 
6043         // Load the slot into the register.
6044         SDValue Load =
6045             DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6046         MemOpChains.push_back(Load.getValue(1));
6047         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6048 
6049         // Done with this argument.
6050         ArgOffset += PtrByteSize;
6051         continue;
6052       }
6053 
6054       // For aggregates larger than PtrByteSize, copy the pieces of the
6055       // object that fit into registers from the parameter save area.
6056       for (unsigned j=0; j<Size; j+=PtrByteSize) {
6057         SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6058         SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6059         if (GPR_idx != NumGPRs) {
6060           SDValue Load =
6061               DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6062           MemOpChains.push_back(Load.getValue(1));
6063           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6064           ArgOffset += PtrByteSize;
6065         } else {
6066           ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6067           break;
6068         }
6069       }
6070       continue;
6071     }
6072 
6073     switch (Arg.getSimpleValueType().SimpleTy) {
6074     default: llvm_unreachable("Unexpected ValueType for argument!");
6075     case MVT::i1:
6076     case MVT::i32:
6077     case MVT::i64:
6078       if (Flags.isNest()) {
6079         // The 'nest' parameter, if any, is passed in R11.
6080         RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6081         break;
6082       }
6083 
6084       // These can be scalar arguments or elements of an integer array type
6085       // passed directly.  Clang may use those instead of "byval" aggregate
6086       // types to avoid forcing arguments to memory unnecessarily.
6087       if (GPR_idx != NumGPRs) {
6088         RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6089       } else {
6090         if (IsFastCall)
6091           ComputePtrOff();
6092 
6093         assert(HasParameterArea &&
6094                "Parameter area must exist to pass an argument in memory.");
6095         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6096                          true, CFlags.IsTailCall, false, MemOpChains,
6097                          TailCallArguments, dl);
6098         if (IsFastCall)
6099           ArgOffset += PtrByteSize;
6100       }
6101       if (!IsFastCall)
6102         ArgOffset += PtrByteSize;
6103       break;
6104     case MVT::f32:
6105     case MVT::f64: {
6106       // These can be scalar arguments or elements of a float array type
6107       // passed directly.  The latter are used to implement ELFv2 homogenous
6108       // float aggregates.
6109 
6110       // Named arguments go into FPRs first, and once they overflow, the
6111       // remaining arguments go into GPRs and then the parameter save area.
6112       // Unnamed arguments for vararg functions always go to GPRs and
6113       // then the parameter save area.  For now, put all arguments to vararg
6114       // routines always in both locations (FPR *and* GPR or stack slot).
6115       bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6116       bool NeededLoad = false;
6117 
6118       // First load the argument into the next available FPR.
6119       if (FPR_idx != NumFPRs)
6120         RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6121 
6122       // Next, load the argument into GPR or stack slot if needed.
6123       if (!NeedGPROrStack)
6124         ;
6125       else if (GPR_idx != NumGPRs && !IsFastCall) {
6126         // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6127         // once we support fp <-> gpr moves.
6128 
6129         // In the non-vararg case, this can only ever happen in the
6130         // presence of f32 array types, since otherwise we never run
6131         // out of FPRs before running out of GPRs.
6132         SDValue ArgVal;
6133 
6134         // Double values are always passed in a single GPR.
6135         if (Arg.getValueType() != MVT::f32) {
6136           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6137 
6138         // Non-array float values are extended and passed in a GPR.
6139         } else if (!Flags.isInConsecutiveRegs()) {
6140           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6141           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6142 
6143         // If we have an array of floats, we collect every odd element
6144         // together with its predecessor into one GPR.
6145         } else if (ArgOffset % PtrByteSize != 0) {
6146           SDValue Lo, Hi;
6147           Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6148           Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6149           if (!isLittleEndian)
6150             std::swap(Lo, Hi);
6151           ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6152 
6153         // The final element, if even, goes into the first half of a GPR.
6154         } else if (Flags.isInConsecutiveRegsLast()) {
6155           ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6156           ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6157           if (!isLittleEndian)
6158             ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6159                                  DAG.getConstant(32, dl, MVT::i32));
6160 
6161         // Non-final even elements are skipped; they will be handled
6162         // together the with subsequent argument on the next go-around.
6163         } else
6164           ArgVal = SDValue();
6165 
6166         if (ArgVal.getNode())
6167           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6168       } else {
6169         if (IsFastCall)
6170           ComputePtrOff();
6171 
6172         // Single-precision floating-point values are mapped to the
6173         // second (rightmost) word of the stack doubleword.
6174         if (Arg.getValueType() == MVT::f32 &&
6175             !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6176           SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6177           PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6178         }
6179 
6180         assert(HasParameterArea &&
6181                "Parameter area must exist to pass an argument in memory.");
6182         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6183                          true, CFlags.IsTailCall, false, MemOpChains,
6184                          TailCallArguments, dl);
6185 
6186         NeededLoad = true;
6187       }
6188       // When passing an array of floats, the array occupies consecutive
6189       // space in the argument area; only round up to the next doubleword
6190       // at the end of the array.  Otherwise, each float takes 8 bytes.
6191       if (!IsFastCall || NeededLoad) {
6192         ArgOffset += (Arg.getValueType() == MVT::f32 &&
6193                       Flags.isInConsecutiveRegs()) ? 4 : 8;
6194         if (Flags.isInConsecutiveRegsLast())
6195           ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6196       }
6197       break;
6198     }
6199     case MVT::v4f32:
6200     case MVT::v4i32:
6201     case MVT::v8i16:
6202     case MVT::v16i8:
6203     case MVT::v2f64:
6204     case MVT::v2i64:
6205     case MVT::v1i128:
6206     case MVT::f128:
6207       // These can be scalar arguments or elements of a vector array type
6208       // passed directly.  The latter are used to implement ELFv2 homogenous
6209       // vector aggregates.
6210 
6211       // For a varargs call, named arguments go into VRs or on the stack as
6212       // usual; unnamed arguments always go to the stack or the corresponding
6213       // GPRs when within range.  For now, we always put the value in both
6214       // locations (or even all three).
6215       if (CFlags.IsVarArg) {
6216         assert(HasParameterArea &&
6217                "Parameter area must exist if we have a varargs call.");
6218         // We could elide this store in the case where the object fits
6219         // entirely in R registers.  Maybe later.
6220         SDValue Store =
6221             DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6222         MemOpChains.push_back(Store);
6223         if (VR_idx != NumVRs) {
6224           SDValue Load =
6225               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6226           MemOpChains.push_back(Load.getValue(1));
6227           RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6228         }
6229         ArgOffset += 16;
6230         for (unsigned i=0; i<16; i+=PtrByteSize) {
6231           if (GPR_idx == NumGPRs)
6232             break;
6233           SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6234                                    DAG.getConstant(i, dl, PtrVT));
6235           SDValue Load =
6236               DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6237           MemOpChains.push_back(Load.getValue(1));
6238           RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6239         }
6240         break;
6241       }
6242 
6243       // Non-varargs Altivec params go into VRs or on the stack.
6244       if (VR_idx != NumVRs) {
6245         RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6246       } else {
6247         if (IsFastCall)
6248           ComputePtrOff();
6249 
6250         assert(HasParameterArea &&
6251                "Parameter area must exist to pass an argument in memory.");
6252         LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6253                          true, CFlags.IsTailCall, true, MemOpChains,
6254                          TailCallArguments, dl);
6255         if (IsFastCall)
6256           ArgOffset += 16;
6257       }
6258 
6259       if (!IsFastCall)
6260         ArgOffset += 16;
6261       break;
6262     }
6263   }
6264 
6265   assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6266          "mismatch in size of parameter area");
6267   (void)NumBytesActuallyUsed;
6268 
6269   if (!MemOpChains.empty())
6270     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6271 
6272   // Check if this is an indirect call (MTCTR/BCTRL).
6273   // See prepareDescriptorIndirectCall and buildCallOperands for more
6274   // information about calls through function pointers in the 64-bit SVR4 ABI.
6275   if (CFlags.IsIndirect) {
6276     // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6277     // caller in the TOC save area.
6278     if (isTOCSaveRestoreRequired(Subtarget)) {
6279       assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6280       // Load r2 into a virtual register and store it to the TOC save area.
6281       setUsesTOCBasePtr(DAG);
6282       SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6283       // TOC save area offset.
6284       unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6285       SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6286       SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6287       Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6288                            MachinePointerInfo::getStack(
6289                                DAG.getMachineFunction(), TOCSaveOffset));
6290     }
6291     // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6292     // This does not mean the MTCTR instruction must use R12; it's easier
6293     // to model this as an extra parameter, so do that.
6294     if (isELFv2ABI && !CFlags.IsPatchPoint)
6295       RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6296   }
6297 
6298   // Build a sequence of copy-to-reg nodes chained together with token chain
6299   // and flag operands which copy the outgoing args into the appropriate regs.
6300   SDValue InFlag;
6301   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6302     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6303                              RegsToPass[i].second, InFlag);
6304     InFlag = Chain.getValue(1);
6305   }
6306 
6307   if (CFlags.IsTailCall && !IsSibCall)
6308     PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6309                     TailCallArguments);
6310 
6311   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6312                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
6313 }
6314 
6315 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6316                    CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6317                    CCState &State) {
6318 
6319   const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6320       State.getMachineFunction().getSubtarget());
6321   const bool IsPPC64 = Subtarget.isPPC64();
6322   const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6323   const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6324 
6325   if (ValVT.isVector() && !State.getMachineFunction()
6326                                .getTarget()
6327                                .Options.EnableAIXExtendedAltivecABI)
6328     report_fatal_error("the default Altivec AIX ABI is not yet supported");
6329 
6330   if (ValVT == MVT::f128)
6331     report_fatal_error("f128 is unimplemented on AIX.");
6332 
6333   if (ArgFlags.isNest())
6334     report_fatal_error("Nest arguments are unimplemented.");
6335 
6336   static const MCPhysReg GPR_32[] = {// 32-bit registers.
6337                                      PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6338                                      PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6339   static const MCPhysReg GPR_64[] = {// 64-bit registers.
6340                                      PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6341                                      PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6342 
6343   static const MCPhysReg VR[] = {// Vector registers.
6344                                  PPC::V2,  PPC::V3,  PPC::V4,  PPC::V5,
6345                                  PPC::V6,  PPC::V7,  PPC::V8,  PPC::V9,
6346                                  PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6347 
6348   if (ArgFlags.isByVal()) {
6349     if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6350       report_fatal_error("Pass-by-value arguments with alignment greater than "
6351                          "register width are not supported.");
6352 
6353     const unsigned ByValSize = ArgFlags.getByValSize();
6354 
6355     // An empty aggregate parameter takes up no storage and no registers,
6356     // but needs a MemLoc for a stack slot for the formal arguments side.
6357     if (ByValSize == 0) {
6358       State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6359                                        State.getNextStackOffset(), RegVT,
6360                                        LocInfo));
6361       return false;
6362     }
6363 
6364     const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6365     unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6366     for (const unsigned E = Offset + StackSize; Offset < E;
6367          Offset += PtrAlign.value()) {
6368       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6369         State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6370       else {
6371         State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
6372                                          Offset, MVT::INVALID_SIMPLE_VALUE_TYPE,
6373                                          LocInfo));
6374         break;
6375       }
6376     }
6377     return false;
6378   }
6379 
6380   // Arguments always reserve parameter save area.
6381   switch (ValVT.SimpleTy) {
6382   default:
6383     report_fatal_error("Unhandled value type for argument.");
6384   case MVT::i64:
6385     // i64 arguments should have been split to i32 for PPC32.
6386     assert(IsPPC64 && "PPC32 should have split i64 values.");
6387     LLVM_FALLTHROUGH;
6388   case MVT::i1:
6389   case MVT::i32: {
6390     const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6391     // AIX integer arguments are always passed in register width.
6392     if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6393       LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6394                                   : CCValAssign::LocInfo::ZExt;
6395     if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6396       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6397     else
6398       State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6399 
6400     return false;
6401   }
6402   case MVT::f32:
6403   case MVT::f64: {
6404     // Parameter save area (PSA) is reserved even if the float passes in fpr.
6405     const unsigned StoreSize = LocVT.getStoreSize();
6406     // Floats are always 4-byte aligned in the PSA on AIX.
6407     // This includes f64 in 64-bit mode for ABI compatibility.
6408     const unsigned Offset =
6409         State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6410     unsigned FReg = State.AllocateReg(FPR);
6411     if (FReg)
6412       State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6413 
6414     // Reserve and initialize GPRs or initialize the PSA as required.
6415     for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6416       if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6417         assert(FReg && "An FPR should be available when a GPR is reserved.");
6418         if (State.isVarArg()) {
6419           // Successfully reserved GPRs are only initialized for vararg calls.
6420           // Custom handling is required for:
6421           //   f64 in PPC32 needs to be split into 2 GPRs.
6422           //   f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6423           State.addLoc(
6424               CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6425         }
6426       } else {
6427         // If there are insufficient GPRs, the PSA needs to be initialized.
6428         // Initialization occurs even if an FPR was initialized for
6429         // compatibility with the AIX XL compiler. The full memory for the
6430         // argument will be initialized even if a prior word is saved in GPR.
6431         // A custom memLoc is used when the argument also passes in FPR so
6432         // that the callee handling can skip over it easily.
6433         State.addLoc(
6434             FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6435                                              LocInfo)
6436                  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6437         break;
6438       }
6439     }
6440 
6441     return false;
6442   }
6443   case MVT::v4f32:
6444   case MVT::v4i32:
6445   case MVT::v8i16:
6446   case MVT::v16i8:
6447   case MVT::v2i64:
6448   case MVT::v2f64:
6449   case MVT::v1i128: {
6450     if (State.isVarArg())
6451       report_fatal_error(
6452           "variadic arguments for vector types are unimplemented for AIX");
6453 
6454     if (unsigned VReg = State.AllocateReg(VR))
6455       State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6456     else {
6457       report_fatal_error(
6458           "passing vector parameters to the stack is unimplemented for AIX");
6459     }
6460     return false;
6461   }
6462   }
6463   return true;
6464 }
6465 
6466 static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT,
6467                                                     bool IsPPC64) {
6468   assert((IsPPC64 || SVT != MVT::i64) &&
6469          "i64 should have been split for 32-bit codegen.");
6470 
6471   switch (SVT) {
6472   default:
6473     report_fatal_error("Unexpected value type for formal argument");
6474   case MVT::i1:
6475   case MVT::i32:
6476   case MVT::i64:
6477     return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6478   case MVT::f32:
6479     return &PPC::F4RCRegClass;
6480   case MVT::f64:
6481     return &PPC::F8RCRegClass;
6482   case MVT::v4f32:
6483   case MVT::v4i32:
6484   case MVT::v8i16:
6485   case MVT::v16i8:
6486   case MVT::v2i64:
6487   case MVT::v2f64:
6488   case MVT::v1i128:
6489     return &PPC::VRRCRegClass;
6490   }
6491 }
6492 
6493 static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT,
6494                                         SelectionDAG &DAG, SDValue ArgValue,
6495                                         MVT LocVT, const SDLoc &dl) {
6496   assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
6497   assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
6498 
6499   if (Flags.isSExt())
6500     ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
6501                            DAG.getValueType(ValVT));
6502   else if (Flags.isZExt())
6503     ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
6504                            DAG.getValueType(ValVT));
6505 
6506   return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
6507 }
6508 
6509 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
6510   const unsigned LASize = FL->getLinkageSize();
6511 
6512   if (PPC::GPRCRegClass.contains(Reg)) {
6513     assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
6514            "Reg must be a valid argument register!");
6515     return LASize + 4 * (Reg - PPC::R3);
6516   }
6517 
6518   if (PPC::G8RCRegClass.contains(Reg)) {
6519     assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
6520            "Reg must be a valid argument register!");
6521     return LASize + 8 * (Reg - PPC::X3);
6522   }
6523 
6524   llvm_unreachable("Only general purpose registers expected.");
6525 }
6526 
6527 //   AIX ABI Stack Frame Layout:
6528 //
6529 //   Low Memory +--------------------------------------------+
6530 //   SP   +---> | Back chain                                 | ---+
6531 //        |     +--------------------------------------------+    |
6532 //        |     | Saved Condition Register                   |    |
6533 //        |     +--------------------------------------------+    |
6534 //        |     | Saved Linkage Register                     |    |
6535 //        |     +--------------------------------------------+    | Linkage Area
6536 //        |     | Reserved for compilers                     |    |
6537 //        |     +--------------------------------------------+    |
6538 //        |     | Reserved for binders                       |    |
6539 //        |     +--------------------------------------------+    |
6540 //        |     | Saved TOC pointer                          | ---+
6541 //        |     +--------------------------------------------+
6542 //        |     | Parameter save area                        |
6543 //        |     +--------------------------------------------+
6544 //        |     | Alloca space                               |
6545 //        |     +--------------------------------------------+
6546 //        |     | Local variable space                       |
6547 //        |     +--------------------------------------------+
6548 //        |     | Float/int conversion temporary             |
6549 //        |     +--------------------------------------------+
6550 //        |     | Save area for AltiVec registers            |
6551 //        |     +--------------------------------------------+
6552 //        |     | AltiVec alignment padding                  |
6553 //        |     +--------------------------------------------+
6554 //        |     | Save area for VRSAVE register              |
6555 //        |     +--------------------------------------------+
6556 //        |     | Save area for General Purpose registers    |
6557 //        |     +--------------------------------------------+
6558 //        |     | Save area for Floating Point registers     |
6559 //        |     +--------------------------------------------+
6560 //        +---- | Back chain                                 |
6561 // High Memory  +--------------------------------------------+
6562 //
6563 //  Specifications:
6564 //  AIX 7.2 Assembler Language Reference
6565 //  Subroutine linkage convention
6566 
6567 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6568     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
6569     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6570     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
6571 
6572   assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
6573           CallConv == CallingConv::Fast) &&
6574          "Unexpected calling convention!");
6575 
6576   if (getTargetMachine().Options.GuaranteedTailCallOpt)
6577     report_fatal_error("Tail call support is unimplemented on AIX.");
6578 
6579   if (useSoftFloat())
6580     report_fatal_error("Soft float support is unimplemented on AIX.");
6581 
6582   const PPCSubtarget &Subtarget =
6583       static_cast<const PPCSubtarget &>(DAG.getSubtarget());
6584 
6585   const bool IsPPC64 = Subtarget.isPPC64();
6586   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6587 
6588   // Assign locations to all of the incoming arguments.
6589   SmallVector<CCValAssign, 16> ArgLocs;
6590   MachineFunction &MF = DAG.getMachineFunction();
6591   MachineFrameInfo &MFI = MF.getFrameInfo();
6592   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
6593   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
6594 
6595   const EVT PtrVT = getPointerTy(MF.getDataLayout());
6596   // Reserve space for the linkage area on the stack.
6597   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6598   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6599   CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
6600 
6601   SmallVector<SDValue, 8> MemOps;
6602 
6603   for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
6604     CCValAssign &VA = ArgLocs[I++];
6605     MVT LocVT = VA.getLocVT();
6606     ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
6607     if (VA.isMemLoc() && VA.getValVT().isVector())
6608       report_fatal_error(
6609           "passing vector parameters to the stack is unimplemented for AIX");
6610 
6611     // For compatibility with the AIX XL compiler, the float args in the
6612     // parameter save area are initialized even if the argument is available
6613     // in register.  The caller is required to initialize both the register
6614     // and memory, however, the callee can choose to expect it in either.
6615     // The memloc is dismissed here because the argument is retrieved from
6616     // the register.
6617     if (VA.isMemLoc() && VA.needsCustom())
6618       continue;
6619 
6620     if (VA.isRegLoc()) {
6621       if (VA.getValVT().isScalarInteger())
6622         FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
6623       else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector())
6624         FuncInfo->appendParameterType(VA.getValVT().SimpleTy == MVT::f32
6625                                           ? PPCFunctionInfo::ShortFloatPoint
6626                                           : PPCFunctionInfo::LongFloatPoint);
6627     }
6628 
6629     if (Flags.isByVal() && VA.isMemLoc()) {
6630       const unsigned Size =
6631           alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
6632                   PtrByteSize);
6633       const int FI = MF.getFrameInfo().CreateFixedObject(
6634           Size, VA.getLocMemOffset(), /* IsImmutable */ false,
6635           /* IsAliased */ true);
6636       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6637       InVals.push_back(FIN);
6638 
6639       continue;
6640     }
6641 
6642     if (Flags.isByVal()) {
6643       assert(VA.isRegLoc() && "MemLocs should already be handled.");
6644 
6645       const MCPhysReg ArgReg = VA.getLocReg();
6646       const PPCFrameLowering *FL = Subtarget.getFrameLowering();
6647 
6648       if (Flags.getNonZeroByValAlign() > PtrByteSize)
6649         report_fatal_error("Over aligned byvals not supported yet.");
6650 
6651       const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
6652       const int FI = MF.getFrameInfo().CreateFixedObject(
6653           StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
6654           /* IsAliased */ true);
6655       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6656       InVals.push_back(FIN);
6657 
6658       // Add live ins for all the RegLocs for the same ByVal.
6659       const TargetRegisterClass *RegClass =
6660           IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6661 
6662       auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
6663                                                unsigned Offset) {
6664         const unsigned VReg = MF.addLiveIn(PhysReg, RegClass);
6665         // Since the callers side has left justified the aggregate in the
6666         // register, we can simply store the entire register into the stack
6667         // slot.
6668         SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
6669         // The store to the fixedstack object is needed becuase accessing a
6670         // field of the ByVal will use a gep and load. Ideally we will optimize
6671         // to extracting the value from the register directly, and elide the
6672         // stores when the arguments address is not taken, but that will need to
6673         // be future work.
6674         SDValue Store = DAG.getStore(
6675             CopyFrom.getValue(1), dl, CopyFrom,
6676             DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
6677             MachinePointerInfo::getFixedStack(MF, FI, Offset));
6678 
6679         MemOps.push_back(Store);
6680       };
6681 
6682       unsigned Offset = 0;
6683       HandleRegLoc(VA.getLocReg(), Offset);
6684       Offset += PtrByteSize;
6685       for (; Offset != StackSize && ArgLocs[I].isRegLoc();
6686            Offset += PtrByteSize) {
6687         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
6688                "RegLocs should be for ByVal argument.");
6689 
6690         const CCValAssign RL = ArgLocs[I++];
6691         HandleRegLoc(RL.getLocReg(), Offset);
6692         FuncInfo->appendParameterType(PPCFunctionInfo::FixedType);
6693       }
6694 
6695       if (Offset != StackSize) {
6696         assert(ArgLocs[I].getValNo() == VA.getValNo() &&
6697                "Expected MemLoc for remaining bytes.");
6698         assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
6699         // Consume the MemLoc.The InVal has already been emitted, so nothing
6700         // more needs to be done.
6701         ++I;
6702       }
6703 
6704       continue;
6705     }
6706 
6707     EVT ValVT = VA.getValVT();
6708     if (VA.isRegLoc() && !VA.needsCustom()) {
6709       MVT::SimpleValueType SVT = ValVT.getSimpleVT().SimpleTy;
6710       unsigned VReg =
6711           MF.addLiveIn(VA.getLocReg(), getRegClassForSVT(SVT, IsPPC64));
6712       SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
6713       if (ValVT.isScalarInteger() &&
6714           (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
6715         ArgValue =
6716             truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
6717       }
6718       InVals.push_back(ArgValue);
6719       continue;
6720     }
6721     if (VA.isMemLoc()) {
6722       const unsigned LocSize = LocVT.getStoreSize();
6723       const unsigned ValSize = ValVT.getStoreSize();
6724       assert((ValSize <= LocSize) &&
6725              "Object size is larger than size of MemLoc");
6726       int CurArgOffset = VA.getLocMemOffset();
6727       // Objects are right-justified because AIX is big-endian.
6728       if (LocSize > ValSize)
6729         CurArgOffset += LocSize - ValSize;
6730       // Potential tail calls could cause overwriting of argument stack slots.
6731       const bool IsImmutable =
6732           !(getTargetMachine().Options.GuaranteedTailCallOpt &&
6733             (CallConv == CallingConv::Fast));
6734       int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
6735       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6736       SDValue ArgValue =
6737           DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
6738       InVals.push_back(ArgValue);
6739       continue;
6740     }
6741   }
6742 
6743   // On AIX a minimum of 8 words is saved to the parameter save area.
6744   const unsigned MinParameterSaveArea = 8 * PtrByteSize;
6745   // Area that is at least reserved in the caller of this function.
6746   unsigned CallerReservedArea =
6747       std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
6748 
6749   // Set the size that is at least reserved in caller of this function. Tail
6750   // call optimized function's reserved stack space needs to be aligned so
6751   // that taking the difference between two stack areas will result in an
6752   // aligned stack.
6753   CallerReservedArea =
6754       EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
6755   FuncInfo->setMinReservedArea(CallerReservedArea);
6756 
6757   if (isVarArg) {
6758     FuncInfo->setVarArgsFrameIndex(
6759         MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
6760     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
6761 
6762     static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6763                                        PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6764 
6765     static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6766                                        PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6767     const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
6768 
6769     // The fixed integer arguments of a variadic function are stored to the
6770     // VarArgsFrameIndex on the stack so that they may be loaded by
6771     // dereferencing the result of va_next.
6772     for (unsigned GPRIndex =
6773              (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
6774          GPRIndex < NumGPArgRegs; ++GPRIndex) {
6775 
6776       const unsigned VReg =
6777           IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
6778                   : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
6779 
6780       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
6781       SDValue Store =
6782           DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
6783       MemOps.push_back(Store);
6784       // Increment the address for the next argument to store.
6785       SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
6786       FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
6787     }
6788   }
6789 
6790   if (!MemOps.empty())
6791     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
6792 
6793   return Chain;
6794 }
6795 
6796 SDValue PPCTargetLowering::LowerCall_AIX(
6797     SDValue Chain, SDValue Callee, CallFlags CFlags,
6798     const SmallVectorImpl<ISD::OutputArg> &Outs,
6799     const SmallVectorImpl<SDValue> &OutVals,
6800     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6801     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
6802     const CallBase *CB) const {
6803   // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
6804   // AIX ABI stack frame layout.
6805 
6806   assert((CFlags.CallConv == CallingConv::C ||
6807           CFlags.CallConv == CallingConv::Cold ||
6808           CFlags.CallConv == CallingConv::Fast) &&
6809          "Unexpected calling convention!");
6810 
6811   if (CFlags.IsPatchPoint)
6812     report_fatal_error("This call type is unimplemented on AIX.");
6813 
6814   const PPCSubtarget& Subtarget =
6815       static_cast<const PPCSubtarget&>(DAG.getSubtarget());
6816 
6817   MachineFunction &MF = DAG.getMachineFunction();
6818   SmallVector<CCValAssign, 16> ArgLocs;
6819   CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
6820                  *DAG.getContext());
6821 
6822   // Reserve space for the linkage save area (LSA) on the stack.
6823   // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
6824   //   [SP][CR][LR][2 x reserved][TOC].
6825   // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
6826   const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6827   const bool IsPPC64 = Subtarget.isPPC64();
6828   const EVT PtrVT = getPointerTy(DAG.getDataLayout());
6829   const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6830   CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6831   CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
6832 
6833   // The prolog code of the callee may store up to 8 GPR argument registers to
6834   // the stack, allowing va_start to index over them in memory if the callee
6835   // is variadic.
6836   // Because we cannot tell if this is needed on the caller side, we have to
6837   // conservatively assume that it is needed.  As such, make sure we have at
6838   // least enough stack space for the caller to store the 8 GPRs.
6839   const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
6840   const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
6841                                      CCInfo.getNextStackOffset());
6842 
6843   // Adjust the stack pointer for the new arguments...
6844   // These operations are automatically eliminated by the prolog/epilog pass.
6845   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6846   SDValue CallSeqStart = Chain;
6847 
6848   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
6849   SmallVector<SDValue, 8> MemOpChains;
6850 
6851   // Set up a copy of the stack pointer for loading and storing any
6852   // arguments that may not fit in the registers available for argument
6853   // passing.
6854   const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
6855                                    : DAG.getRegister(PPC::R1, MVT::i32);
6856 
6857   for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
6858     const unsigned ValNo = ArgLocs[I].getValNo();
6859     SDValue Arg = OutVals[ValNo];
6860     ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
6861 
6862     if (Flags.isByVal()) {
6863       const unsigned ByValSize = Flags.getByValSize();
6864 
6865       // Nothing to do for zero-sized ByVals on the caller side.
6866       if (!ByValSize) {
6867         ++I;
6868         continue;
6869       }
6870 
6871       auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
6872         return DAG.getExtLoad(
6873             ISD::ZEXTLOAD, dl, PtrVT, Chain,
6874             (LoadOffset != 0)
6875                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
6876                 : Arg,
6877             MachinePointerInfo(), VT);
6878       };
6879 
6880       unsigned LoadOffset = 0;
6881 
6882       // Initialize registers, which are fully occupied by the by-val argument.
6883       while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
6884         SDValue Load = GetLoad(PtrVT, LoadOffset);
6885         MemOpChains.push_back(Load.getValue(1));
6886         LoadOffset += PtrByteSize;
6887         const CCValAssign &ByValVA = ArgLocs[I++];
6888         assert(ByValVA.getValNo() == ValNo &&
6889                "Unexpected location for pass-by-value argument.");
6890         RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
6891       }
6892 
6893       if (LoadOffset == ByValSize)
6894         continue;
6895 
6896       // There must be one more loc to handle the remainder.
6897       assert(ArgLocs[I].getValNo() == ValNo &&
6898              "Expected additional location for by-value argument.");
6899 
6900       if (ArgLocs[I].isMemLoc()) {
6901         assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
6902         const CCValAssign &ByValVA = ArgLocs[I++];
6903         ISD::ArgFlagsTy MemcpyFlags = Flags;
6904         // Only memcpy the bytes that don't pass in register.
6905         MemcpyFlags.setByValSize(ByValSize - LoadOffset);
6906         Chain = CallSeqStart = createMemcpyOutsideCallSeq(
6907             (LoadOffset != 0)
6908                 ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
6909                 : Arg,
6910             DAG.getObjectPtrOffset(dl, StackPtr,
6911                                    TypeSize::Fixed(ByValVA.getLocMemOffset())),
6912             CallSeqStart, MemcpyFlags, DAG, dl);
6913         continue;
6914       }
6915 
6916       // Initialize the final register residue.
6917       // Any residue that occupies the final by-val arg register must be
6918       // left-justified on AIX. Loads must be a power-of-2 size and cannot be
6919       // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
6920       // 2 and 1 byte loads.
6921       const unsigned ResidueBytes = ByValSize % PtrByteSize;
6922       assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
6923              "Unexpected register residue for by-value argument.");
6924       SDValue ResidueVal;
6925       for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
6926         const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
6927         const MVT VT =
6928             N == 1 ? MVT::i8
6929                    : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
6930         SDValue Load = GetLoad(VT, LoadOffset);
6931         MemOpChains.push_back(Load.getValue(1));
6932         LoadOffset += N;
6933         Bytes += N;
6934 
6935         // By-val arguments are passed left-justfied in register.
6936         // Every load here needs to be shifted, otherwise a full register load
6937         // should have been used.
6938         assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
6939                "Unexpected load emitted during handling of pass-by-value "
6940                "argument.");
6941         unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
6942         EVT ShiftAmountTy =
6943             getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
6944         SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
6945         SDValue ShiftedLoad =
6946             DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
6947         ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
6948                                               ShiftedLoad)
6949                                 : ShiftedLoad;
6950       }
6951 
6952       const CCValAssign &ByValVA = ArgLocs[I++];
6953       RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
6954       continue;
6955     }
6956 
6957     CCValAssign &VA = ArgLocs[I++];
6958     const MVT LocVT = VA.getLocVT();
6959     const MVT ValVT = VA.getValVT();
6960 
6961     if (VA.isMemLoc() && VA.getValVT().isVector())
6962       report_fatal_error(
6963           "passing vector parameters to the stack is unimplemented for AIX");
6964 
6965     switch (VA.getLocInfo()) {
6966     default:
6967       report_fatal_error("Unexpected argument extension type.");
6968     case CCValAssign::Full:
6969       break;
6970     case CCValAssign::ZExt:
6971       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
6972       break;
6973     case CCValAssign::SExt:
6974       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
6975       break;
6976     }
6977 
6978     if (VA.isRegLoc() && !VA.needsCustom()) {
6979       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6980       continue;
6981     }
6982 
6983     if (VA.isMemLoc()) {
6984       SDValue PtrOff =
6985           DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
6986       PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6987       MemOpChains.push_back(
6988           DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6989 
6990       continue;
6991     }
6992 
6993     // Custom handling is used for GPR initializations for vararg float
6994     // arguments.
6995     assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
6996            ValVT.isFloatingPoint() && LocVT.isInteger() &&
6997            "Unexpected register handling for calling convention.");
6998 
6999     SDValue ArgAsInt =
7000         DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7001 
7002     if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7003       // f32 in 32-bit GPR
7004       // f64 in 64-bit GPR
7005       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7006     else if (Arg.getValueType().getFixedSizeInBits() <
7007              LocVT.getFixedSizeInBits())
7008       // f32 in 64-bit GPR.
7009       RegsToPass.push_back(std::make_pair(
7010           VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7011     else {
7012       // f64 in two 32-bit GPRs
7013       // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7014       assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7015              "Unexpected custom register for argument!");
7016       CCValAssign &GPR1 = VA;
7017       SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7018                                      DAG.getConstant(32, dl, MVT::i8));
7019       RegsToPass.push_back(std::make_pair(
7020           GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7021 
7022       if (I != E) {
7023         // If only 1 GPR was available, there will only be one custom GPR and
7024         // the argument will also pass in memory.
7025         CCValAssign &PeekArg = ArgLocs[I];
7026         if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7027           assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7028           CCValAssign &GPR2 = ArgLocs[I++];
7029           RegsToPass.push_back(std::make_pair(
7030               GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7031         }
7032       }
7033     }
7034   }
7035 
7036   if (!MemOpChains.empty())
7037     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7038 
7039   // For indirect calls, we need to save the TOC base to the stack for
7040   // restoration after the call.
7041   if (CFlags.IsIndirect) {
7042     assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7043     const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7044     const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7045     const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7046     const unsigned TOCSaveOffset =
7047         Subtarget.getFrameLowering()->getTOCSaveOffset();
7048 
7049     setUsesTOCBasePtr(DAG);
7050     SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7051     SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7052     SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7053     SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7054     Chain = DAG.getStore(
7055         Val.getValue(1), dl, Val, AddPtr,
7056         MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7057   }
7058 
7059   // Build a sequence of copy-to-reg nodes chained together with token chain
7060   // and flag operands which copy the outgoing args into the appropriate regs.
7061   SDValue InFlag;
7062   for (auto Reg : RegsToPass) {
7063     Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7064     InFlag = Chain.getValue(1);
7065   }
7066 
7067   const int SPDiff = 0;
7068   return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7069                     Callee, SPDiff, NumBytes, Ins, InVals, CB);
7070 }
7071 
7072 bool
7073 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7074                                   MachineFunction &MF, bool isVarArg,
7075                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
7076                                   LLVMContext &Context) const {
7077   SmallVector<CCValAssign, 16> RVLocs;
7078   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7079   return CCInfo.CheckReturn(
7080       Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7081                 ? RetCC_PPC_Cold
7082                 : RetCC_PPC);
7083 }
7084 
7085 SDValue
7086 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7087                                bool isVarArg,
7088                                const SmallVectorImpl<ISD::OutputArg> &Outs,
7089                                const SmallVectorImpl<SDValue> &OutVals,
7090                                const SDLoc &dl, SelectionDAG &DAG) const {
7091   SmallVector<CCValAssign, 16> RVLocs;
7092   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7093                  *DAG.getContext());
7094   CCInfo.AnalyzeReturn(Outs,
7095                        (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7096                            ? RetCC_PPC_Cold
7097                            : RetCC_PPC);
7098 
7099   SDValue Flag;
7100   SmallVector<SDValue, 4> RetOps(1, Chain);
7101 
7102   // Copy the result values into the output registers.
7103   for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7104     CCValAssign &VA = RVLocs[i];
7105     assert(VA.isRegLoc() && "Can only return in registers!");
7106 
7107     SDValue Arg = OutVals[RealResIdx];
7108 
7109     switch (VA.getLocInfo()) {
7110     default: llvm_unreachable("Unknown loc info!");
7111     case CCValAssign::Full: break;
7112     case CCValAssign::AExt:
7113       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7114       break;
7115     case CCValAssign::ZExt:
7116       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7117       break;
7118     case CCValAssign::SExt:
7119       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7120       break;
7121     }
7122     if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7123       bool isLittleEndian = Subtarget.isLittleEndian();
7124       // Legalize ret f64 -> ret 2 x i32.
7125       SDValue SVal =
7126           DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7127                       DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7128       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7129       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7130       SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7131                          DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7132       Flag = Chain.getValue(1);
7133       VA = RVLocs[++i]; // skip ahead to next loc
7134       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7135     } else
7136       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7137     Flag = Chain.getValue(1);
7138     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7139   }
7140 
7141   RetOps[0] = Chain;  // Update chain.
7142 
7143   // Add the flag if we have it.
7144   if (Flag.getNode())
7145     RetOps.push_back(Flag);
7146 
7147   return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7148 }
7149 
7150 SDValue
7151 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7152                                                 SelectionDAG &DAG) const {
7153   SDLoc dl(Op);
7154 
7155   // Get the correct type for integers.
7156   EVT IntVT = Op.getValueType();
7157 
7158   // Get the inputs.
7159   SDValue Chain = Op.getOperand(0);
7160   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7161   // Build a DYNAREAOFFSET node.
7162   SDValue Ops[2] = {Chain, FPSIdx};
7163   SDVTList VTs = DAG.getVTList(IntVT);
7164   return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7165 }
7166 
7167 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7168                                              SelectionDAG &DAG) const {
7169   // When we pop the dynamic allocation we need to restore the SP link.
7170   SDLoc dl(Op);
7171 
7172   // Get the correct type for pointers.
7173   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7174 
7175   // Construct the stack pointer operand.
7176   bool isPPC64 = Subtarget.isPPC64();
7177   unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7178   SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7179 
7180   // Get the operands for the STACKRESTORE.
7181   SDValue Chain = Op.getOperand(0);
7182   SDValue SaveSP = Op.getOperand(1);
7183 
7184   // Load the old link SP.
7185   SDValue LoadLinkSP =
7186       DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7187 
7188   // Restore the stack pointer.
7189   Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7190 
7191   // Store the old link SP.
7192   return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7193 }
7194 
7195 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7196   MachineFunction &MF = DAG.getMachineFunction();
7197   bool isPPC64 = Subtarget.isPPC64();
7198   EVT PtrVT = getPointerTy(MF.getDataLayout());
7199 
7200   // Get current frame pointer save index.  The users of this index will be
7201   // primarily DYNALLOC instructions.
7202   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7203   int RASI = FI->getReturnAddrSaveIndex();
7204 
7205   // If the frame pointer save index hasn't been defined yet.
7206   if (!RASI) {
7207     // Find out what the fix offset of the frame pointer save area.
7208     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7209     // Allocate the frame index for frame pointer save area.
7210     RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7211     // Save the result.
7212     FI->setReturnAddrSaveIndex(RASI);
7213   }
7214   return DAG.getFrameIndex(RASI, PtrVT);
7215 }
7216 
7217 SDValue
7218 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7219   MachineFunction &MF = DAG.getMachineFunction();
7220   bool isPPC64 = Subtarget.isPPC64();
7221   EVT PtrVT = getPointerTy(MF.getDataLayout());
7222 
7223   // Get current frame pointer save index.  The users of this index will be
7224   // primarily DYNALLOC instructions.
7225   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7226   int FPSI = FI->getFramePointerSaveIndex();
7227 
7228   // If the frame pointer save index hasn't been defined yet.
7229   if (!FPSI) {
7230     // Find out what the fix offset of the frame pointer save area.
7231     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7232     // Allocate the frame index for frame pointer save area.
7233     FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7234     // Save the result.
7235     FI->setFramePointerSaveIndex(FPSI);
7236   }
7237   return DAG.getFrameIndex(FPSI, PtrVT);
7238 }
7239 
7240 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7241                                                    SelectionDAG &DAG) const {
7242   MachineFunction &MF = DAG.getMachineFunction();
7243   // Get the inputs.
7244   SDValue Chain = Op.getOperand(0);
7245   SDValue Size  = Op.getOperand(1);
7246   SDLoc dl(Op);
7247 
7248   // Get the correct type for pointers.
7249   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7250   // Negate the size.
7251   SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7252                                 DAG.getConstant(0, dl, PtrVT), Size);
7253   // Construct a node for the frame pointer save index.
7254   SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7255   SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7256   SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7257   if (hasInlineStackProbe(MF))
7258     return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7259   return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7260 }
7261 
7262 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7263                                                      SelectionDAG &DAG) const {
7264   MachineFunction &MF = DAG.getMachineFunction();
7265 
7266   bool isPPC64 = Subtarget.isPPC64();
7267   EVT PtrVT = getPointerTy(DAG.getDataLayout());
7268 
7269   int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7270   return DAG.getFrameIndex(FI, PtrVT);
7271 }
7272 
7273 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7274                                                SelectionDAG &DAG) const {
7275   SDLoc DL(Op);
7276   return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7277                      DAG.getVTList(MVT::i32, MVT::Other),
7278                      Op.getOperand(0), Op.getOperand(1));
7279 }
7280 
7281 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7282                                                 SelectionDAG &DAG) const {
7283   SDLoc DL(Op);
7284   return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7285                      Op.getOperand(0), Op.getOperand(1));
7286 }
7287 
7288 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7289   if (Op.getValueType().isVector())
7290     return LowerVectorLoad(Op, DAG);
7291 
7292   assert(Op.getValueType() == MVT::i1 &&
7293          "Custom lowering only for i1 loads");
7294 
7295   // First, load 8 bits into 32 bits, then truncate to 1 bit.
7296 
7297   SDLoc dl(Op);
7298   LoadSDNode *LD = cast<LoadSDNode>(Op);
7299 
7300   SDValue Chain = LD->getChain();
7301   SDValue BasePtr = LD->getBasePtr();
7302   MachineMemOperand *MMO = LD->getMemOperand();
7303 
7304   SDValue NewLD =
7305       DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7306                      BasePtr, MVT::i8, MMO);
7307   SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7308 
7309   SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7310   return DAG.getMergeValues(Ops, dl);
7311 }
7312 
7313 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7314   if (Op.getOperand(1).getValueType().isVector())
7315     return LowerVectorStore(Op, DAG);
7316 
7317   assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7318          "Custom lowering only for i1 stores");
7319 
7320   // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7321 
7322   SDLoc dl(Op);
7323   StoreSDNode *ST = cast<StoreSDNode>(Op);
7324 
7325   SDValue Chain = ST->getChain();
7326   SDValue BasePtr = ST->getBasePtr();
7327   SDValue Value = ST->getValue();
7328   MachineMemOperand *MMO = ST->getMemOperand();
7329 
7330   Value = DAG.getNode(ISD::ZERO_EXTEND, dl, getPointerTy(DAG.getDataLayout()),
7331                       Value);
7332   return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7333 }
7334 
7335 // FIXME: Remove this once the ANDI glue bug is fixed:
7336 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7337   assert(Op.getValueType() == MVT::i1 &&
7338          "Custom lowering only for i1 results");
7339 
7340   SDLoc DL(Op);
7341   return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7342 }
7343 
7344 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7345                                                SelectionDAG &DAG) const {
7346 
7347   // Implements a vector truncate that fits in a vector register as a shuffle.
7348   // We want to legalize vector truncates down to where the source fits in
7349   // a vector register (and target is therefore smaller than vector register
7350   // size).  At that point legalization will try to custom lower the sub-legal
7351   // result and get here - where we can contain the truncate as a single target
7352   // operation.
7353 
7354   // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7355   //   <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7356   //
7357   // We will implement it for big-endian ordering as this (where x denotes
7358   // undefined):
7359   //   < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7360   //   < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7361   //
7362   // The same operation in little-endian ordering will be:
7363   //   <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7364   //   <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7365 
7366   EVT TrgVT = Op.getValueType();
7367   assert(TrgVT.isVector() && "Vector type expected.");
7368   unsigned TrgNumElts = TrgVT.getVectorNumElements();
7369   EVT EltVT = TrgVT.getVectorElementType();
7370   if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7371       TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7372       !isPowerOf2_32(EltVT.getSizeInBits()))
7373     return SDValue();
7374 
7375   SDValue N1 = Op.getOperand(0);
7376   EVT SrcVT = N1.getValueType();
7377   unsigned SrcSize = SrcVT.getSizeInBits();
7378   if (SrcSize > 256 ||
7379       !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7380       !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))
7381     return SDValue();
7382   if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7383     return SDValue();
7384 
7385   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7386   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7387 
7388   SDLoc DL(Op);
7389   SDValue Op1, Op2;
7390   if (SrcSize == 256) {
7391     EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7392     EVT SplitVT =
7393         N1.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
7394     unsigned SplitNumElts = SplitVT.getVectorNumElements();
7395     Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7396                       DAG.getConstant(0, DL, VecIdxTy));
7397     Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7398                       DAG.getConstant(SplitNumElts, DL, VecIdxTy));
7399   }
7400   else {
7401     Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7402     Op2 = DAG.getUNDEF(WideVT);
7403   }
7404 
7405   // First list the elements we want to keep.
7406   unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7407   SmallVector<int, 16> ShuffV;
7408   if (Subtarget.isLittleEndian())
7409     for (unsigned i = 0; i < TrgNumElts; ++i)
7410       ShuffV.push_back(i * SizeMult);
7411   else
7412     for (unsigned i = 1; i <= TrgNumElts; ++i)
7413       ShuffV.push_back(i * SizeMult - 1);
7414 
7415   // Populate the remaining elements with undefs.
7416   for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7417     // ShuffV.push_back(i + WideNumElts);
7418     ShuffV.push_back(WideNumElts + 1);
7419 
7420   Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7421   Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7422   return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7423 }
7424 
7425 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7426 /// possible.
7427 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7428   // Not FP, or using SPE? Not a fsel.
7429   if (!Op.getOperand(0).getValueType().isFloatingPoint() ||
7430       !Op.getOperand(2).getValueType().isFloatingPoint() || Subtarget.hasSPE())
7431     return Op;
7432 
7433   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7434 
7435   EVT ResVT = Op.getValueType();
7436   EVT CmpVT = Op.getOperand(0).getValueType();
7437   SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7438   SDValue TV  = Op.getOperand(2), FV  = Op.getOperand(3);
7439   SDLoc dl(Op);
7440   SDNodeFlags Flags = Op.getNode()->getFlags();
7441 
7442   // We have xsmaxcdp/xsmincdp which are OK to emit even in the
7443   // presence of infinities.
7444   if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7445     switch (CC) {
7446     default:
7447       break;
7448     case ISD::SETOGT:
7449     case ISD::SETGT:
7450       return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
7451     case ISD::SETOLT:
7452     case ISD::SETLT:
7453       return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
7454     }
7455   }
7456 
7457   // We might be able to do better than this under some circumstances, but in
7458   // general, fsel-based lowering of select is a finite-math-only optimization.
7459   // For more information, see section F.3 of the 2.06 ISA specification.
7460   // With ISA 3.0
7461   if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
7462       (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
7463     return Op;
7464 
7465   // If the RHS of the comparison is a 0.0, we don't need to do the
7466   // subtraction at all.
7467   SDValue Sel1;
7468   if (isFloatingPointZero(RHS))
7469     switch (CC) {
7470     default: break;       // SETUO etc aren't handled by fsel.
7471     case ISD::SETNE:
7472       std::swap(TV, FV);
7473       LLVM_FALLTHROUGH;
7474     case ISD::SETEQ:
7475       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7476         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7477       Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7478       if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
7479         Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7480       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7481                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7482     case ISD::SETULT:
7483     case ISD::SETLT:
7484       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
7485       LLVM_FALLTHROUGH;
7486     case ISD::SETOGE:
7487     case ISD::SETGE:
7488       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7489         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7490       return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7491     case ISD::SETUGT:
7492     case ISD::SETGT:
7493       std::swap(TV, FV);  // fsel is natively setge, swap operands for setlt
7494       LLVM_FALLTHROUGH;
7495     case ISD::SETOLE:
7496     case ISD::SETLE:
7497       if (LHS.getValueType() == MVT::f32)   // Comparison is always 64-bits
7498         LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7499       return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7500                          DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7501     }
7502 
7503   SDValue Cmp;
7504   switch (CC) {
7505   default: break;       // SETUO etc aren't handled by fsel.
7506   case ISD::SETNE:
7507     std::swap(TV, FV);
7508     LLVM_FALLTHROUGH;
7509   case ISD::SETEQ:
7510     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7511     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7512       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7513     Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7514     if (Sel1.getValueType() == MVT::f32)   // Comparison is always 64-bits
7515       Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7516     return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7517                        DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7518   case ISD::SETULT:
7519   case ISD::SETLT:
7520     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7521     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7522       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7523     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7524   case ISD::SETOGE:
7525   case ISD::SETGE:
7526     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7527     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7528       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7529     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7530   case ISD::SETUGT:
7531   case ISD::SETGT:
7532     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7533     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7534       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7535     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7536   case ISD::SETOLE:
7537   case ISD::SETLE:
7538     Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7539     if (Cmp.getValueType() == MVT::f32)   // Comparison is always 64-bits
7540       Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7541     return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7542   }
7543   return Op;
7544 }
7545 
7546 static unsigned getPPCStrictOpcode(unsigned Opc) {
7547   switch (Opc) {
7548   default:
7549     llvm_unreachable("No strict version of this opcode!");
7550   case PPCISD::FCTIDZ:
7551     return PPCISD::STRICT_FCTIDZ;
7552   case PPCISD::FCTIWZ:
7553     return PPCISD::STRICT_FCTIWZ;
7554   case PPCISD::FCTIDUZ:
7555     return PPCISD::STRICT_FCTIDUZ;
7556   case PPCISD::FCTIWUZ:
7557     return PPCISD::STRICT_FCTIWUZ;
7558   case PPCISD::FCFID:
7559     return PPCISD::STRICT_FCFID;
7560   case PPCISD::FCFIDU:
7561     return PPCISD::STRICT_FCFIDU;
7562   case PPCISD::FCFIDS:
7563     return PPCISD::STRICT_FCFIDS;
7564   case PPCISD::FCFIDUS:
7565     return PPCISD::STRICT_FCFIDUS;
7566   }
7567 }
7568 
7569 static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
7570                               const PPCSubtarget &Subtarget) {
7571   SDLoc dl(Op);
7572   bool IsStrict = Op->isStrictFPOpcode();
7573   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7574                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7575 
7576   // TODO: Any other flags to propagate?
7577   SDNodeFlags Flags;
7578   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7579 
7580   // For strict nodes, source is the second operand.
7581   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7582   SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
7583   assert(Src.getValueType().isFloatingPoint());
7584   if (Src.getValueType() == MVT::f32) {
7585     if (IsStrict) {
7586       Src =
7587           DAG.getNode(ISD::STRICT_FP_EXTEND, dl,
7588                       DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
7589       Chain = Src.getValue(1);
7590     } else
7591       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
7592   }
7593   SDValue Conv;
7594   unsigned Opc = ISD::DELETED_NODE;
7595   switch (Op.getSimpleValueType().SimpleTy) {
7596   default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7597   case MVT::i32:
7598     Opc = IsSigned ? PPCISD::FCTIWZ
7599                    : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
7600     break;
7601   case MVT::i64:
7602     assert((IsSigned || Subtarget.hasFPCVT()) &&
7603            "i64 FP_TO_UINT is supported only with FPCVT");
7604     Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
7605   }
7606   if (IsStrict) {
7607     Opc = getPPCStrictOpcode(Opc);
7608     Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
7609                        {Chain, Src}, Flags);
7610   } else {
7611     Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
7612   }
7613   return Conv;
7614 }
7615 
7616 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
7617                                                SelectionDAG &DAG,
7618                                                const SDLoc &dl) const {
7619   SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
7620   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7621                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7622   bool IsStrict = Op->isStrictFPOpcode();
7623 
7624   // Convert the FP value to an int value through memory.
7625   bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
7626                   (IsSigned || Subtarget.hasFPCVT());
7627   SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
7628   int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
7629   MachinePointerInfo MPI =
7630       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
7631 
7632   // Emit a store to the stack slot.
7633   SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
7634   Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
7635   if (i32Stack) {
7636     MachineFunction &MF = DAG.getMachineFunction();
7637     Alignment = Align(4);
7638     MachineMemOperand *MMO =
7639         MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
7640     SDValue Ops[] = { Chain, Tmp, FIPtr };
7641     Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
7642               DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
7643   } else
7644     Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
7645 
7646   // Result is a load from the stack slot.  If loading 4 bytes, make sure to
7647   // add in a bias on big endian.
7648   if (Op.getValueType() == MVT::i32 && !i32Stack) {
7649     FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
7650                         DAG.getConstant(4, dl, FIPtr.getValueType()));
7651     MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
7652   }
7653 
7654   RLI.Chain = Chain;
7655   RLI.Ptr = FIPtr;
7656   RLI.MPI = MPI;
7657   RLI.Alignment = Alignment;
7658 }
7659 
7660 /// Custom lowers floating point to integer conversions to use
7661 /// the direct move instructions available in ISA 2.07 to avoid the
7662 /// need for load/store combinations.
7663 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
7664                                                     SelectionDAG &DAG,
7665                                                     const SDLoc &dl) const {
7666   SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
7667   SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
7668   if (Op->isStrictFPOpcode())
7669     return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
7670   else
7671     return Mov;
7672 }
7673 
7674 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
7675                                           const SDLoc &dl) const {
7676   bool IsStrict = Op->isStrictFPOpcode();
7677   bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
7678                   Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
7679   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7680   EVT SrcVT = Src.getValueType();
7681   EVT DstVT = Op.getValueType();
7682 
7683   // FP to INT conversions are legal for f128.
7684   if (SrcVT == MVT::f128)
7685     return Subtarget.hasP9Vector() ? Op : SDValue();
7686 
7687   // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
7688   // PPC (the libcall is not available).
7689   if (SrcVT == MVT::ppcf128) {
7690     if (DstVT == MVT::i32) {
7691       // TODO: Conservatively pass only nofpexcept flag here. Need to check and
7692       // set other fast-math flags to FP operations in both strict and
7693       // non-strict cases. (FP_TO_SINT, FSUB)
7694       SDNodeFlags Flags;
7695       Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7696 
7697       if (IsSigned) {
7698         SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
7699                                  DAG.getIntPtrConstant(0, dl));
7700         SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
7701                                  DAG.getIntPtrConstant(1, dl));
7702 
7703         // Add the two halves of the long double in round-to-zero mode, and use
7704         // a smaller FP_TO_SINT.
7705         if (IsStrict) {
7706           SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
7707                                     DAG.getVTList(MVT::f64, MVT::Other),
7708                                     {Op.getOperand(0), Lo, Hi}, Flags);
7709           return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
7710                              DAG.getVTList(MVT::i32, MVT::Other),
7711                              {Res.getValue(1), Res}, Flags);
7712         } else {
7713           SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
7714           return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
7715         }
7716       } else {
7717         const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
7718         APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
7719         SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
7720         SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
7721         if (IsStrict) {
7722           // Sel = Src < 0x80000000
7723           // FltOfs = select Sel, 0.0, 0x80000000
7724           // IntOfs = select Sel, 0, 0x80000000
7725           // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
7726           SDValue Chain = Op.getOperand(0);
7727           EVT SetCCVT =
7728               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
7729           EVT DstSetCCVT =
7730               getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
7731           SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
7732                                      Chain, true);
7733           Chain = Sel.getValue(1);
7734 
7735           SDValue FltOfs = DAG.getSelect(
7736               dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
7737           Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
7738 
7739           SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
7740                                     DAG.getVTList(SrcVT, MVT::Other),
7741                                     {Chain, Src, FltOfs}, Flags);
7742           Chain = Val.getValue(1);
7743           SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
7744                                      DAG.getVTList(DstVT, MVT::Other),
7745                                      {Chain, Val}, Flags);
7746           Chain = SInt.getValue(1);
7747           SDValue IntOfs = DAG.getSelect(
7748               dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
7749           SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
7750           return DAG.getMergeValues({Result, Chain}, dl);
7751         } else {
7752           // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
7753           // FIXME: generated code sucks.
7754           SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
7755           True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
7756           True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
7757           SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
7758           return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
7759         }
7760       }
7761     }
7762 
7763     return SDValue();
7764   }
7765 
7766   if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
7767     return LowerFP_TO_INTDirectMove(Op, DAG, dl);
7768 
7769   ReuseLoadInfo RLI;
7770   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7771 
7772   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
7773                      RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
7774 }
7775 
7776 // We're trying to insert a regular store, S, and then a load, L. If the
7777 // incoming value, O, is a load, we might just be able to have our load use the
7778 // address used by O. However, we don't know if anything else will store to
7779 // that address before we can load from it. To prevent this situation, we need
7780 // to insert our load, L, into the chain as a peer of O. To do this, we give L
7781 // the same chain operand as O, we create a token factor from the chain results
7782 // of O and L, and we replace all uses of O's chain result with that token
7783 // factor (see spliceIntoChain below for this last part).
7784 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
7785                                             ReuseLoadInfo &RLI,
7786                                             SelectionDAG &DAG,
7787                                             ISD::LoadExtType ET) const {
7788   // Conservatively skip reusing for constrained FP nodes.
7789   if (Op->isStrictFPOpcode())
7790     return false;
7791 
7792   SDLoc dl(Op);
7793   bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
7794                        (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
7795   if (ET == ISD::NON_EXTLOAD &&
7796       (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
7797       isOperationLegalOrCustom(Op.getOpcode(),
7798                                Op.getOperand(0).getValueType())) {
7799 
7800     LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
7801     return true;
7802   }
7803 
7804   LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
7805   if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
7806       LD->isNonTemporal())
7807     return false;
7808   if (LD->getMemoryVT() != MemVT)
7809     return false;
7810 
7811   // If the result of the load is an illegal type, then we can't build a
7812   // valid chain for reuse since the legalised loads and token factor node that
7813   // ties the legalised loads together uses a different output chain then the
7814   // illegal load.
7815   if (!isTypeLegal(LD->getValueType(0)))
7816     return false;
7817 
7818   RLI.Ptr = LD->getBasePtr();
7819   if (LD->isIndexed() && !LD->getOffset().isUndef()) {
7820     assert(LD->getAddressingMode() == ISD::PRE_INC &&
7821            "Non-pre-inc AM on PPC?");
7822     RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
7823                           LD->getOffset());
7824   }
7825 
7826   RLI.Chain = LD->getChain();
7827   RLI.MPI = LD->getPointerInfo();
7828   RLI.IsDereferenceable = LD->isDereferenceable();
7829   RLI.IsInvariant = LD->isInvariant();
7830   RLI.Alignment = LD->getAlign();
7831   RLI.AAInfo = LD->getAAInfo();
7832   RLI.Ranges = LD->getRanges();
7833 
7834   RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
7835   return true;
7836 }
7837 
7838 // Given the head of the old chain, ResChain, insert a token factor containing
7839 // it and NewResChain, and make users of ResChain now be users of that token
7840 // factor.
7841 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
7842 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
7843                                         SDValue NewResChain,
7844                                         SelectionDAG &DAG) const {
7845   if (!ResChain)
7846     return;
7847 
7848   SDLoc dl(NewResChain);
7849 
7850   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
7851                            NewResChain, DAG.getUNDEF(MVT::Other));
7852   assert(TF.getNode() != NewResChain.getNode() &&
7853          "A new TF really is required here");
7854 
7855   DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
7856   DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
7857 }
7858 
7859 /// Analyze profitability of direct move
7860 /// prefer float load to int load plus direct move
7861 /// when there is no integer use of int load
7862 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
7863   SDNode *Origin = Op.getOperand(0).getNode();
7864   if (Origin->getOpcode() != ISD::LOAD)
7865     return true;
7866 
7867   // If there is no LXSIBZX/LXSIHZX, like Power8,
7868   // prefer direct move if the memory size is 1 or 2 bytes.
7869   MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
7870   if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
7871     return true;
7872 
7873   for (SDNode::use_iterator UI = Origin->use_begin(),
7874                             UE = Origin->use_end();
7875        UI != UE; ++UI) {
7876 
7877     // Only look at the users of the loaded value.
7878     if (UI.getUse().get().getResNo() != 0)
7879       continue;
7880 
7881     if (UI->getOpcode() != ISD::SINT_TO_FP &&
7882         UI->getOpcode() != ISD::UINT_TO_FP &&
7883         UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
7884         UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
7885       return true;
7886   }
7887 
7888   return false;
7889 }
7890 
7891 static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG,
7892                               const PPCSubtarget &Subtarget,
7893                               SDValue Chain = SDValue()) {
7894   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
7895                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
7896   SDLoc dl(Op);
7897 
7898   // TODO: Any other flags to propagate?
7899   SDNodeFlags Flags;
7900   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7901 
7902   // If we have FCFIDS, then use it when converting to single-precision.
7903   // Otherwise, convert to double-precision and then round.
7904   bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
7905   unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
7906                               : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
7907   EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
7908   if (Op->isStrictFPOpcode()) {
7909     if (!Chain)
7910       Chain = Op.getOperand(0);
7911     return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
7912                        DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
7913   } else
7914     return DAG.getNode(ConvOpc, dl, ConvTy, Src);
7915 }
7916 
7917 /// Custom lowers integer to floating point conversions to use
7918 /// the direct move instructions available in ISA 2.07 to avoid the
7919 /// need for load/store combinations.
7920 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
7921                                                     SelectionDAG &DAG,
7922                                                     const SDLoc &dl) const {
7923   assert((Op.getValueType() == MVT::f32 ||
7924           Op.getValueType() == MVT::f64) &&
7925          "Invalid floating point type as target of conversion");
7926   assert(Subtarget.hasFPCVT() &&
7927          "Int to FP conversions with direct moves require FPCVT");
7928   SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
7929   bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
7930   bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
7931                 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
7932   unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
7933   SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
7934   return convertIntToFP(Op, Mov, DAG, Subtarget);
7935 }
7936 
7937 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
7938 
7939   EVT VecVT = Vec.getValueType();
7940   assert(VecVT.isVector() && "Expected a vector type.");
7941   assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
7942 
7943   EVT EltVT = VecVT.getVectorElementType();
7944   unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7945   EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7946 
7947   unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
7948   SmallVector<SDValue, 16> Ops(NumConcat);
7949   Ops[0] = Vec;
7950   SDValue UndefVec = DAG.getUNDEF(VecVT);
7951   for (unsigned i = 1; i < NumConcat; ++i)
7952     Ops[i] = UndefVec;
7953 
7954   return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
7955 }
7956 
7957 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
7958                                                 const SDLoc &dl) const {
7959   bool IsStrict = Op->isStrictFPOpcode();
7960   unsigned Opc = Op.getOpcode();
7961   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
7962   assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
7963           Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
7964          "Unexpected conversion type");
7965   assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
7966          "Supports conversions to v2f64/v4f32 only.");
7967 
7968   // TODO: Any other flags to propagate?
7969   SDNodeFlags Flags;
7970   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
7971 
7972   bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
7973   bool FourEltRes = Op.getValueType() == MVT::v4f32;
7974 
7975   SDValue Wide = widenVec(DAG, Src, dl);
7976   EVT WideVT = Wide.getValueType();
7977   unsigned WideNumElts = WideVT.getVectorNumElements();
7978   MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
7979 
7980   SmallVector<int, 16> ShuffV;
7981   for (unsigned i = 0; i < WideNumElts; ++i)
7982     ShuffV.push_back(i + WideNumElts);
7983 
7984   int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
7985   int SaveElts = FourEltRes ? 4 : 2;
7986   if (Subtarget.isLittleEndian())
7987     for (int i = 0; i < SaveElts; i++)
7988       ShuffV[i * Stride] = i;
7989   else
7990     for (int i = 1; i <= SaveElts; i++)
7991       ShuffV[i * Stride - 1] = i - 1;
7992 
7993   SDValue ShuffleSrc2 =
7994       SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
7995   SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
7996 
7997   SDValue Extend;
7998   if (SignedConv) {
7999     Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8000     EVT ExtVT = Src.getValueType();
8001     if (Subtarget.hasP9Altivec())
8002       ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8003                                IntermediateVT.getVectorNumElements());
8004 
8005     Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8006                          DAG.getValueType(ExtVT));
8007   } else
8008     Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8009 
8010   if (IsStrict)
8011     return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8012                        {Op.getOperand(0), Extend}, Flags);
8013 
8014   return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8015 }
8016 
8017 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8018                                           SelectionDAG &DAG) const {
8019   SDLoc dl(Op);
8020   bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8021                   Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8022   bool IsStrict = Op->isStrictFPOpcode();
8023   SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8024   SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8025 
8026   // TODO: Any other flags to propagate?
8027   SDNodeFlags Flags;
8028   Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8029 
8030   EVT InVT = Src.getValueType();
8031   EVT OutVT = Op.getValueType();
8032   if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8033       isOperationCustom(Op.getOpcode(), InVT))
8034     return LowerINT_TO_FPVector(Op, DAG, dl);
8035 
8036   // Conversions to f128 are legal.
8037   if (Op.getValueType() == MVT::f128)
8038     return Subtarget.hasP9Vector() ? Op : SDValue();
8039 
8040   // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8041   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8042     return SDValue();
8043 
8044   if (Src.getValueType() == MVT::i1) {
8045     SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8046                               DAG.getConstantFP(1.0, dl, Op.getValueType()),
8047                               DAG.getConstantFP(0.0, dl, Op.getValueType()));
8048     if (IsStrict)
8049       return DAG.getMergeValues({Sel, Chain}, dl);
8050     else
8051       return Sel;
8052   }
8053 
8054   // If we have direct moves, we can do all the conversion, skip the store/load
8055   // however, without FPCVT we can't do most conversions.
8056   if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8057       Subtarget.isPPC64() && Subtarget.hasFPCVT())
8058     return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8059 
8060   assert((IsSigned || Subtarget.hasFPCVT()) &&
8061          "UINT_TO_FP is supported only with FPCVT");
8062 
8063   if (Src.getValueType() == MVT::i64) {
8064     SDValue SINT = Src;
8065     // When converting to single-precision, we actually need to convert
8066     // to double-precision first and then round to single-precision.
8067     // To avoid double-rounding effects during that operation, we have
8068     // to prepare the input operand.  Bits that might be truncated when
8069     // converting to double-precision are replaced by a bit that won't
8070     // be lost at this stage, but is below the single-precision rounding
8071     // position.
8072     //
8073     // However, if -enable-unsafe-fp-math is in effect, accept double
8074     // rounding to avoid the extra overhead.
8075     if (Op.getValueType() == MVT::f32 &&
8076         !Subtarget.hasFPCVT() &&
8077         !DAG.getTarget().Options.UnsafeFPMath) {
8078 
8079       // Twiddle input to make sure the low 11 bits are zero.  (If this
8080       // is the case, we are guaranteed the value will fit into the 53 bit
8081       // mantissa of an IEEE double-precision value without rounding.)
8082       // If any of those low 11 bits were not zero originally, make sure
8083       // bit 12 (value 2048) is set instead, so that the final rounding
8084       // to single-precision gets the correct result.
8085       SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8086                                   SINT, DAG.getConstant(2047, dl, MVT::i64));
8087       Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8088                           Round, DAG.getConstant(2047, dl, MVT::i64));
8089       Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8090       Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8091                           Round, DAG.getConstant(-2048, dl, MVT::i64));
8092 
8093       // However, we cannot use that value unconditionally: if the magnitude
8094       // of the input value is small, the bit-twiddling we did above might
8095       // end up visibly changing the output.  Fortunately, in that case, we
8096       // don't need to twiddle bits since the original input will convert
8097       // exactly to double-precision floating-point already.  Therefore,
8098       // construct a conditional to use the original value if the top 11
8099       // bits are all sign-bit copies, and use the rounded value computed
8100       // above otherwise.
8101       SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8102                                  SINT, DAG.getConstant(53, dl, MVT::i32));
8103       Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8104                          Cond, DAG.getConstant(1, dl, MVT::i64));
8105       Cond = DAG.getSetCC(
8106           dl,
8107           getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8108           Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8109 
8110       SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8111     }
8112 
8113     ReuseLoadInfo RLI;
8114     SDValue Bits;
8115 
8116     MachineFunction &MF = DAG.getMachineFunction();
8117     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8118       Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8119                          RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8120       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8121     } else if (Subtarget.hasLFIWAX() &&
8122                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8123       MachineMemOperand *MMO =
8124         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8125                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8126       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8127       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8128                                      DAG.getVTList(MVT::f64, MVT::Other),
8129                                      Ops, MVT::i32, MMO);
8130       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8131     } else if (Subtarget.hasFPCVT() &&
8132                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8133       MachineMemOperand *MMO =
8134         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8135                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8136       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8137       Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8138                                      DAG.getVTList(MVT::f64, MVT::Other),
8139                                      Ops, MVT::i32, MMO);
8140       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8141     } else if (((Subtarget.hasLFIWAX() &&
8142                  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8143                 (Subtarget.hasFPCVT() &&
8144                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8145                SINT.getOperand(0).getValueType() == MVT::i32) {
8146       MachineFrameInfo &MFI = MF.getFrameInfo();
8147       EVT PtrVT = getPointerTy(DAG.getDataLayout());
8148 
8149       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8150       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8151 
8152       SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8153                                    MachinePointerInfo::getFixedStack(
8154                                        DAG.getMachineFunction(), FrameIdx));
8155       Chain = Store;
8156 
8157       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8158              "Expected an i32 store");
8159 
8160       RLI.Ptr = FIdx;
8161       RLI.Chain = Chain;
8162       RLI.MPI =
8163           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8164       RLI.Alignment = Align(4);
8165 
8166       MachineMemOperand *MMO =
8167         MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8168                                 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8169       SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8170       Bits = DAG.getMemIntrinsicNode(SINT.getOpcode() == ISD::ZERO_EXTEND ?
8171                                      PPCISD::LFIWZX : PPCISD::LFIWAX,
8172                                      dl, DAG.getVTList(MVT::f64, MVT::Other),
8173                                      Ops, MVT::i32, MMO);
8174       Chain = Bits.getValue(1);
8175     } else
8176       Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8177 
8178     SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8179     if (IsStrict)
8180       Chain = FP.getValue(1);
8181 
8182     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8183       if (IsStrict)
8184         FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8185                          DAG.getVTList(MVT::f32, MVT::Other),
8186                          {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8187       else
8188         FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8189                          DAG.getIntPtrConstant(0, dl));
8190     }
8191     return FP;
8192   }
8193 
8194   assert(Src.getValueType() == MVT::i32 &&
8195          "Unhandled INT_TO_FP type in custom expander!");
8196   // Since we only generate this in 64-bit mode, we can take advantage of
8197   // 64-bit registers.  In particular, sign extend the input value into the
8198   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8199   // then lfd it and fcfid it.
8200   MachineFunction &MF = DAG.getMachineFunction();
8201   MachineFrameInfo &MFI = MF.getFrameInfo();
8202   EVT PtrVT = getPointerTy(MF.getDataLayout());
8203 
8204   SDValue Ld;
8205   if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8206     ReuseLoadInfo RLI;
8207     bool ReusingLoad;
8208     if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8209       int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8210       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8211 
8212       SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8213                                    MachinePointerInfo::getFixedStack(
8214                                        DAG.getMachineFunction(), FrameIdx));
8215       Chain = Store;
8216 
8217       assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8218              "Expected an i32 store");
8219 
8220       RLI.Ptr = FIdx;
8221       RLI.Chain = Chain;
8222       RLI.MPI =
8223           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
8224       RLI.Alignment = Align(4);
8225     }
8226 
8227     MachineMemOperand *MMO =
8228       MF.getMachineMemOperand(RLI.MPI, MachineMemOperand::MOLoad, 4,
8229                               RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8230     SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8231     Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8232                                  DAG.getVTList(MVT::f64, MVT::Other), Ops,
8233                                  MVT::i32, MMO);
8234     Chain = Ld.getValue(1);
8235     if (ReusingLoad)
8236       spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8237   } else {
8238     assert(Subtarget.isPPC64() &&
8239            "i32->FP without LFIWAX supported only on PPC64");
8240 
8241     int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8242     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8243 
8244     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8245 
8246     // STD the extended value into the stack slot.
8247     SDValue Store = DAG.getStore(
8248         Chain, dl, Ext64, FIdx,
8249         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8250     Chain = Store;
8251 
8252     // Load the value as a double.
8253     Ld = DAG.getLoad(
8254         MVT::f64, dl, Chain, FIdx,
8255         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx));
8256     Chain = Ld.getValue(1);
8257   }
8258 
8259   // FCFID it and return it.
8260   SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8261   if (IsStrict)
8262     Chain = FP.getValue(1);
8263   if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8264     if (IsStrict)
8265       FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8266                        DAG.getVTList(MVT::f32, MVT::Other),
8267                        {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8268     else
8269       FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8270                        DAG.getIntPtrConstant(0, dl));
8271   }
8272   return FP;
8273 }
8274 
8275 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8276                                             SelectionDAG &DAG) const {
8277   SDLoc dl(Op);
8278   /*
8279    The rounding mode is in bits 30:31 of FPSR, and has the following
8280    settings:
8281      00 Round to nearest
8282      01 Round to 0
8283      10 Round to +inf
8284      11 Round to -inf
8285 
8286   FLT_ROUNDS, on the other hand, expects the following:
8287     -1 Undefined
8288      0 Round to 0
8289      1 Round to nearest
8290      2 Round to +inf
8291      3 Round to -inf
8292 
8293   To perform the conversion, we do:
8294     ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8295   */
8296 
8297   MachineFunction &MF = DAG.getMachineFunction();
8298   EVT VT = Op.getValueType();
8299   EVT PtrVT = getPointerTy(MF.getDataLayout());
8300 
8301   // Save FP Control Word to register
8302   SDValue Chain = Op.getOperand(0);
8303   SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8304   Chain = MFFS.getValue(1);
8305 
8306   SDValue CWD;
8307   if (isTypeLegal(MVT::i64)) {
8308     CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
8309                       DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
8310   } else {
8311     // Save FP register to stack slot
8312     int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8313     SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8314     Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8315 
8316     // Load FP Control Word from low 32 bits of stack slot.
8317     assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) &&
8318            "Stack slot adjustment is valid only on big endian subtargets!");
8319     SDValue Four = DAG.getConstant(4, dl, PtrVT);
8320     SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8321     CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8322     Chain = CWD.getValue(1);
8323   }
8324 
8325   // Transform as necessary
8326   SDValue CWD1 =
8327     DAG.getNode(ISD::AND, dl, MVT::i32,
8328                 CWD, DAG.getConstant(3, dl, MVT::i32));
8329   SDValue CWD2 =
8330     DAG.getNode(ISD::SRL, dl, MVT::i32,
8331                 DAG.getNode(ISD::AND, dl, MVT::i32,
8332                             DAG.getNode(ISD::XOR, dl, MVT::i32,
8333                                         CWD, DAG.getConstant(3, dl, MVT::i32)),
8334                             DAG.getConstant(3, dl, MVT::i32)),
8335                 DAG.getConstant(1, dl, MVT::i32));
8336 
8337   SDValue RetVal =
8338     DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8339 
8340   RetVal =
8341       DAG.getNode((VT.getSizeInBits() < 16 ? ISD::TRUNCATE : ISD::ZERO_EXTEND),
8342                   dl, VT, RetVal);
8343 
8344   return DAG.getMergeValues({RetVal, Chain}, dl);
8345 }
8346 
8347 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8348   EVT VT = Op.getValueType();
8349   unsigned BitWidth = VT.getSizeInBits();
8350   SDLoc dl(Op);
8351   assert(Op.getNumOperands() == 3 &&
8352          VT == Op.getOperand(1).getValueType() &&
8353          "Unexpected SHL!");
8354 
8355   // Expand into a bunch of logical ops.  Note that these ops
8356   // depend on the PPC behavior for oversized shift amounts.
8357   SDValue Lo = Op.getOperand(0);
8358   SDValue Hi = Op.getOperand(1);
8359   SDValue Amt = Op.getOperand(2);
8360   EVT AmtVT = Amt.getValueType();
8361 
8362   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8363                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8364   SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8365   SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8366   SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8367   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8368                              DAG.getConstant(-BitWidth, dl, AmtVT));
8369   SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8370   SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8371   SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8372   SDValue OutOps[] = { OutLo, OutHi };
8373   return DAG.getMergeValues(OutOps, dl);
8374 }
8375 
8376 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8377   EVT VT = Op.getValueType();
8378   SDLoc dl(Op);
8379   unsigned BitWidth = VT.getSizeInBits();
8380   assert(Op.getNumOperands() == 3 &&
8381          VT == Op.getOperand(1).getValueType() &&
8382          "Unexpected SRL!");
8383 
8384   // Expand into a bunch of logical ops.  Note that these ops
8385   // depend on the PPC behavior for oversized shift amounts.
8386   SDValue Lo = Op.getOperand(0);
8387   SDValue Hi = Op.getOperand(1);
8388   SDValue Amt = Op.getOperand(2);
8389   EVT AmtVT = Amt.getValueType();
8390 
8391   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8392                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8393   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8394   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8395   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8396   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8397                              DAG.getConstant(-BitWidth, dl, AmtVT));
8398   SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8399   SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8400   SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8401   SDValue OutOps[] = { OutLo, OutHi };
8402   return DAG.getMergeValues(OutOps, dl);
8403 }
8404 
8405 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8406   SDLoc dl(Op);
8407   EVT VT = Op.getValueType();
8408   unsigned BitWidth = VT.getSizeInBits();
8409   assert(Op.getNumOperands() == 3 &&
8410          VT == Op.getOperand(1).getValueType() &&
8411          "Unexpected SRA!");
8412 
8413   // Expand into a bunch of logical ops, followed by a select_cc.
8414   SDValue Lo = Op.getOperand(0);
8415   SDValue Hi = Op.getOperand(1);
8416   SDValue Amt = Op.getOperand(2);
8417   EVT AmtVT = Amt.getValueType();
8418 
8419   SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8420                              DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8421   SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8422   SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8423   SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8424   SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8425                              DAG.getConstant(-BitWidth, dl, AmtVT));
8426   SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8427   SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8428   SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8429                                   Tmp4, Tmp6, ISD::SETLE);
8430   SDValue OutOps[] = { OutLo, OutHi };
8431   return DAG.getMergeValues(OutOps, dl);
8432 }
8433 
8434 SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8435                                             SelectionDAG &DAG) const {
8436   SDLoc dl(Op);
8437   EVT VT = Op.getValueType();
8438   unsigned BitWidth = VT.getSizeInBits();
8439 
8440   bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8441   SDValue X = Op.getOperand(0);
8442   SDValue Y = Op.getOperand(1);
8443   SDValue Z = Op.getOperand(2);
8444   EVT AmtVT = Z.getValueType();
8445 
8446   // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8447   // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8448   // This is simpler than TargetLowering::expandFunnelShift because we can rely
8449   // on PowerPC shift by BW being well defined.
8450   Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8451                   DAG.getConstant(BitWidth - 1, dl, AmtVT));
8452   SDValue SubZ =
8453       DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8454   X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8455   Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8456   return DAG.getNode(ISD::OR, dl, VT, X, Y);
8457 }
8458 
8459 //===----------------------------------------------------------------------===//
8460 // Vector related lowering.
8461 //
8462 
8463 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8464 /// element size of SplatSize. Cast the result to VT.
8465 static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8466                                       SelectionDAG &DAG, const SDLoc &dl) {
8467   static const MVT VTys[] = { // canonical VT to use for each size.
8468     MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
8469   };
8470 
8471   EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8472 
8473   // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8474   if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
8475     SplatSize = 1;
8476     Val = 0xFF;
8477   }
8478 
8479   EVT CanonicalVT = VTys[SplatSize-1];
8480 
8481   // Build a canonical splat for this value.
8482   return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8483 }
8484 
8485 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8486 /// specified intrinsic ID.
8487 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8488                                 const SDLoc &dl, EVT DestVT = MVT::Other) {
8489   if (DestVT == MVT::Other) DestVT = Op.getValueType();
8490   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8491                      DAG.getConstant(IID, dl, MVT::i32), Op);
8492 }
8493 
8494 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8495 /// specified intrinsic ID.
8496 static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
8497                                 SelectionDAG &DAG, const SDLoc &dl,
8498                                 EVT DestVT = MVT::Other) {
8499   if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8500   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8501                      DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8502 }
8503 
8504 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8505 /// specified intrinsic ID.
8506 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8507                                 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8508                                 EVT DestVT = MVT::Other) {
8509   if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8510   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8511                      DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8512 }
8513 
8514 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8515 /// amount.  The result has the specified value type.
8516 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8517                            SelectionDAG &DAG, const SDLoc &dl) {
8518   // Force LHS/RHS to be the right type.
8519   LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8520   RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8521 
8522   int Ops[16];
8523   for (unsigned i = 0; i != 16; ++i)
8524     Ops[i] = i + Amt;
8525   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
8526   return DAG.getNode(ISD::BITCAST, dl, VT, T);
8527 }
8528 
8529 /// Do we have an efficient pattern in a .td file for this node?
8530 ///
8531 /// \param V - pointer to the BuildVectorSDNode being matched
8532 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8533 ///
8534 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8535 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8536 /// the opposite is true (expansion is beneficial) are:
8537 /// - The node builds a vector out of integers that are not 32 or 64-bits
8538 /// - The node builds a vector out of constants
8539 /// - The node is a "load-and-splat"
8540 /// In all other cases, we will choose to keep the BUILD_VECTOR.
8541 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
8542                                             bool HasDirectMove,
8543                                             bool HasP8Vector) {
8544   EVT VecVT = V->getValueType(0);
8545   bool RightType = VecVT == MVT::v2f64 ||
8546     (HasP8Vector && VecVT == MVT::v4f32) ||
8547     (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
8548   if (!RightType)
8549     return false;
8550 
8551   bool IsSplat = true;
8552   bool IsLoad = false;
8553   SDValue Op0 = V->getOperand(0);
8554 
8555   // This function is called in a block that confirms the node is not a constant
8556   // splat. So a constant BUILD_VECTOR here means the vector is built out of
8557   // different constants.
8558   if (V->isConstant())
8559     return false;
8560   for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
8561     if (V->getOperand(i).isUndef())
8562       return false;
8563     // We want to expand nodes that represent load-and-splat even if the
8564     // loaded value is a floating point truncation or conversion to int.
8565     if (V->getOperand(i).getOpcode() == ISD::LOAD ||
8566         (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
8567          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8568         (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
8569          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
8570         (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
8571          V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
8572       IsLoad = true;
8573     // If the operands are different or the input is not a load and has more
8574     // uses than just this BV node, then it isn't a splat.
8575     if (V->getOperand(i) != Op0 ||
8576         (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
8577       IsSplat = false;
8578   }
8579   return !(IsSplat && IsLoad);
8580 }
8581 
8582 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8583 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
8584 
8585   SDLoc dl(Op);
8586   SDValue Op0 = Op->getOperand(0);
8587 
8588   if ((Op.getValueType() != MVT::f128) ||
8589       (Op0.getOpcode() != ISD::BUILD_PAIR) ||
8590       (Op0.getOperand(0).getValueType() != MVT::i64) ||
8591       (Op0.getOperand(1).getValueType() != MVT::i64))
8592     return SDValue();
8593 
8594   return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
8595                      Op0.getOperand(1));
8596 }
8597 
8598 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
8599   const SDValue *InputLoad = &Op;
8600   if (InputLoad->getOpcode() == ISD::BITCAST)
8601     InputLoad = &InputLoad->getOperand(0);
8602   if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
8603       InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
8604     IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
8605     InputLoad = &InputLoad->getOperand(0);
8606   }
8607   if (InputLoad->getOpcode() != ISD::LOAD)
8608     return nullptr;
8609   LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
8610   return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
8611 }
8612 
8613 // Convert the argument APFloat to a single precision APFloat if there is no
8614 // loss in information during the conversion to single precision APFloat and the
8615 // resulting number is not a denormal number. Return true if successful.
8616 bool llvm::convertToNonDenormSingle(APFloat &ArgAPFloat) {
8617   APFloat APFloatToConvert = ArgAPFloat;
8618   bool LosesInfo = true;
8619   APFloatToConvert.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
8620                            &LosesInfo);
8621   bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
8622   if (Success)
8623     ArgAPFloat = APFloatToConvert;
8624   return Success;
8625 }
8626 
8627 // Bitcast the argument APInt to a double and convert it to a single precision
8628 // APFloat, bitcast the APFloat to an APInt and assign it to the original
8629 // argument if there is no loss in information during the conversion from
8630 // double to single precision APFloat and the resulting number is not a denormal
8631 // number. Return true if successful.
8632 bool llvm::convertToNonDenormSingle(APInt &ArgAPInt) {
8633   double DpValue = ArgAPInt.bitsToDouble();
8634   APFloat APFloatDp(DpValue);
8635   bool Success = convertToNonDenormSingle(APFloatDp);
8636   if (Success)
8637     ArgAPInt = APFloatDp.bitcastToAPInt();
8638   return Success;
8639 }
8640 
8641 // If this is a case we can't handle, return null and let the default
8642 // expansion code take care of it.  If we CAN select this case, and if it
8643 // selects to a single instruction, return Op.  Otherwise, if we can codegen
8644 // this case more efficiently than a constant pool load, lower it to the
8645 // sequence of ops that should be used.
8646 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
8647                                              SelectionDAG &DAG) const {
8648   SDLoc dl(Op);
8649   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
8650   assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
8651 
8652   // Check if this is a splat of a constant value.
8653   APInt APSplatBits, APSplatUndef;
8654   unsigned SplatBitSize;
8655   bool HasAnyUndefs;
8656   bool BVNIsConstantSplat =
8657       BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
8658                            HasAnyUndefs, 0, !Subtarget.isLittleEndian());
8659 
8660   // If it is a splat of a double, check if we can shrink it to a 32 bit
8661   // non-denormal float which when converted back to double gives us the same
8662   // double. This is to exploit the XXSPLTIDP instruction.
8663   // If we lose precision, we use XXSPLTI32DX.
8664   if (BVNIsConstantSplat && (SplatBitSize == 64) &&
8665       Subtarget.hasPrefixInstrs()) {
8666     // Check the type first to short-circuit so we don't modify APSplatBits if
8667     // this block isn't executed.
8668     if ((Op->getValueType(0) == MVT::v2f64) &&
8669         convertToNonDenormSingle(APSplatBits)) {
8670       SDValue SplatNode = DAG.getNode(
8671           PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
8672           DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
8673       return DAG.getBitcast(Op.getValueType(), SplatNode);
8674     } else {
8675       // We may lose precision, so we have to use XXSPLTI32DX.
8676 
8677       uint32_t Hi =
8678           (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
8679       uint32_t Lo =
8680           (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
8681       SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
8682 
8683       if (!Hi || !Lo)
8684         // If either load is 0, then we should generate XXLXOR to set to 0.
8685         SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
8686 
8687       if (Hi)
8688         SplatNode = DAG.getNode(
8689             PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
8690             DAG.getTargetConstant(0, dl, MVT::i32),
8691             DAG.getTargetConstant(Hi, dl, MVT::i32));
8692 
8693       if (Lo)
8694         SplatNode =
8695             DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
8696                         DAG.getTargetConstant(1, dl, MVT::i32),
8697                         DAG.getTargetConstant(Lo, dl, MVT::i32));
8698 
8699       return DAG.getBitcast(Op.getValueType(), SplatNode);
8700     }
8701   }
8702 
8703   if (!BVNIsConstantSplat || SplatBitSize > 32) {
8704 
8705     bool IsPermutedLoad = false;
8706     const SDValue *InputLoad =
8707         getNormalLoadInput(Op.getOperand(0), IsPermutedLoad);
8708     // Handle load-and-splat patterns as we have instructions that will do this
8709     // in one go.
8710     if (InputLoad && DAG.isSplatValue(Op, true)) {
8711       LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
8712 
8713       // We have handling for 4 and 8 byte elements.
8714       unsigned ElementSize = LD->getMemoryVT().getScalarSizeInBits();
8715 
8716       // Checking for a single use of this load, we have to check for vector
8717       // width (128 bits) / ElementSize uses (since each operand of the
8718       // BUILD_VECTOR is a separate use of the value.
8719       unsigned NumUsesOfInputLD = 128 / ElementSize;
8720       for (SDValue BVInOp : Op->ops())
8721         if (BVInOp.isUndef())
8722           NumUsesOfInputLD--;
8723       assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
8724       if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
8725           ((Subtarget.hasVSX() && ElementSize == 64) ||
8726            (Subtarget.hasP9Vector() && ElementSize == 32))) {
8727         SDValue Ops[] = {
8728           LD->getChain(),    // Chain
8729           LD->getBasePtr(),  // Ptr
8730           DAG.getValueType(Op.getValueType()) // VT
8731         };
8732         SDValue LdSplt = DAG.getMemIntrinsicNode(
8733             PPCISD::LD_SPLAT, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8734             Ops, LD->getMemoryVT(), LD->getMemOperand());
8735         // Replace all uses of the output chain of the original load with the
8736         // output chain of the new load.
8737         DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
8738                                       LdSplt.getValue(1));
8739         return LdSplt;
8740       }
8741     }
8742 
8743     // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
8744     // 32-bits can be lowered to VSX instructions under certain conditions.
8745     // Without VSX, there is no pattern more efficient than expanding the node.
8746     if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
8747         haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
8748                                         Subtarget.hasP8Vector()))
8749       return Op;
8750     return SDValue();
8751   }
8752 
8753   uint64_t SplatBits = APSplatBits.getZExtValue();
8754   uint64_t SplatUndef = APSplatUndef.getZExtValue();
8755   unsigned SplatSize = SplatBitSize / 8;
8756 
8757   // First, handle single instruction cases.
8758 
8759   // All zeros?
8760   if (SplatBits == 0) {
8761     // Canonicalize all zero vectors to be v4i32.
8762     if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
8763       SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
8764       Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
8765     }
8766     return Op;
8767   }
8768 
8769   // We have XXSPLTIW for constant splats four bytes wide.
8770   // Given vector length is a multiple of 4, 2-byte splats can be replaced
8771   // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
8772   // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
8773   // turned into a 4-byte splat of 0xABABABAB.
8774   if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
8775     return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
8776                                   Op.getValueType(), DAG, dl);
8777 
8778   if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
8779     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
8780                                   dl);
8781 
8782   // We have XXSPLTIB for constant splats one byte wide.
8783   if (Subtarget.hasP9Vector() && SplatSize == 1)
8784     return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
8785                                   dl);
8786 
8787   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
8788   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
8789                     (32-SplatBitSize));
8790   if (SextVal >= -16 && SextVal <= 15)
8791     return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
8792                                   dl);
8793 
8794   // Two instruction sequences.
8795 
8796   // If this value is in the range [-32,30] and is even, use:
8797   //     VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
8798   // If this value is in the range [17,31] and is odd, use:
8799   //     VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
8800   // If this value is in the range [-31,-17] and is odd, use:
8801   //     VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
8802   // Note the last two are three-instruction sequences.
8803   if (SextVal >= -32 && SextVal <= 31) {
8804     // To avoid having these optimizations undone by constant folding,
8805     // we convert to a pseudo that will be expanded later into one of
8806     // the above forms.
8807     SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
8808     EVT VT = (SplatSize == 1 ? MVT::v16i8 :
8809               (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
8810     SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
8811     SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
8812     if (VT == Op.getValueType())
8813       return RetVal;
8814     else
8815       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
8816   }
8817 
8818   // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it is
8819   // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000).  This is important
8820   // for fneg/fabs.
8821   if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
8822     // Make -1 and vspltisw -1:
8823     SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
8824 
8825     // Make the VSLW intrinsic, computing 0x8000_0000.
8826     SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
8827                                    OnesV, DAG, dl);
8828 
8829     // xor by OnesV to invert it.
8830     Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
8831     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8832   }
8833 
8834   // Check to see if this is a wide variety of vsplti*, binop self cases.
8835   static const signed char SplatCsts[] = {
8836     -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
8837     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
8838   };
8839 
8840   for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
8841     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
8842     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
8843     int i = SplatCsts[idx];
8844 
8845     // Figure out what shift amount will be used by altivec if shifted by i in
8846     // this splat size.
8847     unsigned TypeShiftAmt = i & (SplatBitSize-1);
8848 
8849     // vsplti + shl self.
8850     if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
8851       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
8852       static const unsigned IIDs[] = { // Intrinsic to use for each size.
8853         Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
8854         Intrinsic::ppc_altivec_vslw
8855       };
8856       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8857       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8858     }
8859 
8860     // vsplti + srl self.
8861     if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
8862       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
8863       static const unsigned IIDs[] = { // Intrinsic to use for each size.
8864         Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
8865         Intrinsic::ppc_altivec_vsrw
8866       };
8867       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8868       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8869     }
8870 
8871     // vsplti + rol self.
8872     if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
8873                          ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
8874       SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
8875       static const unsigned IIDs[] = { // Intrinsic to use for each size.
8876         Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
8877         Intrinsic::ppc_altivec_vrlw
8878       };
8879       Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
8880       return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
8881     }
8882 
8883     // t = vsplti c, result = vsldoi t, t, 1
8884     if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
8885       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
8886       unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
8887       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8888     }
8889     // t = vsplti c, result = vsldoi t, t, 2
8890     if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
8891       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
8892       unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
8893       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8894     }
8895     // t = vsplti c, result = vsldoi t, t, 3
8896     if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
8897       SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
8898       unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
8899       return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
8900     }
8901   }
8902 
8903   return SDValue();
8904 }
8905 
8906 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8907 /// the specified operations to build the shuffle.
8908 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8909                                       SDValue RHS, SelectionDAG &DAG,
8910                                       const SDLoc &dl) {
8911   unsigned OpNum = (PFEntry >> 26) & 0x0F;
8912   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8913   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
8914 
8915   enum {
8916     OP_COPY = 0,  // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8917     OP_VMRGHW,
8918     OP_VMRGLW,
8919     OP_VSPLTISW0,
8920     OP_VSPLTISW1,
8921     OP_VSPLTISW2,
8922     OP_VSPLTISW3,
8923     OP_VSLDOI4,
8924     OP_VSLDOI8,
8925     OP_VSLDOI12
8926   };
8927 
8928   if (OpNum == OP_COPY) {
8929     if (LHSID == (1*9+2)*9+3) return LHS;
8930     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8931     return RHS;
8932   }
8933 
8934   SDValue OpLHS, OpRHS;
8935   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8936   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8937 
8938   int ShufIdxs[16];
8939   switch (OpNum) {
8940   default: llvm_unreachable("Unknown i32 permute!");
8941   case OP_VMRGHW:
8942     ShufIdxs[ 0] =  0; ShufIdxs[ 1] =  1; ShufIdxs[ 2] =  2; ShufIdxs[ 3] =  3;
8943     ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
8944     ShufIdxs[ 8] =  4; ShufIdxs[ 9] =  5; ShufIdxs[10] =  6; ShufIdxs[11] =  7;
8945     ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
8946     break;
8947   case OP_VMRGLW:
8948     ShufIdxs[ 0] =  8; ShufIdxs[ 1] =  9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
8949     ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
8950     ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
8951     ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
8952     break;
8953   case OP_VSPLTISW0:
8954     for (unsigned i = 0; i != 16; ++i)
8955       ShufIdxs[i] = (i&3)+0;
8956     break;
8957   case OP_VSPLTISW1:
8958     for (unsigned i = 0; i != 16; ++i)
8959       ShufIdxs[i] = (i&3)+4;
8960     break;
8961   case OP_VSPLTISW2:
8962     for (unsigned i = 0; i != 16; ++i)
8963       ShufIdxs[i] = (i&3)+8;
8964     break;
8965   case OP_VSPLTISW3:
8966     for (unsigned i = 0; i != 16; ++i)
8967       ShufIdxs[i] = (i&3)+12;
8968     break;
8969   case OP_VSLDOI4:
8970     return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
8971   case OP_VSLDOI8:
8972     return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
8973   case OP_VSLDOI12:
8974     return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
8975   }
8976   EVT VT = OpLHS.getValueType();
8977   OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
8978   OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
8979   SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
8980   return DAG.getNode(ISD::BITCAST, dl, VT, T);
8981 }
8982 
8983 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
8984 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
8985 /// SDValue.
8986 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
8987                                            SelectionDAG &DAG) const {
8988   const unsigned BytesInVector = 16;
8989   bool IsLE = Subtarget.isLittleEndian();
8990   SDLoc dl(N);
8991   SDValue V1 = N->getOperand(0);
8992   SDValue V2 = N->getOperand(1);
8993   unsigned ShiftElts = 0, InsertAtByte = 0;
8994   bool Swap = false;
8995 
8996   // Shifts required to get the byte we want at element 7.
8997   unsigned LittleEndianShifts[] = {8, 7,  6,  5,  4,  3,  2,  1,
8998                                    0, 15, 14, 13, 12, 11, 10, 9};
8999   unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9000                                 1, 2,  3,  4,  5,  6,  7,  8};
9001 
9002   ArrayRef<int> Mask = N->getMask();
9003   int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9004 
9005   // For each mask element, find out if we're just inserting something
9006   // from V2 into V1 or vice versa.
9007   // Possible permutations inserting an element from V2 into V1:
9008   //   X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9009   //   0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9010   //   ...
9011   //   0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9012   // Inserting from V1 into V2 will be similar, except mask range will be
9013   // [16,31].
9014 
9015   bool FoundCandidate = false;
9016   // If both vector operands for the shuffle are the same vector, the mask
9017   // will contain only elements from the first one and the second one will be
9018   // undef.
9019   unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9020   // Go through the mask of half-words to find an element that's being moved
9021   // from one vector to the other.
9022   for (unsigned i = 0; i < BytesInVector; ++i) {
9023     unsigned CurrentElement = Mask[i];
9024     // If 2nd operand is undefined, we should only look for element 7 in the
9025     // Mask.
9026     if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9027       continue;
9028 
9029     bool OtherElementsInOrder = true;
9030     // Examine the other elements in the Mask to see if they're in original
9031     // order.
9032     for (unsigned j = 0; j < BytesInVector; ++j) {
9033       if (j == i)
9034         continue;
9035       // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9036       // from V2 [16,31] and vice versa.  Unless the 2nd operand is undefined,
9037       // in which we always assume we're always picking from the 1st operand.
9038       int MaskOffset =
9039           (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9040       if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9041         OtherElementsInOrder = false;
9042         break;
9043       }
9044     }
9045     // If other elements are in original order, we record the number of shifts
9046     // we need to get the element we want into element 7. Also record which byte
9047     // in the vector we should insert into.
9048     if (OtherElementsInOrder) {
9049       // If 2nd operand is undefined, we assume no shifts and no swapping.
9050       if (V2.isUndef()) {
9051         ShiftElts = 0;
9052         Swap = false;
9053       } else {
9054         // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9055         ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9056                          : BigEndianShifts[CurrentElement & 0xF];
9057         Swap = CurrentElement < BytesInVector;
9058       }
9059       InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9060       FoundCandidate = true;
9061       break;
9062     }
9063   }
9064 
9065   if (!FoundCandidate)
9066     return SDValue();
9067 
9068   // Candidate found, construct the proper SDAG sequence with VINSERTB,
9069   // optionally with VECSHL if shift is required.
9070   if (Swap)
9071     std::swap(V1, V2);
9072   if (V2.isUndef())
9073     V2 = V1;
9074   if (ShiftElts) {
9075     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9076                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9077     return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9078                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
9079   }
9080   return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9081                      DAG.getConstant(InsertAtByte, dl, MVT::i32));
9082 }
9083 
9084 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9085 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9086 /// SDValue.
9087 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9088                                            SelectionDAG &DAG) const {
9089   const unsigned NumHalfWords = 8;
9090   const unsigned BytesInVector = NumHalfWords * 2;
9091   // Check that the shuffle is on half-words.
9092   if (!isNByteElemShuffleMask(N, 2, 1))
9093     return SDValue();
9094 
9095   bool IsLE = Subtarget.isLittleEndian();
9096   SDLoc dl(N);
9097   SDValue V1 = N->getOperand(0);
9098   SDValue V2 = N->getOperand(1);
9099   unsigned ShiftElts = 0, InsertAtByte = 0;
9100   bool Swap = false;
9101 
9102   // Shifts required to get the half-word we want at element 3.
9103   unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9104   unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9105 
9106   uint32_t Mask = 0;
9107   uint32_t OriginalOrderLow = 0x1234567;
9108   uint32_t OriginalOrderHigh = 0x89ABCDEF;
9109   // Now we look at mask elements 0,2,4,6,8,10,12,14.  Pack the mask into a
9110   // 32-bit space, only need 4-bit nibbles per element.
9111   for (unsigned i = 0; i < NumHalfWords; ++i) {
9112     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9113     Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9114   }
9115 
9116   // For each mask element, find out if we're just inserting something
9117   // from V2 into V1 or vice versa.  Possible permutations inserting an element
9118   // from V2 into V1:
9119   //   X, 1, 2, 3, 4, 5, 6, 7
9120   //   0, X, 2, 3, 4, 5, 6, 7
9121   //   0, 1, X, 3, 4, 5, 6, 7
9122   //   0, 1, 2, X, 4, 5, 6, 7
9123   //   0, 1, 2, 3, X, 5, 6, 7
9124   //   0, 1, 2, 3, 4, X, 6, 7
9125   //   0, 1, 2, 3, 4, 5, X, 7
9126   //   0, 1, 2, 3, 4, 5, 6, X
9127   // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9128 
9129   bool FoundCandidate = false;
9130   // Go through the mask of half-words to find an element that's being moved
9131   // from one vector to the other.
9132   for (unsigned i = 0; i < NumHalfWords; ++i) {
9133     unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9134     uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9135     uint32_t MaskOtherElts = ~(0xF << MaskShift);
9136     uint32_t TargetOrder = 0x0;
9137 
9138     // If both vector operands for the shuffle are the same vector, the mask
9139     // will contain only elements from the first one and the second one will be
9140     // undef.
9141     if (V2.isUndef()) {
9142       ShiftElts = 0;
9143       unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9144       TargetOrder = OriginalOrderLow;
9145       Swap = false;
9146       // Skip if not the correct element or mask of other elements don't equal
9147       // to our expected order.
9148       if (MaskOneElt == VINSERTHSrcElem &&
9149           (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9150         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9151         FoundCandidate = true;
9152         break;
9153       }
9154     } else { // If both operands are defined.
9155       // Target order is [8,15] if the current mask is between [0,7].
9156       TargetOrder =
9157           (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9158       // Skip if mask of other elements don't equal our expected order.
9159       if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9160         // We only need the last 3 bits for the number of shifts.
9161         ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9162                          : BigEndianShifts[MaskOneElt & 0x7];
9163         InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9164         Swap = MaskOneElt < NumHalfWords;
9165         FoundCandidate = true;
9166         break;
9167       }
9168     }
9169   }
9170 
9171   if (!FoundCandidate)
9172     return SDValue();
9173 
9174   // Candidate found, construct the proper SDAG sequence with VINSERTH,
9175   // optionally with VECSHL if shift is required.
9176   if (Swap)
9177     std::swap(V1, V2);
9178   if (V2.isUndef())
9179     V2 = V1;
9180   SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9181   if (ShiftElts) {
9182     // Double ShiftElts because we're left shifting on v16i8 type.
9183     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9184                               DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9185     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9186     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9187                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
9188     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9189   }
9190   SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9191   SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9192                             DAG.getConstant(InsertAtByte, dl, MVT::i32));
9193   return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9194 }
9195 
9196 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9197 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9198 /// return the default SDValue.
9199 SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9200                                               SelectionDAG &DAG) const {
9201   // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9202   // to v16i8. Peek through the bitcasts to get the actual operands.
9203   SDValue LHS = peekThroughBitcasts(SVN->getOperand(0));
9204   SDValue RHS = peekThroughBitcasts(SVN->getOperand(1));
9205 
9206   auto ShuffleMask = SVN->getMask();
9207   SDValue VecShuffle(SVN, 0);
9208   SDLoc DL(SVN);
9209 
9210   // Check that we have a four byte shuffle.
9211   if (!isNByteElemShuffleMask(SVN, 4, 1))
9212     return SDValue();
9213 
9214   // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9215   if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9216     std::swap(LHS, RHS);
9217     VecShuffle = DAG.getCommutedVectorShuffle(*SVN);
9218     ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9219   }
9220 
9221   // Ensure that the RHS is a vector of constants.
9222   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9223   if (!BVN)
9224     return SDValue();
9225 
9226   // Check if RHS is a splat of 4-bytes (or smaller).
9227   APInt APSplatValue, APSplatUndef;
9228   unsigned SplatBitSize;
9229   bool HasAnyUndefs;
9230   if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9231                             HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9232       SplatBitSize > 32)
9233     return SDValue();
9234 
9235   // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9236   // The instruction splats a constant C into two words of the source vector
9237   // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9238   // Thus we check that the shuffle mask is the equivalent  of
9239   // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9240   // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9241   // within each word are consecutive, so we only need to check the first byte.
9242   SDValue Index;
9243   bool IsLE = Subtarget.isLittleEndian();
9244   if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9245       (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9246        ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9247     Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9248   else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9249            (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9250             ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9251     Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9252   else
9253     return SDValue();
9254 
9255   // If the splat is narrower than 32-bits, we need to get the 32-bit value
9256   // for XXSPLTI32DX.
9257   unsigned SplatVal = APSplatValue.getZExtValue();
9258   for (; SplatBitSize < 32; SplatBitSize <<= 1)
9259     SplatVal |= (SplatVal << SplatBitSize);
9260 
9261   SDValue SplatNode = DAG.getNode(
9262       PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
9263       Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9264   return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9265 }
9266 
9267 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9268 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9269 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9270 /// i.e (or (shl x, C1), (srl x, 128-C1)).
9271 SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9272   assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9273   assert(Op.getValueType() == MVT::v1i128 &&
9274          "Only set v1i128 as custom, other type shouldn't reach here!");
9275   SDLoc dl(Op);
9276   SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9277   SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9278   unsigned SHLAmt = N1.getConstantOperandVal(0);
9279   if (SHLAmt % 8 == 0) {
9280     SmallVector<int, 16> Mask(16, 0);
9281     std::iota(Mask.begin(), Mask.end(), 0);
9282     std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9283     if (SDValue Shuffle =
9284             DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
9285                                  DAG.getUNDEF(MVT::v16i8), Mask))
9286       return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9287   }
9288   SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9289   SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9290                               DAG.getConstant(SHLAmt, dl, MVT::i32));
9291   SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9292                               DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9293   SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9294   return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9295 }
9296 
9297 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE.  If this
9298 /// is a shuffle we can handle in a single instruction, return it.  Otherwise,
9299 /// return the code it can be lowered into.  Worst case, it can always be
9300 /// lowered into a vperm.
9301 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9302                                                SelectionDAG &DAG) const {
9303   SDLoc dl(Op);
9304   SDValue V1 = Op.getOperand(0);
9305   SDValue V2 = Op.getOperand(1);
9306   ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9307 
9308   // Any nodes that were combined in the target-independent combiner prior
9309   // to vector legalization will not be sent to the target combine. Try to
9310   // combine it here.
9311   if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9312     if (!isa<ShuffleVectorSDNode>(NewShuffle))
9313       return NewShuffle;
9314     Op = NewShuffle;
9315     SVOp = cast<ShuffleVectorSDNode>(Op);
9316     V1 = Op.getOperand(0);
9317     V2 = Op.getOperand(1);
9318   }
9319   EVT VT = Op.getValueType();
9320   bool isLittleEndian = Subtarget.isLittleEndian();
9321 
9322   unsigned ShiftElts, InsertAtByte;
9323   bool Swap = false;
9324 
9325   // If this is a load-and-splat, we can do that with a single instruction
9326   // in some cases. However if the load has multiple uses, we don't want to
9327   // combine it because that will just produce multiple loads.
9328   bool IsPermutedLoad = false;
9329   const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9330   if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9331       (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9332       InputLoad->hasOneUse()) {
9333     bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9334     int SplatIdx =
9335       PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9336 
9337     // The splat index for permuted loads will be in the left half of the vector
9338     // which is strictly wider than the loaded value by 8 bytes. So we need to
9339     // adjust the splat index to point to the correct address in memory.
9340     if (IsPermutedLoad) {
9341       assert(isLittleEndian && "Unexpected permuted load on big endian target");
9342       SplatIdx += IsFourByte ? 2 : 1;
9343       assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9344              "Splat of a value outside of the loaded memory");
9345     }
9346 
9347     LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9348     // For 4-byte load-and-splat, we need Power9.
9349     if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9350       uint64_t Offset = 0;
9351       if (IsFourByte)
9352         Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9353       else
9354         Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9355 
9356       SDValue BasePtr = LD->getBasePtr();
9357       if (Offset != 0)
9358         BasePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
9359                               BasePtr, DAG.getIntPtrConstant(Offset, dl));
9360       SDValue Ops[] = {
9361         LD->getChain(),    // Chain
9362         BasePtr,           // BasePtr
9363         DAG.getValueType(Op.getValueType()) // VT
9364       };
9365       SDVTList VTL =
9366         DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9367       SDValue LdSplt =
9368         DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
9369                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
9370       DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
9371       if (LdSplt.getValueType() != SVOp->getValueType(0))
9372         LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9373       return LdSplt;
9374     }
9375   }
9376   if (Subtarget.hasP9Vector() &&
9377       PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9378                            isLittleEndian)) {
9379     if (Swap)
9380       std::swap(V1, V2);
9381     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9382     SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9383     if (ShiftElts) {
9384       SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9385                                 DAG.getConstant(ShiftElts, dl, MVT::i32));
9386       SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9387                                 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9388       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9389     }
9390     SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9391                               DAG.getConstant(InsertAtByte, dl, MVT::i32));
9392     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9393   }
9394 
9395   if (Subtarget.hasPrefixInstrs()) {
9396     SDValue SplatInsertNode;
9397     if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9398       return SplatInsertNode;
9399   }
9400 
9401   if (Subtarget.hasP9Altivec()) {
9402     SDValue NewISDNode;
9403     if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9404       return NewISDNode;
9405 
9406     if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9407       return NewISDNode;
9408   }
9409 
9410   if (Subtarget.hasVSX() &&
9411       PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9412     if (Swap)
9413       std::swap(V1, V2);
9414     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9415     SDValue Conv2 =
9416         DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9417 
9418     SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9419                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9420     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9421   }
9422 
9423   if (Subtarget.hasVSX() &&
9424     PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9425     if (Swap)
9426       std::swap(V1, V2);
9427     SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9428     SDValue Conv2 =
9429         DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
9430 
9431     SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
9432                               DAG.getConstant(ShiftElts, dl, MVT::i32));
9433     return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
9434   }
9435 
9436   if (Subtarget.hasP9Vector()) {
9437      if (PPC::isXXBRHShuffleMask(SVOp)) {
9438       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9439       SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
9440       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
9441     } else if (PPC::isXXBRWShuffleMask(SVOp)) {
9442       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9443       SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
9444       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
9445     } else if (PPC::isXXBRDShuffleMask(SVOp)) {
9446       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9447       SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
9448       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
9449     } else if (PPC::isXXBRQShuffleMask(SVOp)) {
9450       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
9451       SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
9452       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
9453     }
9454   }
9455 
9456   if (Subtarget.hasVSX()) {
9457     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
9458       int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
9459 
9460       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9461       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
9462                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
9463       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
9464     }
9465 
9466     // Left shifts of 8 bytes are actually swaps. Convert accordingly.
9467     if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
9468       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
9469       SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
9470       return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
9471     }
9472   }
9473 
9474   // Cases that are handled by instructions that take permute immediates
9475   // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
9476   // selected by the instruction selector.
9477   if (V2.isUndef()) {
9478     if (PPC::isSplatShuffleMask(SVOp, 1) ||
9479         PPC::isSplatShuffleMask(SVOp, 2) ||
9480         PPC::isSplatShuffleMask(SVOp, 4) ||
9481         PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
9482         PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
9483         PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
9484         PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
9485         PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
9486         PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
9487         PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
9488         PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
9489         PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
9490         (Subtarget.hasP8Altivec() && (
9491          PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
9492          PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
9493          PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
9494       return Op;
9495     }
9496   }
9497 
9498   // Altivec has a variety of "shuffle immediates" that take two vector inputs
9499   // and produce a fixed permutation.  If any of these match, do not lower to
9500   // VPERM.
9501   unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
9502   if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9503       PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9504       PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
9505       PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9506       PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9507       PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9508       PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
9509       PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
9510       PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
9511       (Subtarget.hasP8Altivec() && (
9512        PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
9513        PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
9514        PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
9515     return Op;
9516 
9517   // Check to see if this is a shuffle of 4-byte values.  If so, we can use our
9518   // perfect shuffle table to emit an optimal matching sequence.
9519   ArrayRef<int> PermMask = SVOp->getMask();
9520 
9521   unsigned PFIndexes[4];
9522   bool isFourElementShuffle = true;
9523   for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
9524     unsigned EltNo = 8;   // Start out undef.
9525     for (unsigned j = 0; j != 4; ++j) {  // Intra-element byte.
9526       if (PermMask[i*4+j] < 0)
9527         continue;   // Undef, ignore it.
9528 
9529       unsigned ByteSource = PermMask[i*4+j];
9530       if ((ByteSource & 3) != j) {
9531         isFourElementShuffle = false;
9532         break;
9533       }
9534 
9535       if (EltNo == 8) {
9536         EltNo = ByteSource/4;
9537       } else if (EltNo != ByteSource/4) {
9538         isFourElementShuffle = false;
9539         break;
9540       }
9541     }
9542     PFIndexes[i] = EltNo;
9543   }
9544 
9545   // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
9546   // perfect shuffle vector to determine if it is cost effective to do this as
9547   // discrete instructions, or whether we should use a vperm.
9548   // For now, we skip this for little endian until such time as we have a
9549   // little-endian perfect shuffle table.
9550   if (isFourElementShuffle && !isLittleEndian) {
9551     // Compute the index in the perfect shuffle table.
9552     unsigned PFTableIndex =
9553       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
9554 
9555     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9556     unsigned Cost  = (PFEntry >> 30);
9557 
9558     // Determining when to avoid vperm is tricky.  Many things affect the cost
9559     // of vperm, particularly how many times the perm mask needs to be computed.
9560     // For example, if the perm mask can be hoisted out of a loop or is already
9561     // used (perhaps because there are multiple permutes with the same shuffle
9562     // mask?) the vperm has a cost of 1.  OTOH, hoisting the permute mask out of
9563     // the loop requires an extra register.
9564     //
9565     // As a compromise, we only emit discrete instructions if the shuffle can be
9566     // generated in 3 or fewer operations.  When we have loop information
9567     // available, if this block is within a loop, we should avoid using vperm
9568     // for 3-operation perms and use a constant pool load instead.
9569     if (Cost < 3)
9570       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9571   }
9572 
9573   // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
9574   // vector that will get spilled to the constant pool.
9575   if (V2.isUndef()) V2 = V1;
9576 
9577   // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
9578   // that it is in input element units, not in bytes.  Convert now.
9579 
9580   // For little endian, the order of the input vectors is reversed, and
9581   // the permutation mask is complemented with respect to 31.  This is
9582   // necessary to produce proper semantics with the big-endian-biased vperm
9583   // instruction.
9584   EVT EltVT = V1.getValueType().getVectorElementType();
9585   unsigned BytesPerElement = EltVT.getSizeInBits()/8;
9586 
9587   SmallVector<SDValue, 16> ResultMask;
9588   for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
9589     unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
9590 
9591     for (unsigned j = 0; j != BytesPerElement; ++j)
9592       if (isLittleEndian)
9593         ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
9594                                              dl, MVT::i32));
9595       else
9596         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
9597                                              MVT::i32));
9598   }
9599 
9600   ShufflesHandledWithVPERM++;
9601   SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
9602   LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
9603   LLVM_DEBUG(SVOp->dump());
9604   LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
9605   LLVM_DEBUG(VPermMask.dump());
9606 
9607   if (isLittleEndian)
9608     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9609                        V2, V1, VPermMask);
9610   else
9611     return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
9612                        V1, V2, VPermMask);
9613 }
9614 
9615 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
9616 /// vector comparison.  If it is, return true and fill in Opc/isDot with
9617 /// information about the intrinsic.
9618 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
9619                                  bool &isDot, const PPCSubtarget &Subtarget) {
9620   unsigned IntrinsicID =
9621       cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
9622   CompareOpc = -1;
9623   isDot = false;
9624   switch (IntrinsicID) {
9625   default:
9626     return false;
9627   // Comparison predicates.
9628   case Intrinsic::ppc_altivec_vcmpbfp_p:
9629     CompareOpc = 966;
9630     isDot = true;
9631     break;
9632   case Intrinsic::ppc_altivec_vcmpeqfp_p:
9633     CompareOpc = 198;
9634     isDot = true;
9635     break;
9636   case Intrinsic::ppc_altivec_vcmpequb_p:
9637     CompareOpc = 6;
9638     isDot = true;
9639     break;
9640   case Intrinsic::ppc_altivec_vcmpequh_p:
9641     CompareOpc = 70;
9642     isDot = true;
9643     break;
9644   case Intrinsic::ppc_altivec_vcmpequw_p:
9645     CompareOpc = 134;
9646     isDot = true;
9647     break;
9648   case Intrinsic::ppc_altivec_vcmpequd_p:
9649     if (Subtarget.hasP8Altivec()) {
9650       CompareOpc = 199;
9651       isDot = true;
9652     } else
9653       return false;
9654     break;
9655   case Intrinsic::ppc_altivec_vcmpneb_p:
9656   case Intrinsic::ppc_altivec_vcmpneh_p:
9657   case Intrinsic::ppc_altivec_vcmpnew_p:
9658   case Intrinsic::ppc_altivec_vcmpnezb_p:
9659   case Intrinsic::ppc_altivec_vcmpnezh_p:
9660   case Intrinsic::ppc_altivec_vcmpnezw_p:
9661     if (Subtarget.hasP9Altivec()) {
9662       switch (IntrinsicID) {
9663       default:
9664         llvm_unreachable("Unknown comparison intrinsic.");
9665       case Intrinsic::ppc_altivec_vcmpneb_p:
9666         CompareOpc = 7;
9667         break;
9668       case Intrinsic::ppc_altivec_vcmpneh_p:
9669         CompareOpc = 71;
9670         break;
9671       case Intrinsic::ppc_altivec_vcmpnew_p:
9672         CompareOpc = 135;
9673         break;
9674       case Intrinsic::ppc_altivec_vcmpnezb_p:
9675         CompareOpc = 263;
9676         break;
9677       case Intrinsic::ppc_altivec_vcmpnezh_p:
9678         CompareOpc = 327;
9679         break;
9680       case Intrinsic::ppc_altivec_vcmpnezw_p:
9681         CompareOpc = 391;
9682         break;
9683       }
9684       isDot = true;
9685     } else
9686       return false;
9687     break;
9688   case Intrinsic::ppc_altivec_vcmpgefp_p:
9689     CompareOpc = 454;
9690     isDot = true;
9691     break;
9692   case Intrinsic::ppc_altivec_vcmpgtfp_p:
9693     CompareOpc = 710;
9694     isDot = true;
9695     break;
9696   case Intrinsic::ppc_altivec_vcmpgtsb_p:
9697     CompareOpc = 774;
9698     isDot = true;
9699     break;
9700   case Intrinsic::ppc_altivec_vcmpgtsh_p:
9701     CompareOpc = 838;
9702     isDot = true;
9703     break;
9704   case Intrinsic::ppc_altivec_vcmpgtsw_p:
9705     CompareOpc = 902;
9706     isDot = true;
9707     break;
9708   case Intrinsic::ppc_altivec_vcmpgtsd_p:
9709     if (Subtarget.hasP8Altivec()) {
9710       CompareOpc = 967;
9711       isDot = true;
9712     } else
9713       return false;
9714     break;
9715   case Intrinsic::ppc_altivec_vcmpgtub_p:
9716     CompareOpc = 518;
9717     isDot = true;
9718     break;
9719   case Intrinsic::ppc_altivec_vcmpgtuh_p:
9720     CompareOpc = 582;
9721     isDot = true;
9722     break;
9723   case Intrinsic::ppc_altivec_vcmpgtuw_p:
9724     CompareOpc = 646;
9725     isDot = true;
9726     break;
9727   case Intrinsic::ppc_altivec_vcmpgtud_p:
9728     if (Subtarget.hasP8Altivec()) {
9729       CompareOpc = 711;
9730       isDot = true;
9731     } else
9732       return false;
9733     break;
9734 
9735   case Intrinsic::ppc_altivec_vcmpequq:
9736   case Intrinsic::ppc_altivec_vcmpgtsq:
9737   case Intrinsic::ppc_altivec_vcmpgtuq:
9738     if (!Subtarget.isISA3_1())
9739       return false;
9740     switch (IntrinsicID) {
9741     default:
9742       llvm_unreachable("Unknown comparison intrinsic.");
9743     case Intrinsic::ppc_altivec_vcmpequq:
9744       CompareOpc = 455;
9745       break;
9746     case Intrinsic::ppc_altivec_vcmpgtsq:
9747       CompareOpc = 903;
9748       break;
9749     case Intrinsic::ppc_altivec_vcmpgtuq:
9750       CompareOpc = 647;
9751       break;
9752     }
9753     break;
9754 
9755   // VSX predicate comparisons use the same infrastructure
9756   case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9757   case Intrinsic::ppc_vsx_xvcmpgedp_p:
9758   case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9759   case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9760   case Intrinsic::ppc_vsx_xvcmpgesp_p:
9761   case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9762     if (Subtarget.hasVSX()) {
9763       switch (IntrinsicID) {
9764       case Intrinsic::ppc_vsx_xvcmpeqdp_p:
9765         CompareOpc = 99;
9766         break;
9767       case Intrinsic::ppc_vsx_xvcmpgedp_p:
9768         CompareOpc = 115;
9769         break;
9770       case Intrinsic::ppc_vsx_xvcmpgtdp_p:
9771         CompareOpc = 107;
9772         break;
9773       case Intrinsic::ppc_vsx_xvcmpeqsp_p:
9774         CompareOpc = 67;
9775         break;
9776       case Intrinsic::ppc_vsx_xvcmpgesp_p:
9777         CompareOpc = 83;
9778         break;
9779       case Intrinsic::ppc_vsx_xvcmpgtsp_p:
9780         CompareOpc = 75;
9781         break;
9782       }
9783       isDot = true;
9784     } else
9785       return false;
9786     break;
9787 
9788   // Normal Comparisons.
9789   case Intrinsic::ppc_altivec_vcmpbfp:
9790     CompareOpc = 966;
9791     break;
9792   case Intrinsic::ppc_altivec_vcmpeqfp:
9793     CompareOpc = 198;
9794     break;
9795   case Intrinsic::ppc_altivec_vcmpequb:
9796     CompareOpc = 6;
9797     break;
9798   case Intrinsic::ppc_altivec_vcmpequh:
9799     CompareOpc = 70;
9800     break;
9801   case Intrinsic::ppc_altivec_vcmpequw:
9802     CompareOpc = 134;
9803     break;
9804   case Intrinsic::ppc_altivec_vcmpequd:
9805     if (Subtarget.hasP8Altivec())
9806       CompareOpc = 199;
9807     else
9808       return false;
9809     break;
9810   case Intrinsic::ppc_altivec_vcmpneb:
9811   case Intrinsic::ppc_altivec_vcmpneh:
9812   case Intrinsic::ppc_altivec_vcmpnew:
9813   case Intrinsic::ppc_altivec_vcmpnezb:
9814   case Intrinsic::ppc_altivec_vcmpnezh:
9815   case Intrinsic::ppc_altivec_vcmpnezw:
9816     if (Subtarget.hasP9Altivec())
9817       switch (IntrinsicID) {
9818       default:
9819         llvm_unreachable("Unknown comparison intrinsic.");
9820       case Intrinsic::ppc_altivec_vcmpneb:
9821         CompareOpc = 7;
9822         break;
9823       case Intrinsic::ppc_altivec_vcmpneh:
9824         CompareOpc = 71;
9825         break;
9826       case Intrinsic::ppc_altivec_vcmpnew:
9827         CompareOpc = 135;
9828         break;
9829       case Intrinsic::ppc_altivec_vcmpnezb:
9830         CompareOpc = 263;
9831         break;
9832       case Intrinsic::ppc_altivec_vcmpnezh:
9833         CompareOpc = 327;
9834         break;
9835       case Intrinsic::ppc_altivec_vcmpnezw:
9836         CompareOpc = 391;
9837         break;
9838       }
9839     else
9840       return false;
9841     break;
9842   case Intrinsic::ppc_altivec_vcmpgefp:
9843     CompareOpc = 454;
9844     break;
9845   case Intrinsic::ppc_altivec_vcmpgtfp:
9846     CompareOpc = 710;
9847     break;
9848   case Intrinsic::ppc_altivec_vcmpgtsb:
9849     CompareOpc = 774;
9850     break;
9851   case Intrinsic::ppc_altivec_vcmpgtsh:
9852     CompareOpc = 838;
9853     break;
9854   case Intrinsic::ppc_altivec_vcmpgtsw:
9855     CompareOpc = 902;
9856     break;
9857   case Intrinsic::ppc_altivec_vcmpgtsd:
9858     if (Subtarget.hasP8Altivec())
9859       CompareOpc = 967;
9860     else
9861       return false;
9862     break;
9863   case Intrinsic::ppc_altivec_vcmpgtub:
9864     CompareOpc = 518;
9865     break;
9866   case Intrinsic::ppc_altivec_vcmpgtuh:
9867     CompareOpc = 582;
9868     break;
9869   case Intrinsic::ppc_altivec_vcmpgtuw:
9870     CompareOpc = 646;
9871     break;
9872   case Intrinsic::ppc_altivec_vcmpgtud:
9873     if (Subtarget.hasP8Altivec())
9874       CompareOpc = 711;
9875     else
9876       return false;
9877     break;
9878   case Intrinsic::ppc_altivec_vcmpequq_p:
9879   case Intrinsic::ppc_altivec_vcmpgtsq_p:
9880   case Intrinsic::ppc_altivec_vcmpgtuq_p:
9881     if (!Subtarget.isISA3_1())
9882       return false;
9883     switch (IntrinsicID) {
9884     default:
9885       llvm_unreachable("Unknown comparison intrinsic.");
9886     case Intrinsic::ppc_altivec_vcmpequq_p:
9887       CompareOpc = 455;
9888       break;
9889     case Intrinsic::ppc_altivec_vcmpgtsq_p:
9890       CompareOpc = 903;
9891       break;
9892     case Intrinsic::ppc_altivec_vcmpgtuq_p:
9893       CompareOpc = 647;
9894       break;
9895     }
9896     isDot = true;
9897     break;
9898   }
9899   return true;
9900 }
9901 
9902 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
9903 /// lower, do it, otherwise return null.
9904 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
9905                                                    SelectionDAG &DAG) const {
9906   unsigned IntrinsicID =
9907     cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
9908 
9909   SDLoc dl(Op);
9910 
9911   switch (IntrinsicID) {
9912   case Intrinsic::thread_pointer:
9913     // Reads the thread pointer register, used for __builtin_thread_pointer.
9914     if (Subtarget.isPPC64())
9915       return DAG.getRegister(PPC::X13, MVT::i64);
9916     return DAG.getRegister(PPC::R2, MVT::i32);
9917 
9918   case Intrinsic::ppc_mma_disassemble_acc:
9919   case Intrinsic::ppc_vsx_disassemble_pair: {
9920     int NumVecs = 2;
9921     SDValue WideVec = Op.getOperand(1);
9922     if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
9923       NumVecs = 4;
9924       WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
9925     }
9926     SmallVector<SDValue, 4> RetOps;
9927     for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
9928       SDValue Extract = DAG.getNode(
9929           PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
9930           DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
9931                                                      : VecNo,
9932                           dl, MVT::i64));
9933       RetOps.push_back(Extract);
9934     }
9935     return DAG.getMergeValues(RetOps, dl);
9936   }
9937   }
9938 
9939   // If this is a lowered altivec predicate compare, CompareOpc is set to the
9940   // opcode number of the comparison.
9941   int CompareOpc;
9942   bool isDot;
9943   if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
9944     return SDValue();    // Don't custom lower most intrinsics.
9945 
9946   // If this is a non-dot comparison, make the VCMP node and we are done.
9947   if (!isDot) {
9948     SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
9949                               Op.getOperand(1), Op.getOperand(2),
9950                               DAG.getConstant(CompareOpc, dl, MVT::i32));
9951     return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
9952   }
9953 
9954   // Create the PPCISD altivec 'dot' comparison node.
9955   SDValue Ops[] = {
9956     Op.getOperand(2),  // LHS
9957     Op.getOperand(3),  // RHS
9958     DAG.getConstant(CompareOpc, dl, MVT::i32)
9959   };
9960   EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
9961   SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
9962 
9963   // Now that we have the comparison, emit a copy from the CR to a GPR.
9964   // This is flagged to the above dot comparison.
9965   SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
9966                                 DAG.getRegister(PPC::CR6, MVT::i32),
9967                                 CompNode.getValue(1));
9968 
9969   // Unpack the result based on how the target uses it.
9970   unsigned BitNo;   // Bit # of CR6.
9971   bool InvertBit;   // Invert result?
9972   switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
9973   default:  // Can't happen, don't crash on invalid number though.
9974   case 0:   // Return the value of the EQ bit of CR6.
9975     BitNo = 0; InvertBit = false;
9976     break;
9977   case 1:   // Return the inverted value of the EQ bit of CR6.
9978     BitNo = 0; InvertBit = true;
9979     break;
9980   case 2:   // Return the value of the LT bit of CR6.
9981     BitNo = 2; InvertBit = false;
9982     break;
9983   case 3:   // Return the inverted value of the LT bit of CR6.
9984     BitNo = 2; InvertBit = true;
9985     break;
9986   }
9987 
9988   // Shift the bit into the low position.
9989   Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
9990                       DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
9991   // Isolate the bit.
9992   Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
9993                       DAG.getConstant(1, dl, MVT::i32));
9994 
9995   // If we are supposed to, toggle the bit.
9996   if (InvertBit)
9997     Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
9998                         DAG.getConstant(1, dl, MVT::i32));
9999   return Flags;
10000 }
10001 
10002 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10003                                                SelectionDAG &DAG) const {
10004   // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10005   // the beginning of the argument list.
10006   int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10007   SDLoc DL(Op);
10008   switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10009   case Intrinsic::ppc_cfence: {
10010     assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10011     assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10012     return SDValue(DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10013                                       DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
10014                                                   Op.getOperand(ArgStart + 1)),
10015                                       Op.getOperand(0)),
10016                    0);
10017   }
10018   default:
10019     break;
10020   }
10021   return SDValue();
10022 }
10023 
10024 // Lower scalar BSWAP64 to xxbrd.
10025 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10026   SDLoc dl(Op);
10027   // MTVSRDD
10028   Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10029                    Op.getOperand(0));
10030   // XXBRD
10031   Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10032   // MFVSRD
10033   int VectorIndex = 0;
10034   if (Subtarget.isLittleEndian())
10035     VectorIndex = 1;
10036   Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
10037                    DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10038   return Op;
10039 }
10040 
10041 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10042 // compared to a value that is atomically loaded (atomic loads zero-extend).
10043 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10044                                                 SelectionDAG &DAG) const {
10045   assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10046          "Expecting an atomic compare-and-swap here.");
10047   SDLoc dl(Op);
10048   auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10049   EVT MemVT = AtomicNode->getMemoryVT();
10050   if (MemVT.getSizeInBits() >= 32)
10051     return Op;
10052 
10053   SDValue CmpOp = Op.getOperand(2);
10054   // If this is already correctly zero-extended, leave it alone.
10055   auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10056   if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10057     return Op;
10058 
10059   // Clear the high bits of the compare operand.
10060   unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10061   SDValue NewCmpOp =
10062     DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10063                 DAG.getConstant(MaskVal, dl, MVT::i32));
10064 
10065   // Replace the existing compare operand with the properly zero-extended one.
10066   SmallVector<SDValue, 4> Ops;
10067   for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10068     Ops.push_back(AtomicNode->getOperand(i));
10069   Ops[2] = NewCmpOp;
10070   MachineMemOperand *MMO = AtomicNode->getMemOperand();
10071   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10072   auto NodeTy =
10073     (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
10074   return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10075 }
10076 
10077 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10078                                                  SelectionDAG &DAG) const {
10079   SDLoc dl(Op);
10080   // Create a stack slot that is 16-byte aligned.
10081   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
10082   int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10083   EVT PtrVT = getPointerTy(DAG.getDataLayout());
10084   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10085 
10086   // Store the input value into Value#0 of the stack slot.
10087   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10088                                MachinePointerInfo());
10089   // Load it out.
10090   return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10091 }
10092 
10093 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10094                                                   SelectionDAG &DAG) const {
10095   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10096          "Should only be called for ISD::INSERT_VECTOR_ELT");
10097 
10098   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10099   // We have legal lowering for constant indices but not for variable ones.
10100   if (!C)
10101     return SDValue();
10102 
10103   EVT VT = Op.getValueType();
10104   SDLoc dl(Op);
10105   SDValue V1 = Op.getOperand(0);
10106   SDValue V2 = Op.getOperand(1);
10107   // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10108   if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10109     SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10110     unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10111     unsigned InsertAtElement = C->getZExtValue();
10112     unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10113     if (Subtarget.isLittleEndian()) {
10114       InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10115     }
10116     return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10117                        DAG.getConstant(InsertAtByte, dl, MVT::i32));
10118   }
10119   return Op;
10120 }
10121 
10122 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10123                                            SelectionDAG &DAG) const {
10124   SDLoc dl(Op);
10125   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10126   SDValue LoadChain = LN->getChain();
10127   SDValue BasePtr = LN->getBasePtr();
10128   EVT VT = Op.getValueType();
10129 
10130   if (VT != MVT::v256i1 && VT != MVT::v512i1)
10131     return Op;
10132 
10133   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10134   // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10135   // 2 or 4 vsx registers.
10136   assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
10137          "Type unsupported without MMA");
10138   assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10139          "Type unsupported without paired vector support");
10140   Align Alignment = LN->getAlign();
10141   SmallVector<SDValue, 4> Loads;
10142   SmallVector<SDValue, 4> LoadChains;
10143   unsigned NumVecs = VT.getSizeInBits() / 128;
10144   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10145     SDValue Load =
10146         DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10147                     LN->getPointerInfo().getWithOffset(Idx * 16),
10148                     commonAlignment(Alignment, Idx * 16),
10149                     LN->getMemOperand()->getFlags(), LN->getAAInfo());
10150     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10151                           DAG.getConstant(16, dl, BasePtr.getValueType()));
10152     Loads.push_back(Load);
10153     LoadChains.push_back(Load.getValue(1));
10154   }
10155   if (Subtarget.isLittleEndian()) {
10156     std::reverse(Loads.begin(), Loads.end());
10157     std::reverse(LoadChains.begin(), LoadChains.end());
10158   }
10159   SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10160   SDValue Value =
10161       DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
10162                   dl, VT, Loads);
10163   SDValue RetOps[] = {Value, TF};
10164   return DAG.getMergeValues(RetOps, dl);
10165 }
10166 
10167 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10168                                             SelectionDAG &DAG) const {
10169   SDLoc dl(Op);
10170   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10171   SDValue StoreChain = SN->getChain();
10172   SDValue BasePtr = SN->getBasePtr();
10173   SDValue Value = SN->getValue();
10174   EVT StoreVT = Value.getValueType();
10175 
10176   if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10177     return Op;
10178 
10179   // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10180   // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10181   // underlying registers individually.
10182   assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
10183          "Type unsupported without MMA");
10184   assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10185          "Type unsupported without paired vector support");
10186   Align Alignment = SN->getAlign();
10187   SmallVector<SDValue, 4> Stores;
10188   unsigned NumVecs = 2;
10189   if (StoreVT == MVT::v512i1) {
10190     Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
10191     NumVecs = 4;
10192   }
10193   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10194     unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10195     SDValue Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
10196                               DAG.getConstant(VecNum, dl, MVT::i64));
10197     SDValue Store =
10198         DAG.getStore(StoreChain, dl, Elt, BasePtr,
10199                      SN->getPointerInfo().getWithOffset(Idx * 16),
10200                      commonAlignment(Alignment, Idx * 16),
10201                      SN->getMemOperand()->getFlags(), SN->getAAInfo());
10202     BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10203                           DAG.getConstant(16, dl, BasePtr.getValueType()));
10204     Stores.push_back(Store);
10205   }
10206   SDValue TF = DAG.getTokenFactor(dl, Stores);
10207   return TF;
10208 }
10209 
10210 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10211   SDLoc dl(Op);
10212   if (Op.getValueType() == MVT::v4i32) {
10213     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10214 
10215     SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10216     // +16 as shift amt.
10217     SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10218     SDValue RHSSwap =   // = vrlw RHS, 16
10219       BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10220 
10221     // Shrinkify inputs to v8i16.
10222     LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10223     RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10224     RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10225 
10226     // Low parts multiplied together, generating 32-bit results (we ignore the
10227     // top parts).
10228     SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10229                                         LHS, RHS, DAG, dl, MVT::v4i32);
10230 
10231     SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10232                                       LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10233     // Shift the high parts up 16 bits.
10234     HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10235                               Neg16, DAG, dl);
10236     return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10237   } else if (Op.getValueType() == MVT::v16i8) {
10238     SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10239     bool isLittleEndian = Subtarget.isLittleEndian();
10240 
10241     // Multiply the even 8-bit parts, producing 16-bit sums.
10242     SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10243                                            LHS, RHS, DAG, dl, MVT::v8i16);
10244     EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
10245 
10246     // Multiply the odd 8-bit parts, producing 16-bit sums.
10247     SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10248                                           LHS, RHS, DAG, dl, MVT::v8i16);
10249     OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
10250 
10251     // Merge the results together.  Because vmuleub and vmuloub are
10252     // instructions with a big-endian bias, we must reverse the
10253     // element numbering and reverse the meaning of "odd" and "even"
10254     // when generating little endian code.
10255     int Ops[16];
10256     for (unsigned i = 0; i != 8; ++i) {
10257       if (isLittleEndian) {
10258         Ops[i*2  ] = 2*i;
10259         Ops[i*2+1] = 2*i+16;
10260       } else {
10261         Ops[i*2  ] = 2*i+1;
10262         Ops[i*2+1] = 2*i+1+16;
10263       }
10264     }
10265     if (isLittleEndian)
10266       return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10267     else
10268       return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10269   } else {
10270     llvm_unreachable("Unknown mul to lower!");
10271   }
10272 }
10273 
10274 SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
10275   bool IsStrict = Op->isStrictFPOpcode();
10276   if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
10277       !Subtarget.hasP9Vector())
10278     return SDValue();
10279 
10280   return Op;
10281 }
10282 
10283 // Custom lowering for fpext vf32 to v2f64
10284 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10285 
10286   assert(Op.getOpcode() == ISD::FP_EXTEND &&
10287          "Should only be called for ISD::FP_EXTEND");
10288 
10289   // FIXME: handle extends from half precision float vectors on P9.
10290   // We only want to custom lower an extend from v2f32 to v2f64.
10291   if (Op.getValueType() != MVT::v2f64 ||
10292       Op.getOperand(0).getValueType() != MVT::v2f32)
10293     return SDValue();
10294 
10295   SDLoc dl(Op);
10296   SDValue Op0 = Op.getOperand(0);
10297 
10298   switch (Op0.getOpcode()) {
10299   default:
10300     return SDValue();
10301   case ISD::EXTRACT_SUBVECTOR: {
10302     assert(Op0.getNumOperands() == 2 &&
10303            isa<ConstantSDNode>(Op0->getOperand(1)) &&
10304            "Node should have 2 operands with second one being a constant!");
10305 
10306     if (Op0.getOperand(0).getValueType() != MVT::v4f32)
10307       return SDValue();
10308 
10309     // Custom lower is only done for high or low doubleword.
10310     int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
10311     if (Idx % 2 != 0)
10312       return SDValue();
10313 
10314     // Since input is v4f32, at this point Idx is either 0 or 2.
10315     // Shift to get the doubleword position we want.
10316     int DWord = Idx >> 1;
10317 
10318     // High and low word positions are different on little endian.
10319     if (Subtarget.isLittleEndian())
10320       DWord ^= 0x1;
10321 
10322     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
10323                        Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
10324   }
10325   case ISD::FADD:
10326   case ISD::FMUL:
10327   case ISD::FSUB: {
10328     SDValue NewLoad[2];
10329     for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
10330       // Ensure both input are loads.
10331       SDValue LdOp = Op0.getOperand(i);
10332       if (LdOp.getOpcode() != ISD::LOAD)
10333         return SDValue();
10334       // Generate new load node.
10335       LoadSDNode *LD = cast<LoadSDNode>(LdOp);
10336       SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10337       NewLoad[i] = DAG.getMemIntrinsicNode(
10338           PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10339           LD->getMemoryVT(), LD->getMemOperand());
10340     }
10341     SDValue NewOp =
10342         DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
10343                     NewLoad[1], Op0.getNode()->getFlags());
10344     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
10345                        DAG.getConstant(0, dl, MVT::i32));
10346   }
10347   case ISD::LOAD: {
10348     LoadSDNode *LD = cast<LoadSDNode>(Op0);
10349     SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
10350     SDValue NewLd = DAG.getMemIntrinsicNode(
10351         PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
10352         LD->getMemoryVT(), LD->getMemOperand());
10353     return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
10354                        DAG.getConstant(0, dl, MVT::i32));
10355   }
10356   }
10357   llvm_unreachable("ERROR:Should return for all cases within swtich.");
10358 }
10359 
10360 /// LowerOperation - Provide custom lowering hooks for some operations.
10361 ///
10362 SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
10363   switch (Op.getOpcode()) {
10364   default: llvm_unreachable("Wasn't expecting to be able to lower this!");
10365   case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
10366   case ISD::BlockAddress:       return LowerBlockAddress(Op, DAG);
10367   case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
10368   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
10369   case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
10370   case ISD::SETCC:              return LowerSETCC(Op, DAG);
10371   case ISD::INIT_TRAMPOLINE:    return LowerINIT_TRAMPOLINE(Op, DAG);
10372   case ISD::ADJUST_TRAMPOLINE:  return LowerADJUST_TRAMPOLINE(Op, DAG);
10373 
10374   case ISD::INLINEASM:
10375   case ISD::INLINEASM_BR:       return LowerINLINEASM(Op, DAG);
10376   // Variable argument lowering.
10377   case ISD::VASTART:            return LowerVASTART(Op, DAG);
10378   case ISD::VAARG:              return LowerVAARG(Op, DAG);
10379   case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
10380 
10381   case ISD::STACKRESTORE:       return LowerSTACKRESTORE(Op, DAG);
10382   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
10383   case ISD::GET_DYNAMIC_AREA_OFFSET:
10384     return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
10385 
10386   // Exception handling lowering.
10387   case ISD::EH_DWARF_CFA:       return LowerEH_DWARF_CFA(Op, DAG);
10388   case ISD::EH_SJLJ_SETJMP:     return lowerEH_SJLJ_SETJMP(Op, DAG);
10389   case ISD::EH_SJLJ_LONGJMP:    return lowerEH_SJLJ_LONGJMP(Op, DAG);
10390 
10391   case ISD::LOAD:               return LowerLOAD(Op, DAG);
10392   case ISD::STORE:              return LowerSTORE(Op, DAG);
10393   case ISD::TRUNCATE:           return LowerTRUNCATE(Op, DAG);
10394   case ISD::SELECT_CC:          return LowerSELECT_CC(Op, DAG);
10395   case ISD::STRICT_FP_TO_UINT:
10396   case ISD::STRICT_FP_TO_SINT:
10397   case ISD::FP_TO_UINT:
10398   case ISD::FP_TO_SINT:         return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
10399   case ISD::STRICT_UINT_TO_FP:
10400   case ISD::STRICT_SINT_TO_FP:
10401   case ISD::UINT_TO_FP:
10402   case ISD::SINT_TO_FP:         return LowerINT_TO_FP(Op, DAG);
10403   case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
10404 
10405   // Lower 64-bit shifts.
10406   case ISD::SHL_PARTS:          return LowerSHL_PARTS(Op, DAG);
10407   case ISD::SRL_PARTS:          return LowerSRL_PARTS(Op, DAG);
10408   case ISD::SRA_PARTS:          return LowerSRA_PARTS(Op, DAG);
10409 
10410   case ISD::FSHL:               return LowerFunnelShift(Op, DAG);
10411   case ISD::FSHR:               return LowerFunnelShift(Op, DAG);
10412 
10413   // Vector-related lowering.
10414   case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
10415   case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
10416   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
10417   case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
10418   case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
10419   case ISD::MUL:                return LowerMUL(Op, DAG);
10420   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
10421   case ISD::STRICT_FP_ROUND:
10422   case ISD::FP_ROUND:
10423     return LowerFP_ROUND(Op, DAG);
10424   case ISD::ROTL:               return LowerROTL(Op, DAG);
10425 
10426   // For counter-based loop handling.
10427   case ISD::INTRINSIC_W_CHAIN:  return SDValue();
10428 
10429   case ISD::BITCAST:            return LowerBITCAST(Op, DAG);
10430 
10431   // Frame & Return address.
10432   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
10433   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
10434 
10435   case ISD::INTRINSIC_VOID:
10436     return LowerINTRINSIC_VOID(Op, DAG);
10437   case ISD::BSWAP:
10438     return LowerBSWAP(Op, DAG);
10439   case ISD::ATOMIC_CMP_SWAP:
10440     return LowerATOMIC_CMP_SWAP(Op, DAG);
10441   }
10442 }
10443 
10444 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
10445                                            SmallVectorImpl<SDValue>&Results,
10446                                            SelectionDAG &DAG) const {
10447   SDLoc dl(N);
10448   switch (N->getOpcode()) {
10449   default:
10450     llvm_unreachable("Do not know how to custom type legalize this operation!");
10451   case ISD::READCYCLECOUNTER: {
10452     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
10453     SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
10454 
10455     Results.push_back(
10456         DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
10457     Results.push_back(RTB.getValue(2));
10458     break;
10459   }
10460   case ISD::INTRINSIC_W_CHAIN: {
10461     if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
10462         Intrinsic::loop_decrement)
10463       break;
10464 
10465     assert(N->getValueType(0) == MVT::i1 &&
10466            "Unexpected result type for CTR decrement intrinsic");
10467     EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
10468                                  N->getValueType(0));
10469     SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
10470     SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
10471                                  N->getOperand(1));
10472 
10473     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
10474     Results.push_back(NewInt.getValue(1));
10475     break;
10476   }
10477   case ISD::VAARG: {
10478     if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
10479       return;
10480 
10481     EVT VT = N->getValueType(0);
10482 
10483     if (VT == MVT::i64) {
10484       SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
10485 
10486       Results.push_back(NewNode);
10487       Results.push_back(NewNode.getValue(1));
10488     }
10489     return;
10490   }
10491   case ISD::STRICT_FP_TO_SINT:
10492   case ISD::STRICT_FP_TO_UINT:
10493   case ISD::FP_TO_SINT:
10494   case ISD::FP_TO_UINT:
10495     // LowerFP_TO_INT() can only handle f32 and f64.
10496     if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
10497         MVT::ppcf128)
10498       return;
10499     Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
10500     return;
10501   case ISD::TRUNCATE: {
10502     if (!N->getValueType(0).isVector())
10503       return;
10504     SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
10505     if (Lowered)
10506       Results.push_back(Lowered);
10507     return;
10508   }
10509   case ISD::FSHL:
10510   case ISD::FSHR:
10511     // Don't handle funnel shifts here.
10512     return;
10513   case ISD::BITCAST:
10514     // Don't handle bitcast here.
10515     return;
10516   case ISD::FP_EXTEND:
10517     SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
10518     if (Lowered)
10519       Results.push_back(Lowered);
10520     return;
10521   }
10522 }
10523 
10524 //===----------------------------------------------------------------------===//
10525 //  Other Lowering Code
10526 //===----------------------------------------------------------------------===//
10527 
10528 static Instruction* callIntrinsic(IRBuilder<> &Builder, Intrinsic::ID Id) {
10529   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
10530   Function *Func = Intrinsic::getDeclaration(M, Id);
10531   return Builder.CreateCall(Func, {});
10532 }
10533 
10534 // The mappings for emitLeading/TrailingFence is taken from
10535 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
10536 Instruction *PPCTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
10537                                                  Instruction *Inst,
10538                                                  AtomicOrdering Ord) const {
10539   if (Ord == AtomicOrdering::SequentiallyConsistent)
10540     return callIntrinsic(Builder, Intrinsic::ppc_sync);
10541   if (isReleaseOrStronger(Ord))
10542     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10543   return nullptr;
10544 }
10545 
10546 Instruction *PPCTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
10547                                                   Instruction *Inst,
10548                                                   AtomicOrdering Ord) const {
10549   if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
10550     // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
10551     // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
10552     // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
10553     if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
10554       return Builder.CreateCall(
10555           Intrinsic::getDeclaration(
10556               Builder.GetInsertBlock()->getParent()->getParent(),
10557               Intrinsic::ppc_cfence, {Inst->getType()}),
10558           {Inst});
10559     // FIXME: Can use isync for rmw operation.
10560     return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
10561   }
10562   return nullptr;
10563 }
10564 
10565 MachineBasicBlock *
10566 PPCTargetLowering::EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *BB,
10567                                     unsigned AtomicSize,
10568                                     unsigned BinOpcode,
10569                                     unsigned CmpOpcode,
10570                                     unsigned CmpPred) const {
10571   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10572   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10573 
10574   auto LoadMnemonic = PPC::LDARX;
10575   auto StoreMnemonic = PPC::STDCX;
10576   switch (AtomicSize) {
10577   default:
10578     llvm_unreachable("Unexpected size of atomic entity");
10579   case 1:
10580     LoadMnemonic = PPC::LBARX;
10581     StoreMnemonic = PPC::STBCX;
10582     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10583     break;
10584   case 2:
10585     LoadMnemonic = PPC::LHARX;
10586     StoreMnemonic = PPC::STHCX;
10587     assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
10588     break;
10589   case 4:
10590     LoadMnemonic = PPC::LWARX;
10591     StoreMnemonic = PPC::STWCX;
10592     break;
10593   case 8:
10594     LoadMnemonic = PPC::LDARX;
10595     StoreMnemonic = PPC::STDCX;
10596     break;
10597   }
10598 
10599   const BasicBlock *LLVM_BB = BB->getBasicBlock();
10600   MachineFunction *F = BB->getParent();
10601   MachineFunction::iterator It = ++BB->getIterator();
10602 
10603   Register dest = MI.getOperand(0).getReg();
10604   Register ptrA = MI.getOperand(1).getReg();
10605   Register ptrB = MI.getOperand(2).getReg();
10606   Register incr = MI.getOperand(3).getReg();
10607   DebugLoc dl = MI.getDebugLoc();
10608 
10609   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10610   MachineBasicBlock *loop2MBB =
10611     CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10612   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10613   F->insert(It, loopMBB);
10614   if (CmpOpcode)
10615     F->insert(It, loop2MBB);
10616   F->insert(It, exitMBB);
10617   exitMBB->splice(exitMBB->begin(), BB,
10618                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
10619   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10620 
10621   MachineRegisterInfo &RegInfo = F->getRegInfo();
10622   Register TmpReg = (!BinOpcode) ? incr :
10623     RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
10624                                            : &PPC::GPRCRegClass);
10625 
10626   //  thisMBB:
10627   //   ...
10628   //   fallthrough --> loopMBB
10629   BB->addSuccessor(loopMBB);
10630 
10631   //  loopMBB:
10632   //   l[wd]arx dest, ptr
10633   //   add r0, dest, incr
10634   //   st[wd]cx. r0, ptr
10635   //   bne- loopMBB
10636   //   fallthrough --> exitMBB
10637 
10638   // For max/min...
10639   //  loopMBB:
10640   //   l[wd]arx dest, ptr
10641   //   cmpl?[wd] incr, dest
10642   //   bgt exitMBB
10643   //  loop2MBB:
10644   //   st[wd]cx. dest, ptr
10645   //   bne- loopMBB
10646   //   fallthrough --> exitMBB
10647 
10648   BB = loopMBB;
10649   BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
10650     .addReg(ptrA).addReg(ptrB);
10651   if (BinOpcode)
10652     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
10653   if (CmpOpcode) {
10654     // Signed comparisons of byte or halfword values must be sign-extended.
10655     if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
10656       Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10657       BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
10658               ExtReg).addReg(dest);
10659       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10660         .addReg(incr).addReg(ExtReg);
10661     } else
10662       BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10663         .addReg(incr).addReg(dest);
10664 
10665     BuildMI(BB, dl, TII->get(PPC::BCC))
10666       .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
10667     BB->addSuccessor(loop2MBB);
10668     BB->addSuccessor(exitMBB);
10669     BB = loop2MBB;
10670   }
10671   BuildMI(BB, dl, TII->get(StoreMnemonic))
10672     .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
10673   BuildMI(BB, dl, TII->get(PPC::BCC))
10674     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
10675   BB->addSuccessor(loopMBB);
10676   BB->addSuccessor(exitMBB);
10677 
10678   //  exitMBB:
10679   //   ...
10680   BB = exitMBB;
10681   return BB;
10682 }
10683 
10684 static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII) {
10685   switch(MI.getOpcode()) {
10686   default:
10687     return false;
10688   case PPC::COPY:
10689     return TII->isSignExtended(MI);
10690   case PPC::LHA:
10691   case PPC::LHA8:
10692   case PPC::LHAU:
10693   case PPC::LHAU8:
10694   case PPC::LHAUX:
10695   case PPC::LHAUX8:
10696   case PPC::LHAX:
10697   case PPC::LHAX8:
10698   case PPC::LWA:
10699   case PPC::LWAUX:
10700   case PPC::LWAX:
10701   case PPC::LWAX_32:
10702   case PPC::LWA_32:
10703   case PPC::PLHA:
10704   case PPC::PLHA8:
10705   case PPC::PLHA8pc:
10706   case PPC::PLHApc:
10707   case PPC::PLWA:
10708   case PPC::PLWA8:
10709   case PPC::PLWA8pc:
10710   case PPC::PLWApc:
10711   case PPC::EXTSB:
10712   case PPC::EXTSB8:
10713   case PPC::EXTSB8_32_64:
10714   case PPC::EXTSB8_rec:
10715   case PPC::EXTSB_rec:
10716   case PPC::EXTSH:
10717   case PPC::EXTSH8:
10718   case PPC::EXTSH8_32_64:
10719   case PPC::EXTSH8_rec:
10720   case PPC::EXTSH_rec:
10721   case PPC::EXTSW:
10722   case PPC::EXTSWSLI:
10723   case PPC::EXTSWSLI_32_64:
10724   case PPC::EXTSWSLI_32_64_rec:
10725   case PPC::EXTSWSLI_rec:
10726   case PPC::EXTSW_32:
10727   case PPC::EXTSW_32_64:
10728   case PPC::EXTSW_32_64_rec:
10729   case PPC::EXTSW_rec:
10730   case PPC::SRAW:
10731   case PPC::SRAWI:
10732   case PPC::SRAWI_rec:
10733   case PPC::SRAW_rec:
10734     return true;
10735   }
10736   return false;
10737 }
10738 
10739 MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
10740     MachineInstr &MI, MachineBasicBlock *BB,
10741     bool is8bit, // operation
10742     unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
10743   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10744   const PPCInstrInfo *TII = Subtarget.getInstrInfo();
10745 
10746   // If this is a signed comparison and the value being compared is not known
10747   // to be sign extended, sign extend it here.
10748   DebugLoc dl = MI.getDebugLoc();
10749   MachineFunction *F = BB->getParent();
10750   MachineRegisterInfo &RegInfo = F->getRegInfo();
10751   Register incr = MI.getOperand(3).getReg();
10752   bool IsSignExtended = Register::isVirtualRegister(incr) &&
10753     isSignExtended(*RegInfo.getVRegDef(incr), TII);
10754 
10755   if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
10756     Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
10757     BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
10758         .addReg(MI.getOperand(3).getReg());
10759     MI.getOperand(3).setReg(ValueReg);
10760   }
10761   // If we support part-word atomic mnemonics, just use them
10762   if (Subtarget.hasPartwordAtomics())
10763     return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
10764                             CmpPred);
10765 
10766   // In 64 bit mode we have to use 64 bits for addresses, even though the
10767   // lwarx/stwcx are 32 bits.  With the 32-bit atomics we can use address
10768   // registers without caring whether they're 32 or 64, but here we're
10769   // doing actual arithmetic on the addresses.
10770   bool is64bit = Subtarget.isPPC64();
10771   bool isLittleEndian = Subtarget.isLittleEndian();
10772   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
10773 
10774   const BasicBlock *LLVM_BB = BB->getBasicBlock();
10775   MachineFunction::iterator It = ++BB->getIterator();
10776 
10777   Register dest = MI.getOperand(0).getReg();
10778   Register ptrA = MI.getOperand(1).getReg();
10779   Register ptrB = MI.getOperand(2).getReg();
10780 
10781   MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
10782   MachineBasicBlock *loop2MBB =
10783       CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
10784   MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
10785   F->insert(It, loopMBB);
10786   if (CmpOpcode)
10787     F->insert(It, loop2MBB);
10788   F->insert(It, exitMBB);
10789   exitMBB->splice(exitMBB->begin(), BB,
10790                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
10791   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
10792 
10793   const TargetRegisterClass *RC =
10794       is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
10795   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
10796 
10797   Register PtrReg = RegInfo.createVirtualRegister(RC);
10798   Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
10799   Register ShiftReg =
10800       isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
10801   Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
10802   Register MaskReg = RegInfo.createVirtualRegister(GPRC);
10803   Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
10804   Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
10805   Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
10806   Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
10807   Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
10808   Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
10809   Register Ptr1Reg;
10810   Register TmpReg =
10811       (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
10812 
10813   //  thisMBB:
10814   //   ...
10815   //   fallthrough --> loopMBB
10816   BB->addSuccessor(loopMBB);
10817 
10818   // The 4-byte load must be aligned, while a char or short may be
10819   // anywhere in the word.  Hence all this nasty bookkeeping code.
10820   //   add ptr1, ptrA, ptrB [copy if ptrA==0]
10821   //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10822   //   xori shift, shift1, 24 [16]
10823   //   rlwinm ptr, ptr1, 0, 0, 29
10824   //   slw incr2, incr, shift
10825   //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10826   //   slw mask, mask2, shift
10827   //  loopMBB:
10828   //   lwarx tmpDest, ptr
10829   //   add tmp, tmpDest, incr2
10830   //   andc tmp2, tmpDest, mask
10831   //   and tmp3, tmp, mask
10832   //   or tmp4, tmp3, tmp2
10833   //   stwcx. tmp4, ptr
10834   //   bne- loopMBB
10835   //   fallthrough --> exitMBB
10836   //   srw dest, tmpDest, shift
10837   if (ptrA != ZeroReg) {
10838     Ptr1Reg = RegInfo.createVirtualRegister(RC);
10839     BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
10840         .addReg(ptrA)
10841         .addReg(ptrB);
10842   } else {
10843     Ptr1Reg = ptrB;
10844   }
10845   // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10846   // mode.
10847   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
10848       .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
10849       .addImm(3)
10850       .addImm(27)
10851       .addImm(is8bit ? 28 : 27);
10852   if (!isLittleEndian)
10853     BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
10854         .addReg(Shift1Reg)
10855         .addImm(is8bit ? 24 : 16);
10856   if (is64bit)
10857     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
10858         .addReg(Ptr1Reg)
10859         .addImm(0)
10860         .addImm(61);
10861   else
10862     BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
10863         .addReg(Ptr1Reg)
10864         .addImm(0)
10865         .addImm(0)
10866         .addImm(29);
10867   BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
10868   if (is8bit)
10869     BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
10870   else {
10871     BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
10872     BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
10873         .addReg(Mask3Reg)
10874         .addImm(65535);
10875   }
10876   BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
10877       .addReg(Mask2Reg)
10878       .addReg(ShiftReg);
10879 
10880   BB = loopMBB;
10881   BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
10882       .addReg(ZeroReg)
10883       .addReg(PtrReg);
10884   if (BinOpcode)
10885     BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
10886         .addReg(Incr2Reg)
10887         .addReg(TmpDestReg);
10888   BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
10889       .addReg(TmpDestReg)
10890       .addReg(MaskReg);
10891   BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
10892   if (CmpOpcode) {
10893     // For unsigned comparisons, we can directly compare the shifted values.
10894     // For signed comparisons we shift and sign extend.
10895     Register SReg = RegInfo.createVirtualRegister(GPRC);
10896     BuildMI(BB, dl, TII->get(PPC::AND), SReg)
10897         .addReg(TmpDestReg)
10898         .addReg(MaskReg);
10899     unsigned ValueReg = SReg;
10900     unsigned CmpReg = Incr2Reg;
10901     if (CmpOpcode == PPC::CMPW) {
10902       ValueReg = RegInfo.createVirtualRegister(GPRC);
10903       BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
10904           .addReg(SReg)
10905           .addReg(ShiftReg);
10906       Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
10907       BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
10908           .addReg(ValueReg);
10909       ValueReg = ValueSReg;
10910       CmpReg = incr;
10911     }
10912     BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
10913         .addReg(CmpReg)
10914         .addReg(ValueReg);
10915     BuildMI(BB, dl, TII->get(PPC::BCC))
10916         .addImm(CmpPred)
10917         .addReg(PPC::CR0)
10918         .addMBB(exitMBB);
10919     BB->addSuccessor(loop2MBB);
10920     BB->addSuccessor(exitMBB);
10921     BB = loop2MBB;
10922   }
10923   BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
10924   BuildMI(BB, dl, TII->get(PPC::STWCX))
10925       .addReg(Tmp4Reg)
10926       .addReg(ZeroReg)
10927       .addReg(PtrReg);
10928   BuildMI(BB, dl, TII->get(PPC::BCC))
10929       .addImm(PPC::PRED_NE)
10930       .addReg(PPC::CR0)
10931       .addMBB(loopMBB);
10932   BB->addSuccessor(loopMBB);
10933   BB->addSuccessor(exitMBB);
10934 
10935   //  exitMBB:
10936   //   ...
10937   BB = exitMBB;
10938   BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
10939       .addReg(TmpDestReg)
10940       .addReg(ShiftReg);
10941   return BB;
10942 }
10943 
10944 llvm::MachineBasicBlock *
10945 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
10946                                     MachineBasicBlock *MBB) const {
10947   DebugLoc DL = MI.getDebugLoc();
10948   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
10949   const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
10950 
10951   MachineFunction *MF = MBB->getParent();
10952   MachineRegisterInfo &MRI = MF->getRegInfo();
10953 
10954   const BasicBlock *BB = MBB->getBasicBlock();
10955   MachineFunction::iterator I = ++MBB->getIterator();
10956 
10957   Register DstReg = MI.getOperand(0).getReg();
10958   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
10959   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
10960   Register mainDstReg = MRI.createVirtualRegister(RC);
10961   Register restoreDstReg = MRI.createVirtualRegister(RC);
10962 
10963   MVT PVT = getPointerTy(MF->getDataLayout());
10964   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
10965          "Invalid Pointer Size!");
10966   // For v = setjmp(buf), we generate
10967   //
10968   // thisMBB:
10969   //  SjLjSetup mainMBB
10970   //  bl mainMBB
10971   //  v_restore = 1
10972   //  b sinkMBB
10973   //
10974   // mainMBB:
10975   //  buf[LabelOffset] = LR
10976   //  v_main = 0
10977   //
10978   // sinkMBB:
10979   //  v = phi(main, restore)
10980   //
10981 
10982   MachineBasicBlock *thisMBB = MBB;
10983   MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
10984   MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
10985   MF->insert(I, mainMBB);
10986   MF->insert(I, sinkMBB);
10987 
10988   MachineInstrBuilder MIB;
10989 
10990   // Transfer the remainder of BB and its successor edges to sinkMBB.
10991   sinkMBB->splice(sinkMBB->begin(), MBB,
10992                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
10993   sinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
10994 
10995   // Note that the structure of the jmp_buf used here is not compatible
10996   // with that used by libc, and is not designed to be. Specifically, it
10997   // stores only those 'reserved' registers that LLVM does not otherwise
10998   // understand how to spill. Also, by convention, by the time this
10999   // intrinsic is called, Clang has already stored the frame address in the
11000   // first slot of the buffer and stack address in the third. Following the
11001   // X86 target code, we'll store the jump address in the second slot. We also
11002   // need to save the TOC pointer (R2) to handle jumps between shared
11003   // libraries, and that will be stored in the fourth slot. The thread
11004   // identifier (R13) is not affected.
11005 
11006   // thisMBB:
11007   const int64_t LabelOffset = 1 * PVT.getStoreSize();
11008   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
11009   const int64_t BPOffset    = 4 * PVT.getStoreSize();
11010 
11011   // Prepare IP either in reg.
11012   const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11013   Register LabelReg = MRI.createVirtualRegister(PtrRC);
11014   Register BufReg = MI.getOperand(1).getReg();
11015 
11016   if (Subtarget.is64BitELFABI()) {
11017     setUsesTOCBasePtr(*MBB->getParent());
11018     MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11019               .addReg(PPC::X2)
11020               .addImm(TOCOffset)
11021               .addReg(BufReg)
11022               .cloneMemRefs(MI);
11023   }
11024 
11025   // Naked functions never have a base pointer, and so we use r1. For all
11026   // other functions, this decision must be delayed until during PEI.
11027   unsigned BaseReg;
11028   if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11029     BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11030   else
11031     BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11032 
11033   MIB = BuildMI(*thisMBB, MI, DL,
11034                 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11035             .addReg(BaseReg)
11036             .addImm(BPOffset)
11037             .addReg(BufReg)
11038             .cloneMemRefs(MI);
11039 
11040   // Setup
11041   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11042   MIB.addRegMask(TRI->getNoPreservedMask());
11043 
11044   BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11045 
11046   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11047           .addMBB(mainMBB);
11048   MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11049 
11050   thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11051   thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11052 
11053   // mainMBB:
11054   //  mainDstReg = 0
11055   MIB =
11056       BuildMI(mainMBB, DL,
11057               TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11058 
11059   // Store IP
11060   if (Subtarget.isPPC64()) {
11061     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11062             .addReg(LabelReg)
11063             .addImm(LabelOffset)
11064             .addReg(BufReg);
11065   } else {
11066     MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11067             .addReg(LabelReg)
11068             .addImm(LabelOffset)
11069             .addReg(BufReg);
11070   }
11071   MIB.cloneMemRefs(MI);
11072 
11073   BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11074   mainMBB->addSuccessor(sinkMBB);
11075 
11076   // sinkMBB:
11077   BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11078           TII->get(PPC::PHI), DstReg)
11079     .addReg(mainDstReg).addMBB(mainMBB)
11080     .addReg(restoreDstReg).addMBB(thisMBB);
11081 
11082   MI.eraseFromParent();
11083   return sinkMBB;
11084 }
11085 
11086 MachineBasicBlock *
11087 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
11088                                      MachineBasicBlock *MBB) const {
11089   DebugLoc DL = MI.getDebugLoc();
11090   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11091 
11092   MachineFunction *MF = MBB->getParent();
11093   MachineRegisterInfo &MRI = MF->getRegInfo();
11094 
11095   MVT PVT = getPointerTy(MF->getDataLayout());
11096   assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11097          "Invalid Pointer Size!");
11098 
11099   const TargetRegisterClass *RC =
11100     (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11101   Register Tmp = MRI.createVirtualRegister(RC);
11102   // Since FP is only updated here but NOT referenced, it's treated as GPR.
11103   unsigned FP  = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11104   unsigned SP  = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11105   unsigned BP =
11106       (PVT == MVT::i64)
11107           ? PPC::X30
11108           : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11109                                                               : PPC::R30);
11110 
11111   MachineInstrBuilder MIB;
11112 
11113   const int64_t LabelOffset = 1 * PVT.getStoreSize();
11114   const int64_t SPOffset    = 2 * PVT.getStoreSize();
11115   const int64_t TOCOffset   = 3 * PVT.getStoreSize();
11116   const int64_t BPOffset    = 4 * PVT.getStoreSize();
11117 
11118   Register BufReg = MI.getOperand(0).getReg();
11119 
11120   // Reload FP (the jumped-to function may not have had a
11121   // frame pointer, and if so, then its r31 will be restored
11122   // as necessary).
11123   if (PVT == MVT::i64) {
11124     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11125             .addImm(0)
11126             .addReg(BufReg);
11127   } else {
11128     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11129             .addImm(0)
11130             .addReg(BufReg);
11131   }
11132   MIB.cloneMemRefs(MI);
11133 
11134   // Reload IP
11135   if (PVT == MVT::i64) {
11136     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11137             .addImm(LabelOffset)
11138             .addReg(BufReg);
11139   } else {
11140     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11141             .addImm(LabelOffset)
11142             .addReg(BufReg);
11143   }
11144   MIB.cloneMemRefs(MI);
11145 
11146   // Reload SP
11147   if (PVT == MVT::i64) {
11148     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11149             .addImm(SPOffset)
11150             .addReg(BufReg);
11151   } else {
11152     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11153             .addImm(SPOffset)
11154             .addReg(BufReg);
11155   }
11156   MIB.cloneMemRefs(MI);
11157 
11158   // Reload BP
11159   if (PVT == MVT::i64) {
11160     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11161             .addImm(BPOffset)
11162             .addReg(BufReg);
11163   } else {
11164     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11165             .addImm(BPOffset)
11166             .addReg(BufReg);
11167   }
11168   MIB.cloneMemRefs(MI);
11169 
11170   // Reload TOC
11171   if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11172     setUsesTOCBasePtr(*MBB->getParent());
11173     MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11174               .addImm(TOCOffset)
11175               .addReg(BufReg)
11176               .cloneMemRefs(MI);
11177   }
11178 
11179   // Jump
11180   BuildMI(*MBB, MI, DL,
11181           TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11182   BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11183 
11184   MI.eraseFromParent();
11185   return MBB;
11186 }
11187 
11188 bool PPCTargetLowering::hasInlineStackProbe(MachineFunction &MF) const {
11189   // If the function specifically requests inline stack probes, emit them.
11190   if (MF.getFunction().hasFnAttribute("probe-stack"))
11191     return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11192            "inline-asm";
11193   return false;
11194 }
11195 
11196 unsigned PPCTargetLowering::getStackProbeSize(MachineFunction &MF) const {
11197   const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11198   unsigned StackAlign = TFI->getStackAlignment();
11199   assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
11200          "Unexpected stack alignment");
11201   // The default stack probe size is 4096 if the function has no
11202   // stack-probe-size attribute.
11203   unsigned StackProbeSize = 4096;
11204   const Function &Fn = MF.getFunction();
11205   if (Fn.hasFnAttribute("stack-probe-size"))
11206     Fn.getFnAttribute("stack-probe-size")
11207         .getValueAsString()
11208         .getAsInteger(0, StackProbeSize);
11209   // Round down to the stack alignment.
11210   StackProbeSize &= ~(StackAlign - 1);
11211   return StackProbeSize ? StackProbeSize : StackAlign;
11212 }
11213 
11214 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11215 // into three phases. In the first phase, it uses pseudo instruction
11216 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11217 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11218 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11219 // MaxCallFrameSize so that it can calculate correct data area pointer.
11220 MachineBasicBlock *
11221 PPCTargetLowering::emitProbedAlloca(MachineInstr &MI,
11222                                     MachineBasicBlock *MBB) const {
11223   const bool isPPC64 = Subtarget.isPPC64();
11224   MachineFunction *MF = MBB->getParent();
11225   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11226   DebugLoc DL = MI.getDebugLoc();
11227   const unsigned ProbeSize = getStackProbeSize(*MF);
11228   const BasicBlock *ProbedBB = MBB->getBasicBlock();
11229   MachineRegisterInfo &MRI = MF->getRegInfo();
11230   // The CFG of probing stack looks as
11231   //         +-----+
11232   //         | MBB |
11233   //         +--+--+
11234   //            |
11235   //       +----v----+
11236   //  +--->+ TestMBB +---+
11237   //  |    +----+----+   |
11238   //  |         |        |
11239   //  |   +-----v----+   |
11240   //  +---+ BlockMBB |   |
11241   //      +----------+   |
11242   //                     |
11243   //       +---------+   |
11244   //       | TailMBB +<--+
11245   //       +---------+
11246   // In MBB, calculate previous frame pointer and final stack pointer.
11247   // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11248   // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11249   // TailMBB is spliced via \p MI.
11250   MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
11251   MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
11252   MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
11253 
11254   MachineFunction::iterator MBBIter = ++MBB->getIterator();
11255   MF->insert(MBBIter, TestMBB);
11256   MF->insert(MBBIter, BlockMBB);
11257   MF->insert(MBBIter, TailMBB);
11258 
11259   const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11260   const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11261 
11262   Register DstReg = MI.getOperand(0).getReg();
11263   Register NegSizeReg = MI.getOperand(1).getReg();
11264   Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11265   Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11266   Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11267   Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11268 
11269   // Since value of NegSizeReg might be realigned in prologepilog, insert a
11270   // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11271   // NegSize.
11272   unsigned ProbeOpc;
11273   if (!MRI.hasOneNonDBGUse(NegSizeReg))
11274     ProbeOpc =
11275         isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
11276   else
11277     // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
11278     // and NegSizeReg will be allocated in the same phyreg to avoid
11279     // redundant copy when NegSizeReg has only one use which is current MI and
11280     // will be replaced by PREPARE_PROBED_ALLOCA then.
11281     ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
11282                        : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
11283   BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
11284       .addDef(ActualNegSizeReg)
11285       .addReg(NegSizeReg)
11286       .add(MI.getOperand(2))
11287       .add(MI.getOperand(3));
11288 
11289   // Calculate final stack pointer, which equals to SP + ActualNegSize.
11290   BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
11291           FinalStackPtr)
11292       .addReg(SPReg)
11293       .addReg(ActualNegSizeReg);
11294 
11295   // Materialize a scratch register for update.
11296   int64_t NegProbeSize = -(int64_t)ProbeSize;
11297   assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
11298   Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11299   if (!isInt<16>(NegProbeSize)) {
11300     Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11301     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
11302         .addImm(NegProbeSize >> 16);
11303     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
11304             ScratchReg)
11305         .addReg(TempReg)
11306         .addImm(NegProbeSize & 0xFFFF);
11307   } else
11308     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
11309         .addImm(NegProbeSize);
11310 
11311   {
11312     // Probing leading residual part.
11313     Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11314     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
11315         .addReg(ActualNegSizeReg)
11316         .addReg(ScratchReg);
11317     Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11318     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
11319         .addReg(Div)
11320         .addReg(ScratchReg);
11321     Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11322     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
11323         .addReg(Mul)
11324         .addReg(ActualNegSizeReg);
11325     BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11326         .addReg(FramePointer)
11327         .addReg(SPReg)
11328         .addReg(NegMod);
11329   }
11330 
11331   {
11332     // Remaining part should be multiple of ProbeSize.
11333     Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
11334     BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
11335         .addReg(SPReg)
11336         .addReg(FinalStackPtr);
11337     BuildMI(TestMBB, DL, TII->get(PPC::BCC))
11338         .addImm(PPC::PRED_EQ)
11339         .addReg(CmpResult)
11340         .addMBB(TailMBB);
11341     TestMBB->addSuccessor(BlockMBB);
11342     TestMBB->addSuccessor(TailMBB);
11343   }
11344 
11345   {
11346     // Touch the block.
11347     // |P...|P...|P...
11348     BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
11349         .addReg(FramePointer)
11350         .addReg(SPReg)
11351         .addReg(ScratchReg);
11352     BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
11353     BlockMBB->addSuccessor(TestMBB);
11354   }
11355 
11356   // Calculation of MaxCallFrameSize is deferred to prologepilog, use
11357   // DYNAREAOFFSET pseudo instruction to get the future result.
11358   Register MaxCallFrameSizeReg =
11359       MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11360   BuildMI(TailMBB, DL,
11361           TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
11362           MaxCallFrameSizeReg)
11363       .add(MI.getOperand(2))
11364       .add(MI.getOperand(3));
11365   BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
11366       .addReg(SPReg)
11367       .addReg(MaxCallFrameSizeReg);
11368 
11369   // Splice instructions after MI to TailMBB.
11370   TailMBB->splice(TailMBB->end(), MBB,
11371                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11372   TailMBB->transferSuccessorsAndUpdatePHIs(MBB);
11373   MBB->addSuccessor(TestMBB);
11374 
11375   // Delete the pseudo instruction.
11376   MI.eraseFromParent();
11377 
11378   ++NumDynamicAllocaProbed;
11379   return TailMBB;
11380 }
11381 
11382 MachineBasicBlock *
11383 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
11384                                                MachineBasicBlock *BB) const {
11385   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
11386       MI.getOpcode() == TargetOpcode::PATCHPOINT) {
11387     if (Subtarget.is64BitELFABI() &&
11388         MI.getOpcode() == TargetOpcode::PATCHPOINT &&
11389         !Subtarget.isUsingPCRelativeCalls()) {
11390       // Call lowering should have added an r2 operand to indicate a dependence
11391       // on the TOC base pointer value. It can't however, because there is no
11392       // way to mark the dependence as implicit there, and so the stackmap code
11393       // will confuse it with a regular operand. Instead, add the dependence
11394       // here.
11395       MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
11396     }
11397 
11398     return emitPatchPoint(MI, BB);
11399   }
11400 
11401   if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
11402       MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
11403     return emitEHSjLjSetJmp(MI, BB);
11404   } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
11405              MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
11406     return emitEHSjLjLongJmp(MI, BB);
11407   }
11408 
11409   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11410 
11411   // To "insert" these instructions we actually have to insert their
11412   // control-flow patterns.
11413   const BasicBlock *LLVM_BB = BB->getBasicBlock();
11414   MachineFunction::iterator It = ++BB->getIterator();
11415 
11416   MachineFunction *F = BB->getParent();
11417 
11418   if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11419       MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
11420       MI.getOpcode() == PPC::SELECT_I8) {
11421     SmallVector<MachineOperand, 2> Cond;
11422     if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
11423         MI.getOpcode() == PPC::SELECT_CC_I8)
11424       Cond.push_back(MI.getOperand(4));
11425     else
11426       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
11427     Cond.push_back(MI.getOperand(1));
11428 
11429     DebugLoc dl = MI.getDebugLoc();
11430     TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
11431                       MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
11432   } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
11433              MI.getOpcode() == PPC::SELECT_CC_F8 ||
11434              MI.getOpcode() == PPC::SELECT_CC_F16 ||
11435              MI.getOpcode() == PPC::SELECT_CC_VRRC ||
11436              MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
11437              MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
11438              MI.getOpcode() == PPC::SELECT_CC_VSRC ||
11439              MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
11440              MI.getOpcode() == PPC::SELECT_CC_SPE ||
11441              MI.getOpcode() == PPC::SELECT_F4 ||
11442              MI.getOpcode() == PPC::SELECT_F8 ||
11443              MI.getOpcode() == PPC::SELECT_F16 ||
11444              MI.getOpcode() == PPC::SELECT_SPE ||
11445              MI.getOpcode() == PPC::SELECT_SPE4 ||
11446              MI.getOpcode() == PPC::SELECT_VRRC ||
11447              MI.getOpcode() == PPC::SELECT_VSFRC ||
11448              MI.getOpcode() == PPC::SELECT_VSSRC ||
11449              MI.getOpcode() == PPC::SELECT_VSRC) {
11450     // The incoming instruction knows the destination vreg to set, the
11451     // condition code register to branch on, the true/false values to
11452     // select between, and a branch opcode to use.
11453 
11454     //  thisMBB:
11455     //  ...
11456     //   TrueVal = ...
11457     //   cmpTY ccX, r1, r2
11458     //   bCC copy1MBB
11459     //   fallthrough --> copy0MBB
11460     MachineBasicBlock *thisMBB = BB;
11461     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
11462     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11463     DebugLoc dl = MI.getDebugLoc();
11464     F->insert(It, copy0MBB);
11465     F->insert(It, sinkMBB);
11466 
11467     // Transfer the remainder of BB and its successor edges to sinkMBB.
11468     sinkMBB->splice(sinkMBB->begin(), BB,
11469                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11470     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11471 
11472     // Next, add the true and fallthrough blocks as its successors.
11473     BB->addSuccessor(copy0MBB);
11474     BB->addSuccessor(sinkMBB);
11475 
11476     if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
11477         MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
11478         MI.getOpcode() == PPC::SELECT_F16 ||
11479         MI.getOpcode() == PPC::SELECT_SPE4 ||
11480         MI.getOpcode() == PPC::SELECT_SPE ||
11481         MI.getOpcode() == PPC::SELECT_VRRC ||
11482         MI.getOpcode() == PPC::SELECT_VSFRC ||
11483         MI.getOpcode() == PPC::SELECT_VSSRC ||
11484         MI.getOpcode() == PPC::SELECT_VSRC) {
11485       BuildMI(BB, dl, TII->get(PPC::BC))
11486           .addReg(MI.getOperand(1).getReg())
11487           .addMBB(sinkMBB);
11488     } else {
11489       unsigned SelectPred = MI.getOperand(4).getImm();
11490       BuildMI(BB, dl, TII->get(PPC::BCC))
11491           .addImm(SelectPred)
11492           .addReg(MI.getOperand(1).getReg())
11493           .addMBB(sinkMBB);
11494     }
11495 
11496     //  copy0MBB:
11497     //   %FalseValue = ...
11498     //   # fallthrough to sinkMBB
11499     BB = copy0MBB;
11500 
11501     // Update machine-CFG edges
11502     BB->addSuccessor(sinkMBB);
11503 
11504     //  sinkMBB:
11505     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
11506     //  ...
11507     BB = sinkMBB;
11508     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
11509         .addReg(MI.getOperand(3).getReg())
11510         .addMBB(copy0MBB)
11511         .addReg(MI.getOperand(2).getReg())
11512         .addMBB(thisMBB);
11513   } else if (MI.getOpcode() == PPC::ReadTB) {
11514     // To read the 64-bit time-base register on a 32-bit target, we read the
11515     // two halves. Should the counter have wrapped while it was being read, we
11516     // need to try again.
11517     // ...
11518     // readLoop:
11519     // mfspr Rx,TBU # load from TBU
11520     // mfspr Ry,TB  # load from TB
11521     // mfspr Rz,TBU # load from TBU
11522     // cmpw crX,Rx,Rz # check if 'old'='new'
11523     // bne readLoop   # branch if they're not equal
11524     // ...
11525 
11526     MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
11527     MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
11528     DebugLoc dl = MI.getDebugLoc();
11529     F->insert(It, readMBB);
11530     F->insert(It, sinkMBB);
11531 
11532     // Transfer the remainder of BB and its successor edges to sinkMBB.
11533     sinkMBB->splice(sinkMBB->begin(), BB,
11534                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11535     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
11536 
11537     BB->addSuccessor(readMBB);
11538     BB = readMBB;
11539 
11540     MachineRegisterInfo &RegInfo = F->getRegInfo();
11541     Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11542     Register LoReg = MI.getOperand(0).getReg();
11543     Register HiReg = MI.getOperand(1).getReg();
11544 
11545     BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
11546     BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
11547     BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
11548 
11549     Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
11550 
11551     BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
11552         .addReg(HiReg)
11553         .addReg(ReadAgainReg);
11554     BuildMI(BB, dl, TII->get(PPC::BCC))
11555         .addImm(PPC::PRED_NE)
11556         .addReg(CmpReg)
11557         .addMBB(readMBB);
11558 
11559     BB->addSuccessor(readMBB);
11560     BB->addSuccessor(sinkMBB);
11561   } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
11562     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
11563   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
11564     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
11565   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
11566     BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
11567   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
11568     BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
11569 
11570   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
11571     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
11572   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
11573     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
11574   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
11575     BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
11576   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
11577     BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
11578 
11579   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
11580     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
11581   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
11582     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
11583   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
11584     BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
11585   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
11586     BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
11587 
11588   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
11589     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
11590   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
11591     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
11592   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
11593     BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
11594   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
11595     BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
11596 
11597   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
11598     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
11599   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
11600     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
11601   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
11602     BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
11603   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
11604     BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
11605 
11606   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
11607     BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
11608   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
11609     BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
11610   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
11611     BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
11612   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
11613     BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
11614 
11615   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
11616     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
11617   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
11618     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
11619   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
11620     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
11621   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
11622     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
11623 
11624   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
11625     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
11626   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
11627     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
11628   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
11629     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
11630   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
11631     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
11632 
11633   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
11634     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
11635   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
11636     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
11637   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
11638     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
11639   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
11640     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
11641 
11642   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
11643     BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
11644   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
11645     BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
11646   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
11647     BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
11648   else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
11649     BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
11650 
11651   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
11652     BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
11653   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
11654     BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
11655   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
11656     BB = EmitAtomicBinary(MI, BB, 4, 0);
11657   else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
11658     BB = EmitAtomicBinary(MI, BB, 8, 0);
11659   else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
11660            MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
11661            (Subtarget.hasPartwordAtomics() &&
11662             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
11663            (Subtarget.hasPartwordAtomics() &&
11664             MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
11665     bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
11666 
11667     auto LoadMnemonic = PPC::LDARX;
11668     auto StoreMnemonic = PPC::STDCX;
11669     switch (MI.getOpcode()) {
11670     default:
11671       llvm_unreachable("Compare and swap of unknown size");
11672     case PPC::ATOMIC_CMP_SWAP_I8:
11673       LoadMnemonic = PPC::LBARX;
11674       StoreMnemonic = PPC::STBCX;
11675       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11676       break;
11677     case PPC::ATOMIC_CMP_SWAP_I16:
11678       LoadMnemonic = PPC::LHARX;
11679       StoreMnemonic = PPC::STHCX;
11680       assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
11681       break;
11682     case PPC::ATOMIC_CMP_SWAP_I32:
11683       LoadMnemonic = PPC::LWARX;
11684       StoreMnemonic = PPC::STWCX;
11685       break;
11686     case PPC::ATOMIC_CMP_SWAP_I64:
11687       LoadMnemonic = PPC::LDARX;
11688       StoreMnemonic = PPC::STDCX;
11689       break;
11690     }
11691     Register dest = MI.getOperand(0).getReg();
11692     Register ptrA = MI.getOperand(1).getReg();
11693     Register ptrB = MI.getOperand(2).getReg();
11694     Register oldval = MI.getOperand(3).getReg();
11695     Register newval = MI.getOperand(4).getReg();
11696     DebugLoc dl = MI.getDebugLoc();
11697 
11698     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11699     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11700     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11701     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11702     F->insert(It, loop1MBB);
11703     F->insert(It, loop2MBB);
11704     F->insert(It, midMBB);
11705     F->insert(It, exitMBB);
11706     exitMBB->splice(exitMBB->begin(), BB,
11707                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11708     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11709 
11710     //  thisMBB:
11711     //   ...
11712     //   fallthrough --> loopMBB
11713     BB->addSuccessor(loop1MBB);
11714 
11715     // loop1MBB:
11716     //   l[bhwd]arx dest, ptr
11717     //   cmp[wd] dest, oldval
11718     //   bne- midMBB
11719     // loop2MBB:
11720     //   st[bhwd]cx. newval, ptr
11721     //   bne- loopMBB
11722     //   b exitBB
11723     // midMBB:
11724     //   st[bhwd]cx. dest, ptr
11725     // exitBB:
11726     BB = loop1MBB;
11727     BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
11728     BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
11729         .addReg(oldval)
11730         .addReg(dest);
11731     BuildMI(BB, dl, TII->get(PPC::BCC))
11732         .addImm(PPC::PRED_NE)
11733         .addReg(PPC::CR0)
11734         .addMBB(midMBB);
11735     BB->addSuccessor(loop2MBB);
11736     BB->addSuccessor(midMBB);
11737 
11738     BB = loop2MBB;
11739     BuildMI(BB, dl, TII->get(StoreMnemonic))
11740         .addReg(newval)
11741         .addReg(ptrA)
11742         .addReg(ptrB);
11743     BuildMI(BB, dl, TII->get(PPC::BCC))
11744         .addImm(PPC::PRED_NE)
11745         .addReg(PPC::CR0)
11746         .addMBB(loop1MBB);
11747     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11748     BB->addSuccessor(loop1MBB);
11749     BB->addSuccessor(exitMBB);
11750 
11751     BB = midMBB;
11752     BuildMI(BB, dl, TII->get(StoreMnemonic))
11753         .addReg(dest)
11754         .addReg(ptrA)
11755         .addReg(ptrB);
11756     BB->addSuccessor(exitMBB);
11757 
11758     //  exitMBB:
11759     //   ...
11760     BB = exitMBB;
11761   } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
11762              MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
11763     // We must use 64-bit registers for addresses when targeting 64-bit,
11764     // since we're actually doing arithmetic on them.  Other registers
11765     // can be 32-bit.
11766     bool is64bit = Subtarget.isPPC64();
11767     bool isLittleEndian = Subtarget.isLittleEndian();
11768     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
11769 
11770     Register dest = MI.getOperand(0).getReg();
11771     Register ptrA = MI.getOperand(1).getReg();
11772     Register ptrB = MI.getOperand(2).getReg();
11773     Register oldval = MI.getOperand(3).getReg();
11774     Register newval = MI.getOperand(4).getReg();
11775     DebugLoc dl = MI.getDebugLoc();
11776 
11777     MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
11778     MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
11779     MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
11780     MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11781     F->insert(It, loop1MBB);
11782     F->insert(It, loop2MBB);
11783     F->insert(It, midMBB);
11784     F->insert(It, exitMBB);
11785     exitMBB->splice(exitMBB->begin(), BB,
11786                     std::next(MachineBasicBlock::iterator(MI)), BB->end());
11787     exitMBB->transferSuccessorsAndUpdatePHIs(BB);
11788 
11789     MachineRegisterInfo &RegInfo = F->getRegInfo();
11790     const TargetRegisterClass *RC =
11791         is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11792     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11793 
11794     Register PtrReg = RegInfo.createVirtualRegister(RC);
11795     Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11796     Register ShiftReg =
11797         isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11798     Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
11799     Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
11800     Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
11801     Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
11802     Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11803     Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11804     Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11805     Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11806     Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11807     Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11808     Register Ptr1Reg;
11809     Register TmpReg = RegInfo.createVirtualRegister(GPRC);
11810     Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11811     //  thisMBB:
11812     //   ...
11813     //   fallthrough --> loopMBB
11814     BB->addSuccessor(loop1MBB);
11815 
11816     // The 4-byte load must be aligned, while a char or short may be
11817     // anywhere in the word.  Hence all this nasty bookkeeping code.
11818     //   add ptr1, ptrA, ptrB [copy if ptrA==0]
11819     //   rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11820     //   xori shift, shift1, 24 [16]
11821     //   rlwinm ptr, ptr1, 0, 0, 29
11822     //   slw newval2, newval, shift
11823     //   slw oldval2, oldval,shift
11824     //   li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11825     //   slw mask, mask2, shift
11826     //   and newval3, newval2, mask
11827     //   and oldval3, oldval2, mask
11828     // loop1MBB:
11829     //   lwarx tmpDest, ptr
11830     //   and tmp, tmpDest, mask
11831     //   cmpw tmp, oldval3
11832     //   bne- midMBB
11833     // loop2MBB:
11834     //   andc tmp2, tmpDest, mask
11835     //   or tmp4, tmp2, newval3
11836     //   stwcx. tmp4, ptr
11837     //   bne- loop1MBB
11838     //   b exitBB
11839     // midMBB:
11840     //   stwcx. tmpDest, ptr
11841     // exitBB:
11842     //   srw dest, tmpDest, shift
11843     if (ptrA != ZeroReg) {
11844       Ptr1Reg = RegInfo.createVirtualRegister(RC);
11845       BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11846           .addReg(ptrA)
11847           .addReg(ptrB);
11848     } else {
11849       Ptr1Reg = ptrB;
11850     }
11851 
11852     // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11853     // mode.
11854     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11855         .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11856         .addImm(3)
11857         .addImm(27)
11858         .addImm(is8bit ? 28 : 27);
11859     if (!isLittleEndian)
11860       BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11861           .addReg(Shift1Reg)
11862           .addImm(is8bit ? 24 : 16);
11863     if (is64bit)
11864       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11865           .addReg(Ptr1Reg)
11866           .addImm(0)
11867           .addImm(61);
11868     else
11869       BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11870           .addReg(Ptr1Reg)
11871           .addImm(0)
11872           .addImm(0)
11873           .addImm(29);
11874     BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
11875         .addReg(newval)
11876         .addReg(ShiftReg);
11877     BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
11878         .addReg(oldval)
11879         .addReg(ShiftReg);
11880     if (is8bit)
11881       BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11882     else {
11883       BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11884       BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11885           .addReg(Mask3Reg)
11886           .addImm(65535);
11887     }
11888     BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11889         .addReg(Mask2Reg)
11890         .addReg(ShiftReg);
11891     BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
11892         .addReg(NewVal2Reg)
11893         .addReg(MaskReg);
11894     BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
11895         .addReg(OldVal2Reg)
11896         .addReg(MaskReg);
11897 
11898     BB = loop1MBB;
11899     BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11900         .addReg(ZeroReg)
11901         .addReg(PtrReg);
11902     BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
11903         .addReg(TmpDestReg)
11904         .addReg(MaskReg);
11905     BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
11906         .addReg(TmpReg)
11907         .addReg(OldVal3Reg);
11908     BuildMI(BB, dl, TII->get(PPC::BCC))
11909         .addImm(PPC::PRED_NE)
11910         .addReg(PPC::CR0)
11911         .addMBB(midMBB);
11912     BB->addSuccessor(loop2MBB);
11913     BB->addSuccessor(midMBB);
11914 
11915     BB = loop2MBB;
11916     BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11917         .addReg(TmpDestReg)
11918         .addReg(MaskReg);
11919     BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
11920         .addReg(Tmp2Reg)
11921         .addReg(NewVal3Reg);
11922     BuildMI(BB, dl, TII->get(PPC::STWCX))
11923         .addReg(Tmp4Reg)
11924         .addReg(ZeroReg)
11925         .addReg(PtrReg);
11926     BuildMI(BB, dl, TII->get(PPC::BCC))
11927         .addImm(PPC::PRED_NE)
11928         .addReg(PPC::CR0)
11929         .addMBB(loop1MBB);
11930     BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
11931     BB->addSuccessor(loop1MBB);
11932     BB->addSuccessor(exitMBB);
11933 
11934     BB = midMBB;
11935     BuildMI(BB, dl, TII->get(PPC::STWCX))
11936         .addReg(TmpDestReg)
11937         .addReg(ZeroReg)
11938         .addReg(PtrReg);
11939     BB->addSuccessor(exitMBB);
11940 
11941     //  exitMBB:
11942     //   ...
11943     BB = exitMBB;
11944     BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
11945         .addReg(TmpReg)
11946         .addReg(ShiftReg);
11947   } else if (MI.getOpcode() == PPC::FADDrtz) {
11948     // This pseudo performs an FADD with rounding mode temporarily forced
11949     // to round-to-zero.  We emit this via custom inserter since the FPSCR
11950     // is not modeled at the SelectionDAG level.
11951     Register Dest = MI.getOperand(0).getReg();
11952     Register Src1 = MI.getOperand(1).getReg();
11953     Register Src2 = MI.getOperand(2).getReg();
11954     DebugLoc dl = MI.getDebugLoc();
11955 
11956     MachineRegisterInfo &RegInfo = F->getRegInfo();
11957     Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
11958 
11959     // Save FPSCR value.
11960     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
11961 
11962     // Set rounding mode to round-to-zero.
11963     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
11964         .addImm(31)
11965         .addReg(PPC::RM, RegState::ImplicitDefine);
11966 
11967     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
11968         .addImm(30)
11969         .addReg(PPC::RM, RegState::ImplicitDefine);
11970 
11971     // Perform addition.
11972     auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
11973                    .addReg(Src1)
11974                    .addReg(Src2);
11975     if (MI.getFlag(MachineInstr::NoFPExcept))
11976       MIB.setMIFlag(MachineInstr::NoFPExcept);
11977 
11978     // Restore FPSCR value.
11979     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
11980   } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
11981              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
11982              MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
11983              MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
11984     unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
11985                        MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
11986                           ? PPC::ANDI8_rec
11987                           : PPC::ANDI_rec;
11988     bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
11989                  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
11990 
11991     MachineRegisterInfo &RegInfo = F->getRegInfo();
11992     Register Dest = RegInfo.createVirtualRegister(
11993         Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
11994 
11995     DebugLoc Dl = MI.getDebugLoc();
11996     BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
11997         .addReg(MI.getOperand(1).getReg())
11998         .addImm(1);
11999     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12000             MI.getOperand(0).getReg())
12001         .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12002   } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12003     DebugLoc Dl = MI.getDebugLoc();
12004     MachineRegisterInfo &RegInfo = F->getRegInfo();
12005     Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12006     BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12007     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12008             MI.getOperand(0).getReg())
12009         .addReg(CRReg);
12010   } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12011     DebugLoc Dl = MI.getDebugLoc();
12012     unsigned Imm = MI.getOperand(1).getImm();
12013     BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12014     BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12015             MI.getOperand(0).getReg())
12016         .addReg(PPC::CR0EQ);
12017   } else if (MI.getOpcode() == PPC::SETRNDi) {
12018     DebugLoc dl = MI.getDebugLoc();
12019     Register OldFPSCRReg = MI.getOperand(0).getReg();
12020 
12021     // Save FPSCR value.
12022     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12023 
12024     // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12025     // the following settings:
12026     //   00 Round to nearest
12027     //   01 Round to 0
12028     //   10 Round to +inf
12029     //   11 Round to -inf
12030 
12031     // When the operand is immediate, using the two least significant bits of
12032     // the immediate to set the bits 62:63 of FPSCR.
12033     unsigned Mode = MI.getOperand(1).getImm();
12034     BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12035         .addImm(31)
12036         .addReg(PPC::RM, RegState::ImplicitDefine);
12037 
12038     BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12039         .addImm(30)
12040         .addReg(PPC::RM, RegState::ImplicitDefine);
12041   } else if (MI.getOpcode() == PPC::SETRND) {
12042     DebugLoc dl = MI.getDebugLoc();
12043 
12044     // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12045     // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12046     // If the target doesn't have DirectMove, we should use stack to do the
12047     // conversion, because the target doesn't have the instructions like mtvsrd
12048     // or mfvsrd to do this conversion directly.
12049     auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12050       if (Subtarget.hasDirectMove()) {
12051         BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12052           .addReg(SrcReg);
12053       } else {
12054         // Use stack to do the register copy.
12055         unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12056         MachineRegisterInfo &RegInfo = F->getRegInfo();
12057         const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12058         if (RC == &PPC::F8RCRegClass) {
12059           // Copy register from F8RCRegClass to G8RCRegclass.
12060           assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12061                  "Unsupported RegClass.");
12062 
12063           StoreOp = PPC::STFD;
12064           LoadOp = PPC::LD;
12065         } else {
12066           // Copy register from G8RCRegClass to F8RCRegclass.
12067           assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12068                  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12069                  "Unsupported RegClass.");
12070         }
12071 
12072         MachineFrameInfo &MFI = F->getFrameInfo();
12073         int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12074 
12075         MachineMemOperand *MMOStore = F->getMachineMemOperand(
12076             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12077             MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
12078             MFI.getObjectAlign(FrameIdx));
12079 
12080         // Store the SrcReg into the stack.
12081         BuildMI(*BB, MI, dl, TII->get(StoreOp))
12082           .addReg(SrcReg)
12083           .addImm(0)
12084           .addFrameIndex(FrameIdx)
12085           .addMemOperand(MMOStore);
12086 
12087         MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12088             MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12089             MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
12090             MFI.getObjectAlign(FrameIdx));
12091 
12092         // Load from the stack where SrcReg is stored, and save to DestReg,
12093         // so we have done the RegClass conversion from RegClass::SrcReg to
12094         // RegClass::DestReg.
12095         BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12096           .addImm(0)
12097           .addFrameIndex(FrameIdx)
12098           .addMemOperand(MMOLoad);
12099       }
12100     };
12101 
12102     Register OldFPSCRReg = MI.getOperand(0).getReg();
12103 
12104     // Save FPSCR value.
12105     BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12106 
12107     // When the operand is gprc register, use two least significant bits of the
12108     // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12109     //
12110     // copy OldFPSCRTmpReg, OldFPSCRReg
12111     // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12112     // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12113     // copy NewFPSCRReg, NewFPSCRTmpReg
12114     // mtfsf 255, NewFPSCRReg
12115     MachineOperand SrcOp = MI.getOperand(1);
12116     MachineRegisterInfo &RegInfo = F->getRegInfo();
12117     Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12118 
12119     copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12120 
12121     Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12122     Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12123 
12124     // The first operand of INSERT_SUBREG should be a register which has
12125     // subregisters, we only care about its RegClass, so we should use an
12126     // IMPLICIT_DEF register.
12127     BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12128     BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12129       .addReg(ImDefReg)
12130       .add(SrcOp)
12131       .addImm(1);
12132 
12133     Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12134     BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12135       .addReg(OldFPSCRTmpReg)
12136       .addReg(ExtSrcReg)
12137       .addImm(0)
12138       .addImm(62);
12139 
12140     Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12141     copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12142 
12143     // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12144     // bits of FPSCR.
12145     BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12146       .addImm(255)
12147       .addReg(NewFPSCRReg)
12148       .addImm(0)
12149       .addImm(0);
12150   } else if (MI.getOpcode() == PPC::SETFLM) {
12151     DebugLoc Dl = MI.getDebugLoc();
12152 
12153     // Result of setflm is previous FPSCR content, so we need to save it first.
12154     Register OldFPSCRReg = MI.getOperand(0).getReg();
12155     BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12156 
12157     // Put bits in 32:63 to FPSCR.
12158     Register NewFPSCRReg = MI.getOperand(1).getReg();
12159     BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12160         .addImm(255)
12161         .addReg(NewFPSCRReg)
12162         .addImm(0)
12163         .addImm(0);
12164   } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12165              MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12166     return emitProbedAlloca(MI, BB);
12167   } else {
12168     llvm_unreachable("Unexpected instr type to insert");
12169   }
12170 
12171   MI.eraseFromParent(); // The pseudo instruction is gone now.
12172   return BB;
12173 }
12174 
12175 //===----------------------------------------------------------------------===//
12176 // Target Optimization Hooks
12177 //===----------------------------------------------------------------------===//
12178 
12179 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12180   // For the estimates, convergence is quadratic, so we essentially double the
12181   // number of digits correct after every iteration. For both FRE and FRSQRTE,
12182   // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12183   // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12184   int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12185   if (VT.getScalarType() == MVT::f64)
12186     RefinementSteps++;
12187   return RefinementSteps;
12188 }
12189 
12190 SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
12191                                             const DenormalMode &Mode) const {
12192   // We only have VSX Vector Test for software Square Root.
12193   EVT VT = Op.getValueType();
12194   if (!isTypeLegal(MVT::i1) ||
12195       (VT != MVT::f64 &&
12196        ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
12197     return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
12198 
12199   SDLoc DL(Op);
12200   // The output register of FTSQRT is CR field.
12201   SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
12202   // ftsqrt BF,FRB
12203   // Let e_b be the unbiased exponent of the double-precision
12204   // floating-point operand in register FRB.
12205   // fe_flag is set to 1 if either of the following conditions occurs.
12206   //   - The double-precision floating-point operand in register FRB is a zero,
12207   //     a NaN, or an infinity, or a negative value.
12208   //   - e_b is less than or equal to -970.
12209   // Otherwise fe_flag is set to 0.
12210   // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
12211   // not eligible for iteration. (zero/negative/infinity/nan or unbiased
12212   // exponent is less than -970)
12213   SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
12214   return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
12215                                     FTSQRT, SRIdxVal),
12216                  0);
12217 }
12218 
12219 SDValue
12220 PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
12221                                                SelectionDAG &DAG) const {
12222   // We only have VSX Vector Square Root.
12223   EVT VT = Op.getValueType();
12224   if (VT != MVT::f64 &&
12225       ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
12226     return TargetLowering::getSqrtResultForDenormInput(Op, DAG);
12227 
12228   return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
12229 }
12230 
12231 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12232                                            int Enabled, int &RefinementSteps,
12233                                            bool &UseOneConstNR,
12234                                            bool Reciprocal) const {
12235   EVT VT = Operand.getValueType();
12236   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
12237       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
12238       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12239       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12240     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12241       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12242 
12243     // The Newton-Raphson computation with a single constant does not provide
12244     // enough accuracy on some CPUs.
12245     UseOneConstNR = !Subtarget.needsTwoConstNR();
12246     return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
12247   }
12248   return SDValue();
12249 }
12250 
12251 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
12252                                             int Enabled,
12253                                             int &RefinementSteps) const {
12254   EVT VT = Operand.getValueType();
12255   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
12256       (VT == MVT::f64 && Subtarget.hasFRE()) ||
12257       (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
12258       (VT == MVT::v2f64 && Subtarget.hasVSX())) {
12259     if (RefinementSteps == ReciprocalEstimate::Unspecified)
12260       RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
12261     return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
12262   }
12263   return SDValue();
12264 }
12265 
12266 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
12267   // Note: This functionality is used only when unsafe-fp-math is enabled, and
12268   // on cores with reciprocal estimates (which are used when unsafe-fp-math is
12269   // enabled for division), this functionality is redundant with the default
12270   // combiner logic (once the division -> reciprocal/multiply transformation
12271   // has taken place). As a result, this matters more for older cores than for
12272   // newer ones.
12273 
12274   // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12275   // reciprocal if there are two or more FDIVs (for embedded cores with only
12276   // one FP pipeline) for three or more FDIVs (for generic OOO cores).
12277   switch (Subtarget.getCPUDirective()) {
12278   default:
12279     return 3;
12280   case PPC::DIR_440:
12281   case PPC::DIR_A2:
12282   case PPC::DIR_E500:
12283   case PPC::DIR_E500mc:
12284   case PPC::DIR_E5500:
12285     return 2;
12286   }
12287 }
12288 
12289 // isConsecutiveLSLoc needs to work even if all adds have not yet been
12290 // collapsed, and so we need to look through chains of them.
12291 static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base,
12292                                      int64_t& Offset, SelectionDAG &DAG) {
12293   if (DAG.isBaseWithConstantOffset(Loc)) {
12294     Base = Loc.getOperand(0);
12295     Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
12296 
12297     // The base might itself be a base plus an offset, and if so, accumulate
12298     // that as well.
12299     getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
12300   }
12301 }
12302 
12303 static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
12304                             unsigned Bytes, int Dist,
12305                             SelectionDAG &DAG) {
12306   if (VT.getSizeInBits() / 8 != Bytes)
12307     return false;
12308 
12309   SDValue BaseLoc = Base->getBasePtr();
12310   if (Loc.getOpcode() == ISD::FrameIndex) {
12311     if (BaseLoc.getOpcode() != ISD::FrameIndex)
12312       return false;
12313     const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
12314     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
12315     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
12316     int FS  = MFI.getObjectSize(FI);
12317     int BFS = MFI.getObjectSize(BFI);
12318     if (FS != BFS || FS != (int)Bytes) return false;
12319     return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
12320   }
12321 
12322   SDValue Base1 = Loc, Base2 = BaseLoc;
12323   int64_t Offset1 = 0, Offset2 = 0;
12324   getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
12325   getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
12326   if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
12327     return true;
12328 
12329   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12330   const GlobalValue *GV1 = nullptr;
12331   const GlobalValue *GV2 = nullptr;
12332   Offset1 = 0;
12333   Offset2 = 0;
12334   bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
12335   bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
12336   if (isGA1 && isGA2 && GV1 == GV2)
12337     return Offset1 == (Offset2 + Dist*Bytes);
12338   return false;
12339 }
12340 
12341 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
12342 // not enforce equality of the chain operands.
12343 static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
12344                             unsigned Bytes, int Dist,
12345                             SelectionDAG &DAG) {
12346   if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
12347     EVT VT = LS->getMemoryVT();
12348     SDValue Loc = LS->getBasePtr();
12349     return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
12350   }
12351 
12352   if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
12353     EVT VT;
12354     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12355     default: return false;
12356     case Intrinsic::ppc_altivec_lvx:
12357     case Intrinsic::ppc_altivec_lvxl:
12358     case Intrinsic::ppc_vsx_lxvw4x:
12359     case Intrinsic::ppc_vsx_lxvw4x_be:
12360       VT = MVT::v4i32;
12361       break;
12362     case Intrinsic::ppc_vsx_lxvd2x:
12363     case Intrinsic::ppc_vsx_lxvd2x_be:
12364       VT = MVT::v2f64;
12365       break;
12366     case Intrinsic::ppc_altivec_lvebx:
12367       VT = MVT::i8;
12368       break;
12369     case Intrinsic::ppc_altivec_lvehx:
12370       VT = MVT::i16;
12371       break;
12372     case Intrinsic::ppc_altivec_lvewx:
12373       VT = MVT::i32;
12374       break;
12375     }
12376 
12377     return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
12378   }
12379 
12380   if (N->getOpcode() == ISD::INTRINSIC_VOID) {
12381     EVT VT;
12382     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
12383     default: return false;
12384     case Intrinsic::ppc_altivec_stvx:
12385     case Intrinsic::ppc_altivec_stvxl:
12386     case Intrinsic::ppc_vsx_stxvw4x:
12387       VT = MVT::v4i32;
12388       break;
12389     case Intrinsic::ppc_vsx_stxvd2x:
12390       VT = MVT::v2f64;
12391       break;
12392     case Intrinsic::ppc_vsx_stxvw4x_be:
12393       VT = MVT::v4i32;
12394       break;
12395     case Intrinsic::ppc_vsx_stxvd2x_be:
12396       VT = MVT::v2f64;
12397       break;
12398     case Intrinsic::ppc_altivec_stvebx:
12399       VT = MVT::i8;
12400       break;
12401     case Intrinsic::ppc_altivec_stvehx:
12402       VT = MVT::i16;
12403       break;
12404     case Intrinsic::ppc_altivec_stvewx:
12405       VT = MVT::i32;
12406       break;
12407     }
12408 
12409     return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
12410   }
12411 
12412   return false;
12413 }
12414 
12415 // Return true is there is a nearyby consecutive load to the one provided
12416 // (regardless of alignment). We search up and down the chain, looking though
12417 // token factors and other loads (but nothing else). As a result, a true result
12418 // indicates that it is safe to create a new consecutive load adjacent to the
12419 // load provided.
12420 static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
12421   SDValue Chain = LD->getChain();
12422   EVT VT = LD->getMemoryVT();
12423 
12424   SmallSet<SDNode *, 16> LoadRoots;
12425   SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
12426   SmallSet<SDNode *, 16> Visited;
12427 
12428   // First, search up the chain, branching to follow all token-factor operands.
12429   // If we find a consecutive load, then we're done, otherwise, record all
12430   // nodes just above the top-level loads and token factors.
12431   while (!Queue.empty()) {
12432     SDNode *ChainNext = Queue.pop_back_val();
12433     if (!Visited.insert(ChainNext).second)
12434       continue;
12435 
12436     if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
12437       if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12438         return true;
12439 
12440       if (!Visited.count(ChainLD->getChain().getNode()))
12441         Queue.push_back(ChainLD->getChain().getNode());
12442     } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
12443       for (const SDUse &O : ChainNext->ops())
12444         if (!Visited.count(O.getNode()))
12445           Queue.push_back(O.getNode());
12446     } else
12447       LoadRoots.insert(ChainNext);
12448   }
12449 
12450   // Second, search down the chain, starting from the top-level nodes recorded
12451   // in the first phase. These top-level nodes are the nodes just above all
12452   // loads and token factors. Starting with their uses, recursively look though
12453   // all loads (just the chain uses) and token factors to find a consecutive
12454   // load.
12455   Visited.clear();
12456   Queue.clear();
12457 
12458   for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
12459        IE = LoadRoots.end(); I != IE; ++I) {
12460     Queue.push_back(*I);
12461 
12462     while (!Queue.empty()) {
12463       SDNode *LoadRoot = Queue.pop_back_val();
12464       if (!Visited.insert(LoadRoot).second)
12465         continue;
12466 
12467       if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
12468         if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
12469           return true;
12470 
12471       for (SDNode::use_iterator UI = LoadRoot->use_begin(),
12472            UE = LoadRoot->use_end(); UI != UE; ++UI)
12473         if (((isa<MemSDNode>(*UI) &&
12474             cast<MemSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
12475             UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
12476           Queue.push_back(*UI);
12477     }
12478   }
12479 
12480   return false;
12481 }
12482 
12483 /// This function is called when we have proved that a SETCC node can be replaced
12484 /// by subtraction (and other supporting instructions) so that the result of
12485 /// comparison is kept in a GPR instead of CR. This function is purely for
12486 /// codegen purposes and has some flags to guide the codegen process.
12487 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
12488                                      bool Swap, SDLoc &DL, SelectionDAG &DAG) {
12489   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12490 
12491   // Zero extend the operands to the largest legal integer. Originally, they
12492   // must be of a strictly smaller size.
12493   auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
12494                          DAG.getConstant(Size, DL, MVT::i32));
12495   auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
12496                          DAG.getConstant(Size, DL, MVT::i32));
12497 
12498   // Swap if needed. Depends on the condition code.
12499   if (Swap)
12500     std::swap(Op0, Op1);
12501 
12502   // Subtract extended integers.
12503   auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
12504 
12505   // Move the sign bit to the least significant position and zero out the rest.
12506   // Now the least significant bit carries the result of original comparison.
12507   auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
12508                              DAG.getConstant(Size - 1, DL, MVT::i32));
12509   auto Final = Shifted;
12510 
12511   // Complement the result if needed. Based on the condition code.
12512   if (Complement)
12513     Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
12514                         DAG.getConstant(1, DL, MVT::i64));
12515 
12516   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
12517 }
12518 
12519 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
12520                                                   DAGCombinerInfo &DCI) const {
12521   assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
12522 
12523   SelectionDAG &DAG = DCI.DAG;
12524   SDLoc DL(N);
12525 
12526   // Size of integers being compared has a critical role in the following
12527   // analysis, so we prefer to do this when all types are legal.
12528   if (!DCI.isAfterLegalizeDAG())
12529     return SDValue();
12530 
12531   // If all users of SETCC extend its value to a legal integer type
12532   // then we replace SETCC with a subtraction
12533   for (SDNode::use_iterator UI = N->use_begin(),
12534        UE = N->use_end(); UI != UE; ++UI) {
12535     if (UI->getOpcode() != ISD::ZERO_EXTEND)
12536       return SDValue();
12537   }
12538 
12539   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
12540   auto OpSize = N->getOperand(0).getValueSizeInBits();
12541 
12542   unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
12543 
12544   if (OpSize < Size) {
12545     switch (CC) {
12546     default: break;
12547     case ISD::SETULT:
12548       return generateEquivalentSub(N, Size, false, false, DL, DAG);
12549     case ISD::SETULE:
12550       return generateEquivalentSub(N, Size, true, true, DL, DAG);
12551     case ISD::SETUGT:
12552       return generateEquivalentSub(N, Size, false, true, DL, DAG);
12553     case ISD::SETUGE:
12554       return generateEquivalentSub(N, Size, true, false, DL, DAG);
12555     }
12556   }
12557 
12558   return SDValue();
12559 }
12560 
12561 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
12562                                                   DAGCombinerInfo &DCI) const {
12563   SelectionDAG &DAG = DCI.DAG;
12564   SDLoc dl(N);
12565 
12566   assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
12567   // If we're tracking CR bits, we need to be careful that we don't have:
12568   //   trunc(binary-ops(zext(x), zext(y)))
12569   // or
12570   //   trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
12571   // such that we're unnecessarily moving things into GPRs when it would be
12572   // better to keep them in CR bits.
12573 
12574   // Note that trunc here can be an actual i1 trunc, or can be the effective
12575   // truncation that comes from a setcc or select_cc.
12576   if (N->getOpcode() == ISD::TRUNCATE &&
12577       N->getValueType(0) != MVT::i1)
12578     return SDValue();
12579 
12580   if (N->getOperand(0).getValueType() != MVT::i32 &&
12581       N->getOperand(0).getValueType() != MVT::i64)
12582     return SDValue();
12583 
12584   if (N->getOpcode() == ISD::SETCC ||
12585       N->getOpcode() == ISD::SELECT_CC) {
12586     // If we're looking at a comparison, then we need to make sure that the
12587     // high bits (all except for the first) don't matter the result.
12588     ISD::CondCode CC =
12589       cast<CondCodeSDNode>(N->getOperand(
12590         N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
12591     unsigned OpBits = N->getOperand(0).getValueSizeInBits();
12592 
12593     if (ISD::isSignedIntSetCC(CC)) {
12594       if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
12595           DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
12596         return SDValue();
12597     } else if (ISD::isUnsignedIntSetCC(CC)) {
12598       if (!DAG.MaskedValueIsZero(N->getOperand(0),
12599                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
12600           !DAG.MaskedValueIsZero(N->getOperand(1),
12601                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
12602         return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
12603                                              : SDValue());
12604     } else {
12605       // This is neither a signed nor an unsigned comparison, just make sure
12606       // that the high bits are equal.
12607       KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
12608       KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
12609 
12610       // We don't really care about what is known about the first bit (if
12611       // anything), so pretend that it is known zero for both to ensure they can
12612       // be compared as constants.
12613       Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
12614       Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
12615 
12616       if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
12617           Op1Known.getConstant() != Op2Known.getConstant())
12618         return SDValue();
12619     }
12620   }
12621 
12622   // We now know that the higher-order bits are irrelevant, we just need to
12623   // make sure that all of the intermediate operations are bit operations, and
12624   // all inputs are extensions.
12625   if (N->getOperand(0).getOpcode() != ISD::AND &&
12626       N->getOperand(0).getOpcode() != ISD::OR  &&
12627       N->getOperand(0).getOpcode() != ISD::XOR &&
12628       N->getOperand(0).getOpcode() != ISD::SELECT &&
12629       N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
12630       N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
12631       N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
12632       N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
12633       N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
12634     return SDValue();
12635 
12636   if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
12637       N->getOperand(1).getOpcode() != ISD::AND &&
12638       N->getOperand(1).getOpcode() != ISD::OR  &&
12639       N->getOperand(1).getOpcode() != ISD::XOR &&
12640       N->getOperand(1).getOpcode() != ISD::SELECT &&
12641       N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
12642       N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
12643       N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
12644       N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
12645       N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
12646     return SDValue();
12647 
12648   SmallVector<SDValue, 4> Inputs;
12649   SmallVector<SDValue, 8> BinOps, PromOps;
12650   SmallPtrSet<SDNode *, 16> Visited;
12651 
12652   for (unsigned i = 0; i < 2; ++i) {
12653     if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12654           N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12655           N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12656           N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12657         isa<ConstantSDNode>(N->getOperand(i)))
12658       Inputs.push_back(N->getOperand(i));
12659     else
12660       BinOps.push_back(N->getOperand(i));
12661 
12662     if (N->getOpcode() == ISD::TRUNCATE)
12663       break;
12664   }
12665 
12666   // Visit all inputs, collect all binary operations (and, or, xor and
12667   // select) that are all fed by extensions.
12668   while (!BinOps.empty()) {
12669     SDValue BinOp = BinOps.pop_back_val();
12670 
12671     if (!Visited.insert(BinOp.getNode()).second)
12672       continue;
12673 
12674     PromOps.push_back(BinOp);
12675 
12676     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12677       // The condition of the select is not promoted.
12678       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12679         continue;
12680       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12681         continue;
12682 
12683       if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12684             BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12685             BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
12686            BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
12687           isa<ConstantSDNode>(BinOp.getOperand(i))) {
12688         Inputs.push_back(BinOp.getOperand(i));
12689       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12690                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
12691                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12692                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12693                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
12694                  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12695                  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
12696                  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
12697                  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
12698         BinOps.push_back(BinOp.getOperand(i));
12699       } else {
12700         // We have an input that is not an extension or another binary
12701         // operation; we'll abort this transformation.
12702         return SDValue();
12703       }
12704     }
12705   }
12706 
12707   // Make sure that this is a self-contained cluster of operations (which
12708   // is not quite the same thing as saying that everything has only one
12709   // use).
12710   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12711     if (isa<ConstantSDNode>(Inputs[i]))
12712       continue;
12713 
12714     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12715                               UE = Inputs[i].getNode()->use_end();
12716          UI != UE; ++UI) {
12717       SDNode *User = *UI;
12718       if (User != N && !Visited.count(User))
12719         return SDValue();
12720 
12721       // Make sure that we're not going to promote the non-output-value
12722       // operand(s) or SELECT or SELECT_CC.
12723       // FIXME: Although we could sometimes handle this, and it does occur in
12724       // practice that one of the condition inputs to the select is also one of
12725       // the outputs, we currently can't deal with this.
12726       if (User->getOpcode() == ISD::SELECT) {
12727         if (User->getOperand(0) == Inputs[i])
12728           return SDValue();
12729       } else if (User->getOpcode() == ISD::SELECT_CC) {
12730         if (User->getOperand(0) == Inputs[i] ||
12731             User->getOperand(1) == Inputs[i])
12732           return SDValue();
12733       }
12734     }
12735   }
12736 
12737   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12738     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12739                               UE = PromOps[i].getNode()->use_end();
12740          UI != UE; ++UI) {
12741       SDNode *User = *UI;
12742       if (User != N && !Visited.count(User))
12743         return SDValue();
12744 
12745       // Make sure that we're not going to promote the non-output-value
12746       // operand(s) or SELECT or SELECT_CC.
12747       // FIXME: Although we could sometimes handle this, and it does occur in
12748       // practice that one of the condition inputs to the select is also one of
12749       // the outputs, we currently can't deal with this.
12750       if (User->getOpcode() == ISD::SELECT) {
12751         if (User->getOperand(0) == PromOps[i])
12752           return SDValue();
12753       } else if (User->getOpcode() == ISD::SELECT_CC) {
12754         if (User->getOperand(0) == PromOps[i] ||
12755             User->getOperand(1) == PromOps[i])
12756           return SDValue();
12757       }
12758     }
12759   }
12760 
12761   // Replace all inputs with the extension operand.
12762   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12763     // Constants may have users outside the cluster of to-be-promoted nodes,
12764     // and so we need to replace those as we do the promotions.
12765     if (isa<ConstantSDNode>(Inputs[i]))
12766       continue;
12767     else
12768       DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
12769   }
12770 
12771   std::list<HandleSDNode> PromOpHandles;
12772   for (auto &PromOp : PromOps)
12773     PromOpHandles.emplace_back(PromOp);
12774 
12775   // Replace all operations (these are all the same, but have a different
12776   // (i1) return type). DAG.getNode will validate that the types of
12777   // a binary operator match, so go through the list in reverse so that
12778   // we've likely promoted both operands first. Any intermediate truncations or
12779   // extensions disappear.
12780   while (!PromOpHandles.empty()) {
12781     SDValue PromOp = PromOpHandles.back().getValue();
12782     PromOpHandles.pop_back();
12783 
12784     if (PromOp.getOpcode() == ISD::TRUNCATE ||
12785         PromOp.getOpcode() == ISD::SIGN_EXTEND ||
12786         PromOp.getOpcode() == ISD::ZERO_EXTEND ||
12787         PromOp.getOpcode() == ISD::ANY_EXTEND) {
12788       if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
12789           PromOp.getOperand(0).getValueType() != MVT::i1) {
12790         // The operand is not yet ready (see comment below).
12791         PromOpHandles.emplace_front(PromOp);
12792         continue;
12793       }
12794 
12795       SDValue RepValue = PromOp.getOperand(0);
12796       if (isa<ConstantSDNode>(RepValue))
12797         RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
12798 
12799       DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
12800       continue;
12801     }
12802 
12803     unsigned C;
12804     switch (PromOp.getOpcode()) {
12805     default:             C = 0; break;
12806     case ISD::SELECT:    C = 1; break;
12807     case ISD::SELECT_CC: C = 2; break;
12808     }
12809 
12810     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
12811          PromOp.getOperand(C).getValueType() != MVT::i1) ||
12812         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
12813          PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
12814       // The to-be-promoted operands of this node have not yet been
12815       // promoted (this should be rare because we're going through the
12816       // list backward, but if one of the operands has several users in
12817       // this cluster of to-be-promoted nodes, it is possible).
12818       PromOpHandles.emplace_front(PromOp);
12819       continue;
12820     }
12821 
12822     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
12823                                 PromOp.getNode()->op_end());
12824 
12825     // If there are any constant inputs, make sure they're replaced now.
12826     for (unsigned i = 0; i < 2; ++i)
12827       if (isa<ConstantSDNode>(Ops[C+i]))
12828         Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
12829 
12830     DAG.ReplaceAllUsesOfValueWith(PromOp,
12831       DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
12832   }
12833 
12834   // Now we're left with the initial truncation itself.
12835   if (N->getOpcode() == ISD::TRUNCATE)
12836     return N->getOperand(0);
12837 
12838   // Otherwise, this is a comparison. The operands to be compared have just
12839   // changed type (to i1), but everything else is the same.
12840   return SDValue(N, 0);
12841 }
12842 
12843 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
12844                                                   DAGCombinerInfo &DCI) const {
12845   SelectionDAG &DAG = DCI.DAG;
12846   SDLoc dl(N);
12847 
12848   // If we're tracking CR bits, we need to be careful that we don't have:
12849   //   zext(binary-ops(trunc(x), trunc(y)))
12850   // or
12851   //   zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
12852   // such that we're unnecessarily moving things into CR bits that can more
12853   // efficiently stay in GPRs. Note that if we're not certain that the high
12854   // bits are set as required by the final extension, we still may need to do
12855   // some masking to get the proper behavior.
12856 
12857   // This same functionality is important on PPC64 when dealing with
12858   // 32-to-64-bit extensions; these occur often when 32-bit values are used as
12859   // the return values of functions. Because it is so similar, it is handled
12860   // here as well.
12861 
12862   if (N->getValueType(0) != MVT::i32 &&
12863       N->getValueType(0) != MVT::i64)
12864     return SDValue();
12865 
12866   if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
12867         (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
12868     return SDValue();
12869 
12870   if (N->getOperand(0).getOpcode() != ISD::AND &&
12871       N->getOperand(0).getOpcode() != ISD::OR  &&
12872       N->getOperand(0).getOpcode() != ISD::XOR &&
12873       N->getOperand(0).getOpcode() != ISD::SELECT &&
12874       N->getOperand(0).getOpcode() != ISD::SELECT_CC)
12875     return SDValue();
12876 
12877   SmallVector<SDValue, 4> Inputs;
12878   SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
12879   SmallPtrSet<SDNode *, 16> Visited;
12880 
12881   // Visit all inputs, collect all binary operations (and, or, xor and
12882   // select) that are all fed by truncations.
12883   while (!BinOps.empty()) {
12884     SDValue BinOp = BinOps.pop_back_val();
12885 
12886     if (!Visited.insert(BinOp.getNode()).second)
12887       continue;
12888 
12889     PromOps.push_back(BinOp);
12890 
12891     for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
12892       // The condition of the select is not promoted.
12893       if (BinOp.getOpcode() == ISD::SELECT && i == 0)
12894         continue;
12895       if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
12896         continue;
12897 
12898       if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
12899           isa<ConstantSDNode>(BinOp.getOperand(i))) {
12900         Inputs.push_back(BinOp.getOperand(i));
12901       } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
12902                  BinOp.getOperand(i).getOpcode() == ISD::OR  ||
12903                  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
12904                  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
12905                  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
12906         BinOps.push_back(BinOp.getOperand(i));
12907       } else {
12908         // We have an input that is not a truncation or another binary
12909         // operation; we'll abort this transformation.
12910         return SDValue();
12911       }
12912     }
12913   }
12914 
12915   // The operands of a select that must be truncated when the select is
12916   // promoted because the operand is actually part of the to-be-promoted set.
12917   DenseMap<SDNode *, EVT> SelectTruncOp[2];
12918 
12919   // Make sure that this is a self-contained cluster of operations (which
12920   // is not quite the same thing as saying that everything has only one
12921   // use).
12922   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12923     if (isa<ConstantSDNode>(Inputs[i]))
12924       continue;
12925 
12926     for (SDNode::use_iterator UI = Inputs[i].getNode()->use_begin(),
12927                               UE = Inputs[i].getNode()->use_end();
12928          UI != UE; ++UI) {
12929       SDNode *User = *UI;
12930       if (User != N && !Visited.count(User))
12931         return SDValue();
12932 
12933       // If we're going to promote the non-output-value operand(s) or SELECT or
12934       // SELECT_CC, record them for truncation.
12935       if (User->getOpcode() == ISD::SELECT) {
12936         if (User->getOperand(0) == Inputs[i])
12937           SelectTruncOp[0].insert(std::make_pair(User,
12938                                     User->getOperand(0).getValueType()));
12939       } else if (User->getOpcode() == ISD::SELECT_CC) {
12940         if (User->getOperand(0) == Inputs[i])
12941           SelectTruncOp[0].insert(std::make_pair(User,
12942                                     User->getOperand(0).getValueType()));
12943         if (User->getOperand(1) == Inputs[i])
12944           SelectTruncOp[1].insert(std::make_pair(User,
12945                                     User->getOperand(1).getValueType()));
12946       }
12947     }
12948   }
12949 
12950   for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
12951     for (SDNode::use_iterator UI = PromOps[i].getNode()->use_begin(),
12952                               UE = PromOps[i].getNode()->use_end();
12953          UI != UE; ++UI) {
12954       SDNode *User = *UI;
12955       if (User != N && !Visited.count(User))
12956         return SDValue();
12957 
12958       // If we're going to promote the non-output-value operand(s) or SELECT or
12959       // SELECT_CC, record them for truncation.
12960       if (User->getOpcode() == ISD::SELECT) {
12961         if (User->getOperand(0) == PromOps[i])
12962           SelectTruncOp[0].insert(std::make_pair(User,
12963                                     User->getOperand(0).getValueType()));
12964       } else if (User->getOpcode() == ISD::SELECT_CC) {
12965         if (User->getOperand(0) == PromOps[i])
12966           SelectTruncOp[0].insert(std::make_pair(User,
12967                                     User->getOperand(0).getValueType()));
12968         if (User->getOperand(1) == PromOps[i])
12969           SelectTruncOp[1].insert(std::make_pair(User,
12970                                     User->getOperand(1).getValueType()));
12971       }
12972     }
12973   }
12974 
12975   unsigned PromBits = N->getOperand(0).getValueSizeInBits();
12976   bool ReallyNeedsExt = false;
12977   if (N->getOpcode() != ISD::ANY_EXTEND) {
12978     // If all of the inputs are not already sign/zero extended, then
12979     // we'll still need to do that at the end.
12980     for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
12981       if (isa<ConstantSDNode>(Inputs[i]))
12982         continue;
12983 
12984       unsigned OpBits =
12985         Inputs[i].getOperand(0).getValueSizeInBits();
12986       assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
12987 
12988       if ((N->getOpcode() == ISD::ZERO_EXTEND &&
12989            !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
12990                                   APInt::getHighBitsSet(OpBits,
12991                                                         OpBits-PromBits))) ||
12992           (N->getOpcode() == ISD::SIGN_EXTEND &&
12993            DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
12994              (OpBits-(PromBits-1)))) {
12995         ReallyNeedsExt = true;
12996         break;
12997       }
12998     }
12999   }
13000 
13001   // Replace all inputs, either with the truncation operand, or a
13002   // truncation or extension to the final output type.
13003   for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13004     // Constant inputs need to be replaced with the to-be-promoted nodes that
13005     // use them because they might have users outside of the cluster of
13006     // promoted nodes.
13007     if (isa<ConstantSDNode>(Inputs[i]))
13008       continue;
13009 
13010     SDValue InSrc = Inputs[i].getOperand(0);
13011     if (Inputs[i].getValueType() == N->getValueType(0))
13012       DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13013     else if (N->getOpcode() == ISD::SIGN_EXTEND)
13014       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13015         DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13016     else if (N->getOpcode() == ISD::ZERO_EXTEND)
13017       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13018         DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13019     else
13020       DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13021         DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13022   }
13023 
13024   std::list<HandleSDNode> PromOpHandles;
13025   for (auto &PromOp : PromOps)
13026     PromOpHandles.emplace_back(PromOp);
13027 
13028   // Replace all operations (these are all the same, but have a different
13029   // (promoted) return type). DAG.getNode will validate that the types of
13030   // a binary operator match, so go through the list in reverse so that
13031   // we've likely promoted both operands first.
13032   while (!PromOpHandles.empty()) {
13033     SDValue PromOp = PromOpHandles.back().getValue();
13034     PromOpHandles.pop_back();
13035 
13036     unsigned C;
13037     switch (PromOp.getOpcode()) {
13038     default:             C = 0; break;
13039     case ISD::SELECT:    C = 1; break;
13040     case ISD::SELECT_CC: C = 2; break;
13041     }
13042 
13043     if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13044          PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13045         (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13046          PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13047       // The to-be-promoted operands of this node have not yet been
13048       // promoted (this should be rare because we're going through the
13049       // list backward, but if one of the operands has several users in
13050       // this cluster of to-be-promoted nodes, it is possible).
13051       PromOpHandles.emplace_front(PromOp);
13052       continue;
13053     }
13054 
13055     // For SELECT and SELECT_CC nodes, we do a similar check for any
13056     // to-be-promoted comparison inputs.
13057     if (PromOp.getOpcode() == ISD::SELECT ||
13058         PromOp.getOpcode() == ISD::SELECT_CC) {
13059       if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13060            PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13061           (SelectTruncOp[1].count(PromOp.getNode()) &&
13062            PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13063         PromOpHandles.emplace_front(PromOp);
13064         continue;
13065       }
13066     }
13067 
13068     SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13069                                 PromOp.getNode()->op_end());
13070 
13071     // If this node has constant inputs, then they'll need to be promoted here.
13072     for (unsigned i = 0; i < 2; ++i) {
13073       if (!isa<ConstantSDNode>(Ops[C+i]))
13074         continue;
13075       if (Ops[C+i].getValueType() == N->getValueType(0))
13076         continue;
13077 
13078       if (N->getOpcode() == ISD::SIGN_EXTEND)
13079         Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13080       else if (N->getOpcode() == ISD::ZERO_EXTEND)
13081         Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13082       else
13083         Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13084     }
13085 
13086     // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13087     // truncate them again to the original value type.
13088     if (PromOp.getOpcode() == ISD::SELECT ||
13089         PromOp.getOpcode() == ISD::SELECT_CC) {
13090       auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13091       if (SI0 != SelectTruncOp[0].end())
13092         Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13093       auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13094       if (SI1 != SelectTruncOp[1].end())
13095         Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13096     }
13097 
13098     DAG.ReplaceAllUsesOfValueWith(PromOp,
13099       DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13100   }
13101 
13102   // Now we're left with the initial extension itself.
13103   if (!ReallyNeedsExt)
13104     return N->getOperand(0);
13105 
13106   // To zero extend, just mask off everything except for the first bit (in the
13107   // i1 case).
13108   if (N->getOpcode() == ISD::ZERO_EXTEND)
13109     return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13110                        DAG.getConstant(APInt::getLowBitsSet(
13111                                          N->getValueSizeInBits(0), PromBits),
13112                                        dl, N->getValueType(0)));
13113 
13114   assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13115          "Invalid extension type");
13116   EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13117   SDValue ShiftCst =
13118       DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13119   return DAG.getNode(
13120       ISD::SRA, dl, N->getValueType(0),
13121       DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13122       ShiftCst);
13123 }
13124 
13125 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13126                                         DAGCombinerInfo &DCI) const {
13127   assert(N->getOpcode() == ISD::SETCC &&
13128          "Should be called with a SETCC node");
13129 
13130   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13131   if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13132     SDValue LHS = N->getOperand(0);
13133     SDValue RHS = N->getOperand(1);
13134 
13135     // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13136     if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13137         LHS.hasOneUse())
13138       std::swap(LHS, RHS);
13139 
13140     // x == 0-y --> x+y == 0
13141     // x != 0-y --> x+y != 0
13142     if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13143         RHS.hasOneUse()) {
13144       SDLoc DL(N);
13145       SelectionDAG &DAG = DCI.DAG;
13146       EVT VT = N->getValueType(0);
13147       EVT OpVT = LHS.getValueType();
13148       SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13149       return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13150     }
13151   }
13152 
13153   return DAGCombineTruncBoolExt(N, DCI);
13154 }
13155 
13156 // Is this an extending load from an f32 to an f64?
13157 static bool isFPExtLoad(SDValue Op) {
13158   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13159     return LD->getExtensionType() == ISD::EXTLOAD &&
13160       Op.getValueType() == MVT::f64;
13161   return false;
13162 }
13163 
13164 /// Reduces the number of fp-to-int conversion when building a vector.
13165 ///
13166 /// If this vector is built out of floating to integer conversions,
13167 /// transform it to a vector built out of floating point values followed by a
13168 /// single floating to integer conversion of the vector.
13169 /// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
13170 /// becomes (fptosi (build_vector ($A, $B, ...)))
13171 SDValue PPCTargetLowering::
13172 combineElementTruncationToVectorTruncation(SDNode *N,
13173                                            DAGCombinerInfo &DCI) const {
13174   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13175          "Should be called with a BUILD_VECTOR node");
13176 
13177   SelectionDAG &DAG = DCI.DAG;
13178   SDLoc dl(N);
13179 
13180   SDValue FirstInput = N->getOperand(0);
13181   assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
13182          "The input operand must be an fp-to-int conversion.");
13183 
13184   // This combine happens after legalization so the fp_to_[su]i nodes are
13185   // already converted to PPCSISD nodes.
13186   unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13187   if (FirstConversion == PPCISD::FCTIDZ ||
13188       FirstConversion == PPCISD::FCTIDUZ ||
13189       FirstConversion == PPCISD::FCTIWZ ||
13190       FirstConversion == PPCISD::FCTIWUZ) {
13191     bool IsSplat = true;
13192     bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
13193       FirstConversion == PPCISD::FCTIWUZ;
13194     EVT SrcVT = FirstInput.getOperand(0).getValueType();
13195     SmallVector<SDValue, 4> Ops;
13196     EVT TargetVT = N->getValueType(0);
13197     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13198       SDValue NextOp = N->getOperand(i);
13199       if (NextOp.getOpcode() != PPCISD::MFVSR)
13200         return SDValue();
13201       unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13202       if (NextConversion != FirstConversion)
13203         return SDValue();
13204       // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13205       // This is not valid if the input was originally double precision. It is
13206       // also not profitable to do unless this is an extending load in which
13207       // case doing this combine will allow us to combine consecutive loads.
13208       if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13209         return SDValue();
13210       if (N->getOperand(i) != FirstInput)
13211         IsSplat = false;
13212     }
13213 
13214     // If this is a splat, we leave it as-is since there will be only a single
13215     // fp-to-int conversion followed by a splat of the integer. This is better
13216     // for 32-bit and smaller ints and neutral for 64-bit ints.
13217     if (IsSplat)
13218       return SDValue();
13219 
13220     // Now that we know we have the right type of node, get its operands
13221     for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13222       SDValue In = N->getOperand(i).getOperand(0);
13223       if (Is32Bit) {
13224         // For 32-bit values, we need to add an FP_ROUND node (if we made it
13225         // here, we know that all inputs are extending loads so this is safe).
13226         if (In.isUndef())
13227           Ops.push_back(DAG.getUNDEF(SrcVT));
13228         else {
13229           SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13230                                       MVT::f32, In.getOperand(0),
13231                                       DAG.getIntPtrConstant(1, dl));
13232           Ops.push_back(Trunc);
13233         }
13234       } else
13235         Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13236     }
13237 
13238     unsigned Opcode;
13239     if (FirstConversion == PPCISD::FCTIDZ ||
13240         FirstConversion == PPCISD::FCTIWZ)
13241       Opcode = ISD::FP_TO_SINT;
13242     else
13243       Opcode = ISD::FP_TO_UINT;
13244 
13245     EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
13246     SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13247     return DAG.getNode(Opcode, dl, TargetVT, BV);
13248   }
13249   return SDValue();
13250 }
13251 
13252 /// Reduce the number of loads when building a vector.
13253 ///
13254 /// Building a vector out of multiple loads can be converted to a load
13255 /// of the vector type if the loads are consecutive. If the loads are
13256 /// consecutive but in descending order, a shuffle is added at the end
13257 /// to reorder the vector.
13258 static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
13259   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13260          "Should be called with a BUILD_VECTOR node");
13261 
13262   SDLoc dl(N);
13263 
13264   // Return early for non byte-sized type, as they can't be consecutive.
13265   if (!N->getValueType(0).getVectorElementType().isByteSized())
13266     return SDValue();
13267 
13268   bool InputsAreConsecutiveLoads = true;
13269   bool InputsAreReverseConsecutive = true;
13270   unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
13271   SDValue FirstInput = N->getOperand(0);
13272   bool IsRoundOfExtLoad = false;
13273 
13274   if (FirstInput.getOpcode() == ISD::FP_ROUND &&
13275       FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
13276     LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
13277     IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
13278   }
13279   // Not a build vector of (possibly fp_rounded) loads.
13280   if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
13281       N->getNumOperands() == 1)
13282     return SDValue();
13283 
13284   for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
13285     // If any inputs are fp_round(extload), they all must be.
13286     if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
13287       return SDValue();
13288 
13289     SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
13290       N->getOperand(i);
13291     if (NextInput.getOpcode() != ISD::LOAD)
13292       return SDValue();
13293 
13294     SDValue PreviousInput =
13295       IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
13296     LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
13297     LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
13298 
13299     // If any inputs are fp_round(extload), they all must be.
13300     if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
13301       return SDValue();
13302 
13303     if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
13304       InputsAreConsecutiveLoads = false;
13305     if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
13306       InputsAreReverseConsecutive = false;
13307 
13308     // Exit early if the loads are neither consecutive nor reverse consecutive.
13309     if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
13310       return SDValue();
13311   }
13312 
13313   assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
13314          "The loads cannot be both consecutive and reverse consecutive.");
13315 
13316   SDValue FirstLoadOp =
13317     IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
13318   SDValue LastLoadOp =
13319     IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
13320                        N->getOperand(N->getNumOperands()-1);
13321 
13322   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
13323   LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
13324   if (InputsAreConsecutiveLoads) {
13325     assert(LD1 && "Input needs to be a LoadSDNode.");
13326     return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
13327                        LD1->getBasePtr(), LD1->getPointerInfo(),
13328                        LD1->getAlignment());
13329   }
13330   if (InputsAreReverseConsecutive) {
13331     assert(LDL && "Input needs to be a LoadSDNode.");
13332     SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
13333                                LDL->getBasePtr(), LDL->getPointerInfo(),
13334                                LDL->getAlignment());
13335     SmallVector<int, 16> Ops;
13336     for (int i = N->getNumOperands() - 1; i >= 0; i--)
13337       Ops.push_back(i);
13338 
13339     return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
13340                                 DAG.getUNDEF(N->getValueType(0)), Ops);
13341   }
13342   return SDValue();
13343 }
13344 
13345 // This function adds the required vector_shuffle needed to get
13346 // the elements of the vector extract in the correct position
13347 // as specified by the CorrectElems encoding.
13348 static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG,
13349                                       SDValue Input, uint64_t Elems,
13350                                       uint64_t CorrectElems) {
13351   SDLoc dl(N);
13352 
13353   unsigned NumElems = Input.getValueType().getVectorNumElements();
13354   SmallVector<int, 16> ShuffleMask(NumElems, -1);
13355 
13356   // Knowing the element indices being extracted from the original
13357   // vector and the order in which they're being inserted, just put
13358   // them at element indices required for the instruction.
13359   for (unsigned i = 0; i < N->getNumOperands(); i++) {
13360     if (DAG.getDataLayout().isLittleEndian())
13361       ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
13362     else
13363       ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
13364     CorrectElems = CorrectElems >> 8;
13365     Elems = Elems >> 8;
13366   }
13367 
13368   SDValue Shuffle =
13369       DAG.getVectorShuffle(Input.getValueType(), dl, Input,
13370                            DAG.getUNDEF(Input.getValueType()), ShuffleMask);
13371 
13372   EVT VT = N->getValueType(0);
13373   SDValue Conv = DAG.getBitcast(VT, Shuffle);
13374 
13375   EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
13376                                Input.getValueType().getVectorElementType(),
13377                                VT.getVectorNumElements());
13378   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
13379                      DAG.getValueType(ExtVT));
13380 }
13381 
13382 // Look for build vector patterns where input operands come from sign
13383 // extended vector_extract elements of specific indices. If the correct indices
13384 // aren't used, add a vector shuffle to fix up the indices and create
13385 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
13386 // during instruction selection.
13387 static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG) {
13388   // This array encodes the indices that the vector sign extend instructions
13389   // extract from when extending from one type to another for both BE and LE.
13390   // The right nibble of each byte corresponds to the LE incides.
13391   // and the left nibble of each byte corresponds to the BE incides.
13392   // For example: 0x3074B8FC  byte->word
13393   // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
13394   // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
13395   // For example: 0x000070F8  byte->double word
13396   // For LE: the allowed indices are: 0x0,0x8
13397   // For BE: the allowed indices are: 0x7,0xF
13398   uint64_t TargetElems[] = {
13399       0x3074B8FC, // b->w
13400       0x000070F8, // b->d
13401       0x10325476, // h->w
13402       0x00003074, // h->d
13403       0x00001032, // w->d
13404   };
13405 
13406   uint64_t Elems = 0;
13407   int Index;
13408   SDValue Input;
13409 
13410   auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
13411     if (!Op)
13412       return false;
13413     if (Op.getOpcode() != ISD::SIGN_EXTEND &&
13414         Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
13415       return false;
13416 
13417     // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
13418     // of the right width.
13419     SDValue Extract = Op.getOperand(0);
13420     if (Extract.getOpcode() == ISD::ANY_EXTEND)
13421       Extract = Extract.getOperand(0);
13422     if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13423       return false;
13424 
13425     ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
13426     if (!ExtOp)
13427       return false;
13428 
13429     Index = ExtOp->getZExtValue();
13430     if (Input && Input != Extract.getOperand(0))
13431       return false;
13432 
13433     if (!Input)
13434       Input = Extract.getOperand(0);
13435 
13436     Elems = Elems << 8;
13437     Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
13438     Elems |= Index;
13439 
13440     return true;
13441   };
13442 
13443   // If the build vector operands aren't sign extended vector extracts,
13444   // of the same input vector, then return.
13445   for (unsigned i = 0; i < N->getNumOperands(); i++) {
13446     if (!isSExtOfVecExtract(N->getOperand(i))) {
13447       return SDValue();
13448     }
13449   }
13450 
13451   // If the vector extract indicies are not correct, add the appropriate
13452   // vector_shuffle.
13453   int TgtElemArrayIdx;
13454   int InputSize = Input.getValueType().getScalarSizeInBits();
13455   int OutputSize = N->getValueType(0).getScalarSizeInBits();
13456   if (InputSize + OutputSize == 40)
13457     TgtElemArrayIdx = 0;
13458   else if (InputSize + OutputSize == 72)
13459     TgtElemArrayIdx = 1;
13460   else if (InputSize + OutputSize == 48)
13461     TgtElemArrayIdx = 2;
13462   else if (InputSize + OutputSize == 80)
13463     TgtElemArrayIdx = 3;
13464   else if (InputSize + OutputSize == 96)
13465     TgtElemArrayIdx = 4;
13466   else
13467     return SDValue();
13468 
13469   uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
13470   CorrectElems = DAG.getDataLayout().isLittleEndian()
13471                      ? CorrectElems & 0x0F0F0F0F0F0F0F0F
13472                      : CorrectElems & 0xF0F0F0F0F0F0F0F0;
13473   if (Elems != CorrectElems) {
13474     return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
13475   }
13476 
13477   // Regular lowering will catch cases where a shuffle is not needed.
13478   return SDValue();
13479 }
13480 
13481 // Look for the pattern of a load from a narrow width to i128, feeding
13482 // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
13483 // (LXVRZX). This node represents a zero extending load that will be matched
13484 // to the Load VSX Vector Rightmost instructions.
13485 static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG) {
13486   SDLoc DL(N);
13487 
13488   // This combine is only eligible for a BUILD_VECTOR of v1i128.
13489   if (N->getValueType(0) != MVT::v1i128)
13490     return SDValue();
13491 
13492   SDValue Operand = N->getOperand(0);
13493   // Proceed with the transformation if the operand to the BUILD_VECTOR
13494   // is a load instruction.
13495   if (Operand.getOpcode() != ISD::LOAD)
13496     return SDValue();
13497 
13498   LoadSDNode *LD = dyn_cast<LoadSDNode>(Operand);
13499   EVT MemoryType = LD->getMemoryVT();
13500 
13501   // This transformation is only valid if the we are loading either a byte,
13502   // halfword, word, or doubleword.
13503   bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
13504                      MemoryType == MVT::i32 || MemoryType == MVT::i64;
13505 
13506   // Ensure that the load from the narrow width is being zero extended to i128.
13507   if (!ValidLDType ||
13508       (LD->getExtensionType() != ISD::ZEXTLOAD &&
13509        LD->getExtensionType() != ISD::EXTLOAD))
13510     return SDValue();
13511 
13512   SDValue LoadOps[] = {
13513       LD->getChain(), LD->getBasePtr(),
13514       DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
13515 
13516   return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
13517                                  DAG.getVTList(MVT::v1i128, MVT::Other),
13518                                  LoadOps, MemoryType, LD->getMemOperand());
13519 }
13520 
13521 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
13522                                                  DAGCombinerInfo &DCI) const {
13523   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13524          "Should be called with a BUILD_VECTOR node");
13525 
13526   SelectionDAG &DAG = DCI.DAG;
13527   SDLoc dl(N);
13528 
13529   if (!Subtarget.hasVSX())
13530     return SDValue();
13531 
13532   // The target independent DAG combiner will leave a build_vector of
13533   // float-to-int conversions intact. We can generate MUCH better code for
13534   // a float-to-int conversion of a vector of floats.
13535   SDValue FirstInput = N->getOperand(0);
13536   if (FirstInput.getOpcode() == PPCISD::MFVSR) {
13537     SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
13538     if (Reduced)
13539       return Reduced;
13540   }
13541 
13542   // If we're building a vector out of consecutive loads, just load that
13543   // vector type.
13544   SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
13545   if (Reduced)
13546     return Reduced;
13547 
13548   // If we're building a vector out of extended elements from another vector
13549   // we have P9 vector integer extend instructions. The code assumes legal
13550   // input types (i.e. it can't handle things like v4i16) so do not run before
13551   // legalization.
13552   if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
13553     Reduced = combineBVOfVecSExt(N, DAG);
13554     if (Reduced)
13555       return Reduced;
13556   }
13557 
13558   // On Power10, the Load VSX Vector Rightmost instructions can be utilized
13559   // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
13560   // is a load from <valid narrow width> to i128.
13561   if (Subtarget.isISA3_1()) {
13562     SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
13563     if (BVOfZLoad)
13564       return BVOfZLoad;
13565   }
13566 
13567   if (N->getValueType(0) != MVT::v2f64)
13568     return SDValue();
13569 
13570   // Looking for:
13571   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
13572   if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
13573       FirstInput.getOpcode() != ISD::UINT_TO_FP)
13574     return SDValue();
13575   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
13576       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
13577     return SDValue();
13578   if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
13579     return SDValue();
13580 
13581   SDValue Ext1 = FirstInput.getOperand(0);
13582   SDValue Ext2 = N->getOperand(1).getOperand(0);
13583   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
13584      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
13585     return SDValue();
13586 
13587   ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
13588   ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
13589   if (!Ext1Op || !Ext2Op)
13590     return SDValue();
13591   if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
13592       Ext1.getOperand(0) != Ext2.getOperand(0))
13593     return SDValue();
13594 
13595   int FirstElem = Ext1Op->getZExtValue();
13596   int SecondElem = Ext2Op->getZExtValue();
13597   int SubvecIdx;
13598   if (FirstElem == 0 && SecondElem == 1)
13599     SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
13600   else if (FirstElem == 2 && SecondElem == 3)
13601     SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
13602   else
13603     return SDValue();
13604 
13605   SDValue SrcVec = Ext1.getOperand(0);
13606   auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
13607     PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
13608   return DAG.getNode(NodeType, dl, MVT::v2f64,
13609                      SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
13610 }
13611 
13612 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
13613                                               DAGCombinerInfo &DCI) const {
13614   assert((N->getOpcode() == ISD::SINT_TO_FP ||
13615           N->getOpcode() == ISD::UINT_TO_FP) &&
13616          "Need an int -> FP conversion node here");
13617 
13618   if (useSoftFloat() || !Subtarget.has64BitSupport())
13619     return SDValue();
13620 
13621   SelectionDAG &DAG = DCI.DAG;
13622   SDLoc dl(N);
13623   SDValue Op(N, 0);
13624 
13625   // Don't handle ppc_fp128 here or conversions that are out-of-range capable
13626   // from the hardware.
13627   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
13628     return SDValue();
13629   if (!Op.getOperand(0).getValueType().isSimple())
13630     return SDValue();
13631   if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
13632       Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
13633     return SDValue();
13634 
13635   SDValue FirstOperand(Op.getOperand(0));
13636   bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
13637     (FirstOperand.getValueType() == MVT::i8 ||
13638      FirstOperand.getValueType() == MVT::i16);
13639   if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
13640     bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
13641     bool DstDouble = Op.getValueType() == MVT::f64;
13642     unsigned ConvOp = Signed ?
13643       (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
13644       (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
13645     SDValue WidthConst =
13646       DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
13647                             dl, false);
13648     LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
13649     SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
13650     SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
13651                                          DAG.getVTList(MVT::f64, MVT::Other),
13652                                          Ops, MVT::i8, LDN->getMemOperand());
13653 
13654     // For signed conversion, we need to sign-extend the value in the VSR
13655     if (Signed) {
13656       SDValue ExtOps[] = { Ld, WidthConst };
13657       SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
13658       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
13659     } else
13660       return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
13661   }
13662 
13663 
13664   // For i32 intermediate values, unfortunately, the conversion functions
13665   // leave the upper 32 bits of the value are undefined. Within the set of
13666   // scalar instructions, we have no method for zero- or sign-extending the
13667   // value. Thus, we cannot handle i32 intermediate values here.
13668   if (Op.getOperand(0).getValueType() == MVT::i32)
13669     return SDValue();
13670 
13671   assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
13672          "UINT_TO_FP is supported only with FPCVT");
13673 
13674   // If we have FCFIDS, then use it when converting to single-precision.
13675   // Otherwise, convert to double-precision and then round.
13676   unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13677                        ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
13678                                                             : PPCISD::FCFIDS)
13679                        : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
13680                                                             : PPCISD::FCFID);
13681   MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
13682                   ? MVT::f32
13683                   : MVT::f64;
13684 
13685   // If we're converting from a float, to an int, and back to a float again,
13686   // then we don't need the store/load pair at all.
13687   if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
13688        Subtarget.hasFPCVT()) ||
13689       (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
13690     SDValue Src = Op.getOperand(0).getOperand(0);
13691     if (Src.getValueType() == MVT::f32) {
13692       Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
13693       DCI.AddToWorklist(Src.getNode());
13694     } else if (Src.getValueType() != MVT::f64) {
13695       // Make sure that we don't pick up a ppc_fp128 source value.
13696       return SDValue();
13697     }
13698 
13699     unsigned FCTOp =
13700       Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
13701                                                         PPCISD::FCTIDUZ;
13702 
13703     SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
13704     SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
13705 
13706     if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
13707       FP = DAG.getNode(ISD::FP_ROUND, dl,
13708                        MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
13709       DCI.AddToWorklist(FP.getNode());
13710     }
13711 
13712     return FP;
13713   }
13714 
13715   return SDValue();
13716 }
13717 
13718 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
13719 // builtins) into loads with swaps.
13720 SDValue PPCTargetLowering::expandVSXLoadForLE(SDNode *N,
13721                                               DAGCombinerInfo &DCI) const {
13722   SelectionDAG &DAG = DCI.DAG;
13723   SDLoc dl(N);
13724   SDValue Chain;
13725   SDValue Base;
13726   MachineMemOperand *MMO;
13727 
13728   switch (N->getOpcode()) {
13729   default:
13730     llvm_unreachable("Unexpected opcode for little endian VSX load");
13731   case ISD::LOAD: {
13732     LoadSDNode *LD = cast<LoadSDNode>(N);
13733     Chain = LD->getChain();
13734     Base = LD->getBasePtr();
13735     MMO = LD->getMemOperand();
13736     // If the MMO suggests this isn't a load of a full vector, leave
13737     // things alone.  For a built-in, we have to make the change for
13738     // correctness, so if there is a size problem that will be a bug.
13739     if (MMO->getSize() < 16)
13740       return SDValue();
13741     break;
13742   }
13743   case ISD::INTRINSIC_W_CHAIN: {
13744     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13745     Chain = Intrin->getChain();
13746     // Similarly to the store case below, Intrin->getBasePtr() doesn't get
13747     // us what we want. Get operand 2 instead.
13748     Base = Intrin->getOperand(2);
13749     MMO = Intrin->getMemOperand();
13750     break;
13751   }
13752   }
13753 
13754   MVT VecTy = N->getValueType(0).getSimpleVT();
13755 
13756   // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
13757   // aligned and the type is a vector with elements up to 4 bytes
13758   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
13759       VecTy.getScalarSizeInBits() <= 32) {
13760     return SDValue();
13761   }
13762 
13763   SDValue LoadOps[] = { Chain, Base };
13764   SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
13765                                          DAG.getVTList(MVT::v2f64, MVT::Other),
13766                                          LoadOps, MVT::v2f64, MMO);
13767 
13768   DCI.AddToWorklist(Load.getNode());
13769   Chain = Load.getValue(1);
13770   SDValue Swap = DAG.getNode(
13771       PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
13772   DCI.AddToWorklist(Swap.getNode());
13773 
13774   // Add a bitcast if the resulting load type doesn't match v2f64.
13775   if (VecTy != MVT::v2f64) {
13776     SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
13777     DCI.AddToWorklist(N.getNode());
13778     // Package {bitcast value, swap's chain} to match Load's shape.
13779     return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
13780                        N, Swap.getValue(1));
13781   }
13782 
13783   return Swap;
13784 }
13785 
13786 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
13787 // builtins) into stores with swaps.
13788 SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
13789                                                DAGCombinerInfo &DCI) const {
13790   SelectionDAG &DAG = DCI.DAG;
13791   SDLoc dl(N);
13792   SDValue Chain;
13793   SDValue Base;
13794   unsigned SrcOpnd;
13795   MachineMemOperand *MMO;
13796 
13797   switch (N->getOpcode()) {
13798   default:
13799     llvm_unreachable("Unexpected opcode for little endian VSX store");
13800   case ISD::STORE: {
13801     StoreSDNode *ST = cast<StoreSDNode>(N);
13802     Chain = ST->getChain();
13803     Base = ST->getBasePtr();
13804     MMO = ST->getMemOperand();
13805     SrcOpnd = 1;
13806     // If the MMO suggests this isn't a store of a full vector, leave
13807     // things alone.  For a built-in, we have to make the change for
13808     // correctness, so if there is a size problem that will be a bug.
13809     if (MMO->getSize() < 16)
13810       return SDValue();
13811     break;
13812   }
13813   case ISD::INTRINSIC_VOID: {
13814     MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
13815     Chain = Intrin->getChain();
13816     // Intrin->getBasePtr() oddly does not get what we want.
13817     Base = Intrin->getOperand(3);
13818     MMO = Intrin->getMemOperand();
13819     SrcOpnd = 2;
13820     break;
13821   }
13822   }
13823 
13824   SDValue Src = N->getOperand(SrcOpnd);
13825   MVT VecTy = Src.getValueType().getSimpleVT();
13826 
13827   // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
13828   // aligned and the type is a vector with elements up to 4 bytes
13829   if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
13830       VecTy.getScalarSizeInBits() <= 32) {
13831     return SDValue();
13832   }
13833 
13834   // All stores are done as v2f64 and possible bit cast.
13835   if (VecTy != MVT::v2f64) {
13836     Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
13837     DCI.AddToWorklist(Src.getNode());
13838   }
13839 
13840   SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
13841                              DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
13842   DCI.AddToWorklist(Swap.getNode());
13843   Chain = Swap.getValue(1);
13844   SDValue StoreOps[] = { Chain, Swap, Base };
13845   SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
13846                                           DAG.getVTList(MVT::Other),
13847                                           StoreOps, VecTy, MMO);
13848   DCI.AddToWorklist(Store.getNode());
13849   return Store;
13850 }
13851 
13852 // Handle DAG combine for STORE (FP_TO_INT F).
13853 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
13854                                                DAGCombinerInfo &DCI) const {
13855 
13856   SelectionDAG &DAG = DCI.DAG;
13857   SDLoc dl(N);
13858   unsigned Opcode = N->getOperand(1).getOpcode();
13859 
13860   assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
13861          && "Not a FP_TO_INT Instruction!");
13862 
13863   SDValue Val = N->getOperand(1).getOperand(0);
13864   EVT Op1VT = N->getOperand(1).getValueType();
13865   EVT ResVT = Val.getValueType();
13866 
13867   if (!isTypeLegal(ResVT))
13868     return SDValue();
13869 
13870   // Only perform combine for conversion to i64/i32 or power9 i16/i8.
13871   bool ValidTypeForStoreFltAsInt =
13872         (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
13873          (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
13874 
13875   if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
13876       cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
13877     return SDValue();
13878 
13879   // Extend f32 values to f64
13880   if (ResVT.getScalarSizeInBits() == 32) {
13881     Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
13882     DCI.AddToWorklist(Val.getNode());
13883   }
13884 
13885   // Set signed or unsigned conversion opcode.
13886   unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
13887                           PPCISD::FP_TO_SINT_IN_VSR :
13888                           PPCISD::FP_TO_UINT_IN_VSR;
13889 
13890   Val = DAG.getNode(ConvOpcode,
13891                     dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
13892   DCI.AddToWorklist(Val.getNode());
13893 
13894   // Set number of bytes being converted.
13895   unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
13896   SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
13897                     DAG.getIntPtrConstant(ByteSize, dl, false),
13898                     DAG.getValueType(Op1VT) };
13899 
13900   Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
13901           DAG.getVTList(MVT::Other), Ops,
13902           cast<StoreSDNode>(N)->getMemoryVT(),
13903           cast<StoreSDNode>(N)->getMemOperand());
13904 
13905   DCI.AddToWorklist(Val.getNode());
13906   return Val;
13907 }
13908 
13909 static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
13910   // Check that the source of the element keeps flipping
13911   // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
13912   bool PrevElemFromFirstVec = Mask[0] < NumElts;
13913   for (int i = 1, e = Mask.size(); i < e; i++) {
13914     if (PrevElemFromFirstVec && Mask[i] < NumElts)
13915       return false;
13916     if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
13917       return false;
13918     PrevElemFromFirstVec = !PrevElemFromFirstVec;
13919   }
13920   return true;
13921 }
13922 
13923 static bool isSplatBV(SDValue Op) {
13924   if (Op.getOpcode() != ISD::BUILD_VECTOR)
13925     return false;
13926   SDValue FirstOp;
13927 
13928   // Find first non-undef input.
13929   for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
13930     FirstOp = Op.getOperand(i);
13931     if (!FirstOp.isUndef())
13932       break;
13933   }
13934 
13935   // All inputs are undef or the same as the first non-undef input.
13936   for (int i = 1, e = Op.getNumOperands(); i < e; i++)
13937     if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
13938       return false;
13939   return true;
13940 }
13941 
13942 static SDValue isScalarToVec(SDValue Op) {
13943   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
13944     return Op;
13945   if (Op.getOpcode() != ISD::BITCAST)
13946     return SDValue();
13947   Op = Op.getOperand(0);
13948   if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
13949     return Op;
13950   return SDValue();
13951 }
13952 
13953 static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl<int> &ShuffV,
13954                                             int LHSMaxIdx, int RHSMinIdx,
13955                                             int RHSMaxIdx, int HalfVec) {
13956   for (int i = 0, e = ShuffV.size(); i < e; i++) {
13957     int Idx = ShuffV[i];
13958     if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
13959       ShuffV[i] += HalfVec;
13960   }
13961 }
13962 
13963 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
13964 // the original is:
13965 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
13966 // In such a case, just change the shuffle mask to extract the element
13967 // from the permuted index.
13968 static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG) {
13969   SDLoc dl(OrigSToV);
13970   EVT VT = OrigSToV.getValueType();
13971   assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
13972          "Expecting a SCALAR_TO_VECTOR here");
13973   SDValue Input = OrigSToV.getOperand(0);
13974 
13975   if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
13976     ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
13977     SDValue OrigVector = Input.getOperand(0);
13978 
13979     // Can't handle non-const element indices or different vector types
13980     // for the input to the extract and the output of the scalar_to_vector.
13981     if (Idx && VT == OrigVector.getValueType()) {
13982       SmallVector<int, 16> NewMask(VT.getVectorNumElements(), -1);
13983       NewMask[VT.getVectorNumElements() / 2] = Idx->getZExtValue();
13984       return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
13985     }
13986   }
13987   return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
13988                      OrigSToV.getOperand(0));
13989 }
13990 
13991 // On little endian subtargets, combine shuffles such as:
13992 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
13993 // into:
13994 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
13995 // because the latter can be matched to a single instruction merge.
13996 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
13997 // to put the value into element zero. Adjust the shuffle mask so that the
13998 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
13999 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14000                                                 SelectionDAG &DAG) const {
14001   SDValue LHS = SVN->getOperand(0);
14002   SDValue RHS = SVN->getOperand(1);
14003   auto Mask = SVN->getMask();
14004   int NumElts = LHS.getValueType().getVectorNumElements();
14005   SDValue Res(SVN, 0);
14006   SDLoc dl(SVN);
14007 
14008   // None of these combines are useful on big endian systems since the ISA
14009   // already has a big endian bias.
14010   if (!Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14011     return Res;
14012 
14013   // If this is not a shuffle of a shuffle and the first element comes from
14014   // the second vector, canonicalize to the commuted form. This will make it
14015   // more likely to match one of the single instruction patterns.
14016   if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14017       RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14018     std::swap(LHS, RHS);
14019     Res = DAG.getCommutedVectorShuffle(*SVN);
14020     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14021   }
14022 
14023   // Adjust the shuffle mask if either input vector comes from a
14024   // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14025   // form (to prevent the need for a swap).
14026   SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14027   SDValue SToVLHS = isScalarToVec(LHS);
14028   SDValue SToVRHS = isScalarToVec(RHS);
14029   if (SToVLHS || SToVRHS) {
14030     int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14031                             : SToVRHS.getValueType().getVectorNumElements();
14032     int NumEltsOut = ShuffV.size();
14033 
14034     // Initially assume that neither input is permuted. These will be adjusted
14035     // accordingly if either input is.
14036     int LHSMaxIdx = -1;
14037     int RHSMinIdx = -1;
14038     int RHSMaxIdx = -1;
14039     int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14040 
14041     // Get the permuted scalar to vector nodes for the source(s) that come from
14042     // ISD::SCALAR_TO_VECTOR.
14043     if (SToVLHS) {
14044       // Set up the values for the shuffle vector fixup.
14045       LHSMaxIdx = NumEltsOut / NumEltsIn;
14046       SToVLHS = getSToVPermuted(SToVLHS, DAG);
14047       if (SToVLHS.getValueType() != LHS.getValueType())
14048         SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14049       LHS = SToVLHS;
14050     }
14051     if (SToVRHS) {
14052       RHSMinIdx = NumEltsOut;
14053       RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14054       SToVRHS = getSToVPermuted(SToVRHS, DAG);
14055       if (SToVRHS.getValueType() != RHS.getValueType())
14056         SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14057       RHS = SToVRHS;
14058     }
14059 
14060     // Fix up the shuffle mask to reflect where the desired element actually is.
14061     // The minimum and maximum indices that correspond to element zero for both
14062     // the LHS and RHS are computed and will control which shuffle mask entries
14063     // are to be changed. For example, if the RHS is permuted, any shuffle mask
14064     // entries in the range [RHSMinIdx,RHSMaxIdx) will be incremented by
14065     // HalfVec to refer to the corresponding element in the permuted vector.
14066     fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14067                                     HalfVec);
14068     Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14069 
14070     // We may have simplified away the shuffle. We won't be able to do anything
14071     // further with it here.
14072     if (!isa<ShuffleVectorSDNode>(Res))
14073       return Res;
14074     Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14075   }
14076 
14077   // The common case after we commuted the shuffle is that the RHS is a splat
14078   // and we have elements coming in from the splat at indices that are not
14079   // conducive to using a merge.
14080   // Example:
14081   // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14082   if (!isSplatBV(RHS))
14083     return Res;
14084 
14085   // We are looking for a mask such that all even elements are from
14086   // one vector and all odd elements from the other.
14087   if (!isAlternatingShuffMask(Mask, NumElts))
14088     return Res;
14089 
14090   // Adjust the mask so we are pulling in the same index from the splat
14091   // as the index from the interesting vector in consecutive elements.
14092   // Example (even elements from first vector):
14093   // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14094   if (Mask[0] < NumElts)
14095     for (int i = 1, e = Mask.size(); i < e; i += 2)
14096       ShuffV[i] = (ShuffV[i - 1] + NumElts);
14097   // Example (odd elements from first vector):
14098   // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14099   else
14100     for (int i = 0, e = Mask.size(); i < e; i += 2)
14101       ShuffV[i] = (ShuffV[i + 1] + NumElts);
14102 
14103   // If the RHS has undefs, we need to remove them since we may have created
14104   // a shuffle that adds those instead of the splat value.
14105   SDValue SplatVal = cast<BuildVectorSDNode>(RHS.getNode())->getSplatValue();
14106   RHS = DAG.getSplatBuildVector(RHS.getValueType(), dl, SplatVal);
14107 
14108   Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14109   return Res;
14110 }
14111 
14112 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14113                                                 LSBaseSDNode *LSBase,
14114                                                 DAGCombinerInfo &DCI) const {
14115   assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
14116         "Not a reverse memop pattern!");
14117 
14118   auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14119     auto Mask = SVN->getMask();
14120     int i = 0;
14121     auto I = Mask.rbegin();
14122     auto E = Mask.rend();
14123 
14124     for (; I != E; ++I) {
14125       if (*I != i)
14126         return false;
14127       i++;
14128     }
14129     return true;
14130   };
14131 
14132   SelectionDAG &DAG = DCI.DAG;
14133   EVT VT = SVN->getValueType(0);
14134 
14135   if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14136     return SDValue();
14137 
14138   // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14139   // See comment in PPCVSXSwapRemoval.cpp.
14140   // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14141   if (!Subtarget.hasP9Vector())
14142     return SDValue();
14143 
14144   if(!IsElementReverse(SVN))
14145     return SDValue();
14146 
14147   if (LSBase->getOpcode() == ISD::LOAD) {
14148     SDLoc dl(SVN);
14149     SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14150     return DAG.getMemIntrinsicNode(
14151         PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
14152         LSBase->getMemoryVT(), LSBase->getMemOperand());
14153   }
14154 
14155   if (LSBase->getOpcode() == ISD::STORE) {
14156     SDLoc dl(LSBase);
14157     SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14158                           LSBase->getBasePtr()};
14159     return DAG.getMemIntrinsicNode(
14160         PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
14161         LSBase->getMemoryVT(), LSBase->getMemOperand());
14162   }
14163 
14164   llvm_unreachable("Expected a load or store node here");
14165 }
14166 
14167 SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
14168                                              DAGCombinerInfo &DCI) const {
14169   SelectionDAG &DAG = DCI.DAG;
14170   SDLoc dl(N);
14171   switch (N->getOpcode()) {
14172   default: break;
14173   case ISD::ADD:
14174     return combineADD(N, DCI);
14175   case ISD::SHL:
14176     return combineSHL(N, DCI);
14177   case ISD::SRA:
14178     return combineSRA(N, DCI);
14179   case ISD::SRL:
14180     return combineSRL(N, DCI);
14181   case ISD::MUL:
14182     return combineMUL(N, DCI);
14183   case ISD::FMA:
14184   case PPCISD::FNMSUB:
14185     return combineFMALike(N, DCI);
14186   case PPCISD::SHL:
14187     if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
14188         return N->getOperand(0);
14189     break;
14190   case PPCISD::SRL:
14191     if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
14192         return N->getOperand(0);
14193     break;
14194   case PPCISD::SRA:
14195     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
14196       if (C->isNullValue() ||   //  0 >>s V -> 0.
14197           C->isAllOnesValue())    // -1 >>s V -> -1.
14198         return N->getOperand(0);
14199     }
14200     break;
14201   case ISD::SIGN_EXTEND:
14202   case ISD::ZERO_EXTEND:
14203   case ISD::ANY_EXTEND:
14204     return DAGCombineExtBoolTrunc(N, DCI);
14205   case ISD::TRUNCATE:
14206     return combineTRUNCATE(N, DCI);
14207   case ISD::SETCC:
14208     if (SDValue CSCC = combineSetCC(N, DCI))
14209       return CSCC;
14210     LLVM_FALLTHROUGH;
14211   case ISD::SELECT_CC:
14212     return DAGCombineTruncBoolExt(N, DCI);
14213   case ISD::SINT_TO_FP:
14214   case ISD::UINT_TO_FP:
14215     return combineFPToIntToFP(N, DCI);
14216   case ISD::VECTOR_SHUFFLE:
14217     if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
14218       LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
14219       return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
14220     }
14221     return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
14222   case ISD::STORE: {
14223 
14224     EVT Op1VT = N->getOperand(1).getValueType();
14225     unsigned Opcode = N->getOperand(1).getOpcode();
14226 
14227     if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
14228       SDValue Val= combineStoreFPToInt(N, DCI);
14229       if (Val)
14230         return Val;
14231     }
14232 
14233     if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
14234       ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
14235       SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
14236       if (Val)
14237         return Val;
14238     }
14239 
14240     // Turn STORE (BSWAP) -> sthbrx/stwbrx.
14241     if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
14242         N->getOperand(1).getNode()->hasOneUse() &&
14243         (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
14244          (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
14245 
14246       // STBRX can only handle simple types and it makes no sense to store less
14247       // two bytes in byte-reversed order.
14248       EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
14249       if (mVT.isExtended() || mVT.getSizeInBits() < 16)
14250         break;
14251 
14252       SDValue BSwapOp = N->getOperand(1).getOperand(0);
14253       // Do an any-extend to 32-bits if this is a half-word input.
14254       if (BSwapOp.getValueType() == MVT::i16)
14255         BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
14256 
14257       // If the type of BSWAP operand is wider than stored memory width
14258       // it need to be shifted to the right side before STBRX.
14259       if (Op1VT.bitsGT(mVT)) {
14260         int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
14261         BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
14262                               DAG.getConstant(Shift, dl, MVT::i32));
14263         // Need to truncate if this is a bswap of i64 stored as i32/i16.
14264         if (Op1VT == MVT::i64)
14265           BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
14266       }
14267 
14268       SDValue Ops[] = {
14269         N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
14270       };
14271       return
14272         DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
14273                                 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
14274                                 cast<StoreSDNode>(N)->getMemOperand());
14275     }
14276 
14277     // STORE Constant:i32<0>  ->  STORE<trunc to i32> Constant:i64<0>
14278     // So it can increase the chance of CSE constant construction.
14279     if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
14280         isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
14281       // Need to sign-extended to 64-bits to handle negative values.
14282       EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
14283       uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
14284                                     MemVT.getSizeInBits());
14285       SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
14286 
14287       // DAG.getTruncStore() can't be used here because it doesn't accept
14288       // the general (base + offset) addressing mode.
14289       // So we use UpdateNodeOperands and setTruncatingStore instead.
14290       DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
14291                              N->getOperand(3));
14292       cast<StoreSDNode>(N)->setTruncatingStore(true);
14293       return SDValue(N, 0);
14294     }
14295 
14296     // For little endian, VSX stores require generating xxswapd/lxvd2x.
14297     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14298     if (Op1VT.isSimple()) {
14299       MVT StoreVT = Op1VT.getSimpleVT();
14300       if (Subtarget.needsSwapsForVSXMemOps() &&
14301           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
14302            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
14303         return expandVSXStoreForLE(N, DCI);
14304     }
14305     break;
14306   }
14307   case ISD::LOAD: {
14308     LoadSDNode *LD = cast<LoadSDNode>(N);
14309     EVT VT = LD->getValueType(0);
14310 
14311     // For little endian, VSX loads require generating lxvd2x/xxswapd.
14312     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14313     if (VT.isSimple()) {
14314       MVT LoadVT = VT.getSimpleVT();
14315       if (Subtarget.needsSwapsForVSXMemOps() &&
14316           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
14317            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
14318         return expandVSXLoadForLE(N, DCI);
14319     }
14320 
14321     // We sometimes end up with a 64-bit integer load, from which we extract
14322     // two single-precision floating-point numbers. This happens with
14323     // std::complex<float>, and other similar structures, because of the way we
14324     // canonicalize structure copies. However, if we lack direct moves,
14325     // then the final bitcasts from the extracted integer values to the
14326     // floating-point numbers turn into store/load pairs. Even with direct moves,
14327     // just loading the two floating-point numbers is likely better.
14328     auto ReplaceTwoFloatLoad = [&]() {
14329       if (VT != MVT::i64)
14330         return false;
14331 
14332       if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
14333           LD->isVolatile())
14334         return false;
14335 
14336       //  We're looking for a sequence like this:
14337       //  t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
14338       //      t16: i64 = srl t13, Constant:i32<32>
14339       //    t17: i32 = truncate t16
14340       //  t18: f32 = bitcast t17
14341       //    t19: i32 = truncate t13
14342       //  t20: f32 = bitcast t19
14343 
14344       if (!LD->hasNUsesOfValue(2, 0))
14345         return false;
14346 
14347       auto UI = LD->use_begin();
14348       while (UI.getUse().getResNo() != 0) ++UI;
14349       SDNode *Trunc = *UI++;
14350       while (UI.getUse().getResNo() != 0) ++UI;
14351       SDNode *RightShift = *UI;
14352       if (Trunc->getOpcode() != ISD::TRUNCATE)
14353         std::swap(Trunc, RightShift);
14354 
14355       if (Trunc->getOpcode() != ISD::TRUNCATE ||
14356           Trunc->getValueType(0) != MVT::i32 ||
14357           !Trunc->hasOneUse())
14358         return false;
14359       if (RightShift->getOpcode() != ISD::SRL ||
14360           !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
14361           RightShift->getConstantOperandVal(1) != 32 ||
14362           !RightShift->hasOneUse())
14363         return false;
14364 
14365       SDNode *Trunc2 = *RightShift->use_begin();
14366       if (Trunc2->getOpcode() != ISD::TRUNCATE ||
14367           Trunc2->getValueType(0) != MVT::i32 ||
14368           !Trunc2->hasOneUse())
14369         return false;
14370 
14371       SDNode *Bitcast = *Trunc->use_begin();
14372       SDNode *Bitcast2 = *Trunc2->use_begin();
14373 
14374       if (Bitcast->getOpcode() != ISD::BITCAST ||
14375           Bitcast->getValueType(0) != MVT::f32)
14376         return false;
14377       if (Bitcast2->getOpcode() != ISD::BITCAST ||
14378           Bitcast2->getValueType(0) != MVT::f32)
14379         return false;
14380 
14381       if (Subtarget.isLittleEndian())
14382         std::swap(Bitcast, Bitcast2);
14383 
14384       // Bitcast has the second float (in memory-layout order) and Bitcast2
14385       // has the first one.
14386 
14387       SDValue BasePtr = LD->getBasePtr();
14388       if (LD->isIndexed()) {
14389         assert(LD->getAddressingMode() == ISD::PRE_INC &&
14390                "Non-pre-inc AM on PPC?");
14391         BasePtr =
14392           DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
14393                       LD->getOffset());
14394       }
14395 
14396       auto MMOFlags =
14397           LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
14398       SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
14399                                       LD->getPointerInfo(), LD->getAlignment(),
14400                                       MMOFlags, LD->getAAInfo());
14401       SDValue AddPtr =
14402         DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
14403                     BasePtr, DAG.getIntPtrConstant(4, dl));
14404       SDValue FloatLoad2 = DAG.getLoad(
14405           MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
14406           LD->getPointerInfo().getWithOffset(4),
14407           MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
14408 
14409       if (LD->isIndexed()) {
14410         // Note that DAGCombine should re-form any pre-increment load(s) from
14411         // what is produced here if that makes sense.
14412         DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
14413       }
14414 
14415       DCI.CombineTo(Bitcast2, FloatLoad);
14416       DCI.CombineTo(Bitcast, FloatLoad2);
14417 
14418       DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
14419                                     SDValue(FloatLoad2.getNode(), 1));
14420       return true;
14421     };
14422 
14423     if (ReplaceTwoFloatLoad())
14424       return SDValue(N, 0);
14425 
14426     EVT MemVT = LD->getMemoryVT();
14427     Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
14428     Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
14429     if (LD->isUnindexed() && VT.isVector() &&
14430         ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
14431           // P8 and later hardware should just use LOAD.
14432           !Subtarget.hasP8Vector() &&
14433           (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
14434            VT == MVT::v4f32))) &&
14435         LD->getAlign() < ABIAlignment) {
14436       // This is a type-legal unaligned Altivec load.
14437       SDValue Chain = LD->getChain();
14438       SDValue Ptr = LD->getBasePtr();
14439       bool isLittleEndian = Subtarget.isLittleEndian();
14440 
14441       // This implements the loading of unaligned vectors as described in
14442       // the venerable Apple Velocity Engine overview. Specifically:
14443       // https://developer.apple.com/hardwaredrivers/ve/alignment.html
14444       // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
14445       //
14446       // The general idea is to expand a sequence of one or more unaligned
14447       // loads into an alignment-based permutation-control instruction (lvsl
14448       // or lvsr), a series of regular vector loads (which always truncate
14449       // their input address to an aligned address), and a series of
14450       // permutations.  The results of these permutations are the requested
14451       // loaded values.  The trick is that the last "extra" load is not taken
14452       // from the address you might suspect (sizeof(vector) bytes after the
14453       // last requested load), but rather sizeof(vector) - 1 bytes after the
14454       // last requested vector. The point of this is to avoid a page fault if
14455       // the base address happened to be aligned. This works because if the
14456       // base address is aligned, then adding less than a full vector length
14457       // will cause the last vector in the sequence to be (re)loaded.
14458       // Otherwise, the next vector will be fetched as you might suspect was
14459       // necessary.
14460 
14461       // We might be able to reuse the permutation generation from
14462       // a different base address offset from this one by an aligned amount.
14463       // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
14464       // optimization later.
14465       Intrinsic::ID Intr, IntrLD, IntrPerm;
14466       MVT PermCntlTy, PermTy, LDTy;
14467       Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14468                             : Intrinsic::ppc_altivec_lvsl;
14469       IntrLD = Intrinsic::ppc_altivec_lvx;
14470       IntrPerm = Intrinsic::ppc_altivec_vperm;
14471       PermCntlTy = MVT::v16i8;
14472       PermTy = MVT::v4i32;
14473       LDTy = MVT::v4i32;
14474 
14475       SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
14476 
14477       // Create the new MMO for the new base load. It is like the original MMO,
14478       // but represents an area in memory almost twice the vector size centered
14479       // on the original address. If the address is unaligned, we might start
14480       // reading up to (sizeof(vector)-1) bytes below the address of the
14481       // original unaligned load.
14482       MachineFunction &MF = DAG.getMachineFunction();
14483       MachineMemOperand *BaseMMO =
14484         MF.getMachineMemOperand(LD->getMemOperand(),
14485                                 -(long)MemVT.getStoreSize()+1,
14486                                 2*MemVT.getStoreSize()-1);
14487 
14488       // Create the new base load.
14489       SDValue LDXIntID =
14490           DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
14491       SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
14492       SDValue BaseLoad =
14493         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14494                                 DAG.getVTList(PermTy, MVT::Other),
14495                                 BaseLoadOps, LDTy, BaseMMO);
14496 
14497       // Note that the value of IncOffset (which is provided to the next
14498       // load's pointer info offset value, and thus used to calculate the
14499       // alignment), and the value of IncValue (which is actually used to
14500       // increment the pointer value) are different! This is because we
14501       // require the next load to appear to be aligned, even though it
14502       // is actually offset from the base pointer by a lesser amount.
14503       int IncOffset = VT.getSizeInBits() / 8;
14504       int IncValue = IncOffset;
14505 
14506       // Walk (both up and down) the chain looking for another load at the real
14507       // (aligned) offset (the alignment of the other load does not matter in
14508       // this case). If found, then do not use the offset reduction trick, as
14509       // that will prevent the loads from being later combined (as they would
14510       // otherwise be duplicates).
14511       if (!findConsecutiveLoad(LD, DAG))
14512         --IncValue;
14513 
14514       SDValue Increment =
14515           DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
14516       Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
14517 
14518       MachineMemOperand *ExtraMMO =
14519         MF.getMachineMemOperand(LD->getMemOperand(),
14520                                 1, 2*MemVT.getStoreSize()-1);
14521       SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
14522       SDValue ExtraLoad =
14523         DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl,
14524                                 DAG.getVTList(PermTy, MVT::Other),
14525                                 ExtraLoadOps, LDTy, ExtraMMO);
14526 
14527       SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
14528         BaseLoad.getValue(1), ExtraLoad.getValue(1));
14529 
14530       // Because vperm has a big-endian bias, we must reverse the order
14531       // of the input vectors and complement the permute control vector
14532       // when generating little endian code.  We have already handled the
14533       // latter by using lvsr instead of lvsl, so just reverse BaseLoad
14534       // and ExtraLoad here.
14535       SDValue Perm;
14536       if (isLittleEndian)
14537         Perm = BuildIntrinsicOp(IntrPerm,
14538                                 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
14539       else
14540         Perm = BuildIntrinsicOp(IntrPerm,
14541                                 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
14542 
14543       if (VT != PermTy)
14544         Perm = Subtarget.hasAltivec()
14545                    ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
14546                    : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
14547                                  DAG.getTargetConstant(1, dl, MVT::i64));
14548                                // second argument is 1 because this rounding
14549                                // is always exact.
14550 
14551       // The output of the permutation is our loaded result, the TokenFactor is
14552       // our new chain.
14553       DCI.CombineTo(N, Perm, TF);
14554       return SDValue(N, 0);
14555     }
14556     }
14557     break;
14558     case ISD::INTRINSIC_WO_CHAIN: {
14559       bool isLittleEndian = Subtarget.isLittleEndian();
14560       unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
14561       Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
14562                                            : Intrinsic::ppc_altivec_lvsl);
14563       if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
14564         SDValue Add = N->getOperand(1);
14565 
14566         int Bits = 4 /* 16 byte alignment */;
14567 
14568         if (DAG.MaskedValueIsZero(Add->getOperand(1),
14569                                   APInt::getAllOnesValue(Bits /* alignment */)
14570                                       .zext(Add.getScalarValueSizeInBits()))) {
14571           SDNode *BasePtr = Add->getOperand(0).getNode();
14572           for (SDNode::use_iterator UI = BasePtr->use_begin(),
14573                                     UE = BasePtr->use_end();
14574                UI != UE; ++UI) {
14575             if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14576                 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
14577                     IID) {
14578               // We've found another LVSL/LVSR, and this address is an aligned
14579               // multiple of that one. The results will be the same, so use the
14580               // one we've just found instead.
14581 
14582               return SDValue(*UI, 0);
14583             }
14584           }
14585         }
14586 
14587         if (isa<ConstantSDNode>(Add->getOperand(1))) {
14588           SDNode *BasePtr = Add->getOperand(0).getNode();
14589           for (SDNode::use_iterator UI = BasePtr->use_begin(),
14590                UE = BasePtr->use_end(); UI != UE; ++UI) {
14591             if (UI->getOpcode() == ISD::ADD &&
14592                 isa<ConstantSDNode>(UI->getOperand(1)) &&
14593                 (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
14594                  cast<ConstantSDNode>(UI->getOperand(1))->getZExtValue()) %
14595                 (1ULL << Bits) == 0) {
14596               SDNode *OtherAdd = *UI;
14597               for (SDNode::use_iterator VI = OtherAdd->use_begin(),
14598                    VE = OtherAdd->use_end(); VI != VE; ++VI) {
14599                 if (VI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14600                     cast<ConstantSDNode>(VI->getOperand(0))->getZExtValue() == IID) {
14601                   return SDValue(*VI, 0);
14602                 }
14603               }
14604             }
14605           }
14606         }
14607       }
14608 
14609       // Combine vmaxsw/h/b(a, a's negation) to abs(a)
14610       // Expose the vabsduw/h/b opportunity for down stream
14611       if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
14612           (IID == Intrinsic::ppc_altivec_vmaxsw ||
14613            IID == Intrinsic::ppc_altivec_vmaxsh ||
14614            IID == Intrinsic::ppc_altivec_vmaxsb)) {
14615         SDValue V1 = N->getOperand(1);
14616         SDValue V2 = N->getOperand(2);
14617         if ((V1.getSimpleValueType() == MVT::v4i32 ||
14618              V1.getSimpleValueType() == MVT::v8i16 ||
14619              V1.getSimpleValueType() == MVT::v16i8) &&
14620             V1.getSimpleValueType() == V2.getSimpleValueType()) {
14621           // (0-a, a)
14622           if (V1.getOpcode() == ISD::SUB &&
14623               ISD::isBuildVectorAllZeros(V1.getOperand(0).getNode()) &&
14624               V1.getOperand(1) == V2) {
14625             return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
14626           }
14627           // (a, 0-a)
14628           if (V2.getOpcode() == ISD::SUB &&
14629               ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
14630               V2.getOperand(1) == V1) {
14631             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14632           }
14633           // (x-y, y-x)
14634           if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
14635               V1.getOperand(0) == V2.getOperand(1) &&
14636               V1.getOperand(1) == V2.getOperand(0)) {
14637             return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
14638           }
14639         }
14640       }
14641     }
14642 
14643     break;
14644   case ISD::INTRINSIC_W_CHAIN:
14645     // For little endian, VSX loads require generating lxvd2x/xxswapd.
14646     // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
14647     if (Subtarget.needsSwapsForVSXMemOps()) {
14648       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14649       default:
14650         break;
14651       case Intrinsic::ppc_vsx_lxvw4x:
14652       case Intrinsic::ppc_vsx_lxvd2x:
14653         return expandVSXLoadForLE(N, DCI);
14654       }
14655     }
14656     break;
14657   case ISD::INTRINSIC_VOID:
14658     // For little endian, VSX stores require generating xxswapd/stxvd2x.
14659     // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
14660     if (Subtarget.needsSwapsForVSXMemOps()) {
14661       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
14662       default:
14663         break;
14664       case Intrinsic::ppc_vsx_stxvw4x:
14665       case Intrinsic::ppc_vsx_stxvd2x:
14666         return expandVSXStoreForLE(N, DCI);
14667       }
14668     }
14669     break;
14670   case ISD::BSWAP:
14671     // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
14672     if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
14673         N->getOperand(0).hasOneUse() &&
14674         (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
14675          (Subtarget.hasLDBRX() && Subtarget.isPPC64() &&
14676           N->getValueType(0) == MVT::i64))) {
14677       SDValue Load = N->getOperand(0);
14678       LoadSDNode *LD = cast<LoadSDNode>(Load);
14679       // Create the byte-swapping load.
14680       SDValue Ops[] = {
14681         LD->getChain(),    // Chain
14682         LD->getBasePtr(),  // Ptr
14683         DAG.getValueType(N->getValueType(0)) // VT
14684       };
14685       SDValue BSLoad =
14686         DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
14687                                 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
14688                                               MVT::i64 : MVT::i32, MVT::Other),
14689                                 Ops, LD->getMemoryVT(), LD->getMemOperand());
14690 
14691       // If this is an i16 load, insert the truncate.
14692       SDValue ResVal = BSLoad;
14693       if (N->getValueType(0) == MVT::i16)
14694         ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
14695 
14696       // First, combine the bswap away.  This makes the value produced by the
14697       // load dead.
14698       DCI.CombineTo(N, ResVal);
14699 
14700       // Next, combine the load away, we give it a bogus result value but a real
14701       // chain result.  The result value is dead because the bswap is dead.
14702       DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
14703 
14704       // Return N so it doesn't get rechecked!
14705       return SDValue(N, 0);
14706     }
14707     break;
14708   case PPCISD::VCMP:
14709     // If a VCMP_rec node already exists with exactly the same operands as this
14710     // node, use its result instead of this node (VCMP_rec computes both a CR6
14711     // and a normal output).
14712     //
14713     if (!N->getOperand(0).hasOneUse() &&
14714         !N->getOperand(1).hasOneUse() &&
14715         !N->getOperand(2).hasOneUse()) {
14716 
14717       // Scan all of the users of the LHS, looking for VCMP_rec's that match.
14718       SDNode *VCMPrecNode = nullptr;
14719 
14720       SDNode *LHSN = N->getOperand(0).getNode();
14721       for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
14722            UI != E; ++UI)
14723         if (UI->getOpcode() == PPCISD::VCMP_rec &&
14724             UI->getOperand(1) == N->getOperand(1) &&
14725             UI->getOperand(2) == N->getOperand(2) &&
14726             UI->getOperand(0) == N->getOperand(0)) {
14727           VCMPrecNode = *UI;
14728           break;
14729         }
14730 
14731       // If there is no VCMP_rec node, or if the flag value has a single use,
14732       // don't transform this.
14733       if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
14734         break;
14735 
14736       // Look at the (necessarily single) use of the flag value.  If it has a
14737       // chain, this transformation is more complex.  Note that multiple things
14738       // could use the value result, which we should ignore.
14739       SDNode *FlagUser = nullptr;
14740       for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
14741            FlagUser == nullptr; ++UI) {
14742         assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
14743         SDNode *User = *UI;
14744         for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
14745           if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
14746             FlagUser = User;
14747             break;
14748           }
14749         }
14750       }
14751 
14752       // If the user is a MFOCRF instruction, we know this is safe.
14753       // Otherwise we give up for right now.
14754       if (FlagUser->getOpcode() == PPCISD::MFOCRF)
14755         return SDValue(VCMPrecNode, 0);
14756     }
14757     break;
14758   case ISD::BRCOND: {
14759     SDValue Cond = N->getOperand(1);
14760     SDValue Target = N->getOperand(2);
14761 
14762     if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14763         cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
14764           Intrinsic::loop_decrement) {
14765 
14766       // We now need to make the intrinsic dead (it cannot be instruction
14767       // selected).
14768       DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
14769       assert(Cond.getNode()->hasOneUse() &&
14770              "Counter decrement has more than one use");
14771 
14772       return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
14773                          N->getOperand(0), Target);
14774     }
14775   }
14776   break;
14777   case ISD::BR_CC: {
14778     // If this is a branch on an altivec predicate comparison, lower this so
14779     // that we don't have to do a MFOCRF: instead, branch directly on CR6.  This
14780     // lowering is done pre-legalize, because the legalizer lowers the predicate
14781     // compare down to code that is difficult to reassemble.
14782     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
14783     SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
14784 
14785     // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
14786     // value. If so, pass-through the AND to get to the intrinsic.
14787     if (LHS.getOpcode() == ISD::AND &&
14788         LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14789         cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
14790           Intrinsic::loop_decrement &&
14791         isa<ConstantSDNode>(LHS.getOperand(1)) &&
14792         !isNullConstant(LHS.getOperand(1)))
14793       LHS = LHS.getOperand(0);
14794 
14795     if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
14796         cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
14797           Intrinsic::loop_decrement &&
14798         isa<ConstantSDNode>(RHS)) {
14799       assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
14800              "Counter decrement comparison is not EQ or NE");
14801 
14802       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
14803       bool isBDNZ = (CC == ISD::SETEQ && Val) ||
14804                     (CC == ISD::SETNE && !Val);
14805 
14806       // We now need to make the intrinsic dead (it cannot be instruction
14807       // selected).
14808       DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
14809       assert(LHS.getNode()->hasOneUse() &&
14810              "Counter decrement has more than one use");
14811 
14812       return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
14813                          N->getOperand(0), N->getOperand(4));
14814     }
14815 
14816     int CompareOpc;
14817     bool isDot;
14818 
14819     if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
14820         isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
14821         getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
14822       assert(isDot && "Can't compare against a vector result!");
14823 
14824       // If this is a comparison against something other than 0/1, then we know
14825       // that the condition is never/always true.
14826       unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
14827       if (Val != 0 && Val != 1) {
14828         if (CC == ISD::SETEQ)      // Cond never true, remove branch.
14829           return N->getOperand(0);
14830         // Always !=, turn it into an unconditional branch.
14831         return DAG.getNode(ISD::BR, dl, MVT::Other,
14832                            N->getOperand(0), N->getOperand(4));
14833       }
14834 
14835       bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
14836 
14837       // Create the PPCISD altivec 'dot' comparison node.
14838       SDValue Ops[] = {
14839         LHS.getOperand(2),  // LHS of compare
14840         LHS.getOperand(3),  // RHS of compare
14841         DAG.getConstant(CompareOpc, dl, MVT::i32)
14842       };
14843       EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
14844       SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
14845 
14846       // Unpack the result based on how the target uses it.
14847       PPC::Predicate CompOpc;
14848       switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
14849       default:  // Can't happen, don't crash on invalid number though.
14850       case 0:   // Branch on the value of the EQ bit of CR6.
14851         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
14852         break;
14853       case 1:   // Branch on the inverted value of the EQ bit of CR6.
14854         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
14855         break;
14856       case 2:   // Branch on the value of the LT bit of CR6.
14857         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
14858         break;
14859       case 3:   // Branch on the inverted value of the LT bit of CR6.
14860         CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
14861         break;
14862       }
14863 
14864       return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
14865                          DAG.getConstant(CompOpc, dl, MVT::i32),
14866                          DAG.getRegister(PPC::CR6, MVT::i32),
14867                          N->getOperand(4), CompNode.getValue(1));
14868     }
14869     break;
14870   }
14871   case ISD::BUILD_VECTOR:
14872     return DAGCombineBuildVector(N, DCI);
14873   case ISD::ABS:
14874     return combineABS(N, DCI);
14875   case ISD::VSELECT:
14876     return combineVSelect(N, DCI);
14877   }
14878 
14879   return SDValue();
14880 }
14881 
14882 SDValue
14883 PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
14884                                  SelectionDAG &DAG,
14885                                  SmallVectorImpl<SDNode *> &Created) const {
14886   // fold (sdiv X, pow2)
14887   EVT VT = N->getValueType(0);
14888   if (VT == MVT::i64 && !Subtarget.isPPC64())
14889     return SDValue();
14890   if ((VT != MVT::i32 && VT != MVT::i64) ||
14891       !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
14892     return SDValue();
14893 
14894   SDLoc DL(N);
14895   SDValue N0 = N->getOperand(0);
14896 
14897   bool IsNegPow2 = (-Divisor).isPowerOf2();
14898   unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
14899   SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
14900 
14901   SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
14902   Created.push_back(Op.getNode());
14903 
14904   if (IsNegPow2) {
14905     Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
14906     Created.push_back(Op.getNode());
14907   }
14908 
14909   return Op;
14910 }
14911 
14912 //===----------------------------------------------------------------------===//
14913 // Inline Assembly Support
14914 //===----------------------------------------------------------------------===//
14915 
14916 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
14917                                                       KnownBits &Known,
14918                                                       const APInt &DemandedElts,
14919                                                       const SelectionDAG &DAG,
14920                                                       unsigned Depth) const {
14921   Known.resetAll();
14922   switch (Op.getOpcode()) {
14923   default: break;
14924   case PPCISD::LBRX: {
14925     // lhbrx is known to have the top bits cleared out.
14926     if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
14927       Known.Zero = 0xFFFF0000;
14928     break;
14929   }
14930   case ISD::INTRINSIC_WO_CHAIN: {
14931     switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
14932     default: break;
14933     case Intrinsic::ppc_altivec_vcmpbfp_p:
14934     case Intrinsic::ppc_altivec_vcmpeqfp_p:
14935     case Intrinsic::ppc_altivec_vcmpequb_p:
14936     case Intrinsic::ppc_altivec_vcmpequh_p:
14937     case Intrinsic::ppc_altivec_vcmpequw_p:
14938     case Intrinsic::ppc_altivec_vcmpequd_p:
14939     case Intrinsic::ppc_altivec_vcmpequq_p:
14940     case Intrinsic::ppc_altivec_vcmpgefp_p:
14941     case Intrinsic::ppc_altivec_vcmpgtfp_p:
14942     case Intrinsic::ppc_altivec_vcmpgtsb_p:
14943     case Intrinsic::ppc_altivec_vcmpgtsh_p:
14944     case Intrinsic::ppc_altivec_vcmpgtsw_p:
14945     case Intrinsic::ppc_altivec_vcmpgtsd_p:
14946     case Intrinsic::ppc_altivec_vcmpgtsq_p:
14947     case Intrinsic::ppc_altivec_vcmpgtub_p:
14948     case Intrinsic::ppc_altivec_vcmpgtuh_p:
14949     case Intrinsic::ppc_altivec_vcmpgtuw_p:
14950     case Intrinsic::ppc_altivec_vcmpgtud_p:
14951     case Intrinsic::ppc_altivec_vcmpgtuq_p:
14952       Known.Zero = ~1U;  // All bits but the low one are known to be zero.
14953       break;
14954     }
14955   }
14956   }
14957 }
14958 
14959 Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
14960   switch (Subtarget.getCPUDirective()) {
14961   default: break;
14962   case PPC::DIR_970:
14963   case PPC::DIR_PWR4:
14964   case PPC::DIR_PWR5:
14965   case PPC::DIR_PWR5X:
14966   case PPC::DIR_PWR6:
14967   case PPC::DIR_PWR6X:
14968   case PPC::DIR_PWR7:
14969   case PPC::DIR_PWR8:
14970   case PPC::DIR_PWR9:
14971   case PPC::DIR_PWR10:
14972   case PPC::DIR_PWR_FUTURE: {
14973     if (!ML)
14974       break;
14975 
14976     if (!DisableInnermostLoopAlign32) {
14977       // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
14978       // so that we can decrease cache misses and branch-prediction misses.
14979       // Actual alignment of the loop will depend on the hotness check and other
14980       // logic in alignBlocks.
14981       if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
14982         return Align(32);
14983     }
14984 
14985     const PPCInstrInfo *TII = Subtarget.getInstrInfo();
14986 
14987     // For small loops (between 5 and 8 instructions), align to a 32-byte
14988     // boundary so that the entire loop fits in one instruction-cache line.
14989     uint64_t LoopSize = 0;
14990     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
14991       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
14992         LoopSize += TII->getInstSizeInBytes(*J);
14993         if (LoopSize > 32)
14994           break;
14995       }
14996 
14997     if (LoopSize > 16 && LoopSize <= 32)
14998       return Align(32);
14999 
15000     break;
15001   }
15002   }
15003 
15004   return TargetLowering::getPrefLoopAlignment(ML);
15005 }
15006 
15007 /// getConstraintType - Given a constraint, return the type of
15008 /// constraint it is for this target.
15009 PPCTargetLowering::ConstraintType
15010 PPCTargetLowering::getConstraintType(StringRef Constraint) const {
15011   if (Constraint.size() == 1) {
15012     switch (Constraint[0]) {
15013     default: break;
15014     case 'b':
15015     case 'r':
15016     case 'f':
15017     case 'd':
15018     case 'v':
15019     case 'y':
15020       return C_RegisterClass;
15021     case 'Z':
15022       // FIXME: While Z does indicate a memory constraint, it specifically
15023       // indicates an r+r address (used in conjunction with the 'y' modifier
15024       // in the replacement string). Currently, we're forcing the base
15025       // register to be r0 in the asm printer (which is interpreted as zero)
15026       // and forming the complete address in the second register. This is
15027       // suboptimal.
15028       return C_Memory;
15029     }
15030   } else if (Constraint == "wc") { // individual CR bits.
15031     return C_RegisterClass;
15032   } else if (Constraint == "wa" || Constraint == "wd" ||
15033              Constraint == "wf" || Constraint == "ws" ||
15034              Constraint == "wi" || Constraint == "ww") {
15035     return C_RegisterClass; // VSX registers.
15036   }
15037   return TargetLowering::getConstraintType(Constraint);
15038 }
15039 
15040 /// Examine constraint type and operand type and determine a weight value.
15041 /// This object must already have been set up with the operand type
15042 /// and the current alternative constraint selected.
15043 TargetLowering::ConstraintWeight
15044 PPCTargetLowering::getSingleConstraintMatchWeight(
15045     AsmOperandInfo &info, const char *constraint) const {
15046   ConstraintWeight weight = CW_Invalid;
15047   Value *CallOperandVal = info.CallOperandVal;
15048     // If we don't have a value, we can't do a match,
15049     // but allow it at the lowest weight.
15050   if (!CallOperandVal)
15051     return CW_Default;
15052   Type *type = CallOperandVal->getType();
15053 
15054   // Look at the constraint type.
15055   if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15056     return CW_Register; // an individual CR bit.
15057   else if ((StringRef(constraint) == "wa" ||
15058             StringRef(constraint) == "wd" ||
15059             StringRef(constraint) == "wf") &&
15060            type->isVectorTy())
15061     return CW_Register;
15062   else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15063     return CW_Register; // just hold 64-bit integers data.
15064   else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15065     return CW_Register;
15066   else if (StringRef(constraint) == "ww" && type->isFloatTy())
15067     return CW_Register;
15068 
15069   switch (*constraint) {
15070   default:
15071     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
15072     break;
15073   case 'b':
15074     if (type->isIntegerTy())
15075       weight = CW_Register;
15076     break;
15077   case 'f':
15078     if (type->isFloatTy())
15079       weight = CW_Register;
15080     break;
15081   case 'd':
15082     if (type->isDoubleTy())
15083       weight = CW_Register;
15084     break;
15085   case 'v':
15086     if (type->isVectorTy())
15087       weight = CW_Register;
15088     break;
15089   case 'y':
15090     weight = CW_Register;
15091     break;
15092   case 'Z':
15093     weight = CW_Memory;
15094     break;
15095   }
15096   return weight;
15097 }
15098 
15099 std::pair<unsigned, const TargetRegisterClass *>
15100 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
15101                                                 StringRef Constraint,
15102                                                 MVT VT) const {
15103   if (Constraint.size() == 1) {
15104     // GCC RS6000 Constraint Letters
15105     switch (Constraint[0]) {
15106     case 'b':   // R1-R31
15107       if (VT == MVT::i64 && Subtarget.isPPC64())
15108         return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15109       return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15110     case 'r':   // R0-R31
15111       if (VT == MVT::i64 && Subtarget.isPPC64())
15112         return std::make_pair(0U, &PPC::G8RCRegClass);
15113       return std::make_pair(0U, &PPC::GPRCRegClass);
15114     // 'd' and 'f' constraints are both defined to be "the floating point
15115     // registers", where one is for 32-bit and the other for 64-bit. We don't
15116     // really care overly much here so just give them all the same reg classes.
15117     case 'd':
15118     case 'f':
15119       if (Subtarget.hasSPE()) {
15120         if (VT == MVT::f32 || VT == MVT::i32)
15121           return std::make_pair(0U, &PPC::GPRCRegClass);
15122         if (VT == MVT::f64 || VT == MVT::i64)
15123           return std::make_pair(0U, &PPC::SPERCRegClass);
15124       } else {
15125         if (VT == MVT::f32 || VT == MVT::i32)
15126           return std::make_pair(0U, &PPC::F4RCRegClass);
15127         if (VT == MVT::f64 || VT == MVT::i64)
15128           return std::make_pair(0U, &PPC::F8RCRegClass);
15129       }
15130       break;
15131     case 'v':
15132       if (Subtarget.hasAltivec())
15133         return std::make_pair(0U, &PPC::VRRCRegClass);
15134       break;
15135     case 'y':   // crrc
15136       return std::make_pair(0U, &PPC::CRRCRegClass);
15137     }
15138   } else if (Constraint == "wc" && Subtarget.useCRBits()) {
15139     // An individual CR bit.
15140     return std::make_pair(0U, &PPC::CRBITRCRegClass);
15141   } else if ((Constraint == "wa" || Constraint == "wd" ||
15142              Constraint == "wf" || Constraint == "wi") &&
15143              Subtarget.hasVSX()) {
15144     return std::make_pair(0U, &PPC::VSRCRegClass);
15145   } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
15146     if (VT == MVT::f32 && Subtarget.hasP8Vector())
15147       return std::make_pair(0U, &PPC::VSSRCRegClass);
15148     else
15149       return std::make_pair(0U, &PPC::VSFRCRegClass);
15150   } else if (Constraint == "lr") {
15151     if (VT == MVT::i64)
15152       return std::make_pair(0U, &PPC::LR8RCRegClass);
15153     else
15154       return std::make_pair(0U, &PPC::LRRCRegClass);
15155   }
15156 
15157   // Handle special cases of physical registers that are not properly handled
15158   // by the base class.
15159   if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
15160     // If we name a VSX register, we can't defer to the base class because it
15161     // will not recognize the correct register (their names will be VSL{0-31}
15162     // and V{0-31} so they won't match). So we match them here.
15163     if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
15164       int VSNum = atoi(Constraint.data() + 3);
15165       assert(VSNum >= 0 && VSNum <= 63 &&
15166              "Attempted to access a vsr out of range");
15167       if (VSNum < 32)
15168         return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
15169       return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
15170     }
15171 
15172     // For float registers, we can't defer to the base class as it will match
15173     // the SPILLTOVSRRC class.
15174     if (Constraint.size() > 3 && Constraint[1] == 'f') {
15175       int RegNum = atoi(Constraint.data() + 2);
15176       if (RegNum > 31 || RegNum < 0)
15177         report_fatal_error("Invalid floating point register number");
15178       if (VT == MVT::f32 || VT == MVT::i32)
15179         return Subtarget.hasSPE()
15180                    ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
15181                    : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
15182       if (VT == MVT::f64 || VT == MVT::i64)
15183         return Subtarget.hasSPE()
15184                    ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
15185                    : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
15186     }
15187   }
15188 
15189   std::pair<unsigned, const TargetRegisterClass *> R =
15190       TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
15191 
15192   // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
15193   // (which we call X[0-9]+). If a 64-bit value has been requested, and a
15194   // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
15195   // register.
15196   // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
15197   // the AsmName field from *RegisterInfo.td, then this would not be necessary.
15198   if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
15199       PPC::GPRCRegClass.contains(R.first))
15200     return std::make_pair(TRI->getMatchingSuperReg(R.first,
15201                             PPC::sub_32, &PPC::G8RCRegClass),
15202                           &PPC::G8RCRegClass);
15203 
15204   // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
15205   if (!R.second && StringRef("{cc}").equals_lower(Constraint)) {
15206     R.first = PPC::CR0;
15207     R.second = &PPC::CRRCRegClass;
15208   }
15209 
15210   return R;
15211 }
15212 
15213 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
15214 /// vector.  If it is invalid, don't add anything to Ops.
15215 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
15216                                                      std::string &Constraint,
15217                                                      std::vector<SDValue>&Ops,
15218                                                      SelectionDAG &DAG) const {
15219   SDValue Result;
15220 
15221   // Only support length 1 constraints.
15222   if (Constraint.length() > 1) return;
15223 
15224   char Letter = Constraint[0];
15225   switch (Letter) {
15226   default: break;
15227   case 'I':
15228   case 'J':
15229   case 'K':
15230   case 'L':
15231   case 'M':
15232   case 'N':
15233   case 'O':
15234   case 'P': {
15235     ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
15236     if (!CST) return; // Must be an immediate to match.
15237     SDLoc dl(Op);
15238     int64_t Value = CST->getSExtValue();
15239     EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
15240                          // numbers are printed as such.
15241     switch (Letter) {
15242     default: llvm_unreachable("Unknown constraint letter!");
15243     case 'I':  // "I" is a signed 16-bit constant.
15244       if (isInt<16>(Value))
15245         Result = DAG.getTargetConstant(Value, dl, TCVT);
15246       break;
15247     case 'J':  // "J" is a constant with only the high-order 16 bits nonzero.
15248       if (isShiftedUInt<16, 16>(Value))
15249         Result = DAG.getTargetConstant(Value, dl, TCVT);
15250       break;
15251     case 'L':  // "L" is a signed 16-bit constant shifted left 16 bits.
15252       if (isShiftedInt<16, 16>(Value))
15253         Result = DAG.getTargetConstant(Value, dl, TCVT);
15254       break;
15255     case 'K':  // "K" is a constant with only the low-order 16 bits nonzero.
15256       if (isUInt<16>(Value))
15257         Result = DAG.getTargetConstant(Value, dl, TCVT);
15258       break;
15259     case 'M':  // "M" is a constant that is greater than 31.
15260       if (Value > 31)
15261         Result = DAG.getTargetConstant(Value, dl, TCVT);
15262       break;
15263     case 'N':  // "N" is a positive constant that is an exact power of two.
15264       if (Value > 0 && isPowerOf2_64(Value))
15265         Result = DAG.getTargetConstant(Value, dl, TCVT);
15266       break;
15267     case 'O':  // "O" is the constant zero.
15268       if (Value == 0)
15269         Result = DAG.getTargetConstant(Value, dl, TCVT);
15270       break;
15271     case 'P':  // "P" is a constant whose negation is a signed 16-bit constant.
15272       if (isInt<16>(-Value))
15273         Result = DAG.getTargetConstant(Value, dl, TCVT);
15274       break;
15275     }
15276     break;
15277   }
15278   }
15279 
15280   if (Result.getNode()) {
15281     Ops.push_back(Result);
15282     return;
15283   }
15284 
15285   // Handle standard constraint letters.
15286   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
15287 }
15288 
15289 // isLegalAddressingMode - Return true if the addressing mode represented
15290 // by AM is legal for this target, for a load/store of the specified type.
15291 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
15292                                               const AddrMode &AM, Type *Ty,
15293                                               unsigned AS,
15294                                               Instruction *I) const {
15295   // Vector type r+i form is supported since power9 as DQ form. We don't check
15296   // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
15297   // imm form is preferred and the offset can be adjusted to use imm form later
15298   // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
15299   // max offset to check legal addressing mode, we should be a little aggressive
15300   // to contain other offsets for that LSRUse.
15301   if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
15302     return false;
15303 
15304   // PPC allows a sign-extended 16-bit immediate field.
15305   if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
15306     return false;
15307 
15308   // No global is ever allowed as a base.
15309   if (AM.BaseGV)
15310     return false;
15311 
15312   // PPC only support r+r,
15313   switch (AM.Scale) {
15314   case 0:  // "r+i" or just "i", depending on HasBaseReg.
15315     break;
15316   case 1:
15317     if (AM.HasBaseReg && AM.BaseOffs)  // "r+r+i" is not allowed.
15318       return false;
15319     // Otherwise we have r+r or r+i.
15320     break;
15321   case 2:
15322     if (AM.HasBaseReg || AM.BaseOffs)  // 2*r+r  or  2*r+i is not allowed.
15323       return false;
15324     // Allow 2*r as r+r.
15325     break;
15326   default:
15327     // No other scales are supported.
15328     return false;
15329   }
15330 
15331   return true;
15332 }
15333 
15334 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
15335                                            SelectionDAG &DAG) const {
15336   MachineFunction &MF = DAG.getMachineFunction();
15337   MachineFrameInfo &MFI = MF.getFrameInfo();
15338   MFI.setReturnAddressIsTaken(true);
15339 
15340   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
15341     return SDValue();
15342 
15343   SDLoc dl(Op);
15344   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15345 
15346   // Make sure the function does not optimize away the store of the RA to
15347   // the stack.
15348   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
15349   FuncInfo->setLRStoreRequired();
15350   bool isPPC64 = Subtarget.isPPC64();
15351   auto PtrVT = getPointerTy(MF.getDataLayout());
15352 
15353   if (Depth > 0) {
15354     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
15355     SDValue Offset =
15356         DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
15357                         isPPC64 ? MVT::i64 : MVT::i32);
15358     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
15359                        DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
15360                        MachinePointerInfo());
15361   }
15362 
15363   // Just load the return address off the stack.
15364   SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
15365   return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
15366                      MachinePointerInfo());
15367 }
15368 
15369 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
15370                                           SelectionDAG &DAG) const {
15371   SDLoc dl(Op);
15372   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
15373 
15374   MachineFunction &MF = DAG.getMachineFunction();
15375   MachineFrameInfo &MFI = MF.getFrameInfo();
15376   MFI.setFrameAddressIsTaken(true);
15377 
15378   EVT PtrVT = getPointerTy(MF.getDataLayout());
15379   bool isPPC64 = PtrVT == MVT::i64;
15380 
15381   // Naked functions never have a frame pointer, and so we use r1. For all
15382   // other functions, this decision must be delayed until during PEI.
15383   unsigned FrameReg;
15384   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
15385     FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
15386   else
15387     FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
15388 
15389   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
15390                                          PtrVT);
15391   while (Depth--)
15392     FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
15393                             FrameAddr, MachinePointerInfo());
15394   return FrameAddr;
15395 }
15396 
15397 // FIXME? Maybe this could be a TableGen attribute on some registers and
15398 // this table could be generated automatically from RegInfo.
15399 Register PPCTargetLowering::getRegisterByName(const char* RegName, LLT VT,
15400                                               const MachineFunction &MF) const {
15401   bool isPPC64 = Subtarget.isPPC64();
15402 
15403   bool is64Bit = isPPC64 && VT == LLT::scalar(64);
15404   if (!is64Bit && VT != LLT::scalar(32))
15405     report_fatal_error("Invalid register global variable type");
15406 
15407   Register Reg = StringSwitch<Register>(RegName)
15408                      .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
15409                      .Case("r2", isPPC64 ? Register() : PPC::R2)
15410                      .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
15411                      .Default(Register());
15412 
15413   if (Reg)
15414     return Reg;
15415   report_fatal_error("Invalid register name global variable");
15416 }
15417 
15418 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA) const {
15419   // 32-bit SVR4 ABI access everything as got-indirect.
15420   if (Subtarget.is32BitELFABI())
15421     return true;
15422 
15423   // AIX accesses everything indirectly through the TOC, which is similar to
15424   // the GOT.
15425   if (Subtarget.isAIXABI())
15426     return true;
15427 
15428   CodeModel::Model CModel = getTargetMachine().getCodeModel();
15429   // If it is small or large code model, module locals are accessed
15430   // indirectly by loading their address from .toc/.got.
15431   if (CModel == CodeModel::Small || CModel == CodeModel::Large)
15432     return true;
15433 
15434   // JumpTable and BlockAddress are accessed as got-indirect.
15435   if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
15436     return true;
15437 
15438   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
15439     return Subtarget.isGVIndirectSymbol(G->getGlobal());
15440 
15441   return false;
15442 }
15443 
15444 bool
15445 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
15446   // The PowerPC target isn't yet aware of offsets.
15447   return false;
15448 }
15449 
15450 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
15451                                            const CallInst &I,
15452                                            MachineFunction &MF,
15453                                            unsigned Intrinsic) const {
15454   switch (Intrinsic) {
15455   case Intrinsic::ppc_altivec_lvx:
15456   case Intrinsic::ppc_altivec_lvxl:
15457   case Intrinsic::ppc_altivec_lvebx:
15458   case Intrinsic::ppc_altivec_lvehx:
15459   case Intrinsic::ppc_altivec_lvewx:
15460   case Intrinsic::ppc_vsx_lxvd2x:
15461   case Intrinsic::ppc_vsx_lxvw4x:
15462   case Intrinsic::ppc_vsx_lxvd2x_be:
15463   case Intrinsic::ppc_vsx_lxvw4x_be:
15464   case Intrinsic::ppc_vsx_lxvl:
15465   case Intrinsic::ppc_vsx_lxvll: {
15466     EVT VT;
15467     switch (Intrinsic) {
15468     case Intrinsic::ppc_altivec_lvebx:
15469       VT = MVT::i8;
15470       break;
15471     case Intrinsic::ppc_altivec_lvehx:
15472       VT = MVT::i16;
15473       break;
15474     case Intrinsic::ppc_altivec_lvewx:
15475       VT = MVT::i32;
15476       break;
15477     case Intrinsic::ppc_vsx_lxvd2x:
15478     case Intrinsic::ppc_vsx_lxvd2x_be:
15479       VT = MVT::v2f64;
15480       break;
15481     default:
15482       VT = MVT::v4i32;
15483       break;
15484     }
15485 
15486     Info.opc = ISD::INTRINSIC_W_CHAIN;
15487     Info.memVT = VT;
15488     Info.ptrVal = I.getArgOperand(0);
15489     Info.offset = -VT.getStoreSize()+1;
15490     Info.size = 2*VT.getStoreSize()-1;
15491     Info.align = Align(1);
15492     Info.flags = MachineMemOperand::MOLoad;
15493     return true;
15494   }
15495   case Intrinsic::ppc_altivec_stvx:
15496   case Intrinsic::ppc_altivec_stvxl:
15497   case Intrinsic::ppc_altivec_stvebx:
15498   case Intrinsic::ppc_altivec_stvehx:
15499   case Intrinsic::ppc_altivec_stvewx:
15500   case Intrinsic::ppc_vsx_stxvd2x:
15501   case Intrinsic::ppc_vsx_stxvw4x:
15502   case Intrinsic::ppc_vsx_stxvd2x_be:
15503   case Intrinsic::ppc_vsx_stxvw4x_be:
15504   case Intrinsic::ppc_vsx_stxvl:
15505   case Intrinsic::ppc_vsx_stxvll: {
15506     EVT VT;
15507     switch (Intrinsic) {
15508     case Intrinsic::ppc_altivec_stvebx:
15509       VT = MVT::i8;
15510       break;
15511     case Intrinsic::ppc_altivec_stvehx:
15512       VT = MVT::i16;
15513       break;
15514     case Intrinsic::ppc_altivec_stvewx:
15515       VT = MVT::i32;
15516       break;
15517     case Intrinsic::ppc_vsx_stxvd2x:
15518     case Intrinsic::ppc_vsx_stxvd2x_be:
15519       VT = MVT::v2f64;
15520       break;
15521     default:
15522       VT = MVT::v4i32;
15523       break;
15524     }
15525 
15526     Info.opc = ISD::INTRINSIC_VOID;
15527     Info.memVT = VT;
15528     Info.ptrVal = I.getArgOperand(1);
15529     Info.offset = -VT.getStoreSize()+1;
15530     Info.size = 2*VT.getStoreSize()-1;
15531     Info.align = Align(1);
15532     Info.flags = MachineMemOperand::MOStore;
15533     return true;
15534   }
15535   default:
15536     break;
15537   }
15538 
15539   return false;
15540 }
15541 
15542 /// It returns EVT::Other if the type should be determined using generic
15543 /// target-independent logic.
15544 EVT PPCTargetLowering::getOptimalMemOpType(
15545     const MemOp &Op, const AttributeList &FuncAttributes) const {
15546   if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
15547     // We should use Altivec/VSX loads and stores when available. For unaligned
15548     // addresses, unaligned VSX loads are only fast starting with the P8.
15549     if (Subtarget.hasAltivec() && Op.size() >= 16 &&
15550         (Op.isAligned(Align(16)) ||
15551          ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
15552       return MVT::v4i32;
15553   }
15554 
15555   if (Subtarget.isPPC64()) {
15556     return MVT::i64;
15557   }
15558 
15559   return MVT::i32;
15560 }
15561 
15562 /// Returns true if it is beneficial to convert a load of a constant
15563 /// to just the constant itself.
15564 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
15565                                                           Type *Ty) const {
15566   assert(Ty->isIntegerTy());
15567 
15568   unsigned BitSize = Ty->getPrimitiveSizeInBits();
15569   return !(BitSize == 0 || BitSize > 64);
15570 }
15571 
15572 bool PPCTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
15573   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
15574     return false;
15575   unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
15576   unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
15577   return NumBits1 == 64 && NumBits2 == 32;
15578 }
15579 
15580 bool PPCTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
15581   if (!VT1.isInteger() || !VT2.isInteger())
15582     return false;
15583   unsigned NumBits1 = VT1.getSizeInBits();
15584   unsigned NumBits2 = VT2.getSizeInBits();
15585   return NumBits1 == 64 && NumBits2 == 32;
15586 }
15587 
15588 bool PPCTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
15589   // Generally speaking, zexts are not free, but they are free when they can be
15590   // folded with other operations.
15591   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
15592     EVT MemVT = LD->getMemoryVT();
15593     if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
15594          (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
15595         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
15596          LD->getExtensionType() == ISD::ZEXTLOAD))
15597       return true;
15598   }
15599 
15600   // FIXME: Add other cases...
15601   //  - 32-bit shifts with a zext to i64
15602   //  - zext after ctlz, bswap, etc.
15603   //  - zext after and by a constant mask
15604 
15605   return TargetLowering::isZExtFree(Val, VT2);
15606 }
15607 
15608 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
15609   assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
15610          "invalid fpext types");
15611   // Extending to float128 is not free.
15612   if (DestVT == MVT::f128)
15613     return false;
15614   return true;
15615 }
15616 
15617 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
15618   return isInt<16>(Imm) || isUInt<16>(Imm);
15619 }
15620 
15621 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm) const {
15622   return isInt<16>(Imm) || isUInt<16>(Imm);
15623 }
15624 
15625 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
15626                                                        unsigned,
15627                                                        unsigned,
15628                                                        MachineMemOperand::Flags,
15629                                                        bool *Fast) const {
15630   if (DisablePPCUnaligned)
15631     return false;
15632 
15633   // PowerPC supports unaligned memory access for simple non-vector types.
15634   // Although accessing unaligned addresses is not as efficient as accessing
15635   // aligned addresses, it is generally more efficient than manual expansion,
15636   // and generally only traps for software emulation when crossing page
15637   // boundaries.
15638 
15639   if (!VT.isSimple())
15640     return false;
15641 
15642   if (VT.isFloatingPoint() && !VT.isVector() &&
15643       !Subtarget.allowsUnalignedFPAccess())
15644     return false;
15645 
15646   if (VT.getSimpleVT().isVector()) {
15647     if (Subtarget.hasVSX()) {
15648       if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
15649           VT != MVT::v4f32 && VT != MVT::v4i32)
15650         return false;
15651     } else {
15652       return false;
15653     }
15654   }
15655 
15656   if (VT == MVT::ppcf128)
15657     return false;
15658 
15659   if (Fast)
15660     *Fast = true;
15661 
15662   return true;
15663 }
15664 
15665 bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
15666                                                SDValue C) const {
15667   // Check integral scalar types.
15668   if (!VT.isScalarInteger())
15669     return false;
15670   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
15671     if (!ConstNode->getAPIntValue().isSignedIntN(64))
15672       return false;
15673     // This transformation will generate >= 2 operations. But the following
15674     // cases will generate <= 2 instructions during ISEL. So exclude them.
15675     // 1. If the constant multiplier fits 16 bits, it can be handled by one
15676     // HW instruction, ie. MULLI
15677     // 2. If the multiplier after shifted fits 16 bits, an extra shift
15678     // instruction is needed than case 1, ie. MULLI and RLDICR
15679     int64_t Imm = ConstNode->getSExtValue();
15680     unsigned Shift = countTrailingZeros<uint64_t>(Imm);
15681     Imm >>= Shift;
15682     if (isInt<16>(Imm))
15683       return false;
15684     uint64_t UImm = static_cast<uint64_t>(Imm);
15685     if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
15686         isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
15687       return true;
15688   }
15689   return false;
15690 }
15691 
15692 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
15693                                                    EVT VT) const {
15694   return isFMAFasterThanFMulAndFAdd(
15695       MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
15696 }
15697 
15698 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
15699                                                    Type *Ty) const {
15700   switch (Ty->getScalarType()->getTypeID()) {
15701   case Type::FloatTyID:
15702   case Type::DoubleTyID:
15703     return true;
15704   case Type::FP128TyID:
15705     return Subtarget.hasP9Vector();
15706   default:
15707     return false;
15708   }
15709 }
15710 
15711 // FIXME: add more patterns which are not profitable to hoist.
15712 bool PPCTargetLowering::isProfitableToHoist(Instruction *I) const {
15713   if (!I->hasOneUse())
15714     return true;
15715 
15716   Instruction *User = I->user_back();
15717   assert(User && "A single use instruction with no uses.");
15718 
15719   switch (I->getOpcode()) {
15720   case Instruction::FMul: {
15721     // Don't break FMA, PowerPC prefers FMA.
15722     if (User->getOpcode() != Instruction::FSub &&
15723         User->getOpcode() != Instruction::FAdd)
15724       return true;
15725 
15726     const TargetOptions &Options = getTargetMachine().Options;
15727     const Function *F = I->getFunction();
15728     const DataLayout &DL = F->getParent()->getDataLayout();
15729     Type *Ty = User->getOperand(0)->getType();
15730 
15731     return !(
15732         isFMAFasterThanFMulAndFAdd(*F, Ty) &&
15733         isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
15734         (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
15735   }
15736   case Instruction::Load: {
15737     // Don't break "store (load float*)" pattern, this pattern will be combined
15738     // to "store (load int32)" in later InstCombine pass. See function
15739     // combineLoadToOperationType. On PowerPC, loading a float point takes more
15740     // cycles than loading a 32 bit integer.
15741     LoadInst *LI = cast<LoadInst>(I);
15742     // For the loads that combineLoadToOperationType does nothing, like
15743     // ordered load, it should be profitable to hoist them.
15744     // For swifterror load, it can only be used for pointer to pointer type, so
15745     // later type check should get rid of this case.
15746     if (!LI->isUnordered())
15747       return true;
15748 
15749     if (User->getOpcode() != Instruction::Store)
15750       return true;
15751 
15752     if (I->getType()->getTypeID() != Type::FloatTyID)
15753       return true;
15754 
15755     return false;
15756   }
15757   default:
15758     return true;
15759   }
15760   return true;
15761 }
15762 
15763 const MCPhysReg *
15764 PPCTargetLowering::getScratchRegisters(CallingConv::ID) const {
15765   // LR is a callee-save register, but we must treat it as clobbered by any call
15766   // site. Hence we include LR in the scratch registers, which are in turn added
15767   // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
15768   // to CTR, which is used by any indirect call.
15769   static const MCPhysReg ScratchRegs[] = {
15770     PPC::X12, PPC::LR8, PPC::CTR8, 0
15771   };
15772 
15773   return ScratchRegs;
15774 }
15775 
15776 Register PPCTargetLowering::getExceptionPointerRegister(
15777     const Constant *PersonalityFn) const {
15778   return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
15779 }
15780 
15781 Register PPCTargetLowering::getExceptionSelectorRegister(
15782     const Constant *PersonalityFn) const {
15783   return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
15784 }
15785 
15786 bool
15787 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
15788                      EVT VT , unsigned DefinedValues) const {
15789   if (VT == MVT::v2i64)
15790     return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
15791 
15792   if (Subtarget.hasVSX())
15793     return true;
15794 
15795   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
15796 }
15797 
15798 Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
15799   if (DisableILPPref || Subtarget.enableMachineScheduler())
15800     return TargetLowering::getSchedulingPreference(N);
15801 
15802   return Sched::ILP;
15803 }
15804 
15805 // Create a fast isel object.
15806 FastISel *
15807 PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
15808                                   const TargetLibraryInfo *LibInfo) const {
15809   return PPC::createFastISel(FuncInfo, LibInfo);
15810 }
15811 
15812 // 'Inverted' means the FMA opcode after negating one multiplicand.
15813 // For example, (fma -a b c) = (fnmsub a b c)
15814 static unsigned invertFMAOpcode(unsigned Opc) {
15815   switch (Opc) {
15816   default:
15817     llvm_unreachable("Invalid FMA opcode for PowerPC!");
15818   case ISD::FMA:
15819     return PPCISD::FNMSUB;
15820   case PPCISD::FNMSUB:
15821     return ISD::FMA;
15822   }
15823 }
15824 
15825 SDValue PPCTargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
15826                                                 bool LegalOps, bool OptForSize,
15827                                                 NegatibleCost &Cost,
15828                                                 unsigned Depth) const {
15829   if (Depth > SelectionDAG::MaxRecursionDepth)
15830     return SDValue();
15831 
15832   unsigned Opc = Op.getOpcode();
15833   EVT VT = Op.getValueType();
15834   SDNodeFlags Flags = Op.getNode()->getFlags();
15835 
15836   switch (Opc) {
15837   case PPCISD::FNMSUB:
15838     if (!Op.hasOneUse() || !isTypeLegal(VT))
15839       break;
15840 
15841     const TargetOptions &Options = getTargetMachine().Options;
15842     SDValue N0 = Op.getOperand(0);
15843     SDValue N1 = Op.getOperand(1);
15844     SDValue N2 = Op.getOperand(2);
15845     SDLoc Loc(Op);
15846 
15847     NegatibleCost N2Cost = NegatibleCost::Expensive;
15848     SDValue NegN2 =
15849         getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
15850 
15851     if (!NegN2)
15852       return SDValue();
15853 
15854     // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
15855     // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
15856     // These transformations may change sign of zeroes. For example,
15857     // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
15858     if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
15859       // Try and choose the cheaper one to negate.
15860       NegatibleCost N0Cost = NegatibleCost::Expensive;
15861       SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
15862                                            N0Cost, Depth + 1);
15863 
15864       NegatibleCost N1Cost = NegatibleCost::Expensive;
15865       SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
15866                                            N1Cost, Depth + 1);
15867 
15868       if (NegN0 && N0Cost <= N1Cost) {
15869         Cost = std::min(N0Cost, N2Cost);
15870         return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
15871       } else if (NegN1) {
15872         Cost = std::min(N1Cost, N2Cost);
15873         return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
15874       }
15875     }
15876 
15877     // (fneg (fnmsub a b c)) => (fma a b (fneg c))
15878     if (isOperationLegal(ISD::FMA, VT)) {
15879       Cost = N2Cost;
15880       return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
15881     }
15882 
15883     break;
15884   }
15885 
15886   return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
15887                                               Cost, Depth);
15888 }
15889 
15890 // Override to enable LOAD_STACK_GUARD lowering on Linux.
15891 bool PPCTargetLowering::useLoadStackGuardNode() const {
15892   if (!Subtarget.isTargetLinux())
15893     return TargetLowering::useLoadStackGuardNode();
15894   return true;
15895 }
15896 
15897 // Override to disable global variable loading on Linux.
15898 void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
15899   if (!Subtarget.isTargetLinux())
15900     return TargetLowering::insertSSPDeclarations(M);
15901 }
15902 
15903 bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
15904                                      bool ForCodeSize) const {
15905   if (!VT.isSimple() || !Subtarget.hasVSX())
15906     return false;
15907 
15908   switch(VT.getSimpleVT().SimpleTy) {
15909   default:
15910     // For FP types that are currently not supported by PPC backend, return
15911     // false. Examples: f16, f80.
15912     return false;
15913   case MVT::f32:
15914   case MVT::f64:
15915     if (Subtarget.hasPrefixInstrs()) {
15916       // With prefixed instructions, we can materialize anything that can be
15917       // represented with a 32-bit immediate, not just positive zero.
15918       APFloat APFloatOfImm = Imm;
15919       return convertToNonDenormSingle(APFloatOfImm);
15920     }
15921     LLVM_FALLTHROUGH;
15922   case MVT::ppcf128:
15923     return Imm.isPosZero();
15924   }
15925 }
15926 
15927 // For vector shift operation op, fold
15928 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
15929 static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N,
15930                                   SelectionDAG &DAG) {
15931   SDValue N0 = N->getOperand(0);
15932   SDValue N1 = N->getOperand(1);
15933   EVT VT = N0.getValueType();
15934   unsigned OpSizeInBits = VT.getScalarSizeInBits();
15935   unsigned Opcode = N->getOpcode();
15936   unsigned TargetOpcode;
15937 
15938   switch (Opcode) {
15939   default:
15940     llvm_unreachable("Unexpected shift operation");
15941   case ISD::SHL:
15942     TargetOpcode = PPCISD::SHL;
15943     break;
15944   case ISD::SRL:
15945     TargetOpcode = PPCISD::SRL;
15946     break;
15947   case ISD::SRA:
15948     TargetOpcode = PPCISD::SRA;
15949     break;
15950   }
15951 
15952   if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
15953       N1->getOpcode() == ISD::AND)
15954     if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
15955       if (Mask->getZExtValue() == OpSizeInBits - 1)
15956         return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
15957 
15958   return SDValue();
15959 }
15960 
15961 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
15962   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15963     return Value;
15964 
15965   SDValue N0 = N->getOperand(0);
15966   ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
15967   if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
15968       N0.getOpcode() != ISD::SIGN_EXTEND ||
15969       N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
15970       N->getValueType(0) != MVT::i64)
15971     return SDValue();
15972 
15973   // We can't save an operation here if the value is already extended, and
15974   // the existing shift is easier to combine.
15975   SDValue ExtsSrc = N0.getOperand(0);
15976   if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
15977       ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
15978     return SDValue();
15979 
15980   SDLoc DL(N0);
15981   SDValue ShiftBy = SDValue(CN1, 0);
15982   // We want the shift amount to be i32 on the extswli, but the shift could
15983   // have an i64.
15984   if (ShiftBy.getValueType() == MVT::i64)
15985     ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
15986 
15987   return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
15988                          ShiftBy);
15989 }
15990 
15991 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
15992   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
15993     return Value;
15994 
15995   return SDValue();
15996 }
15997 
15998 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
15999   if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16000     return Value;
16001 
16002   return SDValue();
16003 }
16004 
16005 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
16006 // Transform (add X, (zext(sete  Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
16007 // When C is zero, the equation (addi Z, -C) can be simplified to Z
16008 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
16009 static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG,
16010                                  const PPCSubtarget &Subtarget) {
16011   if (!Subtarget.isPPC64())
16012     return SDValue();
16013 
16014   SDValue LHS = N->getOperand(0);
16015   SDValue RHS = N->getOperand(1);
16016 
16017   auto isZextOfCompareWithConstant = [](SDValue Op) {
16018     if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
16019         Op.getValueType() != MVT::i64)
16020       return false;
16021 
16022     SDValue Cmp = Op.getOperand(0);
16023     if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
16024         Cmp.getOperand(0).getValueType() != MVT::i64)
16025       return false;
16026 
16027     if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
16028       int64_t NegConstant = 0 - Constant->getSExtValue();
16029       // Due to the limitations of the addi instruction,
16030       // -C is required to be [-32768, 32767].
16031       return isInt<16>(NegConstant);
16032     }
16033 
16034     return false;
16035   };
16036 
16037   bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
16038   bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
16039 
16040   // If there is a pattern, canonicalize a zext operand to the RHS.
16041   if (LHSHasPattern && !RHSHasPattern)
16042     std::swap(LHS, RHS);
16043   else if (!LHSHasPattern && !RHSHasPattern)
16044     return SDValue();
16045 
16046   SDLoc DL(N);
16047   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
16048   SDValue Cmp = RHS.getOperand(0);
16049   SDValue Z = Cmp.getOperand(0);
16050   auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1));
16051 
16052   assert(Constant && "Constant Should not be a null pointer.");
16053   int64_t NegConstant = 0 - Constant->getSExtValue();
16054 
16055   switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
16056   default: break;
16057   case ISD::SETNE: {
16058     //                                 when C == 0
16059     //                             --> addze X, (addic Z, -1).carry
16060     //                            /
16061     // add X, (zext(setne Z, C))--
16062     //                            \    when -32768 <= -C <= 32767 && C != 0
16063     //                             --> addze X, (addic (addi Z, -C), -1).carry
16064     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16065                               DAG.getConstant(NegConstant, DL, MVT::i64));
16066     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16067     SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16068                                AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
16069     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16070                        SDValue(Addc.getNode(), 1));
16071     }
16072   case ISD::SETEQ: {
16073     //                                 when C == 0
16074     //                             --> addze X, (subfic Z, 0).carry
16075     //                            /
16076     // add X, (zext(sete  Z, C))--
16077     //                            \    when -32768 <= -C <= 32767 && C != 0
16078     //                             --> addze X, (subfic (addi Z, -C), 0).carry
16079     SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16080                               DAG.getConstant(NegConstant, DL, MVT::i64));
16081     SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16082     SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16083                                DAG.getConstant(0, DL, MVT::i64), AddOrZ);
16084     return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
16085                        SDValue(Subc.getNode(), 1));
16086     }
16087   }
16088 
16089   return SDValue();
16090 }
16091 
16092 // Transform
16093 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
16094 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
16095 // In this case both C1 and C2 must be known constants.
16096 // C1+C2 must fit into a 34 bit signed integer.
16097 static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG,
16098                                           const PPCSubtarget &Subtarget) {
16099   if (!Subtarget.isUsingPCRelativeCalls())
16100     return SDValue();
16101 
16102   // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
16103   // If we find that node try to cast the Global Address and the Constant.
16104   SDValue LHS = N->getOperand(0);
16105   SDValue RHS = N->getOperand(1);
16106 
16107   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16108     std::swap(LHS, RHS);
16109 
16110   if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
16111     return SDValue();
16112 
16113   // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
16114   GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
16115   ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
16116 
16117   // Check that both casts succeeded.
16118   if (!GSDN || !ConstNode)
16119     return SDValue();
16120 
16121   int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
16122   SDLoc DL(GSDN);
16123 
16124   // The signed int offset needs to fit in 34 bits.
16125   if (!isInt<34>(NewOffset))
16126     return SDValue();
16127 
16128   // The new global address is a copy of the old global address except
16129   // that it has the updated Offset.
16130   SDValue GA =
16131       DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
16132                                  NewOffset, GSDN->getTargetFlags());
16133   SDValue MatPCRel =
16134       DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
16135   return MatPCRel;
16136 }
16137 
16138 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
16139   if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
16140     return Value;
16141 
16142   if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
16143     return Value;
16144 
16145   return SDValue();
16146 }
16147 
16148 // Detect TRUNCATE operations on bitcasts of float128 values.
16149 // What we are looking for here is the situtation where we extract a subset
16150 // of bits from a 128 bit float.
16151 // This can be of two forms:
16152 // 1) BITCAST of f128 feeding TRUNCATE
16153 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
16154 // The reason this is required is because we do not have a legal i128 type
16155 // and so we want to prevent having to store the f128 and then reload part
16156 // of it.
16157 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
16158                                            DAGCombinerInfo &DCI) const {
16159   // If we are using CRBits then try that first.
16160   if (Subtarget.useCRBits()) {
16161     // Check if CRBits did anything and return that if it did.
16162     if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
16163       return CRTruncValue;
16164   }
16165 
16166   SDLoc dl(N);
16167   SDValue Op0 = N->getOperand(0);
16168 
16169   // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
16170   if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
16171     EVT VT = N->getValueType(0);
16172     if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16173       return SDValue();
16174     SDValue Sub = Op0.getOperand(0);
16175     if (Sub.getOpcode() == ISD::SUB) {
16176       SDValue SubOp0 = Sub.getOperand(0);
16177       SDValue SubOp1 = Sub.getOperand(1);
16178       if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
16179           (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
16180         return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
16181                                SubOp1.getOperand(0),
16182                                DCI.DAG.getTargetConstant(0, dl, MVT::i32));
16183       }
16184     }
16185   }
16186 
16187   // Looking for a truncate of i128 to i64.
16188   if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
16189     return SDValue();
16190 
16191   int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
16192 
16193   // SRL feeding TRUNCATE.
16194   if (Op0.getOpcode() == ISD::SRL) {
16195     ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
16196     // The right shift has to be by 64 bits.
16197     if (!ConstNode || ConstNode->getZExtValue() != 64)
16198       return SDValue();
16199 
16200     // Switch the element number to extract.
16201     EltToExtract = EltToExtract ? 0 : 1;
16202     // Update Op0 past the SRL.
16203     Op0 = Op0.getOperand(0);
16204   }
16205 
16206   // BITCAST feeding a TRUNCATE possibly via SRL.
16207   if (Op0.getOpcode() == ISD::BITCAST &&
16208       Op0.getValueType() == MVT::i128 &&
16209       Op0.getOperand(0).getValueType() == MVT::f128) {
16210     SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
16211     return DCI.DAG.getNode(
16212         ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
16213         DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
16214   }
16215   return SDValue();
16216 }
16217 
16218 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
16219   SelectionDAG &DAG = DCI.DAG;
16220 
16221   ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
16222   if (!ConstOpOrElement)
16223     return SDValue();
16224 
16225   // An imul is usually smaller than the alternative sequence for legal type.
16226   if (DAG.getMachineFunction().getFunction().hasMinSize() &&
16227       isOperationLegal(ISD::MUL, N->getValueType(0)))
16228     return SDValue();
16229 
16230   auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
16231     switch (this->Subtarget.getCPUDirective()) {
16232     default:
16233       // TODO: enhance the condition for subtarget before pwr8
16234       return false;
16235     case PPC::DIR_PWR8:
16236       //  type        mul     add    shl
16237       // scalar        4       1      1
16238       // vector        7       2      2
16239       return true;
16240     case PPC::DIR_PWR9:
16241     case PPC::DIR_PWR10:
16242     case PPC::DIR_PWR_FUTURE:
16243       //  type        mul     add    shl
16244       // scalar        5       2      2
16245       // vector        7       2      2
16246 
16247       // The cycle RATIO of related operations are showed as a table above.
16248       // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
16249       // scalar and vector type. For 2 instrs patterns, add/sub + shl
16250       // are 4, it is always profitable; but for 3 instrs patterns
16251       // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
16252       // So we should only do it for vector type.
16253       return IsAddOne && IsNeg ? VT.isVector() : true;
16254     }
16255   };
16256 
16257   EVT VT = N->getValueType(0);
16258   SDLoc DL(N);
16259 
16260   const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
16261   bool IsNeg = MulAmt.isNegative();
16262   APInt MulAmtAbs = MulAmt.abs();
16263 
16264   if ((MulAmtAbs - 1).isPowerOf2()) {
16265     // (mul x, 2^N + 1) => (add (shl x, N), x)
16266     // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
16267 
16268     if (!IsProfitable(IsNeg, true, VT))
16269       return SDValue();
16270 
16271     SDValue Op0 = N->getOperand(0);
16272     SDValue Op1 =
16273         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16274                     DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
16275     SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
16276 
16277     if (!IsNeg)
16278       return Res;
16279 
16280     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
16281   } else if ((MulAmtAbs + 1).isPowerOf2()) {
16282     // (mul x, 2^N - 1) => (sub (shl x, N), x)
16283     // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
16284 
16285     if (!IsProfitable(IsNeg, false, VT))
16286       return SDValue();
16287 
16288     SDValue Op0 = N->getOperand(0);
16289     SDValue Op1 =
16290         DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
16291                     DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
16292 
16293     if (!IsNeg)
16294       return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
16295     else
16296       return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
16297 
16298   } else {
16299     return SDValue();
16300   }
16301 }
16302 
16303 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
16304 // in combiner since we need to check SD flags and other subtarget features.
16305 SDValue PPCTargetLowering::combineFMALike(SDNode *N,
16306                                           DAGCombinerInfo &DCI) const {
16307   SDValue N0 = N->getOperand(0);
16308   SDValue N1 = N->getOperand(1);
16309   SDValue N2 = N->getOperand(2);
16310   SDNodeFlags Flags = N->getFlags();
16311   EVT VT = N->getValueType(0);
16312   SelectionDAG &DAG = DCI.DAG;
16313   const TargetOptions &Options = getTargetMachine().Options;
16314   unsigned Opc = N->getOpcode();
16315   bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
16316   bool LegalOps = !DCI.isBeforeLegalizeOps();
16317   SDLoc Loc(N);
16318 
16319   if (!isOperationLegal(ISD::FMA, VT))
16320     return SDValue();
16321 
16322   // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
16323   // since (fnmsub a b c)=-0 while c-ab=+0.
16324   if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
16325     return SDValue();
16326 
16327   // (fma (fneg a) b c) => (fnmsub a b c)
16328   // (fnmsub (fneg a) b c) => (fma a b c)
16329   if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
16330     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
16331 
16332   // (fma a (fneg b) c) => (fnmsub a b c)
16333   // (fnmsub a (fneg b) c) => (fma a b c)
16334   if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
16335     return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
16336 
16337   return SDValue();
16338 }
16339 
16340 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
16341   // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
16342   if (!Subtarget.is64BitELFABI())
16343     return false;
16344 
16345   // If not a tail call then no need to proceed.
16346   if (!CI->isTailCall())
16347     return false;
16348 
16349   // If sibling calls have been disabled and tail-calls aren't guaranteed
16350   // there is no reason to duplicate.
16351   auto &TM = getTargetMachine();
16352   if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
16353     return false;
16354 
16355   // Can't tail call a function called indirectly, or if it has variadic args.
16356   const Function *Callee = CI->getCalledFunction();
16357   if (!Callee || Callee->isVarArg())
16358     return false;
16359 
16360   // Make sure the callee and caller calling conventions are eligible for tco.
16361   const Function *Caller = CI->getParent()->getParent();
16362   if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
16363                                            CI->getCallingConv()))
16364       return false;
16365 
16366   // If the function is local then we have a good chance at tail-calling it
16367   return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
16368 }
16369 
16370 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
16371   if (!Subtarget.hasVSX())
16372     return false;
16373   if (Subtarget.hasP9Vector() && VT == MVT::f128)
16374     return true;
16375   return VT == MVT::f32 || VT == MVT::f64 ||
16376     VT == MVT::v4f32 || VT == MVT::v2f64;
16377 }
16378 
16379 bool PPCTargetLowering::
16380 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
16381   const Value *Mask = AndI.getOperand(1);
16382   // If the mask is suitable for andi. or andis. we should sink the and.
16383   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
16384     // Can't handle constants wider than 64-bits.
16385     if (CI->getBitWidth() > 64)
16386       return false;
16387     int64_t ConstVal = CI->getZExtValue();
16388     return isUInt<16>(ConstVal) ||
16389       (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
16390   }
16391 
16392   // For non-constant masks, we can always use the record-form and.
16393   return true;
16394 }
16395 
16396 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
16397 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
16398 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
16399 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
16400 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
16401 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
16402   assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
16403   assert(Subtarget.hasP9Altivec() &&
16404          "Only combine this when P9 altivec supported!");
16405   EVT VT = N->getValueType(0);
16406   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16407     return SDValue();
16408 
16409   SelectionDAG &DAG = DCI.DAG;
16410   SDLoc dl(N);
16411   if (N->getOperand(0).getOpcode() == ISD::SUB) {
16412     // Even for signed integers, if it's known to be positive (as signed
16413     // integer) due to zero-extended inputs.
16414     unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
16415     unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
16416     if ((SubOpcd0 == ISD::ZERO_EXTEND ||
16417          SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
16418         (SubOpcd1 == ISD::ZERO_EXTEND ||
16419          SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
16420       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16421                          N->getOperand(0)->getOperand(0),
16422                          N->getOperand(0)->getOperand(1),
16423                          DAG.getTargetConstant(0, dl, MVT::i32));
16424     }
16425 
16426     // For type v4i32, it can be optimized with xvnegsp + vabsduw
16427     if (N->getOperand(0).getValueType() == MVT::v4i32 &&
16428         N->getOperand(0).hasOneUse()) {
16429       return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
16430                          N->getOperand(0)->getOperand(0),
16431                          N->getOperand(0)->getOperand(1),
16432                          DAG.getTargetConstant(1, dl, MVT::i32));
16433     }
16434   }
16435 
16436   return SDValue();
16437 }
16438 
16439 // For type v4i32/v8ii16/v16i8, transform
16440 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
16441 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
16442 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
16443 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
16444 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
16445                                           DAGCombinerInfo &DCI) const {
16446   assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
16447   assert(Subtarget.hasP9Altivec() &&
16448          "Only combine this when P9 altivec supported!");
16449 
16450   SelectionDAG &DAG = DCI.DAG;
16451   SDLoc dl(N);
16452   SDValue Cond = N->getOperand(0);
16453   SDValue TrueOpnd = N->getOperand(1);
16454   SDValue FalseOpnd = N->getOperand(2);
16455   EVT VT = N->getOperand(1).getValueType();
16456 
16457   if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
16458       FalseOpnd.getOpcode() != ISD::SUB)
16459     return SDValue();
16460 
16461   // ABSD only available for type v4i32/v8i16/v16i8
16462   if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
16463     return SDValue();
16464 
16465   // At least to save one more dependent computation
16466   if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
16467     return SDValue();
16468 
16469   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
16470 
16471   // Can only handle unsigned comparison here
16472   switch (CC) {
16473   default:
16474     return SDValue();
16475   case ISD::SETUGT:
16476   case ISD::SETUGE:
16477     break;
16478   case ISD::SETULT:
16479   case ISD::SETULE:
16480     std::swap(TrueOpnd, FalseOpnd);
16481     break;
16482   }
16483 
16484   SDValue CmpOpnd1 = Cond.getOperand(0);
16485   SDValue CmpOpnd2 = Cond.getOperand(1);
16486 
16487   // SETCC CmpOpnd1 CmpOpnd2 cond
16488   // TrueOpnd = CmpOpnd1 - CmpOpnd2
16489   // FalseOpnd = CmpOpnd2 - CmpOpnd1
16490   if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
16491       TrueOpnd.getOperand(1) == CmpOpnd2 &&
16492       FalseOpnd.getOperand(0) == CmpOpnd2 &&
16493       FalseOpnd.getOperand(1) == CmpOpnd1) {
16494     return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
16495                        CmpOpnd1, CmpOpnd2,
16496                        DAG.getTargetConstant(0, dl, MVT::i32));
16497   }
16498 
16499   return SDValue();
16500 }
16501