1 //===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interfaces that Hexagon uses to lower LLVM code
10 // into a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "HexagonISelLowering.h"
15 #include "Hexagon.h"
16 #include "HexagonMachineFunctionInfo.h"
17 #include "HexagonRegisterInfo.h"
18 #include "HexagonSubtarget.h"
19 #include "HexagonTargetMachine.h"
20 #include "HexagonTargetObjectFile.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringSwitch.h"
25 #include "llvm/CodeGen/CallingConvLower.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineMemOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/RuntimeLibcalls.h"
31 #include "llvm/CodeGen/SelectionDAG.h"
32 #include "llvm/CodeGen/TargetCallingConv.h"
33 #include "llvm/CodeGen/ValueTypes.h"
34 #include "llvm/IR/BasicBlock.h"
35 #include "llvm/IR/CallingConv.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/DiagnosticInfo.h"
39 #include "llvm/IR/DiagnosticPrinter.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/GlobalValue.h"
42 #include "llvm/IR/InlineAsm.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/IntrinsicsHexagon.h"
47 #include "llvm/IR/IRBuilder.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/Type.h"
50 #include "llvm/IR/Value.h"
51 #include "llvm/MC/MCRegisterInfo.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/CodeGen.h"
54 #include "llvm/Support/CommandLine.h"
55 #include "llvm/Support/Debug.h"
56 #include "llvm/Support/ErrorHandling.h"
57 #include "llvm/Support/MathExtras.h"
58 #include "llvm/Support/raw_ostream.h"
59 #include "llvm/Target/TargetMachine.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstddef>
63 #include <cstdint>
64 #include <limits>
65 #include <utility>
66 
67 using namespace llvm;
68 
69 #define DEBUG_TYPE "hexagon-lowering"
70 
71 static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
72   cl::init(true), cl::Hidden,
73   cl::desc("Control jump table emission on Hexagon target"));
74 
75 static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
76   cl::Hidden, cl::ZeroOrMore, cl::init(false),
77   cl::desc("Enable Hexagon SDNode scheduling"));
78 
79 static cl::opt<bool> EnableFastMath("ffast-math",
80   cl::Hidden, cl::ZeroOrMore, cl::init(false),
81   cl::desc("Enable Fast Math processing"));
82 
83 static cl::opt<int> MinimumJumpTables("minimum-jump-tables",
84   cl::Hidden, cl::ZeroOrMore, cl::init(5),
85   cl::desc("Set minimum jump tables"));
86 
87 static cl::opt<int> MaxStoresPerMemcpyCL("max-store-memcpy",
88   cl::Hidden, cl::ZeroOrMore, cl::init(6),
89   cl::desc("Max #stores to inline memcpy"));
90 
91 static cl::opt<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os",
92   cl::Hidden, cl::ZeroOrMore, cl::init(4),
93   cl::desc("Max #stores to inline memcpy"));
94 
95 static cl::opt<int> MaxStoresPerMemmoveCL("max-store-memmove",
96   cl::Hidden, cl::ZeroOrMore, cl::init(6),
97   cl::desc("Max #stores to inline memmove"));
98 
99 static cl::opt<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os",
100   cl::Hidden, cl::ZeroOrMore, cl::init(4),
101   cl::desc("Max #stores to inline memmove"));
102 
103 static cl::opt<int> MaxStoresPerMemsetCL("max-store-memset",
104   cl::Hidden, cl::ZeroOrMore, cl::init(8),
105   cl::desc("Max #stores to inline memset"));
106 
107 static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
108   cl::Hidden, cl::ZeroOrMore, cl::init(4),
109   cl::desc("Max #stores to inline memset"));
110 
111 static cl::opt<bool> AlignLoads("hexagon-align-loads",
112   cl::Hidden, cl::init(false),
113   cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
114 
115 static cl::opt<bool>
116     DisableArgsMinAlignment("hexagon-disable-args-min-alignment", cl::Hidden,
117                             cl::init(false),
118                             cl::desc("Disable minimum alignment of 1 for "
119                                      "arguments passed by value on stack"));
120 
121 namespace {
122 
123   class HexagonCCState : public CCState {
124     unsigned NumNamedVarArgParams = 0;
125 
126   public:
127     HexagonCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
128                    SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
129                    unsigned NumNamedArgs)
130         : CCState(CC, IsVarArg, MF, locs, C),
131           NumNamedVarArgParams(NumNamedArgs) {}
132     unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
133   };
134 
135 } // end anonymous namespace
136 
137 
138 // Implement calling convention for Hexagon.
139 
140 static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
141                        CCValAssign::LocInfo &LocInfo,
142                        ISD::ArgFlagsTy &ArgFlags, CCState &State) {
143   static const MCPhysReg ArgRegs[] = {
144     Hexagon::R0, Hexagon::R1, Hexagon::R2,
145     Hexagon::R3, Hexagon::R4, Hexagon::R5
146   };
147   const unsigned NumArgRegs = array_lengthof(ArgRegs);
148   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
149 
150   // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
151   if (RegNum != NumArgRegs && RegNum % 2 == 1)
152     State.AllocateReg(ArgRegs[RegNum]);
153 
154   // Always return false here, as this function only makes sure that the first
155   // unallocated register has an even register number and does not actually
156   // allocate a register for the current argument.
157   return false;
158 }
159 
160 #include "HexagonGenCallingConv.inc"
161 
162 
163 SDValue
164 HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
165       const {
166   return SDValue();
167 }
168 
169 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
170 /// by "Src" to address "Dst" of size "Size".  Alignment information is
171 /// specified by the specific parameter attribute. The copy will be passed as
172 /// a byval function parameter.  Sometimes what we are copying is the end of a
173 /// larger object, the part that does not fit in registers.
174 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
175                                          SDValue Chain, ISD::ArgFlagsTy Flags,
176                                          SelectionDAG &DAG, const SDLoc &dl) {
177   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
178   return DAG.getMemcpy(
179       Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
180       /*isVolatile=*/false, /*AlwaysInline=*/false,
181       /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo());
182 }
183 
184 bool
185 HexagonTargetLowering::CanLowerReturn(
186     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
187     const SmallVectorImpl<ISD::OutputArg> &Outs,
188     LLVMContext &Context) const {
189   SmallVector<CCValAssign, 16> RVLocs;
190   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
191 
192   if (MF.getSubtarget<HexagonSubtarget>().useHVXOps())
193     return CCInfo.CheckReturn(Outs, RetCC_Hexagon_HVX);
194   return CCInfo.CheckReturn(Outs, RetCC_Hexagon);
195 }
196 
197 // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
198 // passed by value, the function prototype is modified to return void and
199 // the value is stored in memory pointed by a pointer passed by caller.
200 SDValue
201 HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
202                                    bool IsVarArg,
203                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
204                                    const SmallVectorImpl<SDValue> &OutVals,
205                                    const SDLoc &dl, SelectionDAG &DAG) const {
206   // CCValAssign - represent the assignment of the return value to locations.
207   SmallVector<CCValAssign, 16> RVLocs;
208 
209   // CCState - Info about the registers and stack slot.
210   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
211                  *DAG.getContext());
212 
213   // Analyze return values of ISD::RET
214   if (Subtarget.useHVXOps())
215     CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon_HVX);
216   else
217     CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
218 
219   SDValue Flag;
220   SmallVector<SDValue, 4> RetOps(1, Chain);
221 
222   // Copy the result values into the output registers.
223   for (unsigned i = 0; i != RVLocs.size(); ++i) {
224     CCValAssign &VA = RVLocs[i];
225     SDValue Val = OutVals[i];
226 
227     switch (VA.getLocInfo()) {
228       default:
229         // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
230         llvm_unreachable("Unknown loc info!");
231       case CCValAssign::Full:
232         break;
233       case CCValAssign::BCvt:
234         Val = DAG.getBitcast(VA.getLocVT(), Val);
235         break;
236       case CCValAssign::SExt:
237         Val = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Val);
238         break;
239       case CCValAssign::ZExt:
240         Val = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Val);
241         break;
242       case CCValAssign::AExt:
243         Val = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Val);
244         break;
245     }
246 
247     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Val, Flag);
248 
249     // Guarantee that all emitted copies are stuck together with flags.
250     Flag = Chain.getValue(1);
251     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
252   }
253 
254   RetOps[0] = Chain;  // Update chain.
255 
256   // Add the flag if we have it.
257   if (Flag.getNode())
258     RetOps.push_back(Flag);
259 
260   return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
261 }
262 
263 bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
264   // If either no tail call or told not to tail call at all, don't.
265   return CI->isTailCall();
266 }
267 
268 Register HexagonTargetLowering::getRegisterByName(
269       const char* RegName, LLT VT, const MachineFunction &) const {
270   // Just support r19, the linux kernel uses it.
271   Register Reg = StringSwitch<Register>(RegName)
272                      .Case("r0", Hexagon::R0)
273                      .Case("r1", Hexagon::R1)
274                      .Case("r2", Hexagon::R2)
275                      .Case("r3", Hexagon::R3)
276                      .Case("r4", Hexagon::R4)
277                      .Case("r5", Hexagon::R5)
278                      .Case("r6", Hexagon::R6)
279                      .Case("r7", Hexagon::R7)
280                      .Case("r8", Hexagon::R8)
281                      .Case("r9", Hexagon::R9)
282                      .Case("r10", Hexagon::R10)
283                      .Case("r11", Hexagon::R11)
284                      .Case("r12", Hexagon::R12)
285                      .Case("r13", Hexagon::R13)
286                      .Case("r14", Hexagon::R14)
287                      .Case("r15", Hexagon::R15)
288                      .Case("r16", Hexagon::R16)
289                      .Case("r17", Hexagon::R17)
290                      .Case("r18", Hexagon::R18)
291                      .Case("r19", Hexagon::R19)
292                      .Case("r20", Hexagon::R20)
293                      .Case("r21", Hexagon::R21)
294                      .Case("r22", Hexagon::R22)
295                      .Case("r23", Hexagon::R23)
296                      .Case("r24", Hexagon::R24)
297                      .Case("r25", Hexagon::R25)
298                      .Case("r26", Hexagon::R26)
299                      .Case("r27", Hexagon::R27)
300                      .Case("r28", Hexagon::R28)
301                      .Case("r29", Hexagon::R29)
302                      .Case("r30", Hexagon::R30)
303                      .Case("r31", Hexagon::R31)
304                      .Case("r1:0", Hexagon::D0)
305                      .Case("r3:2", Hexagon::D1)
306                      .Case("r5:4", Hexagon::D2)
307                      .Case("r7:6", Hexagon::D3)
308                      .Case("r9:8", Hexagon::D4)
309                      .Case("r11:10", Hexagon::D5)
310                      .Case("r13:12", Hexagon::D6)
311                      .Case("r15:14", Hexagon::D7)
312                      .Case("r17:16", Hexagon::D8)
313                      .Case("r19:18", Hexagon::D9)
314                      .Case("r21:20", Hexagon::D10)
315                      .Case("r23:22", Hexagon::D11)
316                      .Case("r25:24", Hexagon::D12)
317                      .Case("r27:26", Hexagon::D13)
318                      .Case("r29:28", Hexagon::D14)
319                      .Case("r31:30", Hexagon::D15)
320                      .Case("sp", Hexagon::R29)
321                      .Case("fp", Hexagon::R30)
322                      .Case("lr", Hexagon::R31)
323                      .Case("p0", Hexagon::P0)
324                      .Case("p1", Hexagon::P1)
325                      .Case("p2", Hexagon::P2)
326                      .Case("p3", Hexagon::P3)
327                      .Case("sa0", Hexagon::SA0)
328                      .Case("lc0", Hexagon::LC0)
329                      .Case("sa1", Hexagon::SA1)
330                      .Case("lc1", Hexagon::LC1)
331                      .Case("m0", Hexagon::M0)
332                      .Case("m1", Hexagon::M1)
333                      .Case("usr", Hexagon::USR)
334                      .Case("ugp", Hexagon::UGP)
335                      .Case("cs0", Hexagon::CS0)
336                      .Case("cs1", Hexagon::CS1)
337                      .Default(Register());
338   if (Reg)
339     return Reg;
340 
341   report_fatal_error("Invalid register name global variable");
342 }
343 
344 /// LowerCallResult - Lower the result values of an ISD::CALL into the
345 /// appropriate copies out of appropriate physical registers.  This assumes that
346 /// Chain/Glue are the input chain/glue to use, and that TheCall is the call
347 /// being lowered. Returns a SDNode with the same number of values as the
348 /// ISD::CALL.
349 SDValue HexagonTargetLowering::LowerCallResult(
350     SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool IsVarArg,
351     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
352     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
353     const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
354   // Assign locations to each value returned by this call.
355   SmallVector<CCValAssign, 16> RVLocs;
356 
357   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
358                  *DAG.getContext());
359 
360   if (Subtarget.useHVXOps())
361     CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon_HVX);
362   else
363     CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
364 
365   // Copy all of the result registers out of their specified physreg.
366   for (unsigned i = 0; i != RVLocs.size(); ++i) {
367     SDValue RetVal;
368     if (RVLocs[i].getValVT() == MVT::i1) {
369       // Return values of type MVT::i1 require special handling. The reason
370       // is that MVT::i1 is associated with the PredRegs register class, but
371       // values of that type are still returned in R0. Generate an explicit
372       // copy into a predicate register from R0, and treat the value of the
373       // predicate register as the call result.
374       auto &MRI = DAG.getMachineFunction().getRegInfo();
375       SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
376                                        MVT::i32, Glue);
377       // FR0 = (Value, Chain, Glue)
378       Register PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
379       SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR,
380                                      FR0.getValue(0), FR0.getValue(2));
381       // TPR = (Chain, Glue)
382       // Don't glue this CopyFromReg, because it copies from a virtual
383       // register. If it is glued to the call, InstrEmitter will add it
384       // as an implicit def to the call (EmitMachineNode).
385       RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1);
386       Glue = TPR.getValue(1);
387       Chain = TPR.getValue(0);
388     } else {
389       RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
390                                   RVLocs[i].getValVT(), Glue);
391       Glue = RetVal.getValue(2);
392       Chain = RetVal.getValue(1);
393     }
394     InVals.push_back(RetVal.getValue(0));
395   }
396 
397   return Chain;
398 }
399 
400 /// LowerCall - Functions arguments are copied from virtual regs to
401 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
402 SDValue
403 HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
404                                  SmallVectorImpl<SDValue> &InVals) const {
405   SelectionDAG &DAG                     = CLI.DAG;
406   SDLoc &dl                             = CLI.DL;
407   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
408   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
409   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
410   SDValue Chain                         = CLI.Chain;
411   SDValue Callee                        = CLI.Callee;
412   CallingConv::ID CallConv              = CLI.CallConv;
413   bool IsVarArg                         = CLI.IsVarArg;
414   bool DoesNotReturn                    = CLI.DoesNotReturn;
415 
416   bool IsStructRet    = Outs.empty() ? false : Outs[0].Flags.isSRet();
417   MachineFunction &MF = DAG.getMachineFunction();
418   MachineFrameInfo &MFI = MF.getFrameInfo();
419   auto PtrVT = getPointerTy(MF.getDataLayout());
420 
421   unsigned NumParams = CLI.CB ? CLI.CB->getFunctionType()->getNumParams() : 0;
422   if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
423     Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);
424 
425   // Linux ABI treats var-arg calls the same way as regular ones.
426   bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
427 
428   // Analyze operands of the call, assigning locations to each operand.
429   SmallVector<CCValAssign, 16> ArgLocs;
430   HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs, *DAG.getContext(),
431                         NumParams);
432 
433   if (Subtarget.useHVXOps())
434     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_HVX);
435   else if (DisableArgsMinAlignment)
436     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_Legacy);
437   else
438     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
439 
440   if (CLI.IsTailCall) {
441     bool StructAttrFlag = MF.getFunction().hasStructRetAttr();
442     CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
443                         IsVarArg, IsStructRet, StructAttrFlag, Outs,
444                         OutVals, Ins, DAG);
445     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
446       CCValAssign &VA = ArgLocs[i];
447       if (VA.isMemLoc()) {
448         CLI.IsTailCall = false;
449         break;
450       }
451     }
452     LLVM_DEBUG(dbgs() << (CLI.IsTailCall ? "Eligible for Tail Call\n"
453                                          : "Argument must be passed on stack. "
454                                            "Not eligible for Tail Call\n"));
455   }
456   // Get a count of how many bytes are to be pushed on the stack.
457   unsigned NumBytes = CCInfo.getNextStackOffset();
458   SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
459   SmallVector<SDValue, 8> MemOpChains;
460 
461   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
462   SDValue StackPtr =
463       DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
464 
465   bool NeedsArgAlign = false;
466   Align LargestAlignSeen;
467   // Walk the register/memloc assignments, inserting copies/loads.
468   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
469     CCValAssign &VA = ArgLocs[i];
470     SDValue Arg = OutVals[i];
471     ISD::ArgFlagsTy Flags = Outs[i].Flags;
472     // Record if we need > 8 byte alignment on an argument.
473     bool ArgAlign = Subtarget.isHVXVectorType(VA.getValVT());
474     NeedsArgAlign |= ArgAlign;
475 
476     // Promote the value if needed.
477     switch (VA.getLocInfo()) {
478       default:
479         // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
480         llvm_unreachable("Unknown loc info!");
481       case CCValAssign::Full:
482         break;
483       case CCValAssign::BCvt:
484         Arg = DAG.getBitcast(VA.getLocVT(), Arg);
485         break;
486       case CCValAssign::SExt:
487         Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
488         break;
489       case CCValAssign::ZExt:
490         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
491         break;
492       case CCValAssign::AExt:
493         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
494         break;
495     }
496 
497     if (VA.isMemLoc()) {
498       unsigned LocMemOffset = VA.getLocMemOffset();
499       SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
500                                         StackPtr.getValueType());
501       MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
502       if (ArgAlign)
503         LargestAlignSeen = std::max(
504             LargestAlignSeen, Align(VA.getLocVT().getStoreSizeInBits() / 8));
505       if (Flags.isByVal()) {
506         // The argument is a struct passed by value. According to LLVM, "Arg"
507         // is a pointer.
508         MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
509                                                         Flags, DAG, dl));
510       } else {
511         MachinePointerInfo LocPI = MachinePointerInfo::getStack(
512             DAG.getMachineFunction(), LocMemOffset);
513         SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI);
514         MemOpChains.push_back(S);
515       }
516       continue;
517     }
518 
519     // Arguments that can be passed on register must be kept at RegsToPass
520     // vector.
521     if (VA.isRegLoc())
522       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
523   }
524 
525   if (NeedsArgAlign && Subtarget.hasV60Ops()) {
526     LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
527     Align VecAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
528     LargestAlignSeen = std::max(LargestAlignSeen, VecAlign);
529     MFI.ensureMaxAlignment(LargestAlignSeen);
530   }
531   // Transform all store nodes into one single node because all store
532   // nodes are independent of each other.
533   if (!MemOpChains.empty())
534     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
535 
536   SDValue Glue;
537   if (!CLI.IsTailCall) {
538     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
539     Glue = Chain.getValue(1);
540   }
541 
542   // Build a sequence of copy-to-reg nodes chained together with token
543   // chain and flag operands which copy the outgoing args into registers.
544   // The Glue is necessary since all emitted instructions must be
545   // stuck together.
546   if (!CLI.IsTailCall) {
547     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
548       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
549                                RegsToPass[i].second, Glue);
550       Glue = Chain.getValue(1);
551     }
552   } else {
553     // For tail calls lower the arguments to the 'real' stack slot.
554     //
555     // Force all the incoming stack arguments to be loaded from the stack
556     // before any new outgoing arguments are stored to the stack, because the
557     // outgoing stack slots may alias the incoming argument stack slots, and
558     // the alias isn't otherwise explicit. This is slightly more conservative
559     // than necessary, because it means that each store effectively depends
560     // on every argument instead of just those arguments it would clobber.
561     //
562     // Do not flag preceding copytoreg stuff together with the following stuff.
563     Glue = SDValue();
564     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
565       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
566                                RegsToPass[i].second, Glue);
567       Glue = Chain.getValue(1);
568     }
569     Glue = SDValue();
570   }
571 
572   bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls();
573   unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : 0;
574 
575   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
576   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
577   // node so that legalize doesn't hack it.
578   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
579     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT, 0, Flags);
580   } else if (ExternalSymbolSDNode *S =
581              dyn_cast<ExternalSymbolSDNode>(Callee)) {
582     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, Flags);
583   }
584 
585   // Returns a chain & a flag for retval copy to use.
586   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
587   SmallVector<SDValue, 8> Ops;
588   Ops.push_back(Chain);
589   Ops.push_back(Callee);
590 
591   // Add argument registers to the end of the list so that they are
592   // known live into the call.
593   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
594     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
595                                   RegsToPass[i].second.getValueType()));
596   }
597 
598   const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv);
599   assert(Mask && "Missing call preserved mask for calling convention");
600   Ops.push_back(DAG.getRegisterMask(Mask));
601 
602   if (Glue.getNode())
603     Ops.push_back(Glue);
604 
605   if (CLI.IsTailCall) {
606     MFI.setHasTailCall();
607     return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
608   }
609 
610   // Set this here because we need to know this for "hasFP" in frame lowering.
611   // The target-independent code calls getFrameRegister before setting it, and
612   // getFrameRegister uses hasFP to determine whether the function has FP.
613   MFI.setHasCalls(true);
614 
615   unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL;
616   Chain = DAG.getNode(OpCode, dl, NodeTys, Ops);
617   Glue = Chain.getValue(1);
618 
619   // Create the CALLSEQ_END node.
620   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
621                              DAG.getIntPtrConstant(0, dl, true), Glue, dl);
622   Glue = Chain.getValue(1);
623 
624   // Handle result values, copying them out of physregs into vregs that we
625   // return.
626   return LowerCallResult(Chain, Glue, CallConv, IsVarArg, Ins, dl, DAG,
627                          InVals, OutVals, Callee);
628 }
629 
630 /// Returns true by value, base pointer and offset pointer and addressing
631 /// mode by reference if this node can be combined with a load / store to
632 /// form a post-indexed load / store.
633 bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
634       SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM,
635       SelectionDAG &DAG) const {
636   LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N);
637   if (!LSN)
638     return false;
639   EVT VT = LSN->getMemoryVT();
640   if (!VT.isSimple())
641     return false;
642   bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
643                      VT == MVT::i64 || VT == MVT::f32 || VT == MVT::f64 ||
644                      VT == MVT::v2i16 || VT == MVT::v2i32 || VT == MVT::v4i8 ||
645                      VT == MVT::v4i16 || VT == MVT::v8i8 ||
646                      Subtarget.isHVXVectorType(VT.getSimpleVT());
647   if (!IsLegalType)
648     return false;
649 
650   if (Op->getOpcode() != ISD::ADD)
651     return false;
652   Base = Op->getOperand(0);
653   Offset = Op->getOperand(1);
654   if (!isa<ConstantSDNode>(Offset.getNode()))
655     return false;
656   AM = ISD::POST_INC;
657 
658   int32_t V = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
659   return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, V);
660 }
661 
662 SDValue
663 HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
664   MachineFunction &MF = DAG.getMachineFunction();
665   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
666   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
667   unsigned LR = HRI.getRARegister();
668 
669   if ((Op.getOpcode() != ISD::INLINEASM &&
670        Op.getOpcode() != ISD::INLINEASM_BR) || HMFI.hasClobberLR())
671     return Op;
672 
673   unsigned NumOps = Op.getNumOperands();
674   if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue)
675     --NumOps;  // Ignore the flag operand.
676 
677   for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
678     unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
679     unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
680     ++i;  // Skip the ID value.
681 
682     switch (InlineAsm::getKind(Flags)) {
683       default:
684         llvm_unreachable("Bad flags!");
685       case InlineAsm::Kind_RegUse:
686       case InlineAsm::Kind_Imm:
687       case InlineAsm::Kind_Mem:
688         i += NumVals;
689         break;
690       case InlineAsm::Kind_Clobber:
691       case InlineAsm::Kind_RegDef:
692       case InlineAsm::Kind_RegDefEarlyClobber: {
693         for (; NumVals; --NumVals, ++i) {
694           unsigned Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
695           if (Reg != LR)
696             continue;
697           HMFI.setHasClobberLR(true);
698           return Op;
699         }
700         break;
701       }
702     }
703   }
704 
705   return Op;
706 }
707 
708 // Need to transform ISD::PREFETCH into something that doesn't inherit
709 // all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
710 // SDNPMayStore.
711 SDValue HexagonTargetLowering::LowerPREFETCH(SDValue Op,
712                                              SelectionDAG &DAG) const {
713   SDValue Chain = Op.getOperand(0);
714   SDValue Addr = Op.getOperand(1);
715   // Lower it to DCFETCH($reg, #0).  A "pat" will try to merge the offset in,
716   // if the "reg" is fed by an "add".
717   SDLoc DL(Op);
718   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
719   return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
720 }
721 
722 // Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
723 // is marked as having side-effects, while the register read on Hexagon does
724 // not have any. TableGen refuses to accept the direct pattern from that node
725 // to the A4_tfrcpp.
726 SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
727                                                      SelectionDAG &DAG) const {
728   SDValue Chain = Op.getOperand(0);
729   SDLoc dl(Op);
730   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
731   return DAG.getNode(HexagonISD::READCYCLE, dl, VTs, Chain);
732 }
733 
734 SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
735       SelectionDAG &DAG) const {
736   SDValue Chain = Op.getOperand(0);
737   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
738   // Lower the hexagon_prefetch builtin to DCFETCH, as above.
739   if (IntNo == Intrinsic::hexagon_prefetch) {
740     SDValue Addr = Op.getOperand(2);
741     SDLoc DL(Op);
742     SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
743     return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
744   }
745   return SDValue();
746 }
747 
748 SDValue
749 HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
750                                                SelectionDAG &DAG) const {
751   SDValue Chain = Op.getOperand(0);
752   SDValue Size = Op.getOperand(1);
753   SDValue Align = Op.getOperand(2);
754   SDLoc dl(Op);
755 
756   ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align);
757   assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
758 
759   unsigned A = AlignConst->getSExtValue();
760   auto &HFI = *Subtarget.getFrameLowering();
761   // "Zero" means natural stack alignment.
762   if (A == 0)
763     A = HFI.getStackAlign().value();
764 
765   LLVM_DEBUG({
766     dbgs () << __func__ << " Align: " << A << " Size: ";
767     Size.getNode()->dump(&DAG);
768     dbgs() << "\n";
769   });
770 
771   SDValue AC = DAG.getConstant(A, dl, MVT::i32);
772   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
773   SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
774 
775   DAG.ReplaceAllUsesOfValueWith(Op, AA);
776   return AA;
777 }
778 
779 SDValue HexagonTargetLowering::LowerFormalArguments(
780     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
781     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
782     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
783   MachineFunction &MF = DAG.getMachineFunction();
784   MachineFrameInfo &MFI = MF.getFrameInfo();
785   MachineRegisterInfo &MRI = MF.getRegInfo();
786 
787   // Linux ABI treats var-arg calls the same way as regular ones.
788   bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
789 
790   // Assign locations to all of the incoming arguments.
791   SmallVector<CCValAssign, 16> ArgLocs;
792   HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs,
793                         *DAG.getContext(),
794                         MF.getFunction().getFunctionType()->getNumParams());
795 
796   if (Subtarget.useHVXOps())
797     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_HVX);
798   else if (DisableArgsMinAlignment)
799     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_Legacy);
800   else
801     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
802 
803   // For LLVM, in the case when returning a struct by value (>8byte),
804   // the first argument is a pointer that points to the location on caller's
805   // stack where the return value will be stored. For Hexagon, the location on
806   // caller's stack is passed only when the struct size is smaller than (and
807   // equal to) 8 bytes. If not, no address will be passed into callee and
808   // callee return the result direclty through R0/R1.
809   auto NextSingleReg = [] (const TargetRegisterClass &RC, unsigned Reg) {
810     switch (RC.getID()) {
811     case Hexagon::IntRegsRegClassID:
812       return Reg - Hexagon::R0 + 1;
813     case Hexagon::DoubleRegsRegClassID:
814       return (Reg - Hexagon::D0 + 1) * 2;
815     case Hexagon::HvxVRRegClassID:
816       return Reg - Hexagon::V0 + 1;
817     case Hexagon::HvxWRRegClassID:
818       return (Reg - Hexagon::W0 + 1) * 2;
819     }
820     llvm_unreachable("Unexpected register class");
821   };
822 
823   auto &HFL = const_cast<HexagonFrameLowering&>(*Subtarget.getFrameLowering());
824   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
825   HFL.FirstVarArgSavedReg = 0;
826   HMFI.setFirstNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
827 
828   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
829     CCValAssign &VA = ArgLocs[i];
830     ISD::ArgFlagsTy Flags = Ins[i].Flags;
831     bool ByVal = Flags.isByVal();
832 
833     // Arguments passed in registers:
834     // 1. 32- and 64-bit values and HVX vectors are passed directly,
835     // 2. Large structs are passed via an address, and the address is
836     //    passed in a register.
837     if (VA.isRegLoc() && ByVal && Flags.getByValSize() <= 8)
838       llvm_unreachable("ByValSize must be bigger than 8 bytes");
839 
840     bool InReg = VA.isRegLoc() &&
841                  (!ByVal || (ByVal && Flags.getByValSize() > 8));
842 
843     if (InReg) {
844       MVT RegVT = VA.getLocVT();
845       if (VA.getLocInfo() == CCValAssign::BCvt)
846         RegVT = VA.getValVT();
847 
848       const TargetRegisterClass *RC = getRegClassFor(RegVT);
849       Register VReg = MRI.createVirtualRegister(RC);
850       SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
851 
852       // Treat values of type MVT::i1 specially: they are passed in
853       // registers of type i32, but they need to remain as values of
854       // type i1 for consistency of the argument lowering.
855       if (VA.getValVT() == MVT::i1) {
856         assert(RegVT.getSizeInBits() <= 32);
857         SDValue T = DAG.getNode(ISD::AND, dl, RegVT,
858                                 Copy, DAG.getConstant(1, dl, RegVT));
859         Copy = DAG.getSetCC(dl, MVT::i1, T, DAG.getConstant(0, dl, RegVT),
860                             ISD::SETNE);
861       } else {
862 #ifndef NDEBUG
863         unsigned RegSize = RegVT.getSizeInBits();
864         assert(RegSize == 32 || RegSize == 64 ||
865                Subtarget.isHVXVectorType(RegVT));
866 #endif
867       }
868       InVals.push_back(Copy);
869       MRI.addLiveIn(VA.getLocReg(), VReg);
870       HFL.FirstVarArgSavedReg = NextSingleReg(*RC, VA.getLocReg());
871     } else {
872       assert(VA.isMemLoc() && "Argument should be passed in memory");
873 
874       // If it's a byval parameter, then we need to compute the
875       // "real" size, not the size of the pointer.
876       unsigned ObjSize = Flags.isByVal()
877                             ? Flags.getByValSize()
878                             : VA.getLocVT().getStoreSizeInBits() / 8;
879 
880       // Create the frame index object for this incoming parameter.
881       int Offset = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
882       int FI = MFI.CreateFixedObject(ObjSize, Offset, true);
883       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
884 
885       if (Flags.isByVal()) {
886         // If it's a pass-by-value aggregate, then do not dereference the stack
887         // location. Instead, we should generate a reference to the stack
888         // location.
889         InVals.push_back(FIN);
890       } else {
891         SDValue L = DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
892                                 MachinePointerInfo::getFixedStack(MF, FI, 0));
893         InVals.push_back(L);
894       }
895     }
896   }
897 
898   if (IsVarArg && Subtarget.isEnvironmentMusl()) {
899     for (int i = HFL.FirstVarArgSavedReg; i < 6; i++)
900       MRI.addLiveIn(Hexagon::R0+i);
901   }
902 
903   if (IsVarArg && Subtarget.isEnvironmentMusl()) {
904     HMFI.setFirstNamedArgFrameIndex(HMFI.getFirstNamedArgFrameIndex() - 1);
905     HMFI.setLastNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
906 
907     // Create Frame index for the start of register saved area.
908     int NumVarArgRegs = 6 - HFL.FirstVarArgSavedReg;
909     bool RequiresPadding = (NumVarArgRegs & 1);
910     int RegSaveAreaSizePlusPadding = RequiresPadding
911                                         ? (NumVarArgRegs + 1) * 4
912                                         : NumVarArgRegs * 4;
913 
914     if (RegSaveAreaSizePlusPadding > 0) {
915       // The offset to saved register area should be 8 byte aligned.
916       int RegAreaStart = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
917       if (!(RegAreaStart % 8))
918         RegAreaStart = (RegAreaStart + 7) & -8;
919 
920       int RegSaveAreaFrameIndex =
921         MFI.CreateFixedObject(RegSaveAreaSizePlusPadding, RegAreaStart, true);
922       HMFI.setRegSavedAreaStartFrameIndex(RegSaveAreaFrameIndex);
923 
924       // This will point to the next argument passed via stack.
925       int Offset = RegAreaStart + RegSaveAreaSizePlusPadding;
926       int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
927       HMFI.setVarArgsFrameIndex(FI);
928     } else {
929       // This will point to the next argument passed via stack, when
930       // there is no saved register area.
931       int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
932       int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
933       HMFI.setRegSavedAreaStartFrameIndex(FI);
934       HMFI.setVarArgsFrameIndex(FI);
935     }
936   }
937 
938 
939   if (IsVarArg && !Subtarget.isEnvironmentMusl()) {
940     // This will point to the next argument passed via stack.
941     int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
942     int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
943     HMFI.setVarArgsFrameIndex(FI);
944   }
945 
946   return Chain;
947 }
948 
949 SDValue
950 HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
951   // VASTART stores the address of the VarArgsFrameIndex slot into the
952   // memory location argument.
953   MachineFunction &MF = DAG.getMachineFunction();
954   HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
955   SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
956   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
957 
958   if (!Subtarget.isEnvironmentMusl()) {
959     return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
960                         MachinePointerInfo(SV));
961   }
962   auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
963   auto &HFL = *Subtarget.getFrameLowering();
964   SDLoc DL(Op);
965   SmallVector<SDValue, 8> MemOps;
966 
967   // Get frame index of va_list.
968   SDValue FIN = Op.getOperand(1);
969 
970   // If first Vararg register is odd, add 4 bytes to start of
971   // saved register area to point to the first register location.
972   // This is because the saved register area has to be 8 byte aligned.
973   // Incase of an odd start register, there will be 4 bytes of padding in
974   // the beginning of saved register area. If all registers area used up,
975   // the following condition will handle it correctly.
976   SDValue SavedRegAreaStartFrameIndex =
977     DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(), MVT::i32);
978 
979   auto PtrVT = getPointerTy(DAG.getDataLayout());
980 
981   if (HFL.FirstVarArgSavedReg & 1)
982     SavedRegAreaStartFrameIndex =
983       DAG.getNode(ISD::ADD, DL, PtrVT,
984                   DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(),
985                                     MVT::i32),
986                   DAG.getIntPtrConstant(4, DL));
987 
988   // Store the saved register area start pointer.
989   SDValue Store =
990     DAG.getStore(Op.getOperand(0), DL,
991                  SavedRegAreaStartFrameIndex,
992                  FIN, MachinePointerInfo(SV));
993   MemOps.push_back(Store);
994 
995   // Store saved register area end pointer.
996   FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
997                     FIN, DAG.getIntPtrConstant(4, DL));
998   Store = DAG.getStore(Op.getOperand(0), DL,
999                        DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
1000                                          PtrVT),
1001                        FIN, MachinePointerInfo(SV, 4));
1002   MemOps.push_back(Store);
1003 
1004   // Store overflow area pointer.
1005   FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
1006                     FIN, DAG.getIntPtrConstant(4, DL));
1007   Store = DAG.getStore(Op.getOperand(0), DL,
1008                        DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
1009                                          PtrVT),
1010                        FIN, MachinePointerInfo(SV, 8));
1011   MemOps.push_back(Store);
1012 
1013   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1014 }
1015 
1016 SDValue
1017 HexagonTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
1018   // Assert that the linux ABI is enabled for the current compilation.
1019   assert(Subtarget.isEnvironmentMusl() && "Linux ABI should be enabled");
1020   SDValue Chain = Op.getOperand(0);
1021   SDValue DestPtr = Op.getOperand(1);
1022   SDValue SrcPtr = Op.getOperand(2);
1023   const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
1024   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
1025   SDLoc DL(Op);
1026   // Size of the va_list is 12 bytes as it has 3 pointers. Therefore,
1027   // we need to memcopy 12 bytes from va_list to another similar list.
1028   return DAG.getMemcpy(Chain, DL, DestPtr, SrcPtr,
1029                        DAG.getIntPtrConstant(12, DL), Align(4),
1030                        /*isVolatile*/ false, false, false,
1031                        MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
1032 }
1033 
1034 SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1035   const SDLoc &dl(Op);
1036   SDValue LHS = Op.getOperand(0);
1037   SDValue RHS = Op.getOperand(1);
1038   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1039   MVT ResTy = ty(Op);
1040   MVT OpTy = ty(LHS);
1041 
1042   if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
1043     MVT ElemTy = OpTy.getVectorElementType();
1044     assert(ElemTy.isScalarInteger());
1045     MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
1046                                   OpTy.getVectorNumElements());
1047     return DAG.getSetCC(dl, ResTy,
1048                         DAG.getSExtOrTrunc(LHS, SDLoc(LHS), WideTy),
1049                         DAG.getSExtOrTrunc(RHS, SDLoc(RHS), WideTy), CC);
1050   }
1051 
1052   // Treat all other vector types as legal.
1053   if (ResTy.isVector())
1054     return Op;
1055 
1056   // Comparisons of short integers should use sign-extend, not zero-extend,
1057   // since we can represent small negative values in the compare instructions.
1058   // The LLVM default is to use zero-extend arbitrarily in these cases.
1059   auto isSExtFree = [this](SDValue N) {
1060     switch (N.getOpcode()) {
1061       case ISD::TRUNCATE: {
1062         // A sign-extend of a truncate of a sign-extend is free.
1063         SDValue Op = N.getOperand(0);
1064         if (Op.getOpcode() != ISD::AssertSext)
1065           return false;
1066         EVT OrigTy = cast<VTSDNode>(Op.getOperand(1))->getVT();
1067         unsigned ThisBW = ty(N).getSizeInBits();
1068         unsigned OrigBW = OrigTy.getSizeInBits();
1069         // The type that was sign-extended to get the AssertSext must be
1070         // narrower than the type of N (so that N has still the same value
1071         // as the original).
1072         return ThisBW >= OrigBW;
1073       }
1074       case ISD::LOAD:
1075         // We have sign-extended loads.
1076         return true;
1077     }
1078     return false;
1079   };
1080 
1081   if (OpTy == MVT::i8 || OpTy == MVT::i16) {
1082     ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
1083     bool IsNegative = C && C->getAPIntValue().isNegative();
1084     if (IsNegative || isSExtFree(LHS) || isSExtFree(RHS))
1085       return DAG.getSetCC(dl, ResTy,
1086                           DAG.getSExtOrTrunc(LHS, SDLoc(LHS), MVT::i32),
1087                           DAG.getSExtOrTrunc(RHS, SDLoc(RHS), MVT::i32), CC);
1088   }
1089 
1090   return SDValue();
1091 }
1092 
1093 SDValue
1094 HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
1095   SDValue PredOp = Op.getOperand(0);
1096   SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
1097   MVT OpTy = ty(Op1);
1098   const SDLoc &dl(Op);
1099 
1100   if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
1101     MVT ElemTy = OpTy.getVectorElementType();
1102     assert(ElemTy.isScalarInteger());
1103     MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
1104                                   OpTy.getVectorNumElements());
1105     // Generate (trunc (select (_, sext, sext))).
1106     return DAG.getSExtOrTrunc(
1107               DAG.getSelect(dl, WideTy, PredOp,
1108                             DAG.getSExtOrTrunc(Op1, dl, WideTy),
1109                             DAG.getSExtOrTrunc(Op2, dl, WideTy)),
1110               dl, OpTy);
1111   }
1112 
1113   return SDValue();
1114 }
1115 
1116 SDValue
1117 HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
1118   EVT ValTy = Op.getValueType();
1119   ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
1120   Constant *CVal = nullptr;
1121   bool isVTi1Type = false;
1122   if (auto *CV = dyn_cast<ConstantVector>(CPN->getConstVal())) {
1123     if (cast<VectorType>(CV->getType())->getElementType()->isIntegerTy(1)) {
1124       IRBuilder<> IRB(CV->getContext());
1125       SmallVector<Constant*, 128> NewConst;
1126       unsigned VecLen = CV->getNumOperands();
1127       assert(isPowerOf2_32(VecLen) &&
1128              "conversion only supported for pow2 VectorSize");
1129       for (unsigned i = 0; i < VecLen; ++i)
1130         NewConst.push_back(IRB.getInt8(CV->getOperand(i)->isZeroValue()));
1131 
1132       CVal = ConstantVector::get(NewConst);
1133       isVTi1Type = true;
1134     }
1135   }
1136   Align Alignment = CPN->getAlign();
1137   bool IsPositionIndependent = isPositionIndependent();
1138   unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0;
1139 
1140   unsigned Offset = 0;
1141   SDValue T;
1142   if (CPN->isMachineConstantPoolEntry())
1143     T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Alignment,
1144                                   Offset, TF);
1145   else if (isVTi1Type)
1146     T = DAG.getTargetConstantPool(CVal, ValTy, Alignment, Offset, TF);
1147   else
1148     T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Alignment, Offset,
1149                                   TF);
1150 
1151   assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
1152          "Inconsistent target flag encountered");
1153 
1154   if (IsPositionIndependent)
1155     return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
1156   return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
1157 }
1158 
1159 SDValue
1160 HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1161   EVT VT = Op.getValueType();
1162   int Idx = cast<JumpTableSDNode>(Op)->getIndex();
1163   if (isPositionIndependent()) {
1164     SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
1165     return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
1166   }
1167 
1168   SDValue T = DAG.getTargetJumpTable(Idx, VT);
1169   return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
1170 }
1171 
1172 SDValue
1173 HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
1174   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1175   MachineFunction &MF = DAG.getMachineFunction();
1176   MachineFrameInfo &MFI = MF.getFrameInfo();
1177   MFI.setReturnAddressIsTaken(true);
1178 
1179   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1180     return SDValue();
1181 
1182   EVT VT = Op.getValueType();
1183   SDLoc dl(Op);
1184   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1185   if (Depth) {
1186     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
1187     SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
1188     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
1189                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
1190                        MachinePointerInfo());
1191   }
1192 
1193   // Return LR, which contains the return address. Mark it an implicit live-in.
1194   unsigned Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
1195   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
1196 }
1197 
1198 SDValue
1199 HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
1200   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1201   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1202   MFI.setFrameAddressIsTaken(true);
1203 
1204   EVT VT = Op.getValueType();
1205   SDLoc dl(Op);
1206   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1207   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
1208                                          HRI.getFrameRegister(), VT);
1209   while (Depth--)
1210     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
1211                             MachinePointerInfo());
1212   return FrameAddr;
1213 }
1214 
1215 SDValue
1216 HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
1217   SDLoc dl(Op);
1218   return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
1219 }
1220 
1221 SDValue
1222 HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
1223   SDLoc dl(Op);
1224   auto *GAN = cast<GlobalAddressSDNode>(Op);
1225   auto PtrVT = getPointerTy(DAG.getDataLayout());
1226   auto *GV = GAN->getGlobal();
1227   int64_t Offset = GAN->getOffset();
1228 
1229   auto &HLOF = *HTM.getObjFileLowering();
1230   Reloc::Model RM = HTM.getRelocationModel();
1231 
1232   if (RM == Reloc::Static) {
1233     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
1234     const GlobalObject *GO = GV->getBaseObject();
1235     if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, HTM))
1236       return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
1237     return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
1238   }
1239 
1240   bool UsePCRel = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
1241   if (UsePCRel) {
1242     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
1243                                             HexagonII::MO_PCREL);
1244     return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
1245   }
1246 
1247   // Use GOT index.
1248   SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1249   SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
1250   SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
1251   return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
1252 }
1253 
1254 // Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
1255 SDValue
1256 HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
1257   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1258   SDLoc dl(Op);
1259   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1260 
1261   Reloc::Model RM = HTM.getRelocationModel();
1262   if (RM == Reloc::Static) {
1263     SDValue A = DAG.getTargetBlockAddress(BA, PtrVT);
1264     return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
1265   }
1266 
1267   SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
1268   return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
1269 }
1270 
1271 SDValue
1272 HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
1273       const {
1274   EVT PtrVT = getPointerTy(DAG.getDataLayout());
1275   SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT,
1276                                                HexagonII::MO_PCREL);
1277   return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
1278 }
1279 
1280 SDValue
1281 HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
1282       GlobalAddressSDNode *GA, SDValue Glue, EVT PtrVT, unsigned ReturnReg,
1283       unsigned char OperandFlags) const {
1284   MachineFunction &MF = DAG.getMachineFunction();
1285   MachineFrameInfo &MFI = MF.getFrameInfo();
1286   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1287   SDLoc dl(GA);
1288   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
1289                                            GA->getValueType(0),
1290                                            GA->getOffset(),
1291                                            OperandFlags);
1292   // Create Operands for the call.The Operands should have the following:
1293   // 1. Chain SDValue
1294   // 2. Callee which in this case is the Global address value.
1295   // 3. Registers live into the call.In this case its R0, as we
1296   //    have just one argument to be passed.
1297   // 4. Glue.
1298   // Note: The order is important.
1299 
1300   const auto &HRI = *Subtarget.getRegisterInfo();
1301   const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallingConv::C);
1302   assert(Mask && "Missing call preserved mask for calling convention");
1303   SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT),
1304                     DAG.getRegisterMask(Mask), Glue };
1305   Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
1306 
1307   // Inform MFI that function has calls.
1308   MFI.setAdjustsStack(true);
1309 
1310   Glue = Chain.getValue(1);
1311   return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
1312 }
1313 
1314 //
1315 // Lower using the intial executable model for TLS addresses
1316 //
1317 SDValue
1318 HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode *GA,
1319       SelectionDAG &DAG) const {
1320   SDLoc dl(GA);
1321   int64_t Offset = GA->getOffset();
1322   auto PtrVT = getPointerTy(DAG.getDataLayout());
1323 
1324   // Get the thread pointer.
1325   SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1326 
1327   bool IsPositionIndependent = isPositionIndependent();
1328   unsigned char TF =
1329       IsPositionIndependent ? HexagonII::MO_IEGOT : HexagonII::MO_IE;
1330 
1331   // First generate the TLS symbol address
1332   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT,
1333                                            Offset, TF);
1334 
1335   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1336 
1337   if (IsPositionIndependent) {
1338     // Generate the GOT pointer in case of position independent code
1339     SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Sym, DAG);
1340 
1341     // Add the TLS Symbol address to GOT pointer.This gives
1342     // GOT relative relocation for the symbol.
1343     Sym = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1344   }
1345 
1346   // Load the offset value for TLS symbol.This offset is relative to
1347   // thread pointer.
1348   SDValue LoadOffset =
1349       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Sym, MachinePointerInfo());
1350 
1351   // Address of the thread local variable is the add of thread
1352   // pointer and the offset of the variable.
1353   return DAG.getNode(ISD::ADD, dl, PtrVT, TP, LoadOffset);
1354 }
1355 
1356 //
1357 // Lower using the local executable model for TLS addresses
1358 //
1359 SDValue
1360 HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
1361       SelectionDAG &DAG) const {
1362   SDLoc dl(GA);
1363   int64_t Offset = GA->getOffset();
1364   auto PtrVT = getPointerTy(DAG.getDataLayout());
1365 
1366   // Get the thread pointer.
1367   SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1368   // Generate the TLS symbol address
1369   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1370                                            HexagonII::MO_TPREL);
1371   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1372 
1373   // Address of the thread local variable is the add of thread
1374   // pointer and the offset of the variable.
1375   return DAG.getNode(ISD::ADD, dl, PtrVT, TP, Sym);
1376 }
1377 
1378 //
1379 // Lower using the general dynamic model for TLS addresses
1380 //
1381 SDValue
1382 HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
1383       SelectionDAG &DAG) const {
1384   SDLoc dl(GA);
1385   int64_t Offset = GA->getOffset();
1386   auto PtrVT = getPointerTy(DAG.getDataLayout());
1387 
1388   // First generate the TLS symbol address
1389   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1390                                            HexagonII::MO_GDGOT);
1391 
1392   // Then, generate the GOT pointer
1393   SDValue GOT = LowerGLOBAL_OFFSET_TABLE(TGA, DAG);
1394 
1395   // Add the TLS symbol and the GOT pointer
1396   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1397   SDValue Chain = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1398 
1399   // Copy over the argument to R0
1400   SDValue InFlag;
1401   Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag);
1402   InFlag = Chain.getValue(1);
1403 
1404   unsigned Flags =
1405       static_cast<const HexagonSubtarget &>(DAG.getSubtarget()).useLongCalls()
1406           ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended
1407           : HexagonII::MO_GDPLT;
1408 
1409   return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT,
1410                            Hexagon::R0, Flags);
1411 }
1412 
1413 //
1414 // Lower TLS addresses.
1415 //
1416 // For now for dynamic models, we only support the general dynamic model.
1417 //
1418 SDValue
1419 HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1420       SelectionDAG &DAG) const {
1421   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1422 
1423   switch (HTM.getTLSModel(GA->getGlobal())) {
1424     case TLSModel::GeneralDynamic:
1425     case TLSModel::LocalDynamic:
1426       return LowerToTLSGeneralDynamicModel(GA, DAG);
1427     case TLSModel::InitialExec:
1428       return LowerToTLSInitialExecModel(GA, DAG);
1429     case TLSModel::LocalExec:
1430       return LowerToTLSLocalExecModel(GA, DAG);
1431   }
1432   llvm_unreachable("Bogus TLS model");
1433 }
1434 
1435 //===----------------------------------------------------------------------===//
1436 // TargetLowering Implementation
1437 //===----------------------------------------------------------------------===//
1438 
1439 HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
1440                                              const HexagonSubtarget &ST)
1441     : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
1442       Subtarget(ST) {
1443   auto &HRI = *Subtarget.getRegisterInfo();
1444 
1445   setPrefLoopAlignment(Align(16));
1446   setMinFunctionAlignment(Align(4));
1447   setPrefFunctionAlignment(Align(16));
1448   setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
1449   setBooleanContents(TargetLoweringBase::UndefinedBooleanContent);
1450   setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent);
1451 
1452   setMaxAtomicSizeInBitsSupported(64);
1453   setMinCmpXchgSizeInBits(32);
1454 
1455   if (EnableHexSDNodeSched)
1456     setSchedulingPreference(Sched::VLIW);
1457   else
1458     setSchedulingPreference(Sched::Source);
1459 
1460   // Limits for inline expansion of memcpy/memmove
1461   MaxStoresPerMemcpy = MaxStoresPerMemcpyCL;
1462   MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL;
1463   MaxStoresPerMemmove = MaxStoresPerMemmoveCL;
1464   MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL;
1465   MaxStoresPerMemset = MaxStoresPerMemsetCL;
1466   MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL;
1467 
1468   //
1469   // Set up register classes.
1470   //
1471 
1472   addRegisterClass(MVT::i1,    &Hexagon::PredRegsRegClass);
1473   addRegisterClass(MVT::v2i1,  &Hexagon::PredRegsRegClass);  // bbbbaaaa
1474   addRegisterClass(MVT::v4i1,  &Hexagon::PredRegsRegClass);  // ddccbbaa
1475   addRegisterClass(MVT::v8i1,  &Hexagon::PredRegsRegClass);  // hgfedcba
1476   addRegisterClass(MVT::i32,   &Hexagon::IntRegsRegClass);
1477   addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
1478   addRegisterClass(MVT::v4i8,  &Hexagon::IntRegsRegClass);
1479   addRegisterClass(MVT::i64,   &Hexagon::DoubleRegsRegClass);
1480   addRegisterClass(MVT::v8i8,  &Hexagon::DoubleRegsRegClass);
1481   addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
1482   addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
1483 
1484   addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
1485   addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
1486 
1487   //
1488   // Handling of scalar operations.
1489   //
1490   // All operations default to "legal", except:
1491   // - indexed loads and stores (pre-/post-incremented),
1492   // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
1493   //   ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
1494   //   FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
1495   //   FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
1496   // which default to "expand" for at least one type.
1497 
1498   // Misc operations.
1499   setOperationAction(ISD::ConstantFP,           MVT::f32,   Legal);
1500   setOperationAction(ISD::ConstantFP,           MVT::f64,   Legal);
1501   setOperationAction(ISD::TRAP,                 MVT::Other, Legal);
1502   setOperationAction(ISD::ConstantPool,         MVT::i32,   Custom);
1503   setOperationAction(ISD::JumpTable,            MVT::i32,   Custom);
1504   setOperationAction(ISD::BUILD_PAIR,           MVT::i64,   Expand);
1505   setOperationAction(ISD::SIGN_EXTEND_INREG,    MVT::i1,    Expand);
1506   setOperationAction(ISD::INLINEASM,            MVT::Other, Custom);
1507   setOperationAction(ISD::INLINEASM_BR,         MVT::Other, Custom);
1508   setOperationAction(ISD::PREFETCH,             MVT::Other, Custom);
1509   setOperationAction(ISD::READCYCLECOUNTER,     MVT::i64,   Custom);
1510   setOperationAction(ISD::INTRINSIC_VOID,       MVT::Other, Custom);
1511   setOperationAction(ISD::EH_RETURN,            MVT::Other, Custom);
1512   setOperationAction(ISD::GLOBAL_OFFSET_TABLE,  MVT::i32,   Custom);
1513   setOperationAction(ISD::GlobalTLSAddress,     MVT::i32,   Custom);
1514   setOperationAction(ISD::ATOMIC_FENCE,         MVT::Other, Custom);
1515 
1516   // Custom legalize GlobalAddress nodes into CONST32.
1517   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
1518   setOperationAction(ISD::GlobalAddress, MVT::i8,  Custom);
1519   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
1520 
1521   // Hexagon needs to optimize cases with negative constants.
1522   setOperationAction(ISD::SETCC, MVT::i8,    Custom);
1523   setOperationAction(ISD::SETCC, MVT::i16,   Custom);
1524   setOperationAction(ISD::SETCC, MVT::v4i8,  Custom);
1525   setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
1526 
1527   // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
1528   setOperationAction(ISD::VASTART, MVT::Other, Custom);
1529   setOperationAction(ISD::VAEND,   MVT::Other, Expand);
1530   setOperationAction(ISD::VAARG,   MVT::Other, Expand);
1531   if (Subtarget.isEnvironmentMusl())
1532     setOperationAction(ISD::VACOPY, MVT::Other, Custom);
1533   else
1534     setOperationAction(ISD::VACOPY,  MVT::Other, Expand);
1535 
1536   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
1537   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
1538   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
1539 
1540   if (EmitJumpTables)
1541     setMinimumJumpTableEntries(MinimumJumpTables);
1542   else
1543     setMinimumJumpTableEntries(std::numeric_limits<unsigned>::max());
1544   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
1545 
1546   for (unsigned LegalIntOp :
1547        {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) {
1548     setOperationAction(LegalIntOp, MVT::i32, Legal);
1549     setOperationAction(LegalIntOp, MVT::i64, Legal);
1550   }
1551 
1552   // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
1553   // but they only operate on i64.
1554   for (MVT VT : MVT::integer_valuetypes()) {
1555     setOperationAction(ISD::UADDO,    VT, Custom);
1556     setOperationAction(ISD::USUBO,    VT, Custom);
1557     setOperationAction(ISD::SADDO,    VT, Expand);
1558     setOperationAction(ISD::SSUBO,    VT, Expand);
1559     setOperationAction(ISD::ADDCARRY, VT, Expand);
1560     setOperationAction(ISD::SUBCARRY, VT, Expand);
1561   }
1562   setOperationAction(ISD::ADDCARRY, MVT::i64, Custom);
1563   setOperationAction(ISD::SUBCARRY, MVT::i64, Custom);
1564 
1565   setOperationAction(ISD::CTLZ, MVT::i8,  Promote);
1566   setOperationAction(ISD::CTLZ, MVT::i16, Promote);
1567   setOperationAction(ISD::CTTZ, MVT::i8,  Promote);
1568   setOperationAction(ISD::CTTZ, MVT::i16, Promote);
1569 
1570   // Popcount can count # of 1s in i64 but returns i32.
1571   setOperationAction(ISD::CTPOP, MVT::i8,  Promote);
1572   setOperationAction(ISD::CTPOP, MVT::i16, Promote);
1573   setOperationAction(ISD::CTPOP, MVT::i32, Promote);
1574   setOperationAction(ISD::CTPOP, MVT::i64, Legal);
1575 
1576   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
1577   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
1578   setOperationAction(ISD::BSWAP, MVT::i32, Legal);
1579   setOperationAction(ISD::BSWAP, MVT::i64, Legal);
1580 
1581   setOperationAction(ISD::FSHL, MVT::i32, Legal);
1582   setOperationAction(ISD::FSHL, MVT::i64, Legal);
1583   setOperationAction(ISD::FSHR, MVT::i32, Legal);
1584   setOperationAction(ISD::FSHR, MVT::i64, Legal);
1585 
1586   for (unsigned IntExpOp :
1587        {ISD::SDIV,      ISD::UDIV,      ISD::SREM,      ISD::UREM,
1588         ISD::SDIVREM,   ISD::UDIVREM,   ISD::ROTL,      ISD::ROTR,
1589         ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
1590         ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
1591     for (MVT VT : MVT::integer_valuetypes())
1592       setOperationAction(IntExpOp, VT, Expand);
1593   }
1594 
1595   for (unsigned FPExpOp :
1596        {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
1597         ISD::FPOW, ISD::FCOPYSIGN}) {
1598     for (MVT VT : MVT::fp_valuetypes())
1599       setOperationAction(FPExpOp, VT, Expand);
1600   }
1601 
1602   // No extending loads from i32.
1603   for (MVT VT : MVT::integer_valuetypes()) {
1604     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
1605     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
1606     setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i32, Expand);
1607   }
1608   // Turn FP truncstore into trunc + store.
1609   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1610   // Turn FP extload into load/fpextend.
1611   for (MVT VT : MVT::fp_valuetypes())
1612     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1613 
1614   // Expand BR_CC and SELECT_CC for all integer and fp types.
1615   for (MVT VT : MVT::integer_valuetypes()) {
1616     setOperationAction(ISD::BR_CC,     VT, Expand);
1617     setOperationAction(ISD::SELECT_CC, VT, Expand);
1618   }
1619   for (MVT VT : MVT::fp_valuetypes()) {
1620     setOperationAction(ISD::BR_CC,     VT, Expand);
1621     setOperationAction(ISD::SELECT_CC, VT, Expand);
1622   }
1623   setOperationAction(ISD::BR_CC, MVT::Other, Expand);
1624 
1625   //
1626   // Handling of vector operations.
1627   //
1628 
1629   // Set the action for vector operations to "expand", then override it with
1630   // either "custom" or "legal" for specific cases.
1631   static const unsigned VectExpOps[] = {
1632     // Integer arithmetic:
1633     ISD::ADD,     ISD::SUB,     ISD::MUL,     ISD::SDIV,      ISD::UDIV,
1634     ISD::SREM,    ISD::UREM,    ISD::SDIVREM, ISD::UDIVREM,   ISD::SADDO,
1635     ISD::UADDO,   ISD::SSUBO,   ISD::USUBO,   ISD::SMUL_LOHI, ISD::UMUL_LOHI,
1636     // Logical/bit:
1637     ISD::AND,     ISD::OR,      ISD::XOR,     ISD::ROTL,    ISD::ROTR,
1638     ISD::CTPOP,   ISD::CTLZ,    ISD::CTTZ,
1639     // Floating point arithmetic/math functions:
1640     ISD::FADD,    ISD::FSUB,    ISD::FMUL,    ISD::FMA,     ISD::FDIV,
1641     ISD::FREM,    ISD::FNEG,    ISD::FABS,    ISD::FSQRT,   ISD::FSIN,
1642     ISD::FCOS,    ISD::FPOW,    ISD::FLOG,    ISD::FLOG2,
1643     ISD::FLOG10,  ISD::FEXP,    ISD::FEXP2,   ISD::FCEIL,   ISD::FTRUNC,
1644     ISD::FRINT,   ISD::FNEARBYINT,            ISD::FROUND,  ISD::FFLOOR,
1645     ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
1646     // Misc:
1647     ISD::BR_CC,   ISD::SELECT_CC,             ISD::ConstantPool,
1648     // Vector:
1649     ISD::BUILD_VECTOR,          ISD::SCALAR_TO_VECTOR,
1650     ISD::EXTRACT_VECTOR_ELT,    ISD::INSERT_VECTOR_ELT,
1651     ISD::EXTRACT_SUBVECTOR,     ISD::INSERT_SUBVECTOR,
1652     ISD::CONCAT_VECTORS,        ISD::VECTOR_SHUFFLE,
1653     ISD::SPLAT_VECTOR,
1654   };
1655 
1656   for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1657     for (unsigned VectExpOp : VectExpOps)
1658       setOperationAction(VectExpOp, VT, Expand);
1659 
1660     // Expand all extending loads and truncating stores:
1661     for (MVT TargetVT : MVT::fixedlen_vector_valuetypes()) {
1662       if (TargetVT == VT)
1663         continue;
1664       setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
1665       setLoadExtAction(ISD::ZEXTLOAD, TargetVT, VT, Expand);
1666       setLoadExtAction(ISD::SEXTLOAD, TargetVT, VT, Expand);
1667       setTruncStoreAction(VT, TargetVT, Expand);
1668     }
1669 
1670     // Normalize all inputs to SELECT to be vectors of i32.
1671     if (VT.getVectorElementType() != MVT::i32) {
1672       MVT VT32 = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
1673       setOperationAction(ISD::SELECT, VT, Promote);
1674       AddPromotedToType(ISD::SELECT, VT, VT32);
1675     }
1676     setOperationAction(ISD::SRA, VT, Custom);
1677     setOperationAction(ISD::SHL, VT, Custom);
1678     setOperationAction(ISD::SRL, VT, Custom);
1679   }
1680 
1681   // Extending loads from (native) vectors of i8 into (native) vectors of i16
1682   // are legal.
1683   setLoadExtAction(ISD::EXTLOAD,  MVT::v2i16, MVT::v2i8, Legal);
1684   setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1685   setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
1686   setLoadExtAction(ISD::EXTLOAD,  MVT::v4i16, MVT::v4i8, Legal);
1687   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1688   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
1689 
1690   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
1691   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
1692   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
1693 
1694   // Types natively supported:
1695   for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
1696                        MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1697     setOperationAction(ISD::BUILD_VECTOR,       NativeVT, Custom);
1698     setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
1699     setOperationAction(ISD::INSERT_VECTOR_ELT,  NativeVT, Custom);
1700     setOperationAction(ISD::EXTRACT_SUBVECTOR,  NativeVT, Custom);
1701     setOperationAction(ISD::INSERT_SUBVECTOR,   NativeVT, Custom);
1702     setOperationAction(ISD::CONCAT_VECTORS,     NativeVT, Custom);
1703 
1704     setOperationAction(ISD::ADD, NativeVT, Legal);
1705     setOperationAction(ISD::SUB, NativeVT, Legal);
1706     setOperationAction(ISD::MUL, NativeVT, Legal);
1707     setOperationAction(ISD::AND, NativeVT, Legal);
1708     setOperationAction(ISD::OR,  NativeVT, Legal);
1709     setOperationAction(ISD::XOR, NativeVT, Legal);
1710 
1711     if (NativeVT.getVectorElementType() != MVT::i1)
1712       setOperationAction(ISD::SPLAT_VECTOR, NativeVT, Legal);
1713   }
1714 
1715   for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32}) {
1716     setOperationAction(ISD::SMIN, VT, Legal);
1717     setOperationAction(ISD::SMAX, VT, Legal);
1718     setOperationAction(ISD::UMIN, VT, Legal);
1719     setOperationAction(ISD::UMAX, VT, Legal);
1720   }
1721 
1722   // Custom lower unaligned loads.
1723   // Also, for both loads and stores, verify the alignment of the address
1724   // in case it is a compile-time constant. This is a usability feature to
1725   // provide a meaningful error message to users.
1726   for (MVT VT : {MVT::i16, MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
1727                  MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
1728     setOperationAction(ISD::LOAD,  VT, Custom);
1729     setOperationAction(ISD::STORE, VT, Custom);
1730   }
1731 
1732   // Custom-lower load/stores of boolean vectors.
1733   for (MVT VT : {MVT::v2i1, MVT::v4i1, MVT::v8i1}) {
1734     setOperationAction(ISD::LOAD,  VT, Custom);
1735     setOperationAction(ISD::STORE, VT, Custom);
1736   }
1737 
1738   for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16,
1739                  MVT::v2i32}) {
1740     setCondCodeAction(ISD::SETNE,  VT, Expand);
1741     setCondCodeAction(ISD::SETLE,  VT, Expand);
1742     setCondCodeAction(ISD::SETGE,  VT, Expand);
1743     setCondCodeAction(ISD::SETLT,  VT, Expand);
1744     setCondCodeAction(ISD::SETULE, VT, Expand);
1745     setCondCodeAction(ISD::SETUGE, VT, Expand);
1746     setCondCodeAction(ISD::SETULT, VT, Expand);
1747   }
1748 
1749   // Custom-lower bitcasts from i8 to v8i1.
1750   setOperationAction(ISD::BITCAST,        MVT::i8,    Custom);
1751   setOperationAction(ISD::SETCC,          MVT::v2i16, Custom);
1752   setOperationAction(ISD::VSELECT,        MVT::v4i8,  Custom);
1753   setOperationAction(ISD::VSELECT,        MVT::v2i16, Custom);
1754   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8,  Custom);
1755   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
1756   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8,  Custom);
1757 
1758   // V5+.
1759   setOperationAction(ISD::FMA,  MVT::f64, Expand);
1760   setOperationAction(ISD::FADD, MVT::f64, Expand);
1761   setOperationAction(ISD::FSUB, MVT::f64, Expand);
1762   setOperationAction(ISD::FMUL, MVT::f64, Expand);
1763 
1764   setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1765   setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1766 
1767   setOperationAction(ISD::FP_TO_UINT, MVT::i1,  Promote);
1768   setOperationAction(ISD::FP_TO_UINT, MVT::i8,  Promote);
1769   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
1770   setOperationAction(ISD::FP_TO_SINT, MVT::i1,  Promote);
1771   setOperationAction(ISD::FP_TO_SINT, MVT::i8,  Promote);
1772   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
1773   setOperationAction(ISD::UINT_TO_FP, MVT::i1,  Promote);
1774   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
1775   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
1776   setOperationAction(ISD::SINT_TO_FP, MVT::i1,  Promote);
1777   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
1778   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
1779 
1780   // Handling of indexed loads/stores: default is "expand".
1781   //
1782   for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64,
1783                  MVT::v2i16, MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) {
1784     setIndexedLoadAction(ISD::POST_INC, VT, Legal);
1785     setIndexedStoreAction(ISD::POST_INC, VT, Legal);
1786   }
1787 
1788   // Subtarget-specific operation actions.
1789   //
1790   if (Subtarget.hasV60Ops()) {
1791     setOperationAction(ISD::ROTL, MVT::i32, Legal);
1792     setOperationAction(ISD::ROTL, MVT::i64, Legal);
1793     setOperationAction(ISD::ROTR, MVT::i32, Legal);
1794     setOperationAction(ISD::ROTR, MVT::i64, Legal);
1795   }
1796   if (Subtarget.hasV66Ops()) {
1797     setOperationAction(ISD::FADD, MVT::f64, Legal);
1798     setOperationAction(ISD::FSUB, MVT::f64, Legal);
1799   }
1800   if (Subtarget.hasV67Ops()) {
1801     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1802     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1803     setOperationAction(ISD::FMUL,    MVT::f64, Legal);
1804   }
1805 
1806   setTargetDAGCombine(ISD::VSELECT);
1807 
1808   if (Subtarget.useHVXOps())
1809     initializeHVXLowering();
1810 
1811   computeRegisterProperties(&HRI);
1812 
1813   //
1814   // Library calls for unsupported operations
1815   //
1816   bool FastMath  = EnableFastMath;
1817 
1818   setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
1819   setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
1820   setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
1821   setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
1822   setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
1823   setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
1824   setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
1825   setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
1826 
1827   setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
1828   setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
1829   setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
1830   setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
1831   setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
1832   setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
1833 
1834   // This is the only fast library function for sqrtd.
1835   if (FastMath)
1836     setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
1837 
1838   // Prefix is: nothing  for "slow-math",
1839   //            "fast2_" for V5+ fast-math double-precision
1840   // (actually, keep fast-math and fast-math2 separate for now)
1841   if (FastMath) {
1842     setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
1843     setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
1844     setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
1845     setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
1846     setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
1847   } else {
1848     setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
1849     setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
1850     setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
1851     setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
1852     setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
1853   }
1854 
1855   if (FastMath)
1856     setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
1857   else
1858     setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
1859 
1860   // These cause problems when the shift amount is non-constant.
1861   setLibcallName(RTLIB::SHL_I128, nullptr);
1862   setLibcallName(RTLIB::SRL_I128, nullptr);
1863   setLibcallName(RTLIB::SRA_I128, nullptr);
1864 }
1865 
1866 const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
1867   switch ((HexagonISD::NodeType)Opcode) {
1868   case HexagonISD::ADDC:          return "HexagonISD::ADDC";
1869   case HexagonISD::SUBC:          return "HexagonISD::SUBC";
1870   case HexagonISD::ALLOCA:        return "HexagonISD::ALLOCA";
1871   case HexagonISD::AT_GOT:        return "HexagonISD::AT_GOT";
1872   case HexagonISD::AT_PCREL:      return "HexagonISD::AT_PCREL";
1873   case HexagonISD::BARRIER:       return "HexagonISD::BARRIER";
1874   case HexagonISD::CALL:          return "HexagonISD::CALL";
1875   case HexagonISD::CALLnr:        return "HexagonISD::CALLnr";
1876   case HexagonISD::CALLR:         return "HexagonISD::CALLR";
1877   case HexagonISD::COMBINE:       return "HexagonISD::COMBINE";
1878   case HexagonISD::CONST32_GP:    return "HexagonISD::CONST32_GP";
1879   case HexagonISD::CONST32:       return "HexagonISD::CONST32";
1880   case HexagonISD::CP:            return "HexagonISD::CP";
1881   case HexagonISD::DCFETCH:       return "HexagonISD::DCFETCH";
1882   case HexagonISD::EH_RETURN:     return "HexagonISD::EH_RETURN";
1883   case HexagonISD::TSTBIT:        return "HexagonISD::TSTBIT";
1884   case HexagonISD::EXTRACTU:      return "HexagonISD::EXTRACTU";
1885   case HexagonISD::INSERT:        return "HexagonISD::INSERT";
1886   case HexagonISD::JT:            return "HexagonISD::JT";
1887   case HexagonISD::RET_FLAG:      return "HexagonISD::RET_FLAG";
1888   case HexagonISD::TC_RETURN:     return "HexagonISD::TC_RETURN";
1889   case HexagonISD::VASL:          return "HexagonISD::VASL";
1890   case HexagonISD::VASR:          return "HexagonISD::VASR";
1891   case HexagonISD::VLSR:          return "HexagonISD::VLSR";
1892   case HexagonISD::VEXTRACTW:     return "HexagonISD::VEXTRACTW";
1893   case HexagonISD::VINSERTW0:     return "HexagonISD::VINSERTW0";
1894   case HexagonISD::VROR:          return "HexagonISD::VROR";
1895   case HexagonISD::READCYCLE:     return "HexagonISD::READCYCLE";
1896   case HexagonISD::PTRUE:         return "HexagonISD::PTRUE";
1897   case HexagonISD::PFALSE:        return "HexagonISD::PFALSE";
1898   case HexagonISD::D2P:           return "HexagonISD::D2P";
1899   case HexagonISD::P2D:           return "HexagonISD::P2D";
1900   case HexagonISD::V2Q:           return "HexagonISD::V2Q";
1901   case HexagonISD::Q2V:           return "HexagonISD::Q2V";
1902   case HexagonISD::QCAT:          return "HexagonISD::QCAT";
1903   case HexagonISD::QTRUE:         return "HexagonISD::QTRUE";
1904   case HexagonISD::QFALSE:        return "HexagonISD::QFALSE";
1905   case HexagonISD::TYPECAST:      return "HexagonISD::TYPECAST";
1906   case HexagonISD::VALIGN:        return "HexagonISD::VALIGN";
1907   case HexagonISD::VALIGNADDR:    return "HexagonISD::VALIGNADDR";
1908   case HexagonISD::VPACKL:        return "HexagonISD::VPACKL";
1909   case HexagonISD::VUNPACK:       return "HexagonISD::VUNPACK";
1910   case HexagonISD::VUNPACKU:      return "HexagonISD::VUNPACKU";
1911   case HexagonISD::ISEL:          return "HexagonISD::ISEL";
1912   case HexagonISD::OP_END:        break;
1913   }
1914   return nullptr;
1915 }
1916 
1917 bool
1918 HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, Align NeedAlign,
1919       const SDLoc &dl, SelectionDAG &DAG) const {
1920   auto *CA = dyn_cast<ConstantSDNode>(Ptr);
1921   if (!CA)
1922     return true;
1923   unsigned Addr = CA->getZExtValue();
1924   Align HaveAlign =
1925       Addr != 0 ? Align(1ull << countTrailingZeros(Addr)) : NeedAlign;
1926   if (HaveAlign >= NeedAlign)
1927     return true;
1928 
1929   static int DK_MisalignedTrap = llvm::getNextAvailablePluginDiagnosticKind();
1930 
1931   struct DiagnosticInfoMisalignedTrap : public DiagnosticInfo {
1932     DiagnosticInfoMisalignedTrap(StringRef M)
1933       : DiagnosticInfo(DK_MisalignedTrap, DS_Remark), Msg(M) {}
1934     void print(DiagnosticPrinter &DP) const override {
1935       DP << Msg;
1936     }
1937     static bool classof(const DiagnosticInfo *DI) {
1938       return DI->getKind() == DK_MisalignedTrap;
1939     }
1940     StringRef Msg;
1941   };
1942 
1943   std::string ErrMsg;
1944   raw_string_ostream O(ErrMsg);
1945   O << "Misaligned constant address: " << format_hex(Addr, 10)
1946     << " has alignment " << HaveAlign.value()
1947     << ", but the memory access requires " << NeedAlign.value();
1948   if (DebugLoc DL = dl.getDebugLoc())
1949     DL.print(O << ", at ");
1950   O << ". The instruction has been replaced with a trap.";
1951 
1952   DAG.getContext()->diagnose(DiagnosticInfoMisalignedTrap(O.str()));
1953   return false;
1954 }
1955 
1956 SDValue
1957 HexagonTargetLowering::replaceMemWithUndef(SDValue Op, SelectionDAG &DAG)
1958       const {
1959   const SDLoc &dl(Op);
1960   auto *LS = cast<LSBaseSDNode>(Op.getNode());
1961   assert(!LS->isIndexed() && "Not expecting indexed ops on constant address");
1962 
1963   SDValue Chain = LS->getChain();
1964   SDValue Trap = DAG.getNode(ISD::TRAP, dl, MVT::Other, Chain);
1965   if (LS->getOpcode() == ISD::LOAD)
1966     return DAG.getMergeValues({DAG.getUNDEF(ty(Op)), Trap}, dl);
1967   return Trap;
1968 }
1969 
1970 // Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
1971 // intrinsic.
1972 static bool isBrevLdIntrinsic(const Value *Inst) {
1973   unsigned ID = cast<IntrinsicInst>(Inst)->getIntrinsicID();
1974   return (ID == Intrinsic::hexagon_L2_loadrd_pbr ||
1975           ID == Intrinsic::hexagon_L2_loadri_pbr ||
1976           ID == Intrinsic::hexagon_L2_loadrh_pbr ||
1977           ID == Intrinsic::hexagon_L2_loadruh_pbr ||
1978           ID == Intrinsic::hexagon_L2_loadrb_pbr ||
1979           ID == Intrinsic::hexagon_L2_loadrub_pbr);
1980 }
1981 
1982 // Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
1983 // instruction. So far we only handle bitcast, extract value and bit reverse
1984 // load intrinsic instructions. Should we handle CGEP ?
1985 static Value *getBrevLdObject(Value *V) {
1986   if (Operator::getOpcode(V) == Instruction::ExtractValue ||
1987       Operator::getOpcode(V) == Instruction::BitCast)
1988     V = cast<Operator>(V)->getOperand(0);
1989   else if (isa<IntrinsicInst>(V) && isBrevLdIntrinsic(V))
1990     V = cast<Instruction>(V)->getOperand(0);
1991   return V;
1992 }
1993 
1994 // Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
1995 // a back edge. If the back edge comes from the intrinsic itself, the incoming
1996 // edge is returned.
1997 static Value *returnEdge(const PHINode *PN, Value *IntrBaseVal) {
1998   const BasicBlock *Parent = PN->getParent();
1999   int Idx = -1;
2000   for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
2001     BasicBlock *Blk = PN->getIncomingBlock(i);
2002     // Determine if the back edge is originated from intrinsic.
2003     if (Blk == Parent) {
2004       Value *BackEdgeVal = PN->getIncomingValue(i);
2005       Value *BaseVal;
2006       // Loop over till we return the same Value or we hit the IntrBaseVal.
2007       do {
2008         BaseVal = BackEdgeVal;
2009         BackEdgeVal = getBrevLdObject(BackEdgeVal);
2010       } while ((BaseVal != BackEdgeVal) && (IntrBaseVal != BackEdgeVal));
2011       // If the getBrevLdObject returns IntrBaseVal, we should return the
2012       // incoming edge.
2013       if (IntrBaseVal == BackEdgeVal)
2014         continue;
2015       Idx = i;
2016       break;
2017     } else // Set the node to incoming edge.
2018       Idx = i;
2019   }
2020   assert(Idx >= 0 && "Unexpected index to incoming argument in PHI");
2021   return PN->getIncomingValue(Idx);
2022 }
2023 
2024 // Bit-reverse Load Intrinsic: Figure out the underlying object the base
2025 // pointer points to, for the bit-reverse load intrinsic. Setting this to
2026 // memoperand might help alias analysis to figure out the dependencies.
2027 static Value *getUnderLyingObjectForBrevLdIntr(Value *V) {
2028   Value *IntrBaseVal = V;
2029   Value *BaseVal;
2030   // Loop over till we return the same Value, implies we either figure out
2031   // the object or we hit a PHI
2032   do {
2033     BaseVal = V;
2034     V = getBrevLdObject(V);
2035   } while (BaseVal != V);
2036 
2037   // Identify the object from PHINode.
2038   if (const PHINode *PN = dyn_cast<PHINode>(V))
2039     return returnEdge(PN, IntrBaseVal);
2040   // For non PHI nodes, the object is the last value returned by getBrevLdObject
2041   else
2042     return V;
2043 }
2044 
2045 /// Given an intrinsic, checks if on the target the intrinsic will need to map
2046 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
2047 /// true and store the intrinsic information into the IntrinsicInfo that was
2048 /// passed to the function.
2049 bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
2050                                                const CallInst &I,
2051                                                MachineFunction &MF,
2052                                                unsigned Intrinsic) const {
2053   switch (Intrinsic) {
2054   case Intrinsic::hexagon_L2_loadrd_pbr:
2055   case Intrinsic::hexagon_L2_loadri_pbr:
2056   case Intrinsic::hexagon_L2_loadrh_pbr:
2057   case Intrinsic::hexagon_L2_loadruh_pbr:
2058   case Intrinsic::hexagon_L2_loadrb_pbr:
2059   case Intrinsic::hexagon_L2_loadrub_pbr: {
2060     Info.opc = ISD::INTRINSIC_W_CHAIN;
2061     auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
2062     auto &Cont = I.getCalledFunction()->getParent()->getContext();
2063     // The intrinsic function call is of the form { ElTy, i8* }
2064     // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
2065     // should be derived from ElTy.
2066     Type *ElTy = I.getCalledFunction()->getReturnType()->getStructElementType(0);
2067     Info.memVT = MVT::getVT(ElTy);
2068     llvm::Value *BasePtrVal = I.getOperand(0);
2069     Info.ptrVal = getUnderLyingObjectForBrevLdIntr(BasePtrVal);
2070     // The offset value comes through Modifier register. For now, assume the
2071     // offset is 0.
2072     Info.offset = 0;
2073     Info.align = DL.getABITypeAlign(Info.memVT.getTypeForEVT(Cont));
2074     Info.flags = MachineMemOperand::MOLoad;
2075     return true;
2076   }
2077   case Intrinsic::hexagon_V6_vgathermw:
2078   case Intrinsic::hexagon_V6_vgathermw_128B:
2079   case Intrinsic::hexagon_V6_vgathermh:
2080   case Intrinsic::hexagon_V6_vgathermh_128B:
2081   case Intrinsic::hexagon_V6_vgathermhw:
2082   case Intrinsic::hexagon_V6_vgathermhw_128B:
2083   case Intrinsic::hexagon_V6_vgathermwq:
2084   case Intrinsic::hexagon_V6_vgathermwq_128B:
2085   case Intrinsic::hexagon_V6_vgathermhq:
2086   case Intrinsic::hexagon_V6_vgathermhq_128B:
2087   case Intrinsic::hexagon_V6_vgathermhwq:
2088   case Intrinsic::hexagon_V6_vgathermhwq_128B: {
2089     const Module &M = *I.getParent()->getParent()->getParent();
2090     Info.opc = ISD::INTRINSIC_W_CHAIN;
2091     Type *VecTy = I.getArgOperand(1)->getType();
2092     Info.memVT = MVT::getVT(VecTy);
2093     Info.ptrVal = I.getArgOperand(0);
2094     Info.offset = 0;
2095     Info.align =
2096         MaybeAlign(M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8);
2097     Info.flags = MachineMemOperand::MOLoad |
2098                  MachineMemOperand::MOStore |
2099                  MachineMemOperand::MOVolatile;
2100     return true;
2101   }
2102   default:
2103     break;
2104   }
2105   return false;
2106 }
2107 
2108 bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
2109   return X.getValueType().isScalarInteger(); // 'tstbit'
2110 }
2111 
2112 bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
2113   return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
2114 }
2115 
2116 bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
2117   if (!VT1.isSimple() || !VT2.isSimple())
2118     return false;
2119   return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
2120 }
2121 
2122 bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(
2123     const MachineFunction &MF, EVT VT) const {
2124   return isOperationLegalOrCustom(ISD::FMA, VT);
2125 }
2126 
2127 // Should we expand the build vector with shuffles?
2128 bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
2129       unsigned DefinedValues) const {
2130   return false;
2131 }
2132 
2133 bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
2134                                                EVT VT) const {
2135   return true;
2136 }
2137 
2138 TargetLoweringBase::LegalizeTypeAction
2139 HexagonTargetLowering::getPreferredVectorAction(MVT VT) const {
2140   unsigned VecLen = VT.getVectorMinNumElements();
2141   MVT ElemTy = VT.getVectorElementType();
2142 
2143   if (VecLen == 1 || VT.isScalableVector())
2144     return TargetLoweringBase::TypeScalarizeVector;
2145 
2146   if (Subtarget.useHVXOps()) {
2147     unsigned Action = getPreferredHvxVectorAction(VT);
2148     if (Action != ~0u)
2149       return static_cast<TargetLoweringBase::LegalizeTypeAction>(Action);
2150   }
2151 
2152   // Always widen (remaining) vectors of i1.
2153   if (ElemTy == MVT::i1)
2154     return TargetLoweringBase::TypeWidenVector;
2155 
2156   return TargetLoweringBase::TypeSplitVector;
2157 }
2158 
2159 std::pair<SDValue, int>
2160 HexagonTargetLowering::getBaseAndOffset(SDValue Addr) const {
2161   if (Addr.getOpcode() == ISD::ADD) {
2162     SDValue Op1 = Addr.getOperand(1);
2163     if (auto *CN = dyn_cast<const ConstantSDNode>(Op1.getNode()))
2164       return { Addr.getOperand(0), CN->getSExtValue() };
2165   }
2166   return { Addr, 0 };
2167 }
2168 
2169 // Lower a vector shuffle (V1, V2, V3).  V1 and V2 are the two vectors
2170 // to select data from, V3 is the permutation.
2171 SDValue
2172 HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
2173       const {
2174   const auto *SVN = cast<ShuffleVectorSDNode>(Op);
2175   ArrayRef<int> AM = SVN->getMask();
2176   assert(AM.size() <= 8 && "Unexpected shuffle mask");
2177   unsigned VecLen = AM.size();
2178 
2179   MVT VecTy = ty(Op);
2180   assert(!Subtarget.isHVXVectorType(VecTy, true) &&
2181          "HVX shuffles should be legal");
2182   assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length");
2183 
2184   SDValue Op0 = Op.getOperand(0);
2185   SDValue Op1 = Op.getOperand(1);
2186   const SDLoc &dl(Op);
2187 
2188   // If the inputs are not the same as the output, bail. This is not an
2189   // error situation, but complicates the handling and the default expansion
2190   // (into BUILD_VECTOR) should be adequate.
2191   if (ty(Op0) != VecTy || ty(Op1) != VecTy)
2192     return SDValue();
2193 
2194   // Normalize the mask so that the first non-negative index comes from
2195   // the first operand.
2196   SmallVector<int,8> Mask(AM.begin(), AM.end());
2197   unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data();
2198   if (F == AM.size())
2199     return DAG.getUNDEF(VecTy);
2200   if (AM[F] >= int(VecLen)) {
2201     ShuffleVectorSDNode::commuteMask(Mask);
2202     std::swap(Op0, Op1);
2203   }
2204 
2205   // Express the shuffle mask in terms of bytes.
2206   SmallVector<int,8> ByteMask;
2207   unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
2208   for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
2209     int M = Mask[i];
2210     if (M < 0) {
2211       for (unsigned j = 0; j != ElemBytes; ++j)
2212         ByteMask.push_back(-1);
2213     } else {
2214       for (unsigned j = 0; j != ElemBytes; ++j)
2215         ByteMask.push_back(M*ElemBytes + j);
2216     }
2217   }
2218   assert(ByteMask.size() <= 8);
2219 
2220   // All non-undef (non-negative) indexes are well within [0..127], so they
2221   // fit in a single byte. Build two 64-bit words:
2222   // - MaskIdx where each byte is the corresponding index (for non-negative
2223   //   indexes), and 0xFF for negative indexes, and
2224   // - MaskUnd that has 0xFF for each negative index.
2225   uint64_t MaskIdx = 0;
2226   uint64_t MaskUnd = 0;
2227   for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) {
2228     unsigned S = 8*i;
2229     uint64_t M = ByteMask[i] & 0xFF;
2230     if (M == 0xFF)
2231       MaskUnd |= M << S;
2232     MaskIdx |= M << S;
2233   }
2234 
2235   if (ByteMask.size() == 4) {
2236     // Identity.
2237     if (MaskIdx == (0x03020100 | MaskUnd))
2238       return Op0;
2239     // Byte swap.
2240     if (MaskIdx == (0x00010203 | MaskUnd)) {
2241       SDValue T0 = DAG.getBitcast(MVT::i32, Op0);
2242       SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0);
2243       return DAG.getBitcast(VecTy, T1);
2244     }
2245 
2246     // Byte packs.
2247     SDValue Concat10 = DAG.getNode(HexagonISD::COMBINE, dl,
2248                                    typeJoin({ty(Op1), ty(Op0)}), {Op1, Op0});
2249     if (MaskIdx == (0x06040200 | MaskUnd))
2250       return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG);
2251     if (MaskIdx == (0x07050301 | MaskUnd))
2252       return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG);
2253 
2254     SDValue Concat01 = DAG.getNode(HexagonISD::COMBINE, dl,
2255                                    typeJoin({ty(Op0), ty(Op1)}), {Op0, Op1});
2256     if (MaskIdx == (0x02000604 | MaskUnd))
2257       return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG);
2258     if (MaskIdx == (0x03010705 | MaskUnd))
2259       return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG);
2260   }
2261 
2262   if (ByteMask.size() == 8) {
2263     // Identity.
2264     if (MaskIdx == (0x0706050403020100ull | MaskUnd))
2265       return Op0;
2266     // Byte swap.
2267     if (MaskIdx == (0x0001020304050607ull | MaskUnd)) {
2268       SDValue T0 = DAG.getBitcast(MVT::i64, Op0);
2269       SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0);
2270       return DAG.getBitcast(VecTy, T1);
2271     }
2272 
2273     // Halfword picks.
2274     if (MaskIdx == (0x0d0c050409080100ull | MaskUnd))
2275       return getInstr(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG);
2276     if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd))
2277       return getInstr(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG);
2278     if (MaskIdx == (0x0d0c090805040100ull | MaskUnd))
2279       return getInstr(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG);
2280     if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd))
2281       return getInstr(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG);
2282     if (MaskIdx == (0x0706030205040100ull | MaskUnd)) {
2283       VectorPair P = opSplit(Op0, dl, DAG);
2284       return getInstr(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG);
2285     }
2286 
2287     // Byte packs.
2288     if (MaskIdx == (0x0e060c040a020800ull | MaskUnd))
2289       return getInstr(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG);
2290     if (MaskIdx == (0x0f070d050b030901ull | MaskUnd))
2291       return getInstr(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG);
2292   }
2293 
2294   return SDValue();
2295 }
2296 
2297 // Create a Hexagon-specific node for shifting a vector by an integer.
2298 SDValue
2299 HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
2300       const {
2301   unsigned NewOpc;
2302   switch (Op.getOpcode()) {
2303     case ISD::SHL:
2304       NewOpc = HexagonISD::VASL;
2305       break;
2306     case ISD::SRA:
2307       NewOpc = HexagonISD::VASR;
2308       break;
2309     case ISD::SRL:
2310       NewOpc = HexagonISD::VLSR;
2311       break;
2312     default:
2313       llvm_unreachable("Unexpected shift opcode");
2314   }
2315 
2316   SDValue Op0 = Op.getOperand(0);
2317   SDValue Op1 = Op.getOperand(1);
2318   const SDLoc &dl(Op);
2319 
2320   switch (Op1.getOpcode()) {
2321     case ISD::BUILD_VECTOR:
2322       if (SDValue S = cast<BuildVectorSDNode>(Op1)->getSplatValue())
2323         return DAG.getNode(NewOpc, dl, ty(Op), Op0, S);
2324       break;
2325     case ISD::SPLAT_VECTOR:
2326       return DAG.getNode(NewOpc, dl, ty(Op), Op0, Op1.getOperand(0));
2327   }
2328   return SDValue();
2329 }
2330 
2331 SDValue
2332 HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
2333   return getVectorShiftByInt(Op, DAG);
2334 }
2335 
2336 SDValue
2337 HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
2338   if (isa<ConstantSDNode>(Op.getOperand(1).getNode()))
2339     return Op;
2340   return SDValue();
2341 }
2342 
2343 SDValue
2344 HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
2345   MVT ResTy = ty(Op);
2346   SDValue InpV = Op.getOperand(0);
2347   MVT InpTy = ty(InpV);
2348   assert(ResTy.getSizeInBits() == InpTy.getSizeInBits());
2349   const SDLoc &dl(Op);
2350 
2351   // Handle conversion from i8 to v8i1.
2352   if (InpTy == MVT::i8) {
2353     if (ResTy == MVT::v8i1) {
2354       SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
2355       SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
2356       return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
2357     }
2358     return SDValue();
2359   }
2360 
2361   return Op;
2362 }
2363 
2364 bool
2365 HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
2366       MVT VecTy, SelectionDAG &DAG,
2367       MutableArrayRef<ConstantInt*> Consts) const {
2368   MVT ElemTy = VecTy.getVectorElementType();
2369   unsigned ElemWidth = ElemTy.getSizeInBits();
2370   IntegerType *IntTy = IntegerType::get(*DAG.getContext(), ElemWidth);
2371   bool AllConst = true;
2372 
2373   for (unsigned i = 0, e = Values.size(); i != e; ++i) {
2374     SDValue V = Values[i];
2375     if (V.isUndef()) {
2376       Consts[i] = ConstantInt::get(IntTy, 0);
2377       continue;
2378     }
2379     // Make sure to always cast to IntTy.
2380     if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) {
2381       const ConstantInt *CI = CN->getConstantIntValue();
2382       Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue());
2383     } else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) {
2384       const ConstantFP *CF = CN->getConstantFPValue();
2385       APInt A = CF->getValueAPF().bitcastToAPInt();
2386       Consts[i] = ConstantInt::get(IntTy, A.getZExtValue());
2387     } else {
2388       AllConst = false;
2389     }
2390   }
2391   return AllConst;
2392 }
2393 
2394 SDValue
2395 HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
2396                                      MVT VecTy, SelectionDAG &DAG) const {
2397   MVT ElemTy = VecTy.getVectorElementType();
2398   assert(VecTy.getVectorNumElements() == Elem.size());
2399 
2400   SmallVector<ConstantInt*,4> Consts(Elem.size());
2401   bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
2402 
2403   unsigned First, Num = Elem.size();
2404   for (First = 0; First != Num; ++First) {
2405     if (!isUndef(Elem[First]))
2406       break;
2407   }
2408   if (First == Num)
2409     return DAG.getUNDEF(VecTy);
2410 
2411   if (AllConst &&
2412       llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
2413     return getZero(dl, VecTy, DAG);
2414 
2415   if (ElemTy == MVT::i16) {
2416     assert(Elem.size() == 2);
2417     if (AllConst) {
2418       uint32_t V = (Consts[0]->getZExtValue() & 0xFFFF) |
2419                    Consts[1]->getZExtValue() << 16;
2420       return DAG.getBitcast(MVT::v2i16, DAG.getConstant(V, dl, MVT::i32));
2421     }
2422     SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32,
2423                          {Elem[1], Elem[0]}, DAG);
2424     return DAG.getBitcast(MVT::v2i16, N);
2425   }
2426 
2427   if (ElemTy == MVT::i8) {
2428     // First try generating a constant.
2429     if (AllConst) {
2430       int32_t V = (Consts[0]->getZExtValue() & 0xFF) |
2431                   (Consts[1]->getZExtValue() & 0xFF) << 8 |
2432                   (Consts[1]->getZExtValue() & 0xFF) << 16 |
2433                   Consts[2]->getZExtValue() << 24;
2434       return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32));
2435     }
2436 
2437     // Then try splat.
2438     bool IsSplat = true;
2439     for (unsigned i = First+1; i != Num; ++i) {
2440       if (Elem[i] == Elem[First] || isUndef(Elem[i]))
2441         continue;
2442       IsSplat = false;
2443       break;
2444     }
2445     if (IsSplat) {
2446       // Legalize the operand of SPLAT_VECTOR.
2447       SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
2448       return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext);
2449     }
2450 
2451     // Generate
2452     //   (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
2453     //   (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
2454     assert(Elem.size() == 4);
2455     SDValue Vs[4];
2456     for (unsigned i = 0; i != 4; ++i) {
2457       Vs[i] = DAG.getZExtOrTrunc(Elem[i], dl, MVT::i32);
2458       Vs[i] = DAG.getZeroExtendInReg(Vs[i], dl, MVT::i8);
2459     }
2460     SDValue S8 = DAG.getConstant(8, dl, MVT::i32);
2461     SDValue T0 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[1], S8});
2462     SDValue T1 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[3], S8});
2463     SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0});
2464     SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1});
2465 
2466     SDValue R = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
2467     return DAG.getBitcast(MVT::v4i8, R);
2468   }
2469 
2470 #ifndef NDEBUG
2471   dbgs() << "VecTy: " << EVT(VecTy).getEVTString() << '\n';
2472 #endif
2473   llvm_unreachable("Unexpected vector element type");
2474 }
2475 
2476 SDValue
2477 HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
2478                                      MVT VecTy, SelectionDAG &DAG) const {
2479   MVT ElemTy = VecTy.getVectorElementType();
2480   assert(VecTy.getVectorNumElements() == Elem.size());
2481 
2482   SmallVector<ConstantInt*,8> Consts(Elem.size());
2483   bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
2484 
2485   unsigned First, Num = Elem.size();
2486   for (First = 0; First != Num; ++First) {
2487     if (!isUndef(Elem[First]))
2488       break;
2489   }
2490   if (First == Num)
2491     return DAG.getUNDEF(VecTy);
2492 
2493   if (AllConst &&
2494       llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
2495     return getZero(dl, VecTy, DAG);
2496 
2497   // First try splat if possible.
2498   if (ElemTy == MVT::i16) {
2499     bool IsSplat = true;
2500     for (unsigned i = First+1; i != Num; ++i) {
2501       if (Elem[i] == Elem[First] || isUndef(Elem[i]))
2502         continue;
2503       IsSplat = false;
2504       break;
2505     }
2506     if (IsSplat) {
2507       // Legalize the operand of SPLAT_VECTOR
2508       SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
2509       return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext);
2510     }
2511   }
2512 
2513   // Then try constant.
2514   if (AllConst) {
2515     uint64_t Val = 0;
2516     unsigned W = ElemTy.getSizeInBits();
2517     uint64_t Mask = (ElemTy == MVT::i8)  ? 0xFFull
2518                   : (ElemTy == MVT::i16) ? 0xFFFFull : 0xFFFFFFFFull;
2519     for (unsigned i = 0; i != Num; ++i)
2520       Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask);
2521     SDValue V0 = DAG.getConstant(Val, dl, MVT::i64);
2522     return DAG.getBitcast(VecTy, V0);
2523   }
2524 
2525   // Build two 32-bit vectors and concatenate.
2526   MVT HalfTy = MVT::getVectorVT(ElemTy, Num/2);
2527   SDValue L = (ElemTy == MVT::i32)
2528                 ? Elem[0]
2529                 : buildVector32(Elem.take_front(Num/2), dl, HalfTy, DAG);
2530   SDValue H = (ElemTy == MVT::i32)
2531                 ? Elem[1]
2532                 : buildVector32(Elem.drop_front(Num/2), dl, HalfTy, DAG);
2533   return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, {H, L});
2534 }
2535 
2536 SDValue
2537 HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
2538                                      const SDLoc &dl, MVT ValTy, MVT ResTy,
2539                                      SelectionDAG &DAG) const {
2540   MVT VecTy = ty(VecV);
2541   assert(!ValTy.isVector() ||
2542          VecTy.getVectorElementType() == ValTy.getVectorElementType());
2543   unsigned VecWidth = VecTy.getSizeInBits();
2544   unsigned ValWidth = ValTy.getSizeInBits();
2545   unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
2546   assert((VecWidth % ElemWidth) == 0);
2547   auto *IdxN = dyn_cast<ConstantSDNode>(IdxV);
2548 
2549   // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
2550   // without any coprocessors).
2551   if (ElemWidth == 1) {
2552     assert(VecWidth == VecTy.getVectorNumElements() && "Sanity failure");
2553     assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
2554     // Check if this is an extract of the lowest bit.
2555     if (IdxN) {
2556       // Extracting the lowest bit is a no-op, but it changes the type,
2557       // so it must be kept as an operation to avoid errors related to
2558       // type mismatches.
2559       if (IdxN->isNullValue() && ValTy.getSizeInBits() == 1)
2560         return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
2561     }
2562 
2563     // If the value extracted is a single bit, use tstbit.
2564     if (ValWidth == 1) {
2565       SDValue A0 = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
2566       SDValue M0 = DAG.getConstant(8 / VecWidth, dl, MVT::i32);
2567       SDValue I0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, M0);
2568       return DAG.getNode(HexagonISD::TSTBIT, dl, MVT::i1, A0, I0);
2569     }
2570 
2571     // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
2572     // a predicate register. The elements of the vector are repeated
2573     // in the register (if necessary) so that the total number is 8.
2574     // The extracted subvector will need to be expanded in such a way.
2575     unsigned Scale = VecWidth / ValWidth;
2576 
2577     // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
2578     // position 0.
2579     assert(ty(IdxV) == MVT::i32);
2580     unsigned VecRep = 8 / VecWidth;
2581     SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2582                              DAG.getConstant(8*VecRep, dl, MVT::i32));
2583     SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2584     SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
2585     while (Scale > 1) {
2586       // The longest possible subvector is at most 32 bits, so it is always
2587       // contained in the low subregister.
2588       T1 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, T1);
2589       T1 = expandPredicate(T1, dl, DAG);
2590       Scale /= 2;
2591     }
2592 
2593     return DAG.getNode(HexagonISD::D2P, dl, ResTy, T1);
2594   }
2595 
2596   assert(VecWidth == 32 || VecWidth == 64);
2597 
2598   // Cast everything to scalar integer types.
2599   MVT ScalarTy = tyScalar(VecTy);
2600   VecV = DAG.getBitcast(ScalarTy, VecV);
2601 
2602   SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2603   SDValue ExtV;
2604 
2605   if (IdxN) {
2606     unsigned Off = IdxN->getZExtValue() * ElemWidth;
2607     if (VecWidth == 64 && ValWidth == 32) {
2608       assert(Off == 0 || Off == 32);
2609       unsigned SubIdx = Off == 0 ? Hexagon::isub_lo : Hexagon::isub_hi;
2610       ExtV = DAG.getTargetExtractSubreg(SubIdx, dl, MVT::i32, VecV);
2611     } else if (Off == 0 && (ValWidth % 8) == 0) {
2612       ExtV = DAG.getZeroExtendInReg(VecV, dl, tyScalar(ValTy));
2613     } else {
2614       SDValue OffV = DAG.getConstant(Off, dl, MVT::i32);
2615       // The return type of EXTRACTU must be the same as the type of the
2616       // input vector.
2617       ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
2618                          {VecV, WidthV, OffV});
2619     }
2620   } else {
2621     if (ty(IdxV) != MVT::i32)
2622       IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2623     SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2624                                DAG.getConstant(ElemWidth, dl, MVT::i32));
2625     ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
2626                        {VecV, WidthV, OffV});
2627   }
2628 
2629   // Cast ExtV to the requested result type.
2630   ExtV = DAG.getZExtOrTrunc(ExtV, dl, tyScalar(ResTy));
2631   ExtV = DAG.getBitcast(ResTy, ExtV);
2632   return ExtV;
2633 }
2634 
2635 SDValue
2636 HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
2637                                     const SDLoc &dl, MVT ValTy,
2638                                     SelectionDAG &DAG) const {
2639   MVT VecTy = ty(VecV);
2640   if (VecTy.getVectorElementType() == MVT::i1) {
2641     MVT ValTy = ty(ValV);
2642     assert(ValTy.getVectorElementType() == MVT::i1);
2643     SDValue ValR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, ValV);
2644     unsigned VecLen = VecTy.getVectorNumElements();
2645     unsigned Scale = VecLen / ValTy.getVectorNumElements();
2646     assert(Scale > 1);
2647 
2648     for (unsigned R = Scale; R > 1; R /= 2) {
2649       ValR = contractPredicate(ValR, dl, DAG);
2650       ValR = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
2651                          DAG.getUNDEF(MVT::i32), ValR);
2652     }
2653     // The longest possible subvector is at most 32 bits, so it is always
2654     // contained in the low subregister.
2655     ValR = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, ValR);
2656 
2657     unsigned ValBytes = 64 / Scale;
2658     SDValue Width = DAG.getConstant(ValBytes*8, dl, MVT::i32);
2659     SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2660                               DAG.getConstant(8, dl, MVT::i32));
2661     SDValue VecR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2662     SDValue Ins = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
2663                               {VecR, ValR, Width, Idx});
2664     return DAG.getNode(HexagonISD::D2P, dl, VecTy, Ins);
2665   }
2666 
2667   unsigned VecWidth = VecTy.getSizeInBits();
2668   unsigned ValWidth = ValTy.getSizeInBits();
2669   assert(VecWidth == 32 || VecWidth == 64);
2670   assert((VecWidth % ValWidth) == 0);
2671 
2672   // Cast everything to scalar integer types.
2673   MVT ScalarTy = MVT::getIntegerVT(VecWidth);
2674   // The actual type of ValV may be different than ValTy (which is related
2675   // to the vector type).
2676   unsigned VW = ty(ValV).getSizeInBits();
2677   ValV = DAG.getBitcast(MVT::getIntegerVT(VW), ValV);
2678   VecV = DAG.getBitcast(ScalarTy, VecV);
2679   if (VW != VecWidth)
2680     ValV = DAG.getAnyExtOrTrunc(ValV, dl, ScalarTy);
2681 
2682   SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2683   SDValue InsV;
2684 
2685   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(IdxV)) {
2686     unsigned W = C->getZExtValue() * ValWidth;
2687     SDValue OffV = DAG.getConstant(W, dl, MVT::i32);
2688     InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
2689                        {VecV, ValV, WidthV, OffV});
2690   } else {
2691     if (ty(IdxV) != MVT::i32)
2692       IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2693     SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, WidthV);
2694     InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
2695                        {VecV, ValV, WidthV, OffV});
2696   }
2697 
2698   return DAG.getNode(ISD::BITCAST, dl, VecTy, InsV);
2699 }
2700 
2701 SDValue
2702 HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl,
2703                                        SelectionDAG &DAG) const {
2704   assert(ty(Vec32).getSizeInBits() == 32);
2705   if (isUndef(Vec32))
2706     return DAG.getUNDEF(MVT::i64);
2707   return getInstr(Hexagon::S2_vsxtbh, dl, MVT::i64, {Vec32}, DAG);
2708 }
2709 
2710 SDValue
2711 HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl,
2712                                          SelectionDAG &DAG) const {
2713   assert(ty(Vec64).getSizeInBits() == 64);
2714   if (isUndef(Vec64))
2715     return DAG.getUNDEF(MVT::i32);
2716   return getInstr(Hexagon::S2_vtrunehb, dl, MVT::i32, {Vec64}, DAG);
2717 }
2718 
2719 SDValue
2720 HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
2721       const {
2722   if (Ty.isVector()) {
2723     assert(Ty.isInteger() && "Only integer vectors are supported here");
2724     unsigned W = Ty.getSizeInBits();
2725     if (W <= 64)
2726       return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W)));
2727     return DAG.getNode(ISD::SPLAT_VECTOR, dl, Ty, getZero(dl, MVT::i32, DAG));
2728   }
2729 
2730   if (Ty.isInteger())
2731     return DAG.getConstant(0, dl, Ty);
2732   if (Ty.isFloatingPoint())
2733     return DAG.getConstantFP(0.0, dl, Ty);
2734   llvm_unreachable("Invalid type for zero");
2735 }
2736 
2737 SDValue
2738 HexagonTargetLowering::appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG)
2739       const {
2740   MVT ValTy = ty(Val);
2741   assert(ValTy.getVectorElementType() == ResTy.getVectorElementType());
2742 
2743   unsigned ValLen = ValTy.getVectorNumElements();
2744   unsigned ResLen = ResTy.getVectorNumElements();
2745   if (ValLen == ResLen)
2746     return Val;
2747 
2748   const SDLoc &dl(Val);
2749   assert(ValLen < ResLen);
2750   assert(ResLen % ValLen == 0);
2751 
2752   SmallVector<SDValue, 4> Concats = {Val};
2753   for (unsigned i = 1, e = ResLen / ValLen; i < e; ++i)
2754     Concats.push_back(DAG.getUNDEF(ValTy));
2755 
2756   return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, Concats);
2757 }
2758 
2759 SDValue
2760 HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
2761   MVT VecTy = ty(Op);
2762   unsigned BW = VecTy.getSizeInBits();
2763   const SDLoc &dl(Op);
2764   SmallVector<SDValue,8> Ops;
2765   for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
2766     Ops.push_back(Op.getOperand(i));
2767 
2768   if (BW == 32)
2769     return buildVector32(Ops, dl, VecTy, DAG);
2770   if (BW == 64)
2771     return buildVector64(Ops, dl, VecTy, DAG);
2772 
2773   if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) {
2774     // Check if this is a special case or all-0 or all-1.
2775     bool All0 = true, All1 = true;
2776     for (SDValue P : Ops) {
2777       auto *CN = dyn_cast<ConstantSDNode>(P.getNode());
2778       if (CN == nullptr) {
2779         All0 = All1 = false;
2780         break;
2781       }
2782       uint32_t C = CN->getZExtValue();
2783       All0 &= (C == 0);
2784       All1 &= (C == 1);
2785     }
2786     if (All0)
2787       return DAG.getNode(HexagonISD::PFALSE, dl, VecTy);
2788     if (All1)
2789       return DAG.getNode(HexagonISD::PTRUE, dl, VecTy);
2790 
2791     // For each i1 element in the resulting predicate register, put 1
2792     // shifted by the index of the element into a general-purpose register,
2793     // then or them together and transfer it back into a predicate register.
2794     SDValue Rs[8];
2795     SDValue Z = getZero(dl, MVT::i32, DAG);
2796     // Always produce 8 bits, repeat inputs if necessary.
2797     unsigned Rep = 8 / VecTy.getVectorNumElements();
2798     for (unsigned i = 0; i != 8; ++i) {
2799       SDValue S = DAG.getConstant(1ull << i, dl, MVT::i32);
2800       Rs[i] = DAG.getSelect(dl, MVT::i32, Ops[i/Rep], S, Z);
2801     }
2802     for (ArrayRef<SDValue> A(Rs); A.size() != 1; A = A.drop_back(A.size()/2)) {
2803       for (unsigned i = 0, e = A.size()/2; i != e; ++i)
2804         Rs[i] = DAG.getNode(ISD::OR, dl, MVT::i32, Rs[2*i], Rs[2*i+1]);
2805     }
2806     // Move the value directly to a predicate register.
2807     return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Rs[0]}, DAG);
2808   }
2809 
2810   return SDValue();
2811 }
2812 
2813 SDValue
2814 HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
2815                                            SelectionDAG &DAG) const {
2816   MVT VecTy = ty(Op);
2817   const SDLoc &dl(Op);
2818   if (VecTy.getSizeInBits() == 64) {
2819     assert(Op.getNumOperands() == 2);
2820     return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, Op.getOperand(1),
2821                        Op.getOperand(0));
2822   }
2823 
2824   MVT ElemTy = VecTy.getVectorElementType();
2825   if (ElemTy == MVT::i1) {
2826     assert(VecTy == MVT::v2i1 || VecTy == MVT::v4i1 || VecTy == MVT::v8i1);
2827     MVT OpTy = ty(Op.getOperand(0));
2828     // Scale is how many times the operands need to be contracted to match
2829     // the representation in the target register.
2830     unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements();
2831     assert(Scale == Op.getNumOperands() && Scale > 1);
2832 
2833     // First, convert all bool vectors to integers, then generate pairwise
2834     // inserts to form values of doubled length. Up until there are only
2835     // two values left to concatenate, all of these values will fit in a
2836     // 32-bit integer, so keep them as i32 to use 32-bit inserts.
2837     SmallVector<SDValue,4> Words[2];
2838     unsigned IdxW = 0;
2839 
2840     for (SDValue P : Op.getNode()->op_values()) {
2841       SDValue W = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, P);
2842       for (unsigned R = Scale; R > 1; R /= 2) {
2843         W = contractPredicate(W, dl, DAG);
2844         W = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
2845                         DAG.getUNDEF(MVT::i32), W);
2846       }
2847       W = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, W);
2848       Words[IdxW].push_back(W);
2849     }
2850 
2851     while (Scale > 2) {
2852       SDValue WidthV = DAG.getConstant(64 / Scale, dl, MVT::i32);
2853       Words[IdxW ^ 1].clear();
2854 
2855       for (unsigned i = 0, e = Words[IdxW].size(); i != e; i += 2) {
2856         SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+1];
2857         // Insert W1 into W0 right next to the significant bits of W0.
2858         SDValue T = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
2859                                 {W0, W1, WidthV, WidthV});
2860         Words[IdxW ^ 1].push_back(T);
2861       }
2862       IdxW ^= 1;
2863       Scale /= 2;
2864     }
2865 
2866     // Another sanity check. At this point there should only be two words
2867     // left, and Scale should be 2.
2868     assert(Scale == 2 && Words[IdxW].size() == 2);
2869 
2870     SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
2871                              Words[IdxW][1], Words[IdxW][0]);
2872     return DAG.getNode(HexagonISD::D2P, dl, VecTy, WW);
2873   }
2874 
2875   return SDValue();
2876 }
2877 
2878 SDValue
2879 HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
2880                                                SelectionDAG &DAG) const {
2881   SDValue Vec = Op.getOperand(0);
2882   MVT ElemTy = ty(Vec).getVectorElementType();
2883   return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG);
2884 }
2885 
2886 SDValue
2887 HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
2888                                               SelectionDAG &DAG) const {
2889   return extractVector(Op.getOperand(0), Op.getOperand(1), SDLoc(Op),
2890                        ty(Op), ty(Op), DAG);
2891 }
2892 
2893 SDValue
2894 HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
2895                                               SelectionDAG &DAG) const {
2896   return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
2897                       SDLoc(Op), ty(Op).getVectorElementType(), DAG);
2898 }
2899 
2900 SDValue
2901 HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
2902                                              SelectionDAG &DAG) const {
2903   SDValue ValV = Op.getOperand(1);
2904   return insertVector(Op.getOperand(0), ValV, Op.getOperand(2),
2905                       SDLoc(Op), ty(ValV), DAG);
2906 }
2907 
2908 bool
2909 HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
2910   // Assuming the caller does not have either a signext or zeroext modifier, and
2911   // only one value is accepted, any reasonable truncation is allowed.
2912   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
2913     return false;
2914 
2915   // FIXME: in principle up to 64-bit could be made safe, but it would be very
2916   // fragile at the moment: any support for multiple value returns would be
2917   // liable to disallow tail calls involving i64 -> iN truncation in many cases.
2918   return Ty1->getPrimitiveSizeInBits() <= 32;
2919 }
2920 
2921 SDValue
2922 HexagonTargetLowering::LowerLoad(SDValue Op, SelectionDAG &DAG) const {
2923   MVT Ty = ty(Op);
2924   const SDLoc &dl(Op);
2925   // Lower loads of scalar predicate vectors (v2i1, v4i1, v8i1) to loads of i1
2926   // followed by a TYPECAST.
2927   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
2928   bool DoCast = (Ty == MVT::v2i1 || Ty == MVT::v4i1 || Ty == MVT::v8i1);
2929   if (DoCast) {
2930     SDValue NL = DAG.getLoad(
2931         LN->getAddressingMode(), LN->getExtensionType(), MVT::i1, dl,
2932         LN->getChain(), LN->getBasePtr(), LN->getOffset(), LN->getPointerInfo(),
2933         /*MemoryVT*/ MVT::i1, LN->getAlign(), LN->getMemOperand()->getFlags(),
2934         LN->getAAInfo(), LN->getRanges());
2935     LN = cast<LoadSDNode>(NL.getNode());
2936   }
2937 
2938   Align ClaimAlign = LN->getAlign();
2939   if (!validateConstPtrAlignment(LN->getBasePtr(), ClaimAlign, dl, DAG))
2940     return replaceMemWithUndef(Op, DAG);
2941 
2942   // Call LowerUnalignedLoad for all loads, it recognizes loads that
2943   // don't need extra aligning.
2944   SDValue LU = LowerUnalignedLoad(SDValue(LN, 0), DAG);
2945   if (DoCast) {
2946     SDValue TC = DAG.getNode(HexagonISD::TYPECAST, dl, Ty, LU);
2947     SDValue Ch = cast<LoadSDNode>(LU.getNode())->getChain();
2948     return DAG.getMergeValues({TC, Ch}, dl);
2949   }
2950   return LU;
2951 }
2952 
2953 SDValue
2954 HexagonTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const {
2955   const SDLoc &dl(Op);
2956   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
2957   SDValue Val = SN->getValue();
2958   MVT Ty = ty(Val);
2959 
2960   bool DoCast = (Ty == MVT::v2i1 || Ty == MVT::v4i1 || Ty == MVT::v8i1);
2961   if (DoCast) {
2962     SDValue TC = DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, Val);
2963     SDValue NS = DAG.getStore(SN->getChain(), dl, TC, SN->getBasePtr(),
2964                               SN->getMemOperand());
2965     if (SN->isIndexed()) {
2966       NS = DAG.getIndexedStore(NS, dl, SN->getBasePtr(), SN->getOffset(),
2967                                SN->getAddressingMode());
2968     }
2969     SN = cast<StoreSDNode>(NS.getNode());
2970   }
2971 
2972   Align ClaimAlign = SN->getAlign();
2973   if (!validateConstPtrAlignment(SN->getBasePtr(), ClaimAlign, dl, DAG))
2974     return replaceMemWithUndef(Op, DAG);
2975 
2976   MVT StoreTy = SN->getMemoryVT().getSimpleVT();
2977   Align NeedAlign = Subtarget.getTypeAlignment(StoreTy);
2978   if (ClaimAlign < NeedAlign)
2979     return expandUnalignedStore(SN, DAG);
2980   return SDValue(SN, 0);
2981 }
2982 
2983 SDValue
2984 HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
2985       const {
2986   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
2987   MVT LoadTy = ty(Op);
2988   unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy).value();
2989   unsigned HaveAlign = LN->getAlign().value();
2990   if (HaveAlign >= NeedAlign)
2991     return Op;
2992 
2993   const SDLoc &dl(Op);
2994   const DataLayout &DL = DAG.getDataLayout();
2995   LLVMContext &Ctx = *DAG.getContext();
2996 
2997   // If the load aligning is disabled or the load can be broken up into two
2998   // smaller legal loads, do the default (target-independent) expansion.
2999   bool DoDefault = false;
3000   // Handle it in the default way if this is an indexed load.
3001   if (!LN->isUnindexed())
3002     DoDefault = true;
3003 
3004   if (!AlignLoads) {
3005     if (allowsMemoryAccessForAlignment(Ctx, DL, LN->getMemoryVT(),
3006                                        *LN->getMemOperand()))
3007       return Op;
3008     DoDefault = true;
3009   }
3010   if (!DoDefault && (2 * HaveAlign) == NeedAlign) {
3011     // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
3012     MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8 * HaveAlign)
3013                                 : MVT::getVectorVT(MVT::i8, HaveAlign);
3014     DoDefault =
3015         allowsMemoryAccessForAlignment(Ctx, DL, PartTy, *LN->getMemOperand());
3016   }
3017   if (DoDefault) {
3018     std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG);
3019     return DAG.getMergeValues({P.first, P.second}, dl);
3020   }
3021 
3022   // The code below generates two loads, both aligned as NeedAlign, and
3023   // with the distance of NeedAlign between them. For that to cover the
3024   // bits that need to be loaded (and without overlapping), the size of
3025   // the loads should be equal to NeedAlign. This is true for all loadable
3026   // types, but add an assertion in case something changes in the future.
3027   assert(LoadTy.getSizeInBits() == 8*NeedAlign);
3028 
3029   unsigned LoadLen = NeedAlign;
3030   SDValue Base = LN->getBasePtr();
3031   SDValue Chain = LN->getChain();
3032   auto BO = getBaseAndOffset(Base);
3033   unsigned BaseOpc = BO.first.getOpcode();
3034   if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0)
3035     return Op;
3036 
3037   if (BO.second % LoadLen != 0) {
3038     BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first,
3039                            DAG.getConstant(BO.second % LoadLen, dl, MVT::i32));
3040     BO.second -= BO.second % LoadLen;
3041   }
3042   SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR)
3043       ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first,
3044                     DAG.getConstant(NeedAlign, dl, MVT::i32))
3045       : BO.first;
3046   SDValue Base0 =
3047       DAG.getMemBasePlusOffset(BaseNoOff, TypeSize::Fixed(BO.second), dl);
3048   SDValue Base1 = DAG.getMemBasePlusOffset(
3049       BaseNoOff, TypeSize::Fixed(BO.second + LoadLen), dl);
3050 
3051   MachineMemOperand *WideMMO = nullptr;
3052   if (MachineMemOperand *MMO = LN->getMemOperand()) {
3053     MachineFunction &MF = DAG.getMachineFunction();
3054     WideMMO = MF.getMachineMemOperand(
3055         MMO->getPointerInfo(), MMO->getFlags(), 2 * LoadLen, Align(LoadLen),
3056         MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(),
3057         MMO->getSuccessOrdering(), MMO->getFailureOrdering());
3058   }
3059 
3060   SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO);
3061   SDValue Load1 = DAG.getLoad(LoadTy, dl, Chain, Base1, WideMMO);
3062 
3063   SDValue Aligned = DAG.getNode(HexagonISD::VALIGN, dl, LoadTy,
3064                                 {Load1, Load0, BaseNoOff.getOperand(0)});
3065   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3066                                  Load0.getValue(1), Load1.getValue(1));
3067   SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl);
3068   return M;
3069 }
3070 
3071 SDValue
3072 HexagonTargetLowering::LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const {
3073   SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
3074   auto *CY = dyn_cast<ConstantSDNode>(Y);
3075   if (!CY)
3076     return SDValue();
3077 
3078   const SDLoc &dl(Op);
3079   SDVTList VTs = Op.getNode()->getVTList();
3080   assert(VTs.NumVTs == 2);
3081   assert(VTs.VTs[1] == MVT::i1);
3082   unsigned Opc = Op.getOpcode();
3083 
3084   if (CY) {
3085     uint32_t VY = CY->getZExtValue();
3086     assert(VY != 0 && "This should have been folded");
3087     // X +/- 1
3088     if (VY != 1)
3089       return SDValue();
3090 
3091     if (Opc == ISD::UADDO) {
3092       SDValue Op = DAG.getNode(ISD::ADD, dl, VTs.VTs[0], {X, Y});
3093       SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op, getZero(dl, ty(Op), DAG),
3094                                 ISD::SETEQ);
3095       return DAG.getMergeValues({Op, Ov}, dl);
3096     }
3097     if (Opc == ISD::USUBO) {
3098       SDValue Op = DAG.getNode(ISD::SUB, dl, VTs.VTs[0], {X, Y});
3099       SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op,
3100                                 DAG.getConstant(-1, dl, ty(Op)), ISD::SETEQ);
3101       return DAG.getMergeValues({Op, Ov}, dl);
3102     }
3103   }
3104 
3105   return SDValue();
3106 }
3107 
3108 SDValue
3109 HexagonTargetLowering::LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const {
3110   const SDLoc &dl(Op);
3111   unsigned Opc = Op.getOpcode();
3112   SDValue X = Op.getOperand(0), Y = Op.getOperand(1), C = Op.getOperand(2);
3113 
3114   if (Opc == ISD::ADDCARRY)
3115     return DAG.getNode(HexagonISD::ADDC, dl, Op.getNode()->getVTList(),
3116                        { X, Y, C });
3117 
3118   EVT CarryTy = C.getValueType();
3119   SDValue SubC = DAG.getNode(HexagonISD::SUBC, dl, Op.getNode()->getVTList(),
3120                              { X, Y, DAG.getLogicalNOT(dl, C, CarryTy) });
3121   SDValue Out[] = { SubC.getValue(0),
3122                     DAG.getLogicalNOT(dl, SubC.getValue(1), CarryTy) };
3123   return DAG.getMergeValues(Out, dl);
3124 }
3125 
3126 SDValue
3127 HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
3128   SDValue Chain     = Op.getOperand(0);
3129   SDValue Offset    = Op.getOperand(1);
3130   SDValue Handler   = Op.getOperand(2);
3131   SDLoc dl(Op);
3132   auto PtrVT = getPointerTy(DAG.getDataLayout());
3133 
3134   // Mark function as containing a call to EH_RETURN.
3135   HexagonMachineFunctionInfo *FuncInfo =
3136     DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
3137   FuncInfo->setHasEHReturn();
3138 
3139   unsigned OffsetReg = Hexagon::R28;
3140 
3141   SDValue StoreAddr =
3142       DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
3143                   DAG.getIntPtrConstant(4, dl));
3144   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
3145   Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
3146 
3147   // Not needed we already use it as explict input to EH_RETURN.
3148   // MF.getRegInfo().addLiveOut(OffsetReg);
3149 
3150   return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
3151 }
3152 
3153 SDValue
3154 HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3155   unsigned Opc = Op.getOpcode();
3156 
3157   // Handle INLINEASM first.
3158   if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR)
3159     return LowerINLINEASM(Op, DAG);
3160 
3161   if (isHvxOperation(Op.getNode(), DAG)) {
3162     // If HVX lowering returns nothing, try the default lowering.
3163     if (SDValue V = LowerHvxOperation(Op, DAG))
3164       return V;
3165   }
3166 
3167   switch (Opc) {
3168     default:
3169 #ifndef NDEBUG
3170       Op.getNode()->dumpr(&DAG);
3171       if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
3172         errs() << "Error: check for a non-legal type in this operation\n";
3173 #endif
3174       llvm_unreachable("Should not custom lower this!");
3175     case ISD::CONCAT_VECTORS:       return LowerCONCAT_VECTORS(Op, DAG);
3176     case ISD::INSERT_SUBVECTOR:     return LowerINSERT_SUBVECTOR(Op, DAG);
3177     case ISD::INSERT_VECTOR_ELT:    return LowerINSERT_VECTOR_ELT(Op, DAG);
3178     case ISD::EXTRACT_SUBVECTOR:    return LowerEXTRACT_SUBVECTOR(Op, DAG);
3179     case ISD::EXTRACT_VECTOR_ELT:   return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3180     case ISD::BUILD_VECTOR:         return LowerBUILD_VECTOR(Op, DAG);
3181     case ISD::VECTOR_SHUFFLE:       return LowerVECTOR_SHUFFLE(Op, DAG);
3182     case ISD::BITCAST:              return LowerBITCAST(Op, DAG);
3183     case ISD::LOAD:                 return LowerLoad(Op, DAG);
3184     case ISD::STORE:                return LowerStore(Op, DAG);
3185     case ISD::UADDO:
3186     case ISD::USUBO:                return LowerUAddSubO(Op, DAG);
3187     case ISD::ADDCARRY:
3188     case ISD::SUBCARRY:             return LowerAddSubCarry(Op, DAG);
3189     case ISD::SRA:
3190     case ISD::SHL:
3191     case ISD::SRL:                  return LowerVECTOR_SHIFT(Op, DAG);
3192     case ISD::ROTL:                 return LowerROTL(Op, DAG);
3193     case ISD::ConstantPool:         return LowerConstantPool(Op, DAG);
3194     case ISD::JumpTable:            return LowerJumpTable(Op, DAG);
3195     case ISD::EH_RETURN:            return LowerEH_RETURN(Op, DAG);
3196     case ISD::RETURNADDR:           return LowerRETURNADDR(Op, DAG);
3197     case ISD::FRAMEADDR:            return LowerFRAMEADDR(Op, DAG);
3198     case ISD::GlobalTLSAddress:     return LowerGlobalTLSAddress(Op, DAG);
3199     case ISD::ATOMIC_FENCE:         return LowerATOMIC_FENCE(Op, DAG);
3200     case ISD::GlobalAddress:        return LowerGLOBALADDRESS(Op, DAG);
3201     case ISD::BlockAddress:         return LowerBlockAddress(Op, DAG);
3202     case ISD::GLOBAL_OFFSET_TABLE:  return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
3203     case ISD::VACOPY:               return LowerVACOPY(Op, DAG);
3204     case ISD::VASTART:              return LowerVASTART(Op, DAG);
3205     case ISD::DYNAMIC_STACKALLOC:   return LowerDYNAMIC_STACKALLOC(Op, DAG);
3206     case ISD::SETCC:                return LowerSETCC(Op, DAG);
3207     case ISD::VSELECT:              return LowerVSELECT(Op, DAG);
3208     case ISD::INTRINSIC_WO_CHAIN:   return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3209     case ISD::INTRINSIC_VOID:       return LowerINTRINSIC_VOID(Op, DAG);
3210     case ISD::PREFETCH:             return LowerPREFETCH(Op, DAG);
3211     case ISD::READCYCLECOUNTER:     return LowerREADCYCLECOUNTER(Op, DAG);
3212       break;
3213   }
3214 
3215   return SDValue();
3216 }
3217 
3218 void
3219 HexagonTargetLowering::LowerOperationWrapper(SDNode *N,
3220                                              SmallVectorImpl<SDValue> &Results,
3221                                              SelectionDAG &DAG) const {
3222   if (isHvxOperation(N, DAG)) {
3223     LowerHvxOperationWrapper(N, Results, DAG);
3224     if (!Results.empty())
3225       return;
3226   }
3227 
3228   // We are only custom-lowering stores to verify the alignment of the
3229   // address if it is a compile-time constant. Since a store can be modified
3230   // during type-legalization (the value being stored may need legalization),
3231   // return empty Results here to indicate that we don't really make any
3232   // changes in the custom lowering.
3233   if (N->getOpcode() != ISD::STORE)
3234     return TargetLowering::LowerOperationWrapper(N, Results, DAG);
3235 }
3236 
3237 void
3238 HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
3239                                           SmallVectorImpl<SDValue> &Results,
3240                                           SelectionDAG &DAG) const {
3241   if (isHvxOperation(N, DAG)) {
3242     ReplaceHvxNodeResults(N, Results, DAG);
3243     if (!Results.empty())
3244       return;
3245   }
3246 
3247   const SDLoc &dl(N);
3248   switch (N->getOpcode()) {
3249     case ISD::SRL:
3250     case ISD::SRA:
3251     case ISD::SHL:
3252       return;
3253     case ISD::BITCAST:
3254       // Handle a bitcast from v8i1 to i8.
3255       if (N->getValueType(0) == MVT::i8) {
3256         if (N->getOperand(0).getValueType() == MVT::v8i1) {
3257           SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
3258                                N->getOperand(0), DAG);
3259           SDValue T = DAG.getAnyExtOrTrunc(P, dl, MVT::i8);
3260           Results.push_back(T);
3261         }
3262       }
3263       break;
3264   }
3265 }
3266 
3267 SDValue
3268 HexagonTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3269       const {
3270   if (isHvxOperation(N, DCI.DAG)) {
3271     if (SDValue V = PerformHvxDAGCombine(N, DCI))
3272       return V;
3273     return SDValue();
3274   }
3275 
3276   if (DCI.isBeforeLegalizeOps())
3277     return SDValue();
3278 
3279   SDValue Op(N, 0);
3280   const SDLoc &dl(Op);
3281   unsigned Opc = Op.getOpcode();
3282 
3283   if (Opc == HexagonISD::P2D) {
3284     SDValue P = Op.getOperand(0);
3285     switch (P.getOpcode()) {
3286       case HexagonISD::PTRUE:
3287         return DCI.DAG.getConstant(-1, dl, ty(Op));
3288       case HexagonISD::PFALSE:
3289         return getZero(dl, ty(Op), DCI.DAG);
3290       default:
3291         break;
3292     }
3293   } else if (Opc == ISD::VSELECT) {
3294     // This is pretty much duplicated in HexagonISelLoweringHVX...
3295     //
3296     // (vselect (xor x, ptrue), v0, v1) -> (vselect x, v1, v0)
3297     SDValue Cond = Op.getOperand(0);
3298     if (Cond->getOpcode() == ISD::XOR) {
3299       SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3300       if (C1->getOpcode() == HexagonISD::PTRUE) {
3301         SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0,
3302                                        Op.getOperand(2), Op.getOperand(1));
3303         return VSel;
3304       }
3305     }
3306   }
3307 
3308   return SDValue();
3309 }
3310 
3311 /// Returns relocation base for the given PIC jumptable.
3312 SDValue
3313 HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
3314                                                 SelectionDAG &DAG) const {
3315   int Idx = cast<JumpTableSDNode>(Table)->getIndex();
3316   EVT VT = Table.getValueType();
3317   SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
3318   return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
3319 }
3320 
3321 //===----------------------------------------------------------------------===//
3322 // Inline Assembly Support
3323 //===----------------------------------------------------------------------===//
3324 
3325 TargetLowering::ConstraintType
3326 HexagonTargetLowering::getConstraintType(StringRef Constraint) const {
3327   if (Constraint.size() == 1) {
3328     switch (Constraint[0]) {
3329       case 'q':
3330       case 'v':
3331         if (Subtarget.useHVXOps())
3332           return C_RegisterClass;
3333         break;
3334       case 'a':
3335         return C_RegisterClass;
3336       default:
3337         break;
3338     }
3339   }
3340   return TargetLowering::getConstraintType(Constraint);
3341 }
3342 
3343 std::pair<unsigned, const TargetRegisterClass*>
3344 HexagonTargetLowering::getRegForInlineAsmConstraint(
3345     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
3346 
3347   if (Constraint.size() == 1) {
3348     switch (Constraint[0]) {
3349     case 'r':   // R0-R31
3350       switch (VT.SimpleTy) {
3351       default:
3352         return {0u, nullptr};
3353       case MVT::i1:
3354       case MVT::i8:
3355       case MVT::i16:
3356       case MVT::i32:
3357       case MVT::f32:
3358         return {0u, &Hexagon::IntRegsRegClass};
3359       case MVT::i64:
3360       case MVT::f64:
3361         return {0u, &Hexagon::DoubleRegsRegClass};
3362       }
3363       break;
3364     case 'a': // M0-M1
3365       if (VT != MVT::i32)
3366         return {0u, nullptr};
3367       return {0u, &Hexagon::ModRegsRegClass};
3368     case 'q': // q0-q3
3369       switch (VT.getSizeInBits()) {
3370       default:
3371         return {0u, nullptr};
3372       case 64:
3373       case 128:
3374         return {0u, &Hexagon::HvxQRRegClass};
3375       }
3376       break;
3377     case 'v': // V0-V31
3378       switch (VT.getSizeInBits()) {
3379       default:
3380         return {0u, nullptr};
3381       case 512:
3382         return {0u, &Hexagon::HvxVRRegClass};
3383       case 1024:
3384         if (Subtarget.hasV60Ops() && Subtarget.useHVX128BOps())
3385           return {0u, &Hexagon::HvxVRRegClass};
3386         return {0u, &Hexagon::HvxWRRegClass};
3387       case 2048:
3388         return {0u, &Hexagon::HvxWRRegClass};
3389       }
3390       break;
3391     default:
3392       return {0u, nullptr};
3393     }
3394   }
3395 
3396   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3397 }
3398 
3399 /// isFPImmLegal - Returns true if the target can instruction select the
3400 /// specified FP immediate natively. If false, the legalizer will
3401 /// materialize the FP immediate as a load from a constant pool.
3402 bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
3403                                          bool ForCodeSize) const {
3404   return true;
3405 }
3406 
3407 /// isLegalAddressingMode - Return true if the addressing mode represented by
3408 /// AM is legal for this target, for a load/store of the specified type.
3409 bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
3410                                                   const AddrMode &AM, Type *Ty,
3411                                                   unsigned AS, Instruction *I) const {
3412   if (Ty->isSized()) {
3413     // When LSR detects uses of the same base address to access different
3414     // types (e.g. unions), it will assume a conservative type for these
3415     // uses:
3416     //   LSR Use: Kind=Address of void in addrspace(4294967295), ...
3417     // The type Ty passed here would then be "void". Skip the alignment
3418     // checks, but do not return false right away, since that confuses
3419     // LSR into crashing.
3420     Align A = DL.getABITypeAlign(Ty);
3421     // The base offset must be a multiple of the alignment.
3422     if (!isAligned(A, AM.BaseOffs))
3423       return false;
3424     // The shifted offset must fit in 11 bits.
3425     if (!isInt<11>(AM.BaseOffs >> Log2(A)))
3426       return false;
3427   }
3428 
3429   // No global is ever allowed as a base.
3430   if (AM.BaseGV)
3431     return false;
3432 
3433   int Scale = AM.Scale;
3434   if (Scale < 0)
3435     Scale = -Scale;
3436   switch (Scale) {
3437   case 0:  // No scale reg, "r+i", "r", or just "i".
3438     break;
3439   default: // No scaled addressing mode.
3440     return false;
3441   }
3442   return true;
3443 }
3444 
3445 /// Return true if folding a constant offset with the given GlobalAddress is
3446 /// legal.  It is frequently not legal in PIC relocation models.
3447 bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
3448       const {
3449   return HTM.getRelocationModel() == Reloc::Static;
3450 }
3451 
3452 /// isLegalICmpImmediate - Return true if the specified immediate is legal
3453 /// icmp immediate, that is the target has icmp instructions which can compare
3454 /// a register against the immediate without having to materialize the
3455 /// immediate into a register.
3456 bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3457   return Imm >= -512 && Imm <= 511;
3458 }
3459 
3460 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3461 /// for tail call optimization. Targets which want to do tail call
3462 /// optimization should implement this function.
3463 bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
3464                                  SDValue Callee,
3465                                  CallingConv::ID CalleeCC,
3466                                  bool IsVarArg,
3467                                  bool IsCalleeStructRet,
3468                                  bool IsCallerStructRet,
3469                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
3470                                  const SmallVectorImpl<SDValue> &OutVals,
3471                                  const SmallVectorImpl<ISD::InputArg> &Ins,
3472                                  SelectionDAG& DAG) const {
3473   const Function &CallerF = DAG.getMachineFunction().getFunction();
3474   CallingConv::ID CallerCC = CallerF.getCallingConv();
3475   bool CCMatch = CallerCC == CalleeCC;
3476 
3477   // ***************************************************************************
3478   //  Look for obvious safe cases to perform tail call optimization that do not
3479   //  require ABI changes.
3480   // ***************************************************************************
3481 
3482   // If this is a tail call via a function pointer, then don't do it!
3483   if (!isa<GlobalAddressSDNode>(Callee) &&
3484       !isa<ExternalSymbolSDNode>(Callee)) {
3485     return false;
3486   }
3487 
3488   // Do not optimize if the calling conventions do not match and the conventions
3489   // used are not C or Fast.
3490   if (!CCMatch) {
3491     bool R = (CallerCC == CallingConv::C || CallerCC == CallingConv::Fast);
3492     bool E = (CalleeCC == CallingConv::C || CalleeCC == CallingConv::Fast);
3493     // If R & E, then ok.
3494     if (!R || !E)
3495       return false;
3496   }
3497 
3498   // Do not tail call optimize vararg calls.
3499   if (IsVarArg)
3500     return false;
3501 
3502   // Also avoid tail call optimization if either caller or callee uses struct
3503   // return semantics.
3504   if (IsCalleeStructRet || IsCallerStructRet)
3505     return false;
3506 
3507   // In addition to the cases above, we also disable Tail Call Optimization if
3508   // the calling convention code that at least one outgoing argument needs to
3509   // go on the stack. We cannot check that here because at this point that
3510   // information is not available.
3511   return true;
3512 }
3513 
3514 /// Returns the target specific optimal type for load and store operations as
3515 /// a result of memset, memcpy, and memmove lowering.
3516 ///
3517 /// If DstAlign is zero that means it's safe to destination alignment can
3518 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
3519 /// a need to check it against alignment requirement, probably because the
3520 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
3521 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
3522 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
3523 /// does not need to be loaded.  It returns EVT::Other if the type should be
3524 /// determined using generic target-independent logic.
3525 EVT HexagonTargetLowering::getOptimalMemOpType(
3526     const MemOp &Op, const AttributeList &FuncAttributes) const {
3527   if (Op.size() >= 8 && Op.isAligned(Align(8)))
3528     return MVT::i64;
3529   if (Op.size() >= 4 && Op.isAligned(Align(4)))
3530     return MVT::i32;
3531   if (Op.size() >= 2 && Op.isAligned(Align(2)))
3532     return MVT::i16;
3533   return MVT::Other;
3534 }
3535 
3536 bool HexagonTargetLowering::allowsMemoryAccess(
3537     LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
3538     Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
3539   MVT SVT = VT.getSimpleVT();
3540   if (Subtarget.isHVXVectorType(SVT, true))
3541     return allowsHvxMemoryAccess(SVT, Flags, Fast);
3542   return TargetLoweringBase::allowsMemoryAccess(
3543               Context, DL, VT, AddrSpace, Alignment, Flags, Fast);
3544 }
3545 
3546 bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
3547     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3548     bool *Fast) const {
3549   MVT SVT = VT.getSimpleVT();
3550   if (Subtarget.isHVXVectorType(SVT, true))
3551     return allowsHvxMisalignedMemoryAccesses(SVT, Flags, Fast);
3552   if (Fast)
3553     *Fast = false;
3554   return false;
3555 }
3556 
3557 std::pair<const TargetRegisterClass*, uint8_t>
3558 HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
3559       MVT VT) const {
3560   if (Subtarget.isHVXVectorType(VT, true)) {
3561     unsigned BitWidth = VT.getSizeInBits();
3562     unsigned VecWidth = Subtarget.getVectorLength() * 8;
3563 
3564     if (VT.getVectorElementType() == MVT::i1)
3565       return std::make_pair(&Hexagon::HvxQRRegClass, 1);
3566     if (BitWidth == VecWidth)
3567       return std::make_pair(&Hexagon::HvxVRRegClass, 1);
3568     assert(BitWidth == 2 * VecWidth);
3569     return std::make_pair(&Hexagon::HvxWRRegClass, 1);
3570   }
3571 
3572   return TargetLowering::findRepresentativeClass(TRI, VT);
3573 }
3574 
3575 bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode *Load,
3576       ISD::LoadExtType ExtTy, EVT NewVT) const {
3577   // TODO: This may be worth removing. Check regression tests for diffs.
3578   if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
3579     return false;
3580 
3581   auto *L = cast<LoadSDNode>(Load);
3582   std::pair<SDValue,int> BO = getBaseAndOffset(L->getBasePtr());
3583   // Small-data object, do not shrink.
3584   if (BO.first.getOpcode() == HexagonISD::CONST32_GP)
3585     return false;
3586   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(BO.first)) {
3587     auto &HTM = static_cast<const HexagonTargetMachine&>(getTargetMachine());
3588     const auto *GO = dyn_cast_or_null<const GlobalObject>(GA->getGlobal());
3589     return !GO || !HTM.getObjFileLowering()->isGlobalInSmallSection(GO, HTM);
3590   }
3591   return true;
3592 }
3593 
3594 Value *HexagonTargetLowering::emitLoadLinked(IRBuilderBase &Builder,
3595                                              Type *ValueTy, Value *Addr,
3596                                              AtomicOrdering Ord) const {
3597   BasicBlock *BB = Builder.GetInsertBlock();
3598   Module *M = BB->getParent()->getParent();
3599   unsigned SZ = ValueTy->getPrimitiveSizeInBits();
3600   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
3601   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
3602                                    : Intrinsic::hexagon_L4_loadd_locked;
3603   Function *Fn = Intrinsic::getDeclaration(M, IntID);
3604 
3605   auto PtrTy = cast<PointerType>(Addr->getType());
3606   PointerType *NewPtrTy =
3607       Builder.getIntNTy(SZ)->getPointerTo(PtrTy->getAddressSpace());
3608   Addr = Builder.CreateBitCast(Addr, NewPtrTy);
3609 
3610   Value *Call = Builder.CreateCall(Fn, Addr, "larx");
3611 
3612   return Builder.CreateBitCast(Call, ValueTy);
3613 }
3614 
3615 /// Perform a store-conditional operation to Addr. Return the status of the
3616 /// store. This should be 0 if the store succeeded, non-zero otherwise.
3617 Value *HexagonTargetLowering::emitStoreConditional(IRBuilderBase &Builder,
3618                                                    Value *Val, Value *Addr,
3619                                                    AtomicOrdering Ord) const {
3620   BasicBlock *BB = Builder.GetInsertBlock();
3621   Module *M = BB->getParent()->getParent();
3622   Type *Ty = Val->getType();
3623   unsigned SZ = Ty->getPrimitiveSizeInBits();
3624 
3625   Type *CastTy = Builder.getIntNTy(SZ);
3626   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
3627   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
3628                                    : Intrinsic::hexagon_S4_stored_locked;
3629   Function *Fn = Intrinsic::getDeclaration(M, IntID);
3630 
3631   unsigned AS = Addr->getType()->getPointerAddressSpace();
3632   Addr = Builder.CreateBitCast(Addr, CastTy->getPointerTo(AS));
3633   Val = Builder.CreateBitCast(Val, CastTy);
3634 
3635   Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
3636   Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
3637   Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
3638   return Ext;
3639 }
3640 
3641 TargetLowering::AtomicExpansionKind
3642 HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
3643   // Do not expand loads and stores that don't exceed 64 bits.
3644   return LI->getType()->getPrimitiveSizeInBits() > 64
3645              ? AtomicExpansionKind::LLOnly
3646              : AtomicExpansionKind::None;
3647 }
3648 
3649 bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
3650   // Do not expand loads and stores that don't exceed 64 bits.
3651   return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
3652 }
3653 
3654 TargetLowering::AtomicExpansionKind
3655 HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
3656     AtomicCmpXchgInst *AI) const {
3657   return AtomicExpansionKind::LLSC;
3658 }
3659