1 //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interfaces that VE uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "VEISelLowering.h"
15 #include "MCTargetDesc/VEMCExpr.h"
16 #include "VEMachineFunctionInfo.h"
17 #include "VERegisterInfo.h"
18 #include "VETargetMachine.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/CodeGen/CallingConvLower.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineFunction.h"
23 #include "llvm/CodeGen/MachineInstrBuilder.h"
24 #include "llvm/CodeGen/MachineModuleInfo.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/KnownBits.h"
33 using namespace llvm;
34 
35 #define DEBUG_TYPE "ve-lower"
36 
37 //===----------------------------------------------------------------------===//
38 // Calling Convention Implementation
39 //===----------------------------------------------------------------------===//
40 
41 static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT,
42                           CCValAssign::LocInfo LocInfo,
43                           ISD::ArgFlagsTy ArgFlags, CCState &State) {
44   switch (LocVT.SimpleTy) {
45   case MVT::f32: {
46     // Allocate stack like below
47     //    0      4
48     //    +------+------+
49     //    | empty| float|
50     //    +------+------+
51     // Use align=8 for dummy area to align the beginning of these 2 area.
52     State.AllocateStack(4, Align(8)); // for empty area
53     // Use align=4 for value to place it at just after the dummy area.
54     unsigned Offset = State.AllocateStack(4, Align(4)); // for float value area
55     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
56     return true;
57   }
58   default:
59     return false;
60   }
61 }
62 
63 #include "VEGenCallingConv.inc"
64 
65 bool VETargetLowering::CanLowerReturn(
66     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
67     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
68   CCAssignFn *RetCC = RetCC_VE;
69   SmallVector<CCValAssign, 16> RVLocs;
70   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
71   return CCInfo.CheckReturn(Outs, RetCC);
72 }
73 
74 SDValue
75 VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
76                               bool IsVarArg,
77                               const SmallVectorImpl<ISD::OutputArg> &Outs,
78                               const SmallVectorImpl<SDValue> &OutVals,
79                               const SDLoc &DL, SelectionDAG &DAG) const {
80   // CCValAssign - represent the assignment of the return value to locations.
81   SmallVector<CCValAssign, 16> RVLocs;
82 
83   // CCState - Info about the registers and stack slot.
84   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
85                  *DAG.getContext());
86 
87   // Analyze return values.
88   CCInfo.AnalyzeReturn(Outs, RetCC_VE);
89 
90   SDValue Flag;
91   SmallVector<SDValue, 4> RetOps(1, Chain);
92 
93   // Copy the result values into the output registers.
94   for (unsigned i = 0; i != RVLocs.size(); ++i) {
95     CCValAssign &VA = RVLocs[i];
96     assert(VA.isRegLoc() && "Can only return in registers!");
97     SDValue OutVal = OutVals[i];
98 
99     // Integer return values must be sign or zero extended by the callee.
100     switch (VA.getLocInfo()) {
101     case CCValAssign::Full:
102       break;
103     case CCValAssign::SExt:
104       OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
105       break;
106     case CCValAssign::ZExt:
107       OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
108       break;
109     case CCValAssign::AExt:
110       OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
111       break;
112     default:
113       llvm_unreachable("Unknown loc info!");
114     }
115 
116     assert(!VA.needsCustom() && "Unexpected custom lowering");
117 
118     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
119 
120     // Guarantee that all emitted copies are stuck together with flags.
121     Flag = Chain.getValue(1);
122     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
123   }
124 
125   RetOps[0] = Chain; // Update chain.
126 
127   // Add the flag if we have it.
128   if (Flag.getNode())
129     RetOps.push_back(Flag);
130 
131   return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
132 }
133 
134 SDValue VETargetLowering::LowerFormalArguments(
135     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
136     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
137     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
138   MachineFunction &MF = DAG.getMachineFunction();
139 
140   // Get the base offset of the incoming arguments stack space.
141   unsigned ArgsBaseOffset = 176;
142   // Get the size of the preserved arguments area
143   unsigned ArgsPreserved = 64;
144 
145   // Analyze arguments according to CC_VE.
146   SmallVector<CCValAssign, 16> ArgLocs;
147   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
148                  *DAG.getContext());
149   // Allocate the preserved area first.
150   CCInfo.AllocateStack(ArgsPreserved, Align(8));
151   // We already allocated the preserved area, so the stack offset computed
152   // by CC_VE would be correct now.
153   CCInfo.AnalyzeFormalArguments(Ins, CC_VE);
154 
155   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
156     CCValAssign &VA = ArgLocs[i];
157     if (VA.isRegLoc()) {
158       // This argument is passed in a register.
159       // All integer register arguments are promoted by the caller to i64.
160 
161       // Create a virtual register for the promoted live-in value.
162       unsigned VReg =
163           MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
164       SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
165 
166       // Get the high bits for i32 struct elements.
167       if (VA.getValVT() == MVT::i32 && VA.needsCustom())
168         Arg = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Arg,
169                           DAG.getConstant(32, DL, MVT::i32));
170 
171       // The caller promoted the argument, so insert an Assert?ext SDNode so we
172       // won't promote the value again in this function.
173       switch (VA.getLocInfo()) {
174       case CCValAssign::SExt:
175         Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
176                           DAG.getValueType(VA.getValVT()));
177         break;
178       case CCValAssign::ZExt:
179         Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
180                           DAG.getValueType(VA.getValVT()));
181         break;
182       default:
183         break;
184       }
185 
186       // Truncate the register down to the argument type.
187       if (VA.isExtInLoc())
188         Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
189 
190       InVals.push_back(Arg);
191       continue;
192     }
193 
194     // The registers are exhausted. This argument was passed on the stack.
195     assert(VA.isMemLoc());
196     // The CC_VE_Full/Half functions compute stack offsets relative to the
197     // beginning of the arguments area at %fp+176.
198     unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
199     unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
200     int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
201     InVals.push_back(
202         DAG.getLoad(VA.getValVT(), DL, Chain,
203                     DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
204                     MachinePointerInfo::getFixedStack(MF, FI)));
205   }
206 
207   if (!IsVarArg)
208     return Chain;
209 
210   // This function takes variable arguments, some of which may have been passed
211   // in registers %s0-%s8.
212   //
213   // The va_start intrinsic needs to know the offset to the first variable
214   // argument.
215   // TODO: need to calculate offset correctly once we support f128.
216   unsigned ArgOffset = ArgLocs.size() * 8;
217   VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
218   // Skip the 176 bytes of register save area.
219   FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
220 
221   return Chain;
222 }
223 
224 // FIXME? Maybe this could be a TableGen attribute on some registers and
225 // this table could be generated automatically from RegInfo.
226 Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
227                                              const MachineFunction &MF) const {
228   Register Reg = StringSwitch<Register>(RegName)
229                      .Case("sp", VE::SX11)    // Stack pointer
230                      .Case("fp", VE::SX9)     // Frame pointer
231                      .Case("sl", VE::SX8)     // Stack limit
232                      .Case("lr", VE::SX10)    // Link register
233                      .Case("tp", VE::SX14)    // Thread pointer
234                      .Case("outer", VE::SX12) // Outer regiser
235                      .Case("info", VE::SX17)  // Info area register
236                      .Case("got", VE::SX15)   // Global offset table register
237                      .Case("plt", VE::SX16) // Procedure linkage table register
238                      .Default(0);
239 
240   if (Reg)
241     return Reg;
242 
243   report_fatal_error("Invalid register name global variable");
244 }
245 
246 //===----------------------------------------------------------------------===//
247 // TargetLowering Implementation
248 //===----------------------------------------------------------------------===//
249 
250 SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
251                                     SmallVectorImpl<SDValue> &InVals) const {
252   SelectionDAG &DAG = CLI.DAG;
253   SDLoc DL = CLI.DL;
254   SDValue Chain = CLI.Chain;
255   auto PtrVT = getPointerTy(DAG.getDataLayout());
256 
257   // VE target does not yet support tail call optimization.
258   CLI.IsTailCall = false;
259 
260   // Get the base offset of the outgoing arguments stack space.
261   unsigned ArgsBaseOffset = 176;
262   // Get the size of the preserved arguments area
263   unsigned ArgsPreserved = 8 * 8u;
264 
265   // Analyze operands of the call, assigning locations to each operand.
266   SmallVector<CCValAssign, 16> ArgLocs;
267   CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
268                  *DAG.getContext());
269   // Allocate the preserved area first.
270   CCInfo.AllocateStack(ArgsPreserved, Align(8));
271   // We already allocated the preserved area, so the stack offset computed
272   // by CC_VE would be correct now.
273   CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE);
274 
275   // VE requires to use both register and stack for varargs or no-prototyped
276   // functions.
277   bool UseBoth = CLI.IsVarArg;
278 
279   // Analyze operands again if it is required to store BOTH.
280   SmallVector<CCValAssign, 16> ArgLocs2;
281   CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
282                   ArgLocs2, *DAG.getContext());
283   if (UseBoth)
284     CCInfo2.AnalyzeCallOperands(CLI.Outs, CC_VE2);
285 
286   // Get the size of the outgoing arguments stack space requirement.
287   unsigned ArgsSize = CCInfo.getNextStackOffset();
288 
289   // Keep stack frames 16-byte aligned.
290   ArgsSize = alignTo(ArgsSize, 16);
291 
292   // Adjust the stack pointer to make room for the arguments.
293   // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
294   // with more than 6 arguments.
295   Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
296 
297   // Collect the set of registers to pass to the function and their values.
298   // This will be emitted as a sequence of CopyToReg nodes glued to the call
299   // instruction.
300   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
301 
302   // Collect chains from all the memory opeations that copy arguments to the
303   // stack. They must follow the stack pointer adjustment above and precede the
304   // call instruction itself.
305   SmallVector<SDValue, 8> MemOpChains;
306 
307   // VE needs to get address of callee function in a register
308   // So, prepare to copy it to SX12 here.
309 
310   // If the callee is a GlobalAddress node (quite common, every direct call is)
311   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
312   // Likewise ExternalSymbol -> TargetExternalSymbol.
313   SDValue Callee = CLI.Callee;
314 
315   bool IsPICCall = isPositionIndependent();
316 
317   // PC-relative references to external symbols should go through $stub.
318   // If so, we need to prepare GlobalBaseReg first.
319   const TargetMachine &TM = DAG.getTarget();
320   const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
321   const GlobalValue *GV = nullptr;
322   auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
323   if (CalleeG)
324     GV = CalleeG->getGlobal();
325   bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
326   bool UsePlt = !Local;
327   MachineFunction &MF = DAG.getMachineFunction();
328 
329   // Turn GlobalAddress/ExternalSymbol node into a value node
330   // containing the address of them here.
331   if (CalleeG) {
332     if (IsPICCall) {
333       if (UsePlt)
334         Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
335       Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
336       Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
337     } else {
338       Callee =
339           makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
340     }
341   } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
342     if (IsPICCall) {
343       if (UsePlt)
344         Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
345       Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
346       Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
347     } else {
348       Callee =
349           makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
350     }
351   }
352 
353   RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
354 
355   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
356     CCValAssign &VA = ArgLocs[i];
357     SDValue Arg = CLI.OutVals[i];
358 
359     // Promote the value if needed.
360     switch (VA.getLocInfo()) {
361     default:
362       llvm_unreachable("Unknown location info!");
363     case CCValAssign::Full:
364       break;
365     case CCValAssign::SExt:
366       Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
367       break;
368     case CCValAssign::ZExt:
369       Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
370       break;
371     case CCValAssign::AExt:
372       Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
373       break;
374     }
375 
376     if (VA.isRegLoc()) {
377       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
378       if (!UseBoth)
379         continue;
380       VA = ArgLocs2[i];
381     }
382 
383     assert(VA.isMemLoc());
384 
385     // Create a store off the stack pointer for this argument.
386     SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
387     // The argument area starts at %fp+176 in the callee frame,
388     // %sp+176 in ours.
389     SDValue PtrOff =
390         DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
391     PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
392     MemOpChains.push_back(
393         DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
394   }
395 
396   // Emit all stores, make sure they occur before the call.
397   if (!MemOpChains.empty())
398     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
399 
400   // Build a sequence of CopyToReg nodes glued together with token chain and
401   // glue operands which copy the outgoing args into registers. The InGlue is
402   // necessary since all emitted instructions must be stuck together in order
403   // to pass the live physical registers.
404   SDValue InGlue;
405   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
406     Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
407                              RegsToPass[i].second, InGlue);
408     InGlue = Chain.getValue(1);
409   }
410 
411   // Build the operands for the call instruction itself.
412   SmallVector<SDValue, 8> Ops;
413   Ops.push_back(Chain);
414   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
415     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
416                                   RegsToPass[i].second.getValueType()));
417 
418   // Add a register mask operand representing the call-preserved registers.
419   const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
420   const uint32_t *Mask =
421       TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
422   assert(Mask && "Missing call preserved mask for calling convention");
423   Ops.push_back(DAG.getRegisterMask(Mask));
424 
425   // Make sure the CopyToReg nodes are glued to the call instruction which
426   // consumes the registers.
427   if (InGlue.getNode())
428     Ops.push_back(InGlue);
429 
430   // Now the call itself.
431   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
432   Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
433   InGlue = Chain.getValue(1);
434 
435   // Revert the stack pointer immediately after the call.
436   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
437                              DAG.getIntPtrConstant(0, DL, true), InGlue, DL);
438   InGlue = Chain.getValue(1);
439 
440   // Now extract the return values. This is more or less the same as
441   // LowerFormalArguments.
442 
443   // Assign locations to each value returned by this call.
444   SmallVector<CCValAssign, 16> RVLocs;
445   CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
446                  *DAG.getContext());
447 
448   // Set inreg flag manually for codegen generated library calls that
449   // return float.
450   if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
451     CLI.Ins[0].Flags.setInReg();
452 
453   RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE);
454 
455   // Copy all of the result registers out of their specified physreg.
456   for (unsigned i = 0; i != RVLocs.size(); ++i) {
457     CCValAssign &VA = RVLocs[i];
458     unsigned Reg = VA.getLocReg();
459 
460     // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
461     // reside in the same register in the high and low bits. Reuse the
462     // CopyFromReg previous node to avoid duplicate copies.
463     SDValue RV;
464     if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
465       if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
466         RV = Chain.getValue(0);
467 
468     // But usually we'll create a new CopyFromReg for a different register.
469     if (!RV.getNode()) {
470       RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
471       Chain = RV.getValue(1);
472       InGlue = Chain.getValue(2);
473     }
474 
475     // Get the high bits for i32 struct elements.
476     if (VA.getValVT() == MVT::i32 && VA.needsCustom())
477       RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
478                        DAG.getConstant(32, DL, MVT::i32));
479 
480     // The callee promoted the return value, so insert an Assert?ext SDNode so
481     // we won't promote the value again in this function.
482     switch (VA.getLocInfo()) {
483     case CCValAssign::SExt:
484       RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
485                        DAG.getValueType(VA.getValVT()));
486       break;
487     case CCValAssign::ZExt:
488       RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
489                        DAG.getValueType(VA.getValVT()));
490       break;
491     default:
492       break;
493     }
494 
495     // Truncate the register down to the return value type.
496     if (VA.isExtInLoc())
497       RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
498 
499     InVals.push_back(RV);
500   }
501 
502   return Chain;
503 }
504 
505 /// isFPImmLegal - Returns true if the target can instruction select the
506 /// specified FP immediate natively. If false, the legalizer will
507 /// materialize the FP immediate as a load from a constant pool.
508 bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
509                                     bool ForCodeSize) const {
510   return VT == MVT::f32 || VT == MVT::f64;
511 }
512 
513 /// Determine if the target supports unaligned memory accesses.
514 ///
515 /// This function returns true if the target allows unaligned memory accesses
516 /// of the specified type in the given address space. If true, it also returns
517 /// whether the unaligned memory access is "fast" in the last argument by
518 /// reference. This is used, for example, in situations where an array
519 /// copy/move/set is converted to a sequence of store operations. Its use
520 /// helps to ensure that such replacements don't generate code that causes an
521 /// alignment error (trap) on the target machine.
522 bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
523                                                       unsigned AddrSpace,
524                                                       unsigned Align,
525                                                       MachineMemOperand::Flags,
526                                                       bool *Fast) const {
527   if (Fast) {
528     // It's fast anytime on VE
529     *Fast = true;
530   }
531   return true;
532 }
533 
534 bool VETargetLowering::hasAndNot(SDValue Y) const {
535   EVT VT = Y.getValueType();
536 
537   // VE doesn't have vector and not instruction.
538   if (VT.isVector())
539     return false;
540 
541   // VE allows different immediate values for X and Y where ~X & Y.
542   // Only simm7 works for X, and only mimm works for Y on VE.  However, this
543   // function is used to check whether an immediate value is OK for and-not
544   // instruction as both X and Y.  Generating additional instruction to
545   // retrieve an immediate value is no good since the purpose of this
546   // function is to convert a series of 3 instructions to another series of
547   // 3 instructions with better parallelism.  Therefore, we return false
548   // for all immediate values now.
549   // FIXME: Change hasAndNot function to have two operands to make it work
550   //        correctly with Aurora VE.
551   if (isa<ConstantSDNode>(Y))
552     return false;
553 
554   // It's ok for generic registers.
555   return true;
556 }
557 
558 VETargetLowering::VETargetLowering(const TargetMachine &TM,
559                                    const VESubtarget &STI)
560     : TargetLowering(TM), Subtarget(&STI) {
561   // Instructions which use registers as conditionals examine all the
562   // bits (as does the pseudo SELECT_CC expansion). I don't think it
563   // matters much whether it's ZeroOrOneBooleanContent, or
564   // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
565   // former.
566   setBooleanContents(ZeroOrOneBooleanContent);
567   setBooleanVectorContents(ZeroOrOneBooleanContent);
568 
569   // Set up the register classes.
570   addRegisterClass(MVT::i32, &VE::I32RegClass);
571   addRegisterClass(MVT::i64, &VE::I64RegClass);
572   addRegisterClass(MVT::f32, &VE::F32RegClass);
573   addRegisterClass(MVT::f64, &VE::I64RegClass);
574 
575   /// Load & Store {
576   for (MVT FPVT : MVT::fp_valuetypes()) {
577     for (MVT OtherFPVT : MVT::fp_valuetypes()) {
578       // Turn FP extload into load/fpextend
579       setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
580 
581       // Turn FP truncstore into trunc + store.
582       setTruncStoreAction(FPVT, OtherFPVT, Expand);
583     }
584   }
585 
586   // VE doesn't have i1 sign extending load
587   for (MVT VT : MVT::integer_valuetypes()) {
588     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
589     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
590     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
591     setTruncStoreAction(VT, MVT::i1, Expand);
592   }
593   /// } Load & Store
594 
595   // Custom legalize address nodes into LO/HI parts.
596   MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
597   setOperationAction(ISD::BlockAddress, PtrVT, Custom);
598   setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
599   setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
600 
601   /// VAARG handling {
602   setOperationAction(ISD::VASTART, MVT::Other, Custom);
603   // VAARG needs to be lowered to access with 8 bytes alignment.
604   setOperationAction(ISD::VAARG, MVT::Other, Custom);
605   // Use the default implementation.
606   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
607   setOperationAction(ISD::VAEND, MVT::Other, Expand);
608   /// } VAARG handling
609 
610   /// Stack {
611   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
612   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
613   /// } Stack
614 
615   /// Int Ops {
616   for (MVT IntVT : {MVT::i32, MVT::i64}) {
617     // VE has no REM or DIVREM operations.
618     setOperationAction(ISD::UREM, IntVT, Expand);
619     setOperationAction(ISD::SREM, IntVT, Expand);
620     setOperationAction(ISD::SDIVREM, IntVT, Expand);
621     setOperationAction(ISD::UDIVREM, IntVT, Expand);
622 
623     setOperationAction(ISD::CTTZ, IntVT, Expand);
624     setOperationAction(ISD::ROTL, IntVT, Expand);
625     setOperationAction(ISD::ROTR, IntVT, Expand);
626 
627     // Use isel patterns for i32 and i64
628     setOperationAction(ISD::BSWAP, IntVT, Legal);
629     setOperationAction(ISD::CTLZ, IntVT, Legal);
630     setOperationAction(ISD::CTPOP, IntVT, Legal);
631 
632     // Use isel patterns for i64, Promote i32
633     LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
634     setOperationAction(ISD::BITREVERSE, IntVT, Act);
635   }
636   /// } Int Ops
637 
638   /// Conversion {
639   // VE doesn't have instructions for fp<->uint, so expand them by llvm
640   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
641   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
642   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
643   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
644 
645   // fp16 not supported
646   for (MVT FPVT : MVT::fp_valuetypes()) {
647     setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
648     setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
649   }
650   /// } Conversion
651 
652   setStackPointerRegisterToSaveRestore(VE::SX11);
653 
654   // Set function alignment to 16 bytes
655   setMinFunctionAlignment(Align(16));
656 
657   // VE stores all argument by 8 bytes alignment
658   setMinStackArgumentAlignment(Align(8));
659 
660   computeRegisterProperties(Subtarget->getRegisterInfo());
661 }
662 
663 const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
664 #define TARGET_NODE_CASE(NAME)                                                 \
665   case VEISD::NAME:                                                            \
666     return "VEISD::" #NAME;
667   switch ((VEISD::NodeType)Opcode) {
668   case VEISD::FIRST_NUMBER:
669     break;
670     TARGET_NODE_CASE(Lo)
671     TARGET_NODE_CASE(Hi)
672     TARGET_NODE_CASE(GETFUNPLT)
673     TARGET_NODE_CASE(GETSTACKTOP)
674     TARGET_NODE_CASE(GETTLSADDR)
675     TARGET_NODE_CASE(CALL)
676     TARGET_NODE_CASE(RET_FLAG)
677     TARGET_NODE_CASE(GLOBAL_BASE_REG)
678   }
679 #undef TARGET_NODE_CASE
680   return nullptr;
681 }
682 
683 EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
684                                          EVT VT) const {
685   return MVT::i32;
686 }
687 
688 // Convert to a target node and set target flags.
689 SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
690                                           SelectionDAG &DAG) const {
691   if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
692     return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
693                                       GA->getValueType(0), GA->getOffset(), TF);
694 
695   if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
696     return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
697                                      0, TF);
698 
699   if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
700     return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
701                                        TF);
702 
703   llvm_unreachable("Unhandled address SDNode");
704 }
705 
706 // Split Op into high and low parts according to HiTF and LoTF.
707 // Return an ADD node combining the parts.
708 SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
709                                        SelectionDAG &DAG) const {
710   SDLoc DL(Op);
711   EVT VT = Op.getValueType();
712   SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
713   SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
714   return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
715 }
716 
717 // Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
718 // or ExternalSymbol SDNode.
719 SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
720   SDLoc DL(Op);
721   EVT PtrVT = Op.getValueType();
722 
723   // Handle PIC mode first. VE needs a got load for every variable!
724   if (isPositionIndependent()) {
725     // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
726     // function has calls.
727     MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
728     MFI.setHasCalls(true);
729     auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
730 
731     if (isa<ConstantPoolSDNode>(Op) ||
732         (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
733       // Create following instructions for local linkage PIC code.
734       //     lea %s35, %gotoff_lo(.LCPI0_0)
735       //     and %s35, %s35, (32)0
736       //     lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35)
737       //     adds.l %s35, %s15, %s35                  ; %s15 is GOT
738       // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
739       SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
740                                   VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
741       SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
742       return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
743     }
744     // Create following instructions for not local linkage PIC code.
745     //     lea %s35, %got_lo(.LCPI0_0)
746     //     and %s35, %s35, (32)0
747     //     lea.sl %s35, %got_hi(.LCPI0_0)(%s35)
748     //     adds.l %s35, %s15, %s35                  ; %s15 is GOT
749     //     ld     %s35, (,%s35)
750     // FIXME: use lea.sl %s35, %gotoff_hi(.LCPI0_0)(%s35, %s15)
751     SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
752                                 VEMCExpr::VK_VE_GOT_LO32, DAG);
753     SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
754     SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
755     return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
756                        MachinePointerInfo::getGOT(DAG.getMachineFunction()));
757   }
758 
759   // This is one of the absolute code models.
760   switch (getTargetMachine().getCodeModel()) {
761   default:
762     llvm_unreachable("Unsupported absolute code model");
763   case CodeModel::Small:
764   case CodeModel::Medium:
765   case CodeModel::Large:
766     // abs64.
767     return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
768   }
769 }
770 
771 /// Custom Lower {
772 
773 SDValue VETargetLowering::LowerGlobalAddress(SDValue Op,
774                                              SelectionDAG &DAG) const {
775   return makeAddress(Op, DAG);
776 }
777 
778 SDValue VETargetLowering::LowerBlockAddress(SDValue Op,
779                                             SelectionDAG &DAG) const {
780   return makeAddress(Op, DAG);
781 }
782 
783 SDValue
784 VETargetLowering::LowerToTLSGeneralDynamicModel(SDValue Op,
785                                                 SelectionDAG &DAG) const {
786   SDLoc dl(Op);
787 
788   // Generate the following code:
789   //   t1: ch,glue = callseq_start t0, 0, 0
790   //   t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
791   //   t3: ch,glue = callseq_end t2, 0, 0, t2:2
792   //   t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
793   SDValue Label = withTargetFlags(Op, 0, DAG);
794   EVT PtrVT = Op.getValueType();
795 
796   // Lowering the machine isd will make sure everything is in the right
797   // location.
798   SDValue Chain = DAG.getEntryNode();
799   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
800   const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
801       DAG.getMachineFunction(), CallingConv::C);
802   Chain = DAG.getCALLSEQ_START(Chain, 64, 0, dl);
803   SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
804   Chain = DAG.getNode(VEISD::GETTLSADDR, dl, NodeTys, Args);
805   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, dl, true),
806                              DAG.getIntPtrConstant(0, dl, true),
807                              Chain.getValue(1), dl);
808   Chain = DAG.getCopyFromReg(Chain, dl, VE::SX0, PtrVT, Chain.getValue(1));
809 
810   // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
811   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
812   MFI.setHasCalls(true);
813 
814   // Also generate code to prepare a GOT register if it is PIC.
815   if (isPositionIndependent()) {
816     MachineFunction &MF = DAG.getMachineFunction();
817     Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
818   }
819 
820   return Chain;
821 }
822 
823 SDValue VETargetLowering::LowerGlobalTLSAddress(SDValue Op,
824                                                 SelectionDAG &DAG) const {
825   // The current implementation of nld (2.26) doesn't allow local exec model
826   // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
827   // generate the general dynamic model code sequence.
828   //
829   // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
830   return LowerToTLSGeneralDynamicModel(Op, DAG);
831 }
832 
833 SDValue VETargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
834   MachineFunction &MF = DAG.getMachineFunction();
835   VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
836   auto PtrVT = getPointerTy(DAG.getDataLayout());
837 
838   // Need frame address to find the address of VarArgsFrameIndex.
839   MF.getFrameInfo().setFrameAddressIsTaken(true);
840 
841   // vastart just stores the address of the VarArgsFrameIndex slot into the
842   // memory location argument.
843   SDLoc DL(Op);
844   SDValue Offset =
845       DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
846                   DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
847   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
848   return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
849                       MachinePointerInfo(SV));
850 }
851 
852 SDValue VETargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
853   SDNode *Node = Op.getNode();
854   EVT VT = Node->getValueType(0);
855   SDValue InChain = Node->getOperand(0);
856   SDValue VAListPtr = Node->getOperand(1);
857   EVT PtrVT = VAListPtr.getValueType();
858   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
859   SDLoc DL(Node);
860   SDValue VAList =
861       DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
862   SDValue Chain = VAList.getValue(1);
863   SDValue NextPtr;
864 
865   if (VT == MVT::f32) {
866     // float --> need special handling like below.
867     //    0      4
868     //    +------+------+
869     //    | empty| float|
870     //    +------+------+
871     // Increment the pointer, VAList, by 8 to the next vaarg.
872     NextPtr =
873         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
874     // Then, adjust VAList.
875     unsigned InternalOffset = 4;
876     VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
877                          DAG.getConstant(InternalOffset, DL, PtrVT));
878   } else {
879     // Increment the pointer, VAList, by 8 to the next vaarg.
880     NextPtr =
881         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
882   }
883 
884   // Store the incremented VAList to the legalized pointer.
885   InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
886 
887   // Load the actual argument out of the pointer VAList.
888   // We can't count on greater alignment than the word size.
889   return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(),
890                      std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8);
891 }
892 
893 SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
894                                                   SelectionDAG &DAG) const {
895   // Generate following code.
896   //   (void)__llvm_grow_stack(size);
897   //   ret = GETSTACKTOP;        // pseudo instruction
898   SDLoc DL(Op);
899 
900   // Get the inputs.
901   SDNode *Node = Op.getNode();
902   SDValue Chain = Op.getOperand(0);
903   SDValue Size = Op.getOperand(1);
904   MaybeAlign Alignment(Op.getConstantOperandVal(2));
905   EVT VT = Node->getValueType(0);
906 
907   // Chain the dynamic stack allocation so that it doesn't modify the stack
908   // pointer when other instructions are using the stack.
909   Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
910 
911   const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
912   Align StackAlign = TFI.getStackAlign();
913   bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
914 
915   // Prepare arguments
916   TargetLowering::ArgListTy Args;
917   TargetLowering::ArgListEntry Entry;
918   Entry.Node = Size;
919   Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
920   Args.push_back(Entry);
921   if (NeedsAlign) {
922     Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
923     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
924     Args.push_back(Entry);
925   }
926   Type *RetTy = Type::getVoidTy(*DAG.getContext());
927 
928   EVT PtrVT = Op.getValueType();
929   SDValue Callee;
930   if (NeedsAlign) {
931     Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
932   } else {
933     Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
934   }
935 
936   TargetLowering::CallLoweringInfo CLI(DAG);
937   CLI.setDebugLoc(DL)
938       .setChain(Chain)
939       .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
940       .setDiscardResult(true);
941   std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
942   Chain = pair.second;
943   SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
944   if (NeedsAlign) {
945     Result = DAG.getNode(ISD::ADD, DL, VT, Result,
946                          DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
947     Result = DAG.getNode(ISD::AND, DL, VT, Result,
948                          DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
949   }
950   //  Chain = Result.getValue(1);
951   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
952                              DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
953 
954   SDValue Ops[2] = {Result, Chain};
955   return DAG.getMergeValues(Ops, DL);
956 }
957 
958 SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
959   switch (Op.getOpcode()) {
960   default:
961     llvm_unreachable("Should not custom lower this!");
962   case ISD::BlockAddress:
963     return LowerBlockAddress(Op, DAG);
964   case ISD::DYNAMIC_STACKALLOC:
965     return lowerDYNAMIC_STACKALLOC(Op, DAG);
966   case ISD::GlobalAddress:
967     return LowerGlobalAddress(Op, DAG);
968   case ISD::GlobalTLSAddress:
969     return LowerGlobalTLSAddress(Op, DAG);
970   case ISD::VASTART:
971     return LowerVASTART(Op, DAG);
972   case ISD::VAARG:
973     return LowerVAARG(Op, DAG);
974   }
975 }
976 /// } Custom Lower
977