1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation  ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that LoongArch uses to lower LLVM code into
10 // a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LoongArchISelLowering.h"
15 #include "LoongArch.h"
16 #include "LoongArchMachineFunctionInfo.h"
17 #include "LoongArchRegisterInfo.h"
18 #include "LoongArchSubtarget.h"
19 #include "LoongArchTargetMachine.h"
20 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
21 #include "llvm/ADT/Statistic.h"
22 #include "llvm/CodeGen/ISDOpcodes.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/KnownBits.h"
25 
26 using namespace llvm;
27 
28 #define DEBUG_TYPE "loongarch-isel-lowering"
29 
30 static cl::opt<bool> ZeroDivCheck(
31     "loongarch-check-zero-division", cl::Hidden,
32     cl::desc("Trap on integer division by zero."),
33     cl::init(false));
34 
35 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
36                                                  const LoongArchSubtarget &STI)
37     : TargetLowering(TM), Subtarget(STI) {
38 
39   MVT GRLenVT = Subtarget.getGRLenVT();
40   // Set up the register classes.
41   addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
42   if (Subtarget.hasBasicF())
43     addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
44   if (Subtarget.hasBasicD())
45     addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
46 
47   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
48                    MVT::i1, Promote);
49 
50   // TODO: add necessary setOperationAction calls later.
51   setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
52   setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
53   setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
54   setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
55 
56   setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, GRLenVT, Custom);
57 
58   if (Subtarget.is64Bit()) {
59     setOperationAction(ISD::SHL, MVT::i32, Custom);
60     setOperationAction(ISD::SRA, MVT::i32, Custom);
61     setOperationAction(ISD::SRL, MVT::i32, Custom);
62     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
63     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
64     if (Subtarget.hasBasicF() && !Subtarget.hasBasicD())
65       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
66   }
67 
68   static const ISD::CondCode FPCCToExpand[] = {ISD::SETOGT, ISD::SETOGE,
69                                                ISD::SETUGT, ISD::SETUGE};
70 
71   if (Subtarget.hasBasicF()) {
72     setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
73     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
74   }
75   if (Subtarget.hasBasicD()) {
76     setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
77     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
78     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
79     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
80   }
81 
82   setOperationAction(ISD::BR_CC, GRLenVT, Expand);
83   setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
84   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
85   setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
86   if (!Subtarget.is64Bit())
87     setLibcallName(RTLIB::MUL_I128, nullptr);
88 
89   setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
90   setOperationAction(ISD::UINT_TO_FP, GRLenVT, Custom);
91 
92   // Compute derived properties from the register classes.
93   computeRegisterProperties(STI.getRegisterInfo());
94 
95   setStackPointerRegisterToSaveRestore(LoongArch::R3);
96 
97   setBooleanContents(ZeroOrOneBooleanContent);
98 
99   setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
100 
101   // Function alignments.
102   const Align FunctionAlignment(4);
103   setMinFunctionAlignment(FunctionAlignment);
104 
105   setTargetDAGCombine(ISD::AND);
106   setTargetDAGCombine(ISD::OR);
107   setTargetDAGCombine(ISD::SRL);
108 }
109 
110 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
111                                                 SelectionDAG &DAG) const {
112   switch (Op.getOpcode()) {
113   default:
114     report_fatal_error("unimplemented operand");
115   case ISD::GlobalAddress:
116     return lowerGlobalAddress(Op, DAG);
117   case ISD::SHL_PARTS:
118     return lowerShiftLeftParts(Op, DAG);
119   case ISD::SRA_PARTS:
120     return lowerShiftRightParts(Op, DAG, true);
121   case ISD::SRL_PARTS:
122     return lowerShiftRightParts(Op, DAG, false);
123   case ISD::SHL:
124   case ISD::SRA:
125   case ISD::SRL:
126     // This can be called for an i32 shift amount that needs to be promoted.
127     assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
128            "Unexpected custom legalisation");
129     return SDValue();
130   case ISD::ConstantPool:
131     return lowerConstantPool(Op, DAG);
132   case ISD::FP_TO_SINT:
133     return lowerFP_TO_SINT(Op, DAG);
134   case ISD::BITCAST:
135     return lowerBITCAST(Op, DAG);
136   case ISD::FP_TO_UINT:
137     return SDValue();
138   case ISD::UINT_TO_FP:
139     return lowerUINT_TO_FP(Op, DAG);
140   }
141 }
142 
143 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
144                                                  SelectionDAG &DAG) const {
145 
146   SDLoc DL(Op);
147   auto &TLI = DAG.getTargetLoweringInfo();
148   SDValue Tmp1, Tmp2;
149   SDValue Op1 = Op.getOperand(0);
150   if (Op1->getOpcode() == ISD::AssertZext ||
151       Op1->getOpcode() == ISD::AssertSext)
152     return Op;
153   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op.getOperand(0));
154   SDValue Res = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f64, Trunc);
155   SDNode *N = Res.getNode();
156   TLI.expandUINT_TO_FP(N, Tmp1, Tmp2, DAG);
157   return Tmp1;
158 }
159 
160 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
161                                               SelectionDAG &DAG) const {
162 
163   SDLoc DL(Op);
164   SDValue Op0 = Op.getOperand(0);
165 
166   if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
167       Subtarget.is64Bit() && Subtarget.hasBasicF()) {
168     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
169     return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
170   }
171   return Op;
172 }
173 
174 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
175                                                  SelectionDAG &DAG) const {
176 
177   SDLoc DL(Op);
178 
179   if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
180       !Subtarget.hasBasicD()) {
181     SDValue Dst =
182         DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
183     return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
184   }
185 
186   EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
187   SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
188   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
189 }
190 
191 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
192                                                    SelectionDAG &DAG) const {
193   SDLoc DL(Op);
194   EVT Ty = Op.getValueType();
195   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
196 
197   // FIXME: Only support PC-relative addressing to access the symbol.
198   // Target flags will be added later.
199   if (!isPositionIndependent()) {
200     SDValue ConstantN = DAG.getTargetConstantPool(
201         N->getConstVal(), Ty, N->getAlign(), N->getOffset());
202     SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, ConstantN),
203                    0);
204     SDValue Addr(DAG.getMachineNode(Subtarget.is64Bit() ? LoongArch::ADDI_D
205                                                         : LoongArch::ADDI_W,
206                                     DL, Ty, AddrHi, ConstantN),
207                  0);
208     return Addr;
209   }
210   report_fatal_error("Unable to lower ConstantPool");
211 }
212 
213 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
214                                                     SelectionDAG &DAG) const {
215   SDLoc DL(Op);
216   EVT Ty = getPointerTy(DAG.getDataLayout());
217   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
218   unsigned ADDIOp = Subtarget.is64Bit() ? LoongArch::ADDI_D : LoongArch::ADDI_W;
219 
220   // TODO: Support dso_preemptable and target flags.
221   if (GV->isDSOLocal()) {
222     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty);
223     SDValue AddrHi(DAG.getMachineNode(LoongArch::PCALAU12I, DL, Ty, GA), 0);
224     SDValue Addr(DAG.getMachineNode(ADDIOp, DL, Ty, AddrHi, GA), 0);
225     return Addr;
226   }
227   report_fatal_error("Unable to lowerGlobalAddress");
228 }
229 
230 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
231                                                      SelectionDAG &DAG) const {
232   SDLoc DL(Op);
233   SDValue Lo = Op.getOperand(0);
234   SDValue Hi = Op.getOperand(1);
235   SDValue Shamt = Op.getOperand(2);
236   EVT VT = Lo.getValueType();
237 
238   // if Shamt-GRLen < 0: // Shamt < GRLen
239   //   Lo = Lo << Shamt
240   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
241   // else:
242   //   Lo = 0
243   //   Hi = Lo << (Shamt-GRLen)
244 
245   SDValue Zero = DAG.getConstant(0, DL, VT);
246   SDValue One = DAG.getConstant(1, DL, VT);
247   SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
248   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
249   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
250   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
251 
252   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
253   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
254   SDValue ShiftRightLo =
255       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
256   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
257   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
258   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
259 
260   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
261 
262   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
263   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
264 
265   SDValue Parts[2] = {Lo, Hi};
266   return DAG.getMergeValues(Parts, DL);
267 }
268 
269 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
270                                                       SelectionDAG &DAG,
271                                                       bool IsSRA) const {
272   SDLoc DL(Op);
273   SDValue Lo = Op.getOperand(0);
274   SDValue Hi = Op.getOperand(1);
275   SDValue Shamt = Op.getOperand(2);
276   EVT VT = Lo.getValueType();
277 
278   // SRA expansion:
279   //   if Shamt-GRLen < 0: // Shamt < GRLen
280   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
281   //     Hi = Hi >>s Shamt
282   //   else:
283   //     Lo = Hi >>s (Shamt-GRLen);
284   //     Hi = Hi >>s (GRLen-1)
285   //
286   // SRL expansion:
287   //   if Shamt-GRLen < 0: // Shamt < GRLen
288   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
289   //     Hi = Hi >>u Shamt
290   //   else:
291   //     Lo = Hi >>u (Shamt-GRLen);
292   //     Hi = 0;
293 
294   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
295 
296   SDValue Zero = DAG.getConstant(0, DL, VT);
297   SDValue One = DAG.getConstant(1, DL, VT);
298   SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
299   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
300   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
301   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
302 
303   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
304   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
305   SDValue ShiftLeftHi =
306       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
307   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
308   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
309   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
310   SDValue HiFalse =
311       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
312 
313   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
314 
315   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
316   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
317 
318   SDValue Parts[2] = {Lo, Hi};
319   return DAG.getMergeValues(Parts, DL);
320 }
321 
322 // Returns the opcode of the target-specific SDNode that implements the 32-bit
323 // form of the given Opcode.
324 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
325   switch (Opcode) {
326   default:
327     llvm_unreachable("Unexpected opcode");
328   case ISD::SHL:
329     return LoongArchISD::SLL_W;
330   case ISD::SRA:
331     return LoongArchISD::SRA_W;
332   case ISD::SRL:
333     return LoongArchISD::SRL_W;
334   }
335 }
336 
337 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
338 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
339 // otherwise be promoted to i64, making it difficult to select the
340 // SLL_W/.../*W later one because the fact the operation was originally of
341 // type i8/i16/i32 is lost.
342 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
343                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
344   SDLoc DL(N);
345   LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
346   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
347   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
348   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
349   // ReplaceNodeResults requires we maintain the same type for the return value.
350   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
351 }
352 
353 void LoongArchTargetLowering::ReplaceNodeResults(
354     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
355   SDLoc DL(N);
356   switch (N->getOpcode()) {
357   default:
358     llvm_unreachable("Don't know how to legalize this operation");
359   case ISD::SHL:
360   case ISD::SRA:
361   case ISD::SRL:
362     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
363            "Unexpected custom legalisation");
364     if (N->getOperand(1).getOpcode() != ISD::Constant) {
365       Results.push_back(customLegalizeToWOp(N, DAG));
366       break;
367     }
368     break;
369   case ISD::FP_TO_SINT: {
370     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
371            "Unexpected custom legalisation");
372     SDValue Src = N->getOperand(0);
373     EVT VT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
374     SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, VT, Src);
375     Results.push_back(DAG.getNode(ISD::BITCAST, DL, N->getValueType(0), Dst));
376     break;
377   }
378   case ISD::BITCAST: {
379     EVT VT = N->getValueType(0);
380     SDValue Src = N->getOperand(0);
381     EVT SrcVT = Src.getValueType();
382     if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
383         Subtarget.hasBasicF()) {
384       SDValue Dst =
385           DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
386       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
387     }
388     break;
389   }
390   case ISD::FP_TO_UINT: {
391     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
392            "Unexpected custom legalisation");
393     auto &TLI = DAG.getTargetLoweringInfo();
394     SDValue Tmp1, Tmp2;
395     TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
396     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
397     break;
398   }
399   }
400 }
401 
402 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
403                                  TargetLowering::DAGCombinerInfo &DCI,
404                                  const LoongArchSubtarget &Subtarget) {
405   if (DCI.isBeforeLegalizeOps())
406     return SDValue();
407 
408   SDValue FirstOperand = N->getOperand(0);
409   SDValue SecondOperand = N->getOperand(1);
410   unsigned FirstOperandOpc = FirstOperand.getOpcode();
411   EVT ValTy = N->getValueType(0);
412   SDLoc DL(N);
413   uint64_t lsb, msb;
414   unsigned SMIdx, SMLen;
415   ConstantSDNode *CN;
416   SDValue NewOperand;
417   MVT GRLenVT = Subtarget.getGRLenVT();
418 
419   // Op's second operand must be a shifted mask.
420   if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
421       !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
422     return SDValue();
423 
424   if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
425     // Pattern match BSTRPICK.
426     //  $dst = and ((sra or srl) $src , lsb), (2**len - 1)
427     //  => BSTRPICK $dst, $src, msb, lsb
428     //  where msb = lsb + len - 1
429 
430     // The second operand of the shift must be an immediate.
431     if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
432       return SDValue();
433 
434     lsb = CN->getZExtValue();
435 
436     // Return if the shifted mask does not start at bit 0 or the sum of its
437     // length and lsb exceeds the word's size.
438     if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
439       return SDValue();
440 
441     NewOperand = FirstOperand.getOperand(0);
442   } else {
443     // Pattern match BSTRPICK.
444     //  $dst = and $src, (2**len- 1) , if len > 12
445     //  => BSTRPICK $dst, $src, msb, lsb
446     //  where lsb = 0 and msb = len - 1
447 
448     // If the mask is <= 0xfff, andi can be used instead.
449     if (CN->getZExtValue() <= 0xfff)
450       return SDValue();
451 
452     // Return if the mask doesn't start at position 0.
453     if (SMIdx)
454       return SDValue();
455 
456     lsb = 0;
457     NewOperand = FirstOperand;
458   }
459   msb = lsb + SMLen - 1;
460   return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
461                      DAG.getConstant(msb, DL, GRLenVT),
462                      DAG.getConstant(lsb, DL, GRLenVT));
463 }
464 
465 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
466                                  TargetLowering::DAGCombinerInfo &DCI,
467                                  const LoongArchSubtarget &Subtarget) {
468   if (DCI.isBeforeLegalizeOps())
469     return SDValue();
470 
471   // $dst = srl (and $src, Mask), Shamt
472   // =>
473   // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
474   // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
475   //
476 
477   SDValue FirstOperand = N->getOperand(0);
478   ConstantSDNode *CN;
479   EVT ValTy = N->getValueType(0);
480   SDLoc DL(N);
481   MVT GRLenVT = Subtarget.getGRLenVT();
482   unsigned MaskIdx, MaskLen;
483   uint64_t Shamt;
484 
485   // The first operand must be an AND and the second operand of the AND must be
486   // a shifted mask.
487   if (FirstOperand.getOpcode() != ISD::AND ||
488       !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
489       !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
490     return SDValue();
491 
492   // The second operand (shift amount) must be an immediate.
493   if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
494     return SDValue();
495 
496   Shamt = CN->getZExtValue();
497   if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
498     return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
499                        FirstOperand->getOperand(0),
500                        DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
501                        DAG.getConstant(Shamt, DL, GRLenVT));
502 
503   return SDValue();
504 }
505 
506 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
507                                 TargetLowering::DAGCombinerInfo &DCI,
508                                 const LoongArchSubtarget &Subtarget) {
509   MVT GRLenVT = Subtarget.getGRLenVT();
510   EVT ValTy = N->getValueType(0);
511   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
512   ConstantSDNode *CN0, *CN1;
513   SDLoc DL(N);
514   unsigned ValBits = ValTy.getSizeInBits();
515   unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
516   unsigned Shamt;
517   bool SwapAndRetried = false;
518 
519   if (DCI.isBeforeLegalizeOps())
520     return SDValue();
521 
522   if (ValBits != 32 && ValBits != 64)
523     return SDValue();
524 
525 Retry:
526   // 1st pattern to match BSTRINS:
527   //  R = or (and X, mask0), (and (shl Y, lsb), mask1)
528   //  where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
529   //  =>
530   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
531   if (N0.getOpcode() == ISD::AND &&
532       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
533       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
534       N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
535       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
536       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
537       MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
538       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
539       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
540       (MaskIdx0 + MaskLen0 <= ValBits)) {
541     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
542     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
543                        N1.getOperand(0).getOperand(0),
544                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
545                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
546   }
547 
548   // 2nd pattern to match BSTRINS:
549   //  R = or (and X, mask0), (shl (and Y, mask1), lsb)
550   //  where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
551   //  =>
552   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
553   if (N0.getOpcode() == ISD::AND &&
554       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
555       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
556       N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
557       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
558       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
559       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
560       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
561       MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
562       (MaskIdx0 + MaskLen0 <= ValBits)) {
563     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
564     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
565                        N1.getOperand(0).getOperand(0),
566                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
567                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
568   }
569 
570   // 3rd pattern to match BSTRINS:
571   //  R = or (and X, mask0), (and Y, mask1)
572   //  where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
573   //  =>
574   //  R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
575   //  where msb = lsb + size - 1
576   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
577       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
578       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
579       (MaskIdx0 + MaskLen0 <= 64) &&
580       (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
581       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
582     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
583     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
584                        DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
585                                    DAG.getConstant(MaskIdx0, DL, GRLenVT)),
586                        DAG.getConstant(ValBits == 32
587                                            ? (MaskIdx0 + (MaskLen0 & 31) - 1)
588                                            : (MaskIdx0 + MaskLen0 - 1),
589                                        DL, GRLenVT),
590                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
591   }
592 
593   // 4th pattern to match BSTRINS:
594   //  R = or (and X, mask), (shl Y, shamt)
595   //  where mask = (2**shamt - 1)
596   //  =>
597   //  R = BSTRINS X, Y, ValBits - 1, shamt
598   //  where ValBits = 32 or 64
599   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
600       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
601       isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
602       MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
603       (Shamt = CN1->getZExtValue()) == MaskLen0 &&
604       (MaskIdx0 + MaskLen0 <= ValBits)) {
605     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
606     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
607                        N1.getOperand(0),
608                        DAG.getConstant((ValBits - 1), DL, GRLenVT),
609                        DAG.getConstant(Shamt, DL, GRLenVT));
610   }
611 
612   // 5th pattern to match BSTRINS:
613   //  R = or (and X, mask), const
614   //  where ~mask = (2**size - 1) << lsb, mask & const = 0
615   //  =>
616   //  R = BSTRINS X, (const >> lsb), msb, lsb
617   //  where msb = lsb + size - 1
618   if (N0.getOpcode() == ISD::AND &&
619       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
620       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
621       (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
622       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
623     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
624     return DAG.getNode(
625         LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
626         DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
627         DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
628         DAG.getConstant(MaskIdx0, DL, GRLenVT));
629   }
630 
631   // 6th pattern.
632   // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
633   // by the incoming bits are known to be zero.
634   // =>
635   // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
636   //
637   // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
638   // pattern is more common than the 1st. So we put the 1st before the 6th in
639   // order to match as many nodes as possible.
640   ConstantSDNode *CNMask, *CNShamt;
641   unsigned MaskIdx, MaskLen;
642   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
643       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
644       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
645       MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
646       CNShamt->getZExtValue() + MaskLen <= ValBits) {
647     Shamt = CNShamt->getZExtValue();
648     APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
649     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
650       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
651       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
652                          N1.getOperand(0).getOperand(0),
653                          DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
654                          DAG.getConstant(Shamt, DL, GRLenVT));
655     }
656   }
657 
658   // 7th pattern.
659   // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
660   // overwritten by the incoming bits are known to be zero.
661   // =>
662   // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
663   //
664   // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
665   // before the 7th in order to match as many nodes as possible.
666   if (N1.getOpcode() == ISD::AND &&
667       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
668       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
669       N1.getOperand(0).getOpcode() == ISD::SHL &&
670       (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
671       CNShamt->getZExtValue() == MaskIdx) {
672     APInt ShMask(ValBits, CNMask->getZExtValue());
673     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
674       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
675       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
676                          N1.getOperand(0).getOperand(0),
677                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
678                          DAG.getConstant(MaskIdx, DL, GRLenVT));
679     }
680   }
681 
682   // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
683   if (!SwapAndRetried) {
684     std::swap(N0, N1);
685     SwapAndRetried = true;
686     goto Retry;
687   }
688 
689   SwapAndRetried = false;
690 Retry2:
691   // 8th pattern.
692   // a = b | (c & shifted_mask), where all positions in b to be overwritten by
693   // the incoming bits are known to be zero.
694   // =>
695   // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
696   //
697   // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
698   // we put it here in order to match as many nodes as possible or generate less
699   // instructions.
700   if (N1.getOpcode() == ISD::AND &&
701       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
702       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
703     APInt ShMask(ValBits, CNMask->getZExtValue());
704     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
705       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
706       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
707                          DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
708                                      N1->getOperand(0),
709                                      DAG.getConstant(MaskIdx, DL, GRLenVT)),
710                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
711                          DAG.getConstant(MaskIdx, DL, GRLenVT));
712     }
713   }
714   // Swap N0/N1 and retry.
715   if (!SwapAndRetried) {
716     std::swap(N0, N1);
717     SwapAndRetried = true;
718     goto Retry2;
719   }
720 
721   return SDValue();
722 }
723 
724 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
725                                                    DAGCombinerInfo &DCI) const {
726   SelectionDAG &DAG = DCI.DAG;
727   switch (N->getOpcode()) {
728   default:
729     break;
730   case ISD::AND:
731     return performANDCombine(N, DAG, DCI, Subtarget);
732   case ISD::OR:
733     return performORCombine(N, DAG, DCI, Subtarget);
734   case ISD::SRL:
735     return performSRLCombine(N, DAG, DCI, Subtarget);
736   }
737   return SDValue();
738 }
739 
740 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
741                                               MachineBasicBlock &MBB,
742                                               const TargetInstrInfo &TII) {
743   if (!ZeroDivCheck)
744     return &MBB;
745 
746   // Build instructions:
747   //   div(or mod)   $dst, $dividend, $divisor
748   //   bnez          $divisor, 8
749   //   break         7
750   //   fallthrough
751   MachineOperand &Divisor = MI.getOperand(2);
752   auto FallThrough = std::next(MI.getIterator());
753 
754   BuildMI(MBB, FallThrough, MI.getDebugLoc(), TII.get(LoongArch::BNEZ))
755       .addReg(Divisor.getReg(), getKillRegState(Divisor.isKill()))
756       .addImm(8);
757 
758   // See linux header file arch/loongarch/include/uapi/asm/break.h for the
759   // definition of BRK_DIVZERO.
760   BuildMI(MBB, FallThrough, MI.getDebugLoc(), TII.get(LoongArch::BREAK))
761       .addImm(7/*BRK_DIVZERO*/);
762 
763   // Clear Divisor's kill flag.
764   Divisor.setIsKill(false);
765 
766   return &MBB;
767 }
768 
769 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
770     MachineInstr &MI, MachineBasicBlock *BB) const {
771 
772   switch (MI.getOpcode()) {
773   default:
774     llvm_unreachable("Unexpected instr type to insert");
775   case LoongArch::DIV_W:
776   case LoongArch::DIV_WU:
777   case LoongArch::MOD_W:
778   case LoongArch::MOD_WU:
779   case LoongArch::DIV_D:
780   case LoongArch::DIV_DU:
781   case LoongArch::MOD_D:
782   case LoongArch::MOD_DU:
783     return insertDivByZeroTrap(MI, *BB, *Subtarget.getInstrInfo());
784     break;
785   }
786 }
787 
788 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
789   switch ((LoongArchISD::NodeType)Opcode) {
790   case LoongArchISD::FIRST_NUMBER:
791     break;
792 
793 #define NODE_NAME_CASE(node)                                                   \
794   case LoongArchISD::node:                                                     \
795     return "LoongArchISD::" #node;
796 
797     // TODO: Add more target-dependent nodes later.
798     NODE_NAME_CASE(CALL)
799     NODE_NAME_CASE(RET)
800     NODE_NAME_CASE(SLL_W)
801     NODE_NAME_CASE(SRA_W)
802     NODE_NAME_CASE(SRL_W)
803     NODE_NAME_CASE(BSTRINS)
804     NODE_NAME_CASE(BSTRPICK)
805     NODE_NAME_CASE(MOVGR2FR_W_LA64)
806     NODE_NAME_CASE(MOVFR2GR_S_LA64)
807     NODE_NAME_CASE(FTINT)
808   }
809 #undef NODE_NAME_CASE
810   return nullptr;
811 }
812 
813 //===----------------------------------------------------------------------===//
814 //                     Calling Convention Implementation
815 //===----------------------------------------------------------------------===//
816 // FIXME: Now, we only support CallingConv::C with fixed arguments which are
817 // passed with integer or floating-point registers.
818 const MCPhysReg ArgGPRs[] = {LoongArch::R4,  LoongArch::R5, LoongArch::R6,
819                              LoongArch::R7,  LoongArch::R8, LoongArch::R9,
820                              LoongArch::R10, LoongArch::R11};
821 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
822                                LoongArch::F3, LoongArch::F4, LoongArch::F5,
823                                LoongArch::F6, LoongArch::F7};
824 const MCPhysReg ArgFPR64s[] = {
825     LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
826     LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
827 
828 // Implements the LoongArch calling convention. Returns true upon failure.
829 static bool CC_LoongArch(unsigned ValNo, MVT ValVT,
830                          CCValAssign::LocInfo LocInfo, CCState &State) {
831   // Allocate to a register if possible.
832   Register Reg;
833 
834   if (ValVT == MVT::f32)
835     Reg = State.AllocateReg(ArgFPR32s);
836   else if (ValVT == MVT::f64)
837     Reg = State.AllocateReg(ArgFPR64s);
838   else
839     Reg = State.AllocateReg(ArgGPRs);
840   if (Reg) {
841     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, ValVT, LocInfo));
842     return false;
843   }
844 
845   // TODO: Handle arguments passed without register.
846   return true;
847 }
848 
849 void LoongArchTargetLowering::analyzeInputArgs(
850     CCState &CCInfo, const SmallVectorImpl<ISD::InputArg> &Ins,
851     LoongArchCCAssignFn Fn) const {
852   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
853     MVT ArgVT = Ins[i].VT;
854 
855     if (Fn(i, ArgVT, CCValAssign::Full, CCInfo)) {
856       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
857                         << EVT(ArgVT).getEVTString() << '\n');
858       llvm_unreachable("");
859     }
860   }
861 }
862 
863 void LoongArchTargetLowering::analyzeOutputArgs(
864     CCState &CCInfo, const SmallVectorImpl<ISD::OutputArg> &Outs,
865     LoongArchCCAssignFn Fn) const {
866   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
867     MVT ArgVT = Outs[i].VT;
868 
869     if (Fn(i, ArgVT, CCValAssign::Full, CCInfo)) {
870       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
871                         << EVT(ArgVT).getEVTString() << "\n");
872       llvm_unreachable("");
873     }
874   }
875 }
876 
877 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
878                                 const CCValAssign &VA, const SDLoc &DL,
879                                 const LoongArchTargetLowering &TLI) {
880   MachineFunction &MF = DAG.getMachineFunction();
881   MachineRegisterInfo &RegInfo = MF.getRegInfo();
882   EVT LocVT = VA.getLocVT();
883   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
884   Register VReg = RegInfo.createVirtualRegister(RC);
885   RegInfo.addLiveIn(VA.getLocReg(), VReg);
886 
887   return DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
888 }
889 
890 // Transform physical registers into virtual registers.
891 SDValue LoongArchTargetLowering::LowerFormalArguments(
892     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
893     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
894     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
895 
896   MachineFunction &MF = DAG.getMachineFunction();
897 
898   switch (CallConv) {
899   default:
900     llvm_unreachable("Unsupported calling convention");
901   case CallingConv::C:
902     break;
903   }
904 
905   // Assign locations to all of the incoming arguments.
906   SmallVector<CCValAssign> ArgLocs;
907   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
908 
909   analyzeInputArgs(CCInfo, Ins, CC_LoongArch);
910 
911   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
912     InVals.push_back(unpackFromRegLoc(DAG, Chain, ArgLocs[i], DL, *this));
913 
914   return Chain;
915 }
916 
917 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
918 // and output parameter nodes.
919 SDValue
920 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
921                                    SmallVectorImpl<SDValue> &InVals) const {
922   SelectionDAG &DAG = CLI.DAG;
923   SDLoc &DL = CLI.DL;
924   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
925   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
926   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
927   SDValue Chain = CLI.Chain;
928   SDValue Callee = CLI.Callee;
929   CallingConv::ID CallConv = CLI.CallConv;
930   bool IsVarArg = CLI.IsVarArg;
931   EVT PtrVT = getPointerTy(DAG.getDataLayout());
932   CLI.IsTailCall = false;
933 
934   if (IsVarArg)
935     report_fatal_error("LowerCall with varargs not implemented");
936 
937   MachineFunction &MF = DAG.getMachineFunction();
938 
939   // Analyze the operands of the call, assigning locations to each operand.
940   SmallVector<CCValAssign> ArgLocs;
941   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
942 
943   analyzeOutputArgs(ArgCCInfo, Outs, CC_LoongArch);
944 
945   // Get a count of how many bytes are to be pushed on the stack.
946   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
947 
948   for (auto &Arg : Outs) {
949     if (!Arg.Flags.isByVal())
950       continue;
951     report_fatal_error("Passing arguments byval not implemented");
952   }
953 
954   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
955 
956   // Copy argument values to their designated locations.
957   SmallVector<std::pair<Register, SDValue>> RegsToPass;
958   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
959     CCValAssign &VA = ArgLocs[i];
960     SDValue ArgValue = OutVals[i];
961 
962     // Promote the value if needed.
963     // For now, only handle fully promoted arguments.
964     if (VA.getLocInfo() != CCValAssign::Full)
965       report_fatal_error("Unknown loc info");
966 
967     if (VA.isRegLoc()) {
968       // Queue up the argument copies and emit them at the end.
969       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
970     } else {
971       report_fatal_error("Passing arguments via the stack not implemented");
972     }
973   }
974 
975   SDValue Glue;
976 
977   // Build a sequence of copy-to-reg nodes, chained and glued together.
978   for (auto &Reg : RegsToPass) {
979     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
980     Glue = Chain.getValue(1);
981   }
982 
983   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
984   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
985   // split it and then direct call can be matched by PseudoCALL.
986   // FIXME: Add target flags for relocation.
987   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee))
988     Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT);
989   else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
990     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT);
991 
992   // The first call operand is the chain and the second is the target address.
993   SmallVector<SDValue> Ops;
994   Ops.push_back(Chain);
995   Ops.push_back(Callee);
996 
997   // Add argument registers to the end of the list so that they are
998   // known live into the call.
999   for (auto &Reg : RegsToPass)
1000     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
1001 
1002   // Add a register mask operand representing the call-preserved registers.
1003   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1004   const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1005   assert(Mask && "Missing call preserved mask for calling convention");
1006   Ops.push_back(DAG.getRegisterMask(Mask));
1007 
1008   // Glue the call to the argument copies, if any.
1009   if (Glue.getNode())
1010     Ops.push_back(Glue);
1011 
1012   // Emit the call.
1013   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1014 
1015   Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops);
1016   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
1017   Glue = Chain.getValue(1);
1018 
1019   // Mark the end of the call, which is glued to the call itself.
1020   Chain = DAG.getCALLSEQ_END(Chain, DAG.getConstant(NumBytes, DL, PtrVT, true),
1021                              DAG.getConstant(0, DL, PtrVT, true), Glue, DL);
1022   Glue = Chain.getValue(1);
1023 
1024   // Assign locations to each value returned by this call.
1025   SmallVector<CCValAssign> RVLocs;
1026   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
1027   analyzeInputArgs(RetCCInfo, Ins, CC_LoongArch);
1028 
1029   // Copy all of the result registers out of their specified physreg.
1030   for (auto &VA : RVLocs) {
1031     // Copy the value out.
1032     SDValue RetValue =
1033         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
1034     Chain = RetValue.getValue(1);
1035     Glue = RetValue.getValue(2);
1036 
1037     InVals.push_back(Chain.getValue(0));
1038   }
1039 
1040   return Chain;
1041 }
1042 
1043 bool LoongArchTargetLowering::CanLowerReturn(
1044     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
1045     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
1046   // Any return value split in to more than two values can't be returned
1047   // directly.
1048   return Outs.size() <= 2;
1049 }
1050 
1051 SDValue LoongArchTargetLowering::LowerReturn(
1052     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1053     const SmallVectorImpl<ISD::OutputArg> &Outs,
1054     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1055     SelectionDAG &DAG) const {
1056   // Stores the assignment of the return value to a location.
1057   SmallVector<CCValAssign> RVLocs;
1058 
1059   // Info about the registers and stack slot.
1060   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
1061                  *DAG.getContext());
1062 
1063   analyzeOutputArgs(CCInfo, Outs, CC_LoongArch);
1064 
1065   SDValue Glue;
1066   SmallVector<SDValue, 4> RetOps(1, Chain);
1067 
1068   // Copy the result values into the output registers.
1069   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
1070     CCValAssign &VA = RVLocs[i];
1071     assert(VA.isRegLoc() && "Can only return in registers!");
1072 
1073     // Handle a 'normal' return.
1074     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue);
1075 
1076     // Guarantee that all emitted copies are stuck together.
1077     Glue = Chain.getValue(1);
1078     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
1079   }
1080 
1081   RetOps[0] = Chain; // Update chain.
1082 
1083   // Add the glue node if we have it.
1084   if (Glue.getNode())
1085     RetOps.push_back(Glue);
1086 
1087   return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
1088 }
1089 
1090 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
1091                                            bool ForCodeSize) const {
1092   assert((VT == MVT::f32 || VT == MVT::f64) && "Unexpected VT");
1093 
1094   if (VT == MVT::f32 && !Subtarget.hasBasicF())
1095     return false;
1096   if (VT == MVT::f64 && !Subtarget.hasBasicD())
1097     return false;
1098   return (Imm.isZero() || Imm.isExactlyValue(+1.0));
1099 }
1100