1 //=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation  ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that LoongArch uses to lower LLVM code into
10 // a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "LoongArchISelLowering.h"
15 #include "LoongArch.h"
16 #include "LoongArchMachineFunctionInfo.h"
17 #include "LoongArchRegisterInfo.h"
18 #include "LoongArchSubtarget.h"
19 #include "LoongArchTargetMachine.h"
20 #include "MCTargetDesc/LoongArchBaseInfo.h"
21 #include "MCTargetDesc/LoongArchMCTargetDesc.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/CodeGen/ISDOpcodes.h"
25 #include "llvm/CodeGen/RuntimeLibcalls.h"
26 #include "llvm/CodeGen/SelectionDAGNodes.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/IntrinsicsLoongArch.h"
29 #include "llvm/Support/CodeGen.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/KnownBits.h"
33 #include "llvm/Support/MathExtras.h"
34 
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "loongarch-isel-lowering"
38 
39 STATISTIC(NumTailCalls, "Number of tail calls");
40 
41 static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42                                   cl::desc("Trap on integer division by zero."),
43                                   cl::init(false));
44 
LoongArchTargetLowering(const TargetMachine & TM,const LoongArchSubtarget & STI)45 LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
46                                                  const LoongArchSubtarget &STI)
47     : TargetLowering(TM), Subtarget(STI) {
48 
49   MVT GRLenVT = Subtarget.getGRLenVT();
50 
51   // Set up the register classes.
52 
53   addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54   if (Subtarget.hasBasicF())
55     addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56   if (Subtarget.hasBasicD())
57     addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58 
59   static const MVT::SimpleValueType LSXVTs[] = {
60       MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61   static const MVT::SimpleValueType LASXVTs[] = {
62       MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63 
64   if (Subtarget.hasExtLSX())
65     for (MVT VT : LSXVTs)
66       addRegisterClass(VT, &LoongArch::LSX128RegClass);
67 
68   if (Subtarget.hasExtLASX())
69     for (MVT VT : LASXVTs)
70       addRegisterClass(VT, &LoongArch::LASX256RegClass);
71 
72   // Set operations for LA32 and LA64.
73 
74   setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
75                    MVT::i1, Promote);
76 
77   setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
78   setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
79   setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
80   setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
81   setOperationAction(ISD::ROTL, GRLenVT, Expand);
82   setOperationAction(ISD::CTPOP, GRLenVT, Expand);
83 
84   setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
85                       ISD::JumpTable, ISD::GlobalTLSAddress},
86                      GRLenVT, Custom);
87 
88   setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom);
89 
90   setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
91   setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
92   setOperationAction(ISD::VASTART, MVT::Other, Custom);
93   setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
94 
95   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
96   setOperationAction(ISD::TRAP, MVT::Other, Legal);
97 
98   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
99   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
100   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
101 
102   // Expand bitreverse.i16 with native-width bitrev and shift for now, before
103   // we get to know which of sll and revb.2h is faster.
104   setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
105   setOperationAction(ISD::BITREVERSE, GRLenVT, Legal);
106 
107   // LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108   // the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109   // and i32 could still be byte-swapped relatively cheaply.
110   setOperationAction(ISD::BSWAP, MVT::i16, Custom);
111 
112   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
113   setOperationAction(ISD::BR_CC, GRLenVT, Expand);
114   setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
115   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
116   setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
117 
118   setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
119   setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
120 
121   // Set operations for LA64 only.
122 
123   if (Subtarget.is64Bit()) {
124     setOperationAction(ISD::SHL, MVT::i32, Custom);
125     setOperationAction(ISD::SRA, MVT::i32, Custom);
126     setOperationAction(ISD::SRL, MVT::i32, Custom);
127     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
128     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
129     setOperationAction(ISD::ROTR, MVT::i32, Custom);
130     setOperationAction(ISD::ROTL, MVT::i32, Custom);
131     setOperationAction(ISD::CTTZ, MVT::i32, Custom);
132     setOperationAction(ISD::CTLZ, MVT::i32, Custom);
133     setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
134     setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
135     setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
136     setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
137     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
138     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
139 
140     setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
141     setOperationAction(ISD::BSWAP, MVT::i32, Custom);
142   }
143 
144   // Set operations for LA32 only.
145 
146   if (!Subtarget.is64Bit()) {
147     setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
148     setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
149     setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
150     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
151     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
152 
153     // Set libcalls.
154     setLibcallName(RTLIB::MUL_I128, nullptr);
155     // The MULO libcall is not part of libgcc, only compiler-rt.
156     setLibcallName(RTLIB::MULO_I64, nullptr);
157   }
158 
159   // The MULO libcall is not part of libgcc, only compiler-rt.
160   setLibcallName(RTLIB::MULO_I128, nullptr);
161 
162   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
163 
164   static const ISD::CondCode FPCCToExpand[] = {
165       ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
166       ISD::SETGE,  ISD::SETNE,  ISD::SETGT};
167 
168   // Set operations for 'F' feature.
169 
170   if (Subtarget.hasBasicF()) {
171     setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
172 
173     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
174     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
175     setOperationAction(ISD::FMA, MVT::f32, Legal);
176     setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
177     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
178     setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
179     setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
180     setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal);
181     setOperationAction(ISD::FSIN, MVT::f32, Expand);
182     setOperationAction(ISD::FCOS, MVT::f32, Expand);
183     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
184     setOperationAction(ISD::FPOW, MVT::f32, Expand);
185     setOperationAction(ISD::FREM, MVT::f32, Expand);
186 
187     if (Subtarget.is64Bit())
188       setOperationAction(ISD::FRINT, MVT::f32, Legal);
189 
190     if (!Subtarget.hasBasicD()) {
191       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
192       if (Subtarget.is64Bit()) {
193         setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
194         setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
195       }
196     }
197   }
198 
199   // Set operations for 'D' feature.
200 
201   if (Subtarget.hasBasicD()) {
202     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
203     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
204     setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
205 
206     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
207     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
208     setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
209     setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
210     setOperationAction(ISD::FMA, MVT::f64, Legal);
211     setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
212     setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
213     setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal);
214     setOperationAction(ISD::FSIN, MVT::f64, Expand);
215     setOperationAction(ISD::FCOS, MVT::f64, Expand);
216     setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
217     setOperationAction(ISD::FPOW, MVT::f64, Expand);
218     setOperationAction(ISD::FREM, MVT::f64, Expand);
219 
220     if (Subtarget.is64Bit())
221       setOperationAction(ISD::FRINT, MVT::f64, Legal);
222   }
223 
224   // Set operations for 'LSX' feature.
225 
226   if (Subtarget.hasExtLSX()) {
227     for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
228       // Expand all truncating stores and extending loads.
229       for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
230         setTruncStoreAction(VT, InnerVT, Expand);
231         setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
232         setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
233         setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
234       }
235       // By default everything must be expanded. Then we will selectively turn
236       // on ones that can be effectively codegen'd.
237       for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
238         setOperationAction(Op, VT, Expand);
239     }
240 
241     for (MVT VT : LSXVTs) {
242       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
243       setOperationAction(ISD::BITCAST, VT, Legal);
244       setOperationAction(ISD::UNDEF, VT, Legal);
245 
246       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
247       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
248       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
249 
250       setOperationAction(ISD::SETCC, VT, Legal);
251       setOperationAction(ISD::VSELECT, VT, Legal);
252     }
253     for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
254       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
255       setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
256       setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
257                          Legal);
258       setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
259                          VT, Legal);
260       setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
261       setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
262       setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
263       setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
264       setCondCodeAction(
265           {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
266           Expand);
267     }
268     for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
269       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
270       setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
271     }
272     for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
273       setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
274       setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
275       setOperationAction(ISD::FMA, VT, Legal);
276       setOperationAction(ISD::FSQRT, VT, Legal);
277       setOperationAction(ISD::FNEG, VT, Legal);
278       setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
279                          ISD::SETUGE, ISD::SETUGT},
280                         VT, Expand);
281     }
282   }
283 
284   // Set operations for 'LASX' feature.
285 
286   if (Subtarget.hasExtLASX()) {
287     for (MVT VT : LASXVTs) {
288       setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
289       setOperationAction(ISD::BITCAST, VT, Legal);
290       setOperationAction(ISD::UNDEF, VT, Legal);
291 
292       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
293       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
294       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
295 
296       setOperationAction(ISD::SETCC, VT, Legal);
297       setOperationAction(ISD::VSELECT, VT, Legal);
298     }
299     for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
300       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
301       setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
302       setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
303                          Legal);
304       setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
305                          VT, Legal);
306       setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
307       setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
308       setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
309       setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
310       setCondCodeAction(
311           {ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
312           Expand);
313     }
314     for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
315       setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
316       setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
317     }
318     for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
319       setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
320       setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
321       setOperationAction(ISD::FMA, VT, Legal);
322       setOperationAction(ISD::FSQRT, VT, Legal);
323       setOperationAction(ISD::FNEG, VT, Legal);
324       setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
325                          ISD::SETUGE, ISD::SETUGT},
326                         VT, Expand);
327     }
328   }
329 
330   // Set DAG combine for LA32 and LA64.
331 
332   setTargetDAGCombine(ISD::AND);
333   setTargetDAGCombine(ISD::OR);
334   setTargetDAGCombine(ISD::SRL);
335 
336   // Set DAG combine for 'LSX' feature.
337 
338   if (Subtarget.hasExtLSX())
339     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
340 
341   // Compute derived properties from the register classes.
342   computeRegisterProperties(Subtarget.getRegisterInfo());
343 
344   setStackPointerRegisterToSaveRestore(LoongArch::R3);
345 
346   setBooleanContents(ZeroOrOneBooleanContent);
347   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
348 
349   setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
350 
351   setMinCmpXchgSizeInBits(32);
352 
353   // Function alignments.
354   setMinFunctionAlignment(Align(4));
355   // Set preferred alignments.
356   setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
357   setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
358   setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
359 }
360 
isOffsetFoldingLegal(const GlobalAddressSDNode * GA) const361 bool LoongArchTargetLowering::isOffsetFoldingLegal(
362     const GlobalAddressSDNode *GA) const {
363   // In order to maximise the opportunity for common subexpression elimination,
364   // keep a separate ADD node for the global address offset instead of folding
365   // it in the global address node. Later peephole optimisations may choose to
366   // fold it back in when profitable.
367   return false;
368 }
369 
LowerOperation(SDValue Op,SelectionDAG & DAG) const370 SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
371                                                 SelectionDAG &DAG) const {
372   switch (Op.getOpcode()) {
373   case ISD::ATOMIC_FENCE:
374     return lowerATOMIC_FENCE(Op, DAG);
375   case ISD::EH_DWARF_CFA:
376     return lowerEH_DWARF_CFA(Op, DAG);
377   case ISD::GlobalAddress:
378     return lowerGlobalAddress(Op, DAG);
379   case ISD::GlobalTLSAddress:
380     return lowerGlobalTLSAddress(Op, DAG);
381   case ISD::INTRINSIC_WO_CHAIN:
382     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
383   case ISD::INTRINSIC_W_CHAIN:
384     return lowerINTRINSIC_W_CHAIN(Op, DAG);
385   case ISD::INTRINSIC_VOID:
386     return lowerINTRINSIC_VOID(Op, DAG);
387   case ISD::BlockAddress:
388     return lowerBlockAddress(Op, DAG);
389   case ISD::JumpTable:
390     return lowerJumpTable(Op, DAG);
391   case ISD::SHL_PARTS:
392     return lowerShiftLeftParts(Op, DAG);
393   case ISD::SRA_PARTS:
394     return lowerShiftRightParts(Op, DAG, true);
395   case ISD::SRL_PARTS:
396     return lowerShiftRightParts(Op, DAG, false);
397   case ISD::ConstantPool:
398     return lowerConstantPool(Op, DAG);
399   case ISD::FP_TO_SINT:
400     return lowerFP_TO_SINT(Op, DAG);
401   case ISD::BITCAST:
402     return lowerBITCAST(Op, DAG);
403   case ISD::UINT_TO_FP:
404     return lowerUINT_TO_FP(Op, DAG);
405   case ISD::SINT_TO_FP:
406     return lowerSINT_TO_FP(Op, DAG);
407   case ISD::VASTART:
408     return lowerVASTART(Op, DAG);
409   case ISD::FRAMEADDR:
410     return lowerFRAMEADDR(Op, DAG);
411   case ISD::RETURNADDR:
412     return lowerRETURNADDR(Op, DAG);
413   case ISD::WRITE_REGISTER:
414     return lowerWRITE_REGISTER(Op, DAG);
415   case ISD::INSERT_VECTOR_ELT:
416     return lowerINSERT_VECTOR_ELT(Op, DAG);
417   case ISD::EXTRACT_VECTOR_ELT:
418     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
419   case ISD::BUILD_VECTOR:
420     return lowerBUILD_VECTOR(Op, DAG);
421   case ISD::VECTOR_SHUFFLE:
422     return lowerVECTOR_SHUFFLE(Op, DAG);
423   }
424   return SDValue();
425 }
426 
lowerVECTOR_SHUFFLE(SDValue Op,SelectionDAG & DAG) const427 SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
428                                                      SelectionDAG &DAG) const {
429   // TODO: custom shuffle.
430   return SDValue();
431 }
432 
isConstantOrUndef(const SDValue Op)433 static bool isConstantOrUndef(const SDValue Op) {
434   if (Op->isUndef())
435     return true;
436   if (isa<ConstantSDNode>(Op))
437     return true;
438   if (isa<ConstantFPSDNode>(Op))
439     return true;
440   return false;
441 }
442 
isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode * Op)443 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
444   for (unsigned i = 0; i < Op->getNumOperands(); ++i)
445     if (isConstantOrUndef(Op->getOperand(i)))
446       return true;
447   return false;
448 }
449 
lowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const450 SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
451                                                    SelectionDAG &DAG) const {
452   BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
453   EVT ResTy = Op->getValueType(0);
454   SDLoc DL(Op);
455   APInt SplatValue, SplatUndef;
456   unsigned SplatBitSize;
457   bool HasAnyUndefs;
458   bool Is128Vec = ResTy.is128BitVector();
459   bool Is256Vec = ResTy.is256BitVector();
460 
461   if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
462       (!Subtarget.hasExtLASX() || !Is256Vec))
463     return SDValue();
464 
465   if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
466                             /*MinSplatBits=*/8) &&
467       SplatBitSize <= 64) {
468     // We can only cope with 8, 16, 32, or 64-bit elements.
469     if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
470         SplatBitSize != 64)
471       return SDValue();
472 
473     EVT ViaVecTy;
474 
475     switch (SplatBitSize) {
476     default:
477       return SDValue();
478     case 8:
479       ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
480       break;
481     case 16:
482       ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
483       break;
484     case 32:
485       ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
486       break;
487     case 64:
488       ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
489       break;
490     }
491 
492     // SelectionDAG::getConstant will promote SplatValue appropriately.
493     SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
494 
495     // Bitcast to the type we originally wanted.
496     if (ViaVecTy != ResTy)
497       Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
498 
499     return Result;
500   }
501 
502   if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
503     return Op;
504 
505   if (!isConstantOrUndefBUILD_VECTOR(Node)) {
506     // Use INSERT_VECTOR_ELT operations rather than expand to stores.
507     // The resulting code is the same length as the expansion, but it doesn't
508     // use memory operations.
509     EVT ResTy = Node->getValueType(0);
510 
511     assert(ResTy.isVector());
512 
513     unsigned NumElts = ResTy.getVectorNumElements();
514     SDValue Vector = DAG.getUNDEF(ResTy);
515     for (unsigned i = 0; i < NumElts; ++i) {
516       Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
517                            Node->getOperand(i),
518                            DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
519     }
520     return Vector;
521   }
522 
523   return SDValue();
524 }
525 
526 SDValue
lowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const527 LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
528                                                  SelectionDAG &DAG) const {
529   EVT VecTy = Op->getOperand(0)->getValueType(0);
530   SDValue Idx = Op->getOperand(1);
531   EVT EltTy = VecTy.getVectorElementType();
532   unsigned NumElts = VecTy.getVectorNumElements();
533 
534   if (isa<ConstantSDNode>(Idx) &&
535       (EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
536        EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
537     return Op;
538 
539   return SDValue();
540 }
541 
542 SDValue
lowerINSERT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const543 LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
544                                                 SelectionDAG &DAG) const {
545   if (isa<ConstantSDNode>(Op->getOperand(2)))
546     return Op;
547   return SDValue();
548 }
549 
lowerATOMIC_FENCE(SDValue Op,SelectionDAG & DAG) const550 SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
551                                                    SelectionDAG &DAG) const {
552   SDLoc DL(Op);
553   SyncScope::ID FenceSSID =
554       static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
555 
556   // singlethread fences only synchronize with signal handlers on the same
557   // thread and thus only need to preserve instruction order, not actually
558   // enforce memory ordering.
559   if (FenceSSID == SyncScope::SingleThread)
560     // MEMBARRIER is a compiler barrier; it codegens to a no-op.
561     return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
562 
563   return Op;
564 }
565 
lowerWRITE_REGISTER(SDValue Op,SelectionDAG & DAG) const566 SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
567                                                      SelectionDAG &DAG) const {
568 
569   if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
570     DAG.getContext()->emitError(
571         "On LA64, only 64-bit registers can be written.");
572     return Op.getOperand(0);
573   }
574 
575   if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
576     DAG.getContext()->emitError(
577         "On LA32, only 32-bit registers can be written.");
578     return Op.getOperand(0);
579   }
580 
581   return Op;
582 }
583 
lowerFRAMEADDR(SDValue Op,SelectionDAG & DAG) const584 SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
585                                                 SelectionDAG &DAG) const {
586   if (!isa<ConstantSDNode>(Op.getOperand(0))) {
587     DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
588                                 "be a constant integer");
589     return SDValue();
590   }
591 
592   MachineFunction &MF = DAG.getMachineFunction();
593   MF.getFrameInfo().setFrameAddressIsTaken(true);
594   Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
595   EVT VT = Op.getValueType();
596   SDLoc DL(Op);
597   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
598   unsigned Depth = Op.getConstantOperandVal(0);
599   int GRLenInBytes = Subtarget.getGRLen() / 8;
600 
601   while (Depth--) {
602     int Offset = -(GRLenInBytes * 2);
603     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
604                               DAG.getIntPtrConstant(Offset, DL));
605     FrameAddr =
606         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
607   }
608   return FrameAddr;
609 }
610 
lowerRETURNADDR(SDValue Op,SelectionDAG & DAG) const611 SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
612                                                  SelectionDAG &DAG) const {
613   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
614     return SDValue();
615 
616   // Currently only support lowering return address for current frame.
617   if (Op.getConstantOperandVal(0) != 0) {
618     DAG.getContext()->emitError(
619         "return address can only be determined for the current frame");
620     return SDValue();
621   }
622 
623   MachineFunction &MF = DAG.getMachineFunction();
624   MF.getFrameInfo().setReturnAddressIsTaken(true);
625   MVT GRLenVT = Subtarget.getGRLenVT();
626 
627   // Return the value of the return address register, marking it an implicit
628   // live-in.
629   Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
630                               getRegClassFor(GRLenVT));
631   return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
632 }
633 
lowerEH_DWARF_CFA(SDValue Op,SelectionDAG & DAG) const634 SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
635                                                    SelectionDAG &DAG) const {
636   MachineFunction &MF = DAG.getMachineFunction();
637   auto Size = Subtarget.getGRLen() / 8;
638   auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
639   return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
640 }
641 
lowerVASTART(SDValue Op,SelectionDAG & DAG) const642 SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
643                                               SelectionDAG &DAG) const {
644   MachineFunction &MF = DAG.getMachineFunction();
645   auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
646 
647   SDLoc DL(Op);
648   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
649                                  getPointerTy(MF.getDataLayout()));
650 
651   // vastart just stores the address of the VarArgsFrameIndex slot into the
652   // memory location argument.
653   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
654   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
655                       MachinePointerInfo(SV));
656 }
657 
lowerUINT_TO_FP(SDValue Op,SelectionDAG & DAG) const658 SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
659                                                  SelectionDAG &DAG) const {
660   assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
661          !Subtarget.hasBasicD() && "unexpected target features");
662 
663   SDLoc DL(Op);
664   SDValue Op0 = Op.getOperand(0);
665   if (Op0->getOpcode() == ISD::AND) {
666     auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
667     if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
668       return Op;
669   }
670 
671   if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
672       Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
673       Op0.getConstantOperandVal(2) == UINT64_C(0))
674     return Op;
675 
676   if (Op0.getOpcode() == ISD::AssertZext &&
677       dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
678     return Op;
679 
680   EVT OpVT = Op0.getValueType();
681   EVT RetVT = Op.getValueType();
682   RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
683   MakeLibCallOptions CallOptions;
684   CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
685   SDValue Chain = SDValue();
686   SDValue Result;
687   std::tie(Result, Chain) =
688       makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
689   return Result;
690 }
691 
lowerSINT_TO_FP(SDValue Op,SelectionDAG & DAG) const692 SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
693                                                  SelectionDAG &DAG) const {
694   assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
695          !Subtarget.hasBasicD() && "unexpected target features");
696 
697   SDLoc DL(Op);
698   SDValue Op0 = Op.getOperand(0);
699 
700   if ((Op0.getOpcode() == ISD::AssertSext ||
701        Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
702       dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
703     return Op;
704 
705   EVT OpVT = Op0.getValueType();
706   EVT RetVT = Op.getValueType();
707   RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
708   MakeLibCallOptions CallOptions;
709   CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
710   SDValue Chain = SDValue();
711   SDValue Result;
712   std::tie(Result, Chain) =
713       makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
714   return Result;
715 }
716 
lowerBITCAST(SDValue Op,SelectionDAG & DAG) const717 SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
718                                               SelectionDAG &DAG) const {
719 
720   SDLoc DL(Op);
721   SDValue Op0 = Op.getOperand(0);
722 
723   if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
724       Subtarget.is64Bit() && Subtarget.hasBasicF()) {
725     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
726     return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
727   }
728   return Op;
729 }
730 
lowerFP_TO_SINT(SDValue Op,SelectionDAG & DAG) const731 SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
732                                                  SelectionDAG &DAG) const {
733 
734   SDLoc DL(Op);
735 
736   if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
737       !Subtarget.hasBasicD()) {
738     SDValue Dst =
739         DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
740     return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
741   }
742 
743   EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
744   SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
745   return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
746 }
747 
getTargetNode(GlobalAddressSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)748 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
749                              SelectionDAG &DAG, unsigned Flags) {
750   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
751 }
752 
getTargetNode(BlockAddressSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)753 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
754                              SelectionDAG &DAG, unsigned Flags) {
755   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
756                                    Flags);
757 }
758 
getTargetNode(ConstantPoolSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)759 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
760                              SelectionDAG &DAG, unsigned Flags) {
761   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
762                                    N->getOffset(), Flags);
763 }
764 
getTargetNode(JumpTableSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)765 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
766                              SelectionDAG &DAG, unsigned Flags) {
767   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
768 }
769 
770 template <class NodeTy>
getAddr(NodeTy * N,SelectionDAG & DAG,CodeModel::Model M,bool IsLocal) const771 SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
772                                          CodeModel::Model M,
773                                          bool IsLocal) const {
774   SDLoc DL(N);
775   EVT Ty = getPointerTy(DAG.getDataLayout());
776   SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
777 
778   switch (M) {
779   default:
780     report_fatal_error("Unsupported code model");
781 
782   case CodeModel::Large: {
783     assert(Subtarget.is64Bit() && "Large code model requires LA64");
784 
785     // This is not actually used, but is necessary for successfully matching
786     // the PseudoLA_*_LARGE nodes.
787     SDValue Tmp = DAG.getConstant(0, DL, Ty);
788     if (IsLocal)
789       // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
790       // eventually becomes the desired 5-insn code sequence.
791       return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
792                                         Tmp, Addr),
793                      0);
794 
795     // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually
796     // becomes the desired 5-insn code sequence.
797     return SDValue(
798         DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
799         0);
800   }
801 
802   case CodeModel::Small:
803   case CodeModel::Medium:
804     if (IsLocal)
805       // This generates the pattern (PseudoLA_PCREL sym), which expands to
806       // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
807       return SDValue(
808           DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
809 
810     // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
811     // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
812     return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr),
813                    0);
814   }
815 }
816 
lowerBlockAddress(SDValue Op,SelectionDAG & DAG) const817 SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
818                                                    SelectionDAG &DAG) const {
819   return getAddr(cast<BlockAddressSDNode>(Op), DAG,
820                  DAG.getTarget().getCodeModel());
821 }
822 
lowerJumpTable(SDValue Op,SelectionDAG & DAG) const823 SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
824                                                 SelectionDAG &DAG) const {
825   return getAddr(cast<JumpTableSDNode>(Op), DAG,
826                  DAG.getTarget().getCodeModel());
827 }
828 
lowerConstantPool(SDValue Op,SelectionDAG & DAG) const829 SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
830                                                    SelectionDAG &DAG) const {
831   return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
832                  DAG.getTarget().getCodeModel());
833 }
834 
lowerGlobalAddress(SDValue Op,SelectionDAG & DAG) const835 SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
836                                                     SelectionDAG &DAG) const {
837   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
838   assert(N->getOffset() == 0 && "unexpected offset in global node");
839   auto CM = DAG.getTarget().getCodeModel();
840   const GlobalValue *GV = N->getGlobal();
841 
842   if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
843     if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
844       CM = *GCM;
845   }
846 
847   return getAddr(N, DAG, CM, GV->isDSOLocal());
848 }
849 
getStaticTLSAddr(GlobalAddressSDNode * N,SelectionDAG & DAG,unsigned Opc,bool Large) const850 SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
851                                                   SelectionDAG &DAG,
852                                                   unsigned Opc,
853                                                   bool Large) const {
854   SDLoc DL(N);
855   EVT Ty = getPointerTy(DAG.getDataLayout());
856   MVT GRLenVT = Subtarget.getGRLenVT();
857 
858   // This is not actually used, but is necessary for successfully matching the
859   // PseudoLA_*_LARGE nodes.
860   SDValue Tmp = DAG.getConstant(0, DL, Ty);
861   SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
862   SDValue Offset = Large
863                        ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
864                        : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
865 
866   // Add the thread pointer.
867   return DAG.getNode(ISD::ADD, DL, Ty, Offset,
868                      DAG.getRegister(LoongArch::R2, GRLenVT));
869 }
870 
getDynamicTLSAddr(GlobalAddressSDNode * N,SelectionDAG & DAG,unsigned Opc,bool Large) const871 SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
872                                                    SelectionDAG &DAG,
873                                                    unsigned Opc,
874                                                    bool Large) const {
875   SDLoc DL(N);
876   EVT Ty = getPointerTy(DAG.getDataLayout());
877   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
878 
879   // This is not actually used, but is necessary for successfully matching the
880   // PseudoLA_*_LARGE nodes.
881   SDValue Tmp = DAG.getConstant(0, DL, Ty);
882 
883   // Use a PC-relative addressing mode to access the dynamic GOT address.
884   SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
885   SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
886                        : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
887 
888   // Prepare argument list to generate call.
889   ArgListTy Args;
890   ArgListEntry Entry;
891   Entry.Node = Load;
892   Entry.Ty = CallTy;
893   Args.push_back(Entry);
894 
895   // Setup call to __tls_get_addr.
896   TargetLowering::CallLoweringInfo CLI(DAG);
897   CLI.setDebugLoc(DL)
898       .setChain(DAG.getEntryNode())
899       .setLibCallee(CallingConv::C, CallTy,
900                     DAG.getExternalSymbol("__tls_get_addr", Ty),
901                     std::move(Args));
902 
903   return LowerCallTo(CLI).first;
904 }
905 
906 SDValue
lowerGlobalTLSAddress(SDValue Op,SelectionDAG & DAG) const907 LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
908                                                SelectionDAG &DAG) const {
909   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
910       CallingConv::GHC)
911     report_fatal_error("In GHC calling convention TLS is not supported");
912 
913   bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
914   assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
915 
916   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
917   assert(N->getOffset() == 0 && "unexpected offset in global node");
918 
919   SDValue Addr;
920   switch (getTargetMachine().getTLSModel(N->getGlobal())) {
921   case TLSModel::GeneralDynamic:
922     // In this model, application code calls the dynamic linker function
923     // __tls_get_addr to locate TLS offsets into the dynamic thread vector at
924     // runtime.
925     Addr = getDynamicTLSAddr(N, DAG,
926                              Large ? LoongArch::PseudoLA_TLS_GD_LARGE
927                                    : LoongArch::PseudoLA_TLS_GD,
928                              Large);
929     break;
930   case TLSModel::LocalDynamic:
931     // Same as GeneralDynamic, except for assembly modifiers and relocation
932     // records.
933     Addr = getDynamicTLSAddr(N, DAG,
934                              Large ? LoongArch::PseudoLA_TLS_LD_LARGE
935                                    : LoongArch::PseudoLA_TLS_LD,
936                              Large);
937     break;
938   case TLSModel::InitialExec:
939     // This model uses the GOT to resolve TLS offsets.
940     Addr = getStaticTLSAddr(N, DAG,
941                             Large ? LoongArch::PseudoLA_TLS_IE_LARGE
942                                   : LoongArch::PseudoLA_TLS_IE,
943                             Large);
944     break;
945   case TLSModel::LocalExec:
946     // This model is used when static linking as the TLS offsets are resolved
947     // during program linking.
948     //
949     // This node doesn't need an extra argument for the large code model.
950     Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
951     break;
952   }
953 
954   return Addr;
955 }
956 
957 template <unsigned N>
checkIntrinsicImmArg(SDValue Op,unsigned ImmOp,SelectionDAG & DAG,bool IsSigned=false)958 static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
959                                     SelectionDAG &DAG, bool IsSigned = false) {
960   auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
961   // Check the ImmArg.
962   if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
963       (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
964     DAG.getContext()->emitError(Op->getOperationName(0) +
965                                 ": argument out of range.");
966     return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
967   }
968   return SDValue();
969 }
970 
971 SDValue
lowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const972 LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
973                                                  SelectionDAG &DAG) const {
974   SDLoc DL(Op);
975   switch (Op.getConstantOperandVal(0)) {
976   default:
977     return SDValue(); // Don't custom lower most intrinsics.
978   case Intrinsic::thread_pointer: {
979     EVT PtrVT = getPointerTy(DAG.getDataLayout());
980     return DAG.getRegister(LoongArch::R2, PtrVT);
981   }
982   case Intrinsic::loongarch_lsx_vpickve2gr_d:
983   case Intrinsic::loongarch_lsx_vpickve2gr_du:
984   case Intrinsic::loongarch_lsx_vreplvei_d:
985   case Intrinsic::loongarch_lasx_xvrepl128vei_d:
986     return checkIntrinsicImmArg<1>(Op, 2, DAG);
987   case Intrinsic::loongarch_lsx_vreplvei_w:
988   case Intrinsic::loongarch_lasx_xvrepl128vei_w:
989   case Intrinsic::loongarch_lasx_xvpickve2gr_d:
990   case Intrinsic::loongarch_lasx_xvpickve2gr_du:
991   case Intrinsic::loongarch_lasx_xvpickve_d:
992   case Intrinsic::loongarch_lasx_xvpickve_d_f:
993     return checkIntrinsicImmArg<2>(Op, 2, DAG);
994   case Intrinsic::loongarch_lasx_xvinsve0_d:
995     return checkIntrinsicImmArg<2>(Op, 3, DAG);
996   case Intrinsic::loongarch_lsx_vsat_b:
997   case Intrinsic::loongarch_lsx_vsat_bu:
998   case Intrinsic::loongarch_lsx_vrotri_b:
999   case Intrinsic::loongarch_lsx_vsllwil_h_b:
1000   case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1001   case Intrinsic::loongarch_lsx_vsrlri_b:
1002   case Intrinsic::loongarch_lsx_vsrari_b:
1003   case Intrinsic::loongarch_lsx_vreplvei_h:
1004   case Intrinsic::loongarch_lasx_xvsat_b:
1005   case Intrinsic::loongarch_lasx_xvsat_bu:
1006   case Intrinsic::loongarch_lasx_xvrotri_b:
1007   case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1008   case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1009   case Intrinsic::loongarch_lasx_xvsrlri_b:
1010   case Intrinsic::loongarch_lasx_xvsrari_b:
1011   case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1012   case Intrinsic::loongarch_lasx_xvpickve_w:
1013   case Intrinsic::loongarch_lasx_xvpickve_w_f:
1014     return checkIntrinsicImmArg<3>(Op, 2, DAG);
1015   case Intrinsic::loongarch_lasx_xvinsve0_w:
1016     return checkIntrinsicImmArg<3>(Op, 3, DAG);
1017   case Intrinsic::loongarch_lsx_vsat_h:
1018   case Intrinsic::loongarch_lsx_vsat_hu:
1019   case Intrinsic::loongarch_lsx_vrotri_h:
1020   case Intrinsic::loongarch_lsx_vsllwil_w_h:
1021   case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1022   case Intrinsic::loongarch_lsx_vsrlri_h:
1023   case Intrinsic::loongarch_lsx_vsrari_h:
1024   case Intrinsic::loongarch_lsx_vreplvei_b:
1025   case Intrinsic::loongarch_lasx_xvsat_h:
1026   case Intrinsic::loongarch_lasx_xvsat_hu:
1027   case Intrinsic::loongarch_lasx_xvrotri_h:
1028   case Intrinsic::loongarch_lasx_xvsllwil_w_h:
1029   case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
1030   case Intrinsic::loongarch_lasx_xvsrlri_h:
1031   case Intrinsic::loongarch_lasx_xvsrari_h:
1032   case Intrinsic::loongarch_lasx_xvrepl128vei_b:
1033     return checkIntrinsicImmArg<4>(Op, 2, DAG);
1034   case Intrinsic::loongarch_lsx_vsrlni_b_h:
1035   case Intrinsic::loongarch_lsx_vsrani_b_h:
1036   case Intrinsic::loongarch_lsx_vsrlrni_b_h:
1037   case Intrinsic::loongarch_lsx_vsrarni_b_h:
1038   case Intrinsic::loongarch_lsx_vssrlni_b_h:
1039   case Intrinsic::loongarch_lsx_vssrani_b_h:
1040   case Intrinsic::loongarch_lsx_vssrlni_bu_h:
1041   case Intrinsic::loongarch_lsx_vssrani_bu_h:
1042   case Intrinsic::loongarch_lsx_vssrlrni_b_h:
1043   case Intrinsic::loongarch_lsx_vssrarni_b_h:
1044   case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
1045   case Intrinsic::loongarch_lsx_vssrarni_bu_h:
1046   case Intrinsic::loongarch_lasx_xvsrlni_b_h:
1047   case Intrinsic::loongarch_lasx_xvsrani_b_h:
1048   case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
1049   case Intrinsic::loongarch_lasx_xvsrarni_b_h:
1050   case Intrinsic::loongarch_lasx_xvssrlni_b_h:
1051   case Intrinsic::loongarch_lasx_xvssrani_b_h:
1052   case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
1053   case Intrinsic::loongarch_lasx_xvssrani_bu_h:
1054   case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
1055   case Intrinsic::loongarch_lasx_xvssrarni_b_h:
1056   case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
1057   case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
1058     return checkIntrinsicImmArg<4>(Op, 3, DAG);
1059   case Intrinsic::loongarch_lsx_vsat_w:
1060   case Intrinsic::loongarch_lsx_vsat_wu:
1061   case Intrinsic::loongarch_lsx_vrotri_w:
1062   case Intrinsic::loongarch_lsx_vsllwil_d_w:
1063   case Intrinsic::loongarch_lsx_vsllwil_du_wu:
1064   case Intrinsic::loongarch_lsx_vsrlri_w:
1065   case Intrinsic::loongarch_lsx_vsrari_w:
1066   case Intrinsic::loongarch_lsx_vslei_bu:
1067   case Intrinsic::loongarch_lsx_vslei_hu:
1068   case Intrinsic::loongarch_lsx_vslei_wu:
1069   case Intrinsic::loongarch_lsx_vslei_du:
1070   case Intrinsic::loongarch_lsx_vslti_bu:
1071   case Intrinsic::loongarch_lsx_vslti_hu:
1072   case Intrinsic::loongarch_lsx_vslti_wu:
1073   case Intrinsic::loongarch_lsx_vslti_du:
1074   case Intrinsic::loongarch_lsx_vbsll_v:
1075   case Intrinsic::loongarch_lsx_vbsrl_v:
1076   case Intrinsic::loongarch_lasx_xvsat_w:
1077   case Intrinsic::loongarch_lasx_xvsat_wu:
1078   case Intrinsic::loongarch_lasx_xvrotri_w:
1079   case Intrinsic::loongarch_lasx_xvsllwil_d_w:
1080   case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
1081   case Intrinsic::loongarch_lasx_xvsrlri_w:
1082   case Intrinsic::loongarch_lasx_xvsrari_w:
1083   case Intrinsic::loongarch_lasx_xvslei_bu:
1084   case Intrinsic::loongarch_lasx_xvslei_hu:
1085   case Intrinsic::loongarch_lasx_xvslei_wu:
1086   case Intrinsic::loongarch_lasx_xvslei_du:
1087   case Intrinsic::loongarch_lasx_xvslti_bu:
1088   case Intrinsic::loongarch_lasx_xvslti_hu:
1089   case Intrinsic::loongarch_lasx_xvslti_wu:
1090   case Intrinsic::loongarch_lasx_xvslti_du:
1091   case Intrinsic::loongarch_lasx_xvbsll_v:
1092   case Intrinsic::loongarch_lasx_xvbsrl_v:
1093     return checkIntrinsicImmArg<5>(Op, 2, DAG);
1094   case Intrinsic::loongarch_lsx_vseqi_b:
1095   case Intrinsic::loongarch_lsx_vseqi_h:
1096   case Intrinsic::loongarch_lsx_vseqi_w:
1097   case Intrinsic::loongarch_lsx_vseqi_d:
1098   case Intrinsic::loongarch_lsx_vslei_b:
1099   case Intrinsic::loongarch_lsx_vslei_h:
1100   case Intrinsic::loongarch_lsx_vslei_w:
1101   case Intrinsic::loongarch_lsx_vslei_d:
1102   case Intrinsic::loongarch_lsx_vslti_b:
1103   case Intrinsic::loongarch_lsx_vslti_h:
1104   case Intrinsic::loongarch_lsx_vslti_w:
1105   case Intrinsic::loongarch_lsx_vslti_d:
1106   case Intrinsic::loongarch_lasx_xvseqi_b:
1107   case Intrinsic::loongarch_lasx_xvseqi_h:
1108   case Intrinsic::loongarch_lasx_xvseqi_w:
1109   case Intrinsic::loongarch_lasx_xvseqi_d:
1110   case Intrinsic::loongarch_lasx_xvslei_b:
1111   case Intrinsic::loongarch_lasx_xvslei_h:
1112   case Intrinsic::loongarch_lasx_xvslei_w:
1113   case Intrinsic::loongarch_lasx_xvslei_d:
1114   case Intrinsic::loongarch_lasx_xvslti_b:
1115   case Intrinsic::loongarch_lasx_xvslti_h:
1116   case Intrinsic::loongarch_lasx_xvslti_w:
1117   case Intrinsic::loongarch_lasx_xvslti_d:
1118     return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
1119   case Intrinsic::loongarch_lsx_vsrlni_h_w:
1120   case Intrinsic::loongarch_lsx_vsrani_h_w:
1121   case Intrinsic::loongarch_lsx_vsrlrni_h_w:
1122   case Intrinsic::loongarch_lsx_vsrarni_h_w:
1123   case Intrinsic::loongarch_lsx_vssrlni_h_w:
1124   case Intrinsic::loongarch_lsx_vssrani_h_w:
1125   case Intrinsic::loongarch_lsx_vssrlni_hu_w:
1126   case Intrinsic::loongarch_lsx_vssrani_hu_w:
1127   case Intrinsic::loongarch_lsx_vssrlrni_h_w:
1128   case Intrinsic::loongarch_lsx_vssrarni_h_w:
1129   case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
1130   case Intrinsic::loongarch_lsx_vssrarni_hu_w:
1131   case Intrinsic::loongarch_lsx_vfrstpi_b:
1132   case Intrinsic::loongarch_lsx_vfrstpi_h:
1133   case Intrinsic::loongarch_lasx_xvsrlni_h_w:
1134   case Intrinsic::loongarch_lasx_xvsrani_h_w:
1135   case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
1136   case Intrinsic::loongarch_lasx_xvsrarni_h_w:
1137   case Intrinsic::loongarch_lasx_xvssrlni_h_w:
1138   case Intrinsic::loongarch_lasx_xvssrani_h_w:
1139   case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
1140   case Intrinsic::loongarch_lasx_xvssrani_hu_w:
1141   case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
1142   case Intrinsic::loongarch_lasx_xvssrarni_h_w:
1143   case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
1144   case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
1145   case Intrinsic::loongarch_lasx_xvfrstpi_b:
1146   case Intrinsic::loongarch_lasx_xvfrstpi_h:
1147     return checkIntrinsicImmArg<5>(Op, 3, DAG);
1148   case Intrinsic::loongarch_lsx_vsat_d:
1149   case Intrinsic::loongarch_lsx_vsat_du:
1150   case Intrinsic::loongarch_lsx_vrotri_d:
1151   case Intrinsic::loongarch_lsx_vsrlri_d:
1152   case Intrinsic::loongarch_lsx_vsrari_d:
1153   case Intrinsic::loongarch_lasx_xvsat_d:
1154   case Intrinsic::loongarch_lasx_xvsat_du:
1155   case Intrinsic::loongarch_lasx_xvrotri_d:
1156   case Intrinsic::loongarch_lasx_xvsrlri_d:
1157   case Intrinsic::loongarch_lasx_xvsrari_d:
1158     return checkIntrinsicImmArg<6>(Op, 2, DAG);
1159   case Intrinsic::loongarch_lsx_vsrlni_w_d:
1160   case Intrinsic::loongarch_lsx_vsrani_w_d:
1161   case Intrinsic::loongarch_lsx_vsrlrni_w_d:
1162   case Intrinsic::loongarch_lsx_vsrarni_w_d:
1163   case Intrinsic::loongarch_lsx_vssrlni_w_d:
1164   case Intrinsic::loongarch_lsx_vssrani_w_d:
1165   case Intrinsic::loongarch_lsx_vssrlni_wu_d:
1166   case Intrinsic::loongarch_lsx_vssrani_wu_d:
1167   case Intrinsic::loongarch_lsx_vssrlrni_w_d:
1168   case Intrinsic::loongarch_lsx_vssrarni_w_d:
1169   case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
1170   case Intrinsic::loongarch_lsx_vssrarni_wu_d:
1171   case Intrinsic::loongarch_lasx_xvsrlni_w_d:
1172   case Intrinsic::loongarch_lasx_xvsrani_w_d:
1173   case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
1174   case Intrinsic::loongarch_lasx_xvsrarni_w_d:
1175   case Intrinsic::loongarch_lasx_xvssrlni_w_d:
1176   case Intrinsic::loongarch_lasx_xvssrani_w_d:
1177   case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
1178   case Intrinsic::loongarch_lasx_xvssrani_wu_d:
1179   case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
1180   case Intrinsic::loongarch_lasx_xvssrarni_w_d:
1181   case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
1182   case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
1183     return checkIntrinsicImmArg<6>(Op, 3, DAG);
1184   case Intrinsic::loongarch_lsx_vsrlni_d_q:
1185   case Intrinsic::loongarch_lsx_vsrani_d_q:
1186   case Intrinsic::loongarch_lsx_vsrlrni_d_q:
1187   case Intrinsic::loongarch_lsx_vsrarni_d_q:
1188   case Intrinsic::loongarch_lsx_vssrlni_d_q:
1189   case Intrinsic::loongarch_lsx_vssrani_d_q:
1190   case Intrinsic::loongarch_lsx_vssrlni_du_q:
1191   case Intrinsic::loongarch_lsx_vssrani_du_q:
1192   case Intrinsic::loongarch_lsx_vssrlrni_d_q:
1193   case Intrinsic::loongarch_lsx_vssrarni_d_q:
1194   case Intrinsic::loongarch_lsx_vssrlrni_du_q:
1195   case Intrinsic::loongarch_lsx_vssrarni_du_q:
1196   case Intrinsic::loongarch_lasx_xvsrlni_d_q:
1197   case Intrinsic::loongarch_lasx_xvsrani_d_q:
1198   case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
1199   case Intrinsic::loongarch_lasx_xvsrarni_d_q:
1200   case Intrinsic::loongarch_lasx_xvssrlni_d_q:
1201   case Intrinsic::loongarch_lasx_xvssrani_d_q:
1202   case Intrinsic::loongarch_lasx_xvssrlni_du_q:
1203   case Intrinsic::loongarch_lasx_xvssrani_du_q:
1204   case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
1205   case Intrinsic::loongarch_lasx_xvssrarni_d_q:
1206   case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
1207   case Intrinsic::loongarch_lasx_xvssrarni_du_q:
1208     return checkIntrinsicImmArg<7>(Op, 3, DAG);
1209   case Intrinsic::loongarch_lsx_vnori_b:
1210   case Intrinsic::loongarch_lsx_vshuf4i_b:
1211   case Intrinsic::loongarch_lsx_vshuf4i_h:
1212   case Intrinsic::loongarch_lsx_vshuf4i_w:
1213   case Intrinsic::loongarch_lasx_xvnori_b:
1214   case Intrinsic::loongarch_lasx_xvshuf4i_b:
1215   case Intrinsic::loongarch_lasx_xvshuf4i_h:
1216   case Intrinsic::loongarch_lasx_xvshuf4i_w:
1217   case Intrinsic::loongarch_lasx_xvpermi_d:
1218     return checkIntrinsicImmArg<8>(Op, 2, DAG);
1219   case Intrinsic::loongarch_lsx_vshuf4i_d:
1220   case Intrinsic::loongarch_lsx_vpermi_w:
1221   case Intrinsic::loongarch_lsx_vbitseli_b:
1222   case Intrinsic::loongarch_lsx_vextrins_b:
1223   case Intrinsic::loongarch_lsx_vextrins_h:
1224   case Intrinsic::loongarch_lsx_vextrins_w:
1225   case Intrinsic::loongarch_lsx_vextrins_d:
1226   case Intrinsic::loongarch_lasx_xvshuf4i_d:
1227   case Intrinsic::loongarch_lasx_xvpermi_w:
1228   case Intrinsic::loongarch_lasx_xvpermi_q:
1229   case Intrinsic::loongarch_lasx_xvbitseli_b:
1230   case Intrinsic::loongarch_lasx_xvextrins_b:
1231   case Intrinsic::loongarch_lasx_xvextrins_h:
1232   case Intrinsic::loongarch_lasx_xvextrins_w:
1233   case Intrinsic::loongarch_lasx_xvextrins_d:
1234     return checkIntrinsicImmArg<8>(Op, 3, DAG);
1235   case Intrinsic::loongarch_lsx_vrepli_b:
1236   case Intrinsic::loongarch_lsx_vrepli_h:
1237   case Intrinsic::loongarch_lsx_vrepli_w:
1238   case Intrinsic::loongarch_lsx_vrepli_d:
1239   case Intrinsic::loongarch_lasx_xvrepli_b:
1240   case Intrinsic::loongarch_lasx_xvrepli_h:
1241   case Intrinsic::loongarch_lasx_xvrepli_w:
1242   case Intrinsic::loongarch_lasx_xvrepli_d:
1243     return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
1244   case Intrinsic::loongarch_lsx_vldi:
1245   case Intrinsic::loongarch_lasx_xvldi:
1246     return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
1247   }
1248 }
1249 
1250 // Helper function that emits error message for intrinsics with chain and return
1251 // merge values of a UNDEF and the chain.
emitIntrinsicWithChainErrorMessage(SDValue Op,StringRef ErrorMsg,SelectionDAG & DAG)1252 static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
1253                                                   StringRef ErrorMsg,
1254                                                   SelectionDAG &DAG) {
1255   DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1256   return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
1257                             SDLoc(Op));
1258 }
1259 
1260 SDValue
lowerINTRINSIC_W_CHAIN(SDValue Op,SelectionDAG & DAG) const1261 LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
1262                                                 SelectionDAG &DAG) const {
1263   SDLoc DL(Op);
1264   MVT GRLenVT = Subtarget.getGRLenVT();
1265   EVT VT = Op.getValueType();
1266   SDValue Chain = Op.getOperand(0);
1267   const StringRef ErrorMsgOOR = "argument out of range";
1268   const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1269   const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1270 
1271   switch (Op.getConstantOperandVal(1)) {
1272   default:
1273     return Op;
1274   case Intrinsic::loongarch_crc_w_b_w:
1275   case Intrinsic::loongarch_crc_w_h_w:
1276   case Intrinsic::loongarch_crc_w_w_w:
1277   case Intrinsic::loongarch_crc_w_d_w:
1278   case Intrinsic::loongarch_crcc_w_b_w:
1279   case Intrinsic::loongarch_crcc_w_h_w:
1280   case Intrinsic::loongarch_crcc_w_w_w:
1281   case Intrinsic::loongarch_crcc_w_d_w:
1282     return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
1283   case Intrinsic::loongarch_csrrd_w:
1284   case Intrinsic::loongarch_csrrd_d: {
1285     unsigned Imm = Op.getConstantOperandVal(2);
1286     return !isUInt<14>(Imm)
1287                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1288                : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
1289                              {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1290   }
1291   case Intrinsic::loongarch_csrwr_w:
1292   case Intrinsic::loongarch_csrwr_d: {
1293     unsigned Imm = Op.getConstantOperandVal(3);
1294     return !isUInt<14>(Imm)
1295                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1296                : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
1297                              {Chain, Op.getOperand(2),
1298                               DAG.getConstant(Imm, DL, GRLenVT)});
1299   }
1300   case Intrinsic::loongarch_csrxchg_w:
1301   case Intrinsic::loongarch_csrxchg_d: {
1302     unsigned Imm = Op.getConstantOperandVal(4);
1303     return !isUInt<14>(Imm)
1304                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1305                : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
1306                              {Chain, Op.getOperand(2), Op.getOperand(3),
1307                               DAG.getConstant(Imm, DL, GRLenVT)});
1308   }
1309   case Intrinsic::loongarch_iocsrrd_d: {
1310     return DAG.getNode(
1311         LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
1312         {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
1313   }
1314 #define IOCSRRD_CASE(NAME, NODE)                                               \
1315   case Intrinsic::loongarch_##NAME: {                                          \
1316     return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other},          \
1317                        {Chain, Op.getOperand(2)});                             \
1318   }
1319     IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
1320     IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
1321     IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
1322 #undef IOCSRRD_CASE
1323   case Intrinsic::loongarch_cpucfg: {
1324     return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
1325                        {Chain, Op.getOperand(2)});
1326   }
1327   case Intrinsic::loongarch_lddir_d: {
1328     unsigned Imm = Op.getConstantOperandVal(3);
1329     return !isUInt<8>(Imm)
1330                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1331                : Op;
1332   }
1333   case Intrinsic::loongarch_movfcsr2gr: {
1334     if (!Subtarget.hasBasicF())
1335       return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
1336     unsigned Imm = Op.getConstantOperandVal(2);
1337     return !isUInt<2>(Imm)
1338                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1339                : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
1340                              {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1341   }
1342   case Intrinsic::loongarch_lsx_vld:
1343   case Intrinsic::loongarch_lsx_vldrepl_b:
1344   case Intrinsic::loongarch_lasx_xvld:
1345   case Intrinsic::loongarch_lasx_xvldrepl_b:
1346     return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1347                ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
1348                : SDValue();
1349   case Intrinsic::loongarch_lsx_vldrepl_h:
1350   case Intrinsic::loongarch_lasx_xvldrepl_h:
1351     return !isShiftedInt<11, 1>(
1352                cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1353                ? emitIntrinsicWithChainErrorMessage(
1354                      Op, "argument out of range or not a multiple of 2", DAG)
1355                : SDValue();
1356   case Intrinsic::loongarch_lsx_vldrepl_w:
1357   case Intrinsic::loongarch_lasx_xvldrepl_w:
1358     return !isShiftedInt<10, 2>(
1359                cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1360                ? emitIntrinsicWithChainErrorMessage(
1361                      Op, "argument out of range or not a multiple of 4", DAG)
1362                : SDValue();
1363   case Intrinsic::loongarch_lsx_vldrepl_d:
1364   case Intrinsic::loongarch_lasx_xvldrepl_d:
1365     return !isShiftedInt<9, 3>(
1366                cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
1367                ? emitIntrinsicWithChainErrorMessage(
1368                      Op, "argument out of range or not a multiple of 8", DAG)
1369                : SDValue();
1370   }
1371 }
1372 
1373 // Helper function that emits error message for intrinsics with void return
1374 // value and return the chain.
emitIntrinsicErrorMessage(SDValue Op,StringRef ErrorMsg,SelectionDAG & DAG)1375 static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
1376                                          SelectionDAG &DAG) {
1377 
1378   DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
1379   return Op.getOperand(0);
1380 }
1381 
lowerINTRINSIC_VOID(SDValue Op,SelectionDAG & DAG) const1382 SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
1383                                                      SelectionDAG &DAG) const {
1384   SDLoc DL(Op);
1385   MVT GRLenVT = Subtarget.getGRLenVT();
1386   SDValue Chain = Op.getOperand(0);
1387   uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
1388   SDValue Op2 = Op.getOperand(2);
1389   const StringRef ErrorMsgOOR = "argument out of range";
1390   const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1391   const StringRef ErrorMsgReqLA32 = "requires loongarch32";
1392   const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1393 
1394   switch (IntrinsicEnum) {
1395   default:
1396     // TODO: Add more Intrinsics.
1397     return SDValue();
1398   case Intrinsic::loongarch_cacop_d:
1399   case Intrinsic::loongarch_cacop_w: {
1400     if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
1401       return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
1402     if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
1403       return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
1404     // call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
1405     unsigned Imm1 = Op2->getAsZExtVal();
1406     int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
1407     if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
1408       return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
1409     return Op;
1410   }
1411   case Intrinsic::loongarch_dbar: {
1412     unsigned Imm = Op2->getAsZExtVal();
1413     return !isUInt<15>(Imm)
1414                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1415                : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
1416                              DAG.getConstant(Imm, DL, GRLenVT));
1417   }
1418   case Intrinsic::loongarch_ibar: {
1419     unsigned Imm = Op2->getAsZExtVal();
1420     return !isUInt<15>(Imm)
1421                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1422                : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
1423                              DAG.getConstant(Imm, DL, GRLenVT));
1424   }
1425   case Intrinsic::loongarch_break: {
1426     unsigned Imm = Op2->getAsZExtVal();
1427     return !isUInt<15>(Imm)
1428                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1429                : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
1430                              DAG.getConstant(Imm, DL, GRLenVT));
1431   }
1432   case Intrinsic::loongarch_movgr2fcsr: {
1433     if (!Subtarget.hasBasicF())
1434       return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
1435     unsigned Imm = Op2->getAsZExtVal();
1436     return !isUInt<2>(Imm)
1437                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1438                : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
1439                              DAG.getConstant(Imm, DL, GRLenVT),
1440                              DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
1441                                          Op.getOperand(3)));
1442   }
1443   case Intrinsic::loongarch_syscall: {
1444     unsigned Imm = Op2->getAsZExtVal();
1445     return !isUInt<15>(Imm)
1446                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1447                : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
1448                              DAG.getConstant(Imm, DL, GRLenVT));
1449   }
1450 #define IOCSRWR_CASE(NAME, NODE)                                               \
1451   case Intrinsic::loongarch_##NAME: {                                          \
1452     SDValue Op3 = Op.getOperand(3);                                            \
1453     return Subtarget.is64Bit()                                                 \
1454                ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain,        \
1455                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),  \
1456                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3))  \
1457                : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2,   \
1458                              Op3);                                             \
1459   }
1460     IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
1461     IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
1462     IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
1463 #undef IOCSRWR_CASE
1464   case Intrinsic::loongarch_iocsrwr_d: {
1465     return !Subtarget.is64Bit()
1466                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1467                : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
1468                              Op2,
1469                              DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1470                                          Op.getOperand(3)));
1471   }
1472 #define ASRT_LE_GT_CASE(NAME)                                                  \
1473   case Intrinsic::loongarch_##NAME: {                                          \
1474     return !Subtarget.is64Bit()                                                \
1475                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)           \
1476                : Op;                                                           \
1477   }
1478     ASRT_LE_GT_CASE(asrtle_d)
1479     ASRT_LE_GT_CASE(asrtgt_d)
1480 #undef ASRT_LE_GT_CASE
1481   case Intrinsic::loongarch_ldpte_d: {
1482     unsigned Imm = Op.getConstantOperandVal(3);
1483     return !Subtarget.is64Bit()
1484                ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
1485            : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1486                              : Op;
1487   }
1488   case Intrinsic::loongarch_lsx_vst:
1489   case Intrinsic::loongarch_lasx_xvst:
1490     return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
1491                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1492                : SDValue();
1493   case Intrinsic::loongarch_lasx_xvstelm_b:
1494     return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1495             !isUInt<5>(Op.getConstantOperandVal(5)))
1496                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1497                : SDValue();
1498   case Intrinsic::loongarch_lsx_vstelm_b:
1499     return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1500             !isUInt<4>(Op.getConstantOperandVal(5)))
1501                ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
1502                : SDValue();
1503   case Intrinsic::loongarch_lasx_xvstelm_h:
1504     return (!isShiftedInt<8, 1>(
1505                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1506             !isUInt<4>(Op.getConstantOperandVal(5)))
1507                ? emitIntrinsicErrorMessage(
1508                      Op, "argument out of range or not a multiple of 2", DAG)
1509                : SDValue();
1510   case Intrinsic::loongarch_lsx_vstelm_h:
1511     return (!isShiftedInt<8, 1>(
1512                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1513             !isUInt<3>(Op.getConstantOperandVal(5)))
1514                ? emitIntrinsicErrorMessage(
1515                      Op, "argument out of range or not a multiple of 2", DAG)
1516                : SDValue();
1517   case Intrinsic::loongarch_lasx_xvstelm_w:
1518     return (!isShiftedInt<8, 2>(
1519                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1520             !isUInt<3>(Op.getConstantOperandVal(5)))
1521                ? emitIntrinsicErrorMessage(
1522                      Op, "argument out of range or not a multiple of 4", DAG)
1523                : SDValue();
1524   case Intrinsic::loongarch_lsx_vstelm_w:
1525     return (!isShiftedInt<8, 2>(
1526                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1527             !isUInt<2>(Op.getConstantOperandVal(5)))
1528                ? emitIntrinsicErrorMessage(
1529                      Op, "argument out of range or not a multiple of 4", DAG)
1530                : SDValue();
1531   case Intrinsic::loongarch_lasx_xvstelm_d:
1532     return (!isShiftedInt<8, 3>(
1533                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1534             !isUInt<2>(Op.getConstantOperandVal(5)))
1535                ? emitIntrinsicErrorMessage(
1536                      Op, "argument out of range or not a multiple of 8", DAG)
1537                : SDValue();
1538   case Intrinsic::loongarch_lsx_vstelm_d:
1539     return (!isShiftedInt<8, 3>(
1540                 cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
1541             !isUInt<1>(Op.getConstantOperandVal(5)))
1542                ? emitIntrinsicErrorMessage(
1543                      Op, "argument out of range or not a multiple of 8", DAG)
1544                : SDValue();
1545   }
1546 }
1547 
lowerShiftLeftParts(SDValue Op,SelectionDAG & DAG) const1548 SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
1549                                                      SelectionDAG &DAG) const {
1550   SDLoc DL(Op);
1551   SDValue Lo = Op.getOperand(0);
1552   SDValue Hi = Op.getOperand(1);
1553   SDValue Shamt = Op.getOperand(2);
1554   EVT VT = Lo.getValueType();
1555 
1556   // if Shamt-GRLen < 0: // Shamt < GRLen
1557   //   Lo = Lo << Shamt
1558   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
1559   // else:
1560   //   Lo = 0
1561   //   Hi = Lo << (Shamt-GRLen)
1562 
1563   SDValue Zero = DAG.getConstant(0, DL, VT);
1564   SDValue One = DAG.getConstant(1, DL, VT);
1565   SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1566   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1567   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1568   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1569 
1570   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1571   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1572   SDValue ShiftRightLo =
1573       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
1574   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1575   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1576   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
1577 
1578   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1579 
1580   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1581   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1582 
1583   SDValue Parts[2] = {Lo, Hi};
1584   return DAG.getMergeValues(Parts, DL);
1585 }
1586 
lowerShiftRightParts(SDValue Op,SelectionDAG & DAG,bool IsSRA) const1587 SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
1588                                                       SelectionDAG &DAG,
1589                                                       bool IsSRA) const {
1590   SDLoc DL(Op);
1591   SDValue Lo = Op.getOperand(0);
1592   SDValue Hi = Op.getOperand(1);
1593   SDValue Shamt = Op.getOperand(2);
1594   EVT VT = Lo.getValueType();
1595 
1596   // SRA expansion:
1597   //   if Shamt-GRLen < 0: // Shamt < GRLen
1598   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1599   //     Hi = Hi >>s Shamt
1600   //   else:
1601   //     Lo = Hi >>s (Shamt-GRLen);
1602   //     Hi = Hi >>s (GRLen-1)
1603   //
1604   // SRL expansion:
1605   //   if Shamt-GRLen < 0: // Shamt < GRLen
1606   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
1607   //     Hi = Hi >>u Shamt
1608   //   else:
1609   //     Lo = Hi >>u (Shamt-GRLen);
1610   //     Hi = 0;
1611 
1612   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1613 
1614   SDValue Zero = DAG.getConstant(0, DL, VT);
1615   SDValue One = DAG.getConstant(1, DL, VT);
1616   SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
1617   SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
1618   SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
1619   SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
1620 
1621   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1622   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1623   SDValue ShiftLeftHi =
1624       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
1625   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1626   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1627   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
1628   SDValue HiFalse =
1629       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
1630 
1631   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
1632 
1633   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1634   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1635 
1636   SDValue Parts[2] = {Lo, Hi};
1637   return DAG.getMergeValues(Parts, DL);
1638 }
1639 
1640 // Returns the opcode of the target-specific SDNode that implements the 32-bit
1641 // form of the given Opcode.
getLoongArchWOpcode(unsigned Opcode)1642 static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
1643   switch (Opcode) {
1644   default:
1645     llvm_unreachable("Unexpected opcode");
1646   case ISD::SHL:
1647     return LoongArchISD::SLL_W;
1648   case ISD::SRA:
1649     return LoongArchISD::SRA_W;
1650   case ISD::SRL:
1651     return LoongArchISD::SRL_W;
1652   case ISD::ROTR:
1653     return LoongArchISD::ROTR_W;
1654   case ISD::ROTL:
1655     return LoongArchISD::ROTL_W;
1656   case ISD::CTTZ:
1657     return LoongArchISD::CTZ_W;
1658   case ISD::CTLZ:
1659     return LoongArchISD::CLZ_W;
1660   }
1661 }
1662 
1663 // Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
1664 // node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
1665 // otherwise be promoted to i64, making it difficult to select the
1666 // SLL_W/.../*W later one because the fact the operation was originally of
1667 // type i8/i16/i32 is lost.
customLegalizeToWOp(SDNode * N,SelectionDAG & DAG,int NumOp,unsigned ExtOpc=ISD::ANY_EXTEND)1668 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
1669                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
1670   SDLoc DL(N);
1671   LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
1672   SDValue NewOp0, NewRes;
1673 
1674   switch (NumOp) {
1675   default:
1676     llvm_unreachable("Unexpected NumOp");
1677   case 1: {
1678     NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1679     NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
1680     break;
1681   }
1682   case 2: {
1683     NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1684     SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1685     NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1686     break;
1687   }
1688     // TODO:Handle more NumOp.
1689   }
1690 
1691   // ReplaceNodeResults requires we maintain the same type for the return
1692   // value.
1693   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
1694 }
1695 
1696 // Helper function that emits error message for intrinsics with/without chain
1697 // and return a UNDEF or and the chain as the results.
emitErrorAndReplaceIntrinsicResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,StringRef ErrorMsg,bool WithChain=true)1698 static void emitErrorAndReplaceIntrinsicResults(
1699     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
1700     StringRef ErrorMsg, bool WithChain = true) {
1701   DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
1702   Results.push_back(DAG.getUNDEF(N->getValueType(0)));
1703   if (!WithChain)
1704     return;
1705   Results.push_back(N->getOperand(0));
1706 }
1707 
1708 template <unsigned N>
1709 static void
replaceVPICKVE2GRResults(SDNode * Node,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget,unsigned ResOp)1710 replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
1711                          SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
1712                          unsigned ResOp) {
1713   const StringRef ErrorMsgOOR = "argument out of range";
1714   unsigned Imm = Node->getConstantOperandVal(2);
1715   if (!isUInt<N>(Imm)) {
1716     emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR,
1717                                         /*WithChain=*/false);
1718     return;
1719   }
1720   SDLoc DL(Node);
1721   SDValue Vec = Node->getOperand(1);
1722 
1723   SDValue PickElt =
1724       DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
1725                   DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
1726                   DAG.getValueType(Vec.getValueType().getVectorElementType()));
1727   Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
1728                                 PickElt.getValue(0)));
1729 }
1730 
replaceVecCondBranchResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget,unsigned ResOp)1731 static void replaceVecCondBranchResults(SDNode *N,
1732                                         SmallVectorImpl<SDValue> &Results,
1733                                         SelectionDAG &DAG,
1734                                         const LoongArchSubtarget &Subtarget,
1735                                         unsigned ResOp) {
1736   SDLoc DL(N);
1737   SDValue Vec = N->getOperand(1);
1738 
1739   SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
1740   Results.push_back(
1741       DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
1742 }
1743 
1744 static void
replaceINTRINSIC_WO_CHAINResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget)1745 replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1746                                  SelectionDAG &DAG,
1747                                  const LoongArchSubtarget &Subtarget) {
1748   switch (N->getConstantOperandVal(0)) {
1749   default:
1750     llvm_unreachable("Unexpected Intrinsic.");
1751   case Intrinsic::loongarch_lsx_vpickve2gr_b:
1752     replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1753                                 LoongArchISD::VPICK_SEXT_ELT);
1754     break;
1755   case Intrinsic::loongarch_lsx_vpickve2gr_h:
1756   case Intrinsic::loongarch_lasx_xvpickve2gr_w:
1757     replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1758                                 LoongArchISD::VPICK_SEXT_ELT);
1759     break;
1760   case Intrinsic::loongarch_lsx_vpickve2gr_w:
1761     replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1762                                 LoongArchISD::VPICK_SEXT_ELT);
1763     break;
1764   case Intrinsic::loongarch_lsx_vpickve2gr_bu:
1765     replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
1766                                 LoongArchISD::VPICK_ZEXT_ELT);
1767     break;
1768   case Intrinsic::loongarch_lsx_vpickve2gr_hu:
1769   case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
1770     replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
1771                                 LoongArchISD::VPICK_ZEXT_ELT);
1772     break;
1773   case Intrinsic::loongarch_lsx_vpickve2gr_wu:
1774     replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
1775                                 LoongArchISD::VPICK_ZEXT_ELT);
1776     break;
1777   case Intrinsic::loongarch_lsx_bz_b:
1778   case Intrinsic::loongarch_lsx_bz_h:
1779   case Intrinsic::loongarch_lsx_bz_w:
1780   case Intrinsic::loongarch_lsx_bz_d:
1781   case Intrinsic::loongarch_lasx_xbz_b:
1782   case Intrinsic::loongarch_lasx_xbz_h:
1783   case Intrinsic::loongarch_lasx_xbz_w:
1784   case Intrinsic::loongarch_lasx_xbz_d:
1785     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1786                                 LoongArchISD::VALL_ZERO);
1787     break;
1788   case Intrinsic::loongarch_lsx_bz_v:
1789   case Intrinsic::loongarch_lasx_xbz_v:
1790     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1791                                 LoongArchISD::VANY_ZERO);
1792     break;
1793   case Intrinsic::loongarch_lsx_bnz_b:
1794   case Intrinsic::loongarch_lsx_bnz_h:
1795   case Intrinsic::loongarch_lsx_bnz_w:
1796   case Intrinsic::loongarch_lsx_bnz_d:
1797   case Intrinsic::loongarch_lasx_xbnz_b:
1798   case Intrinsic::loongarch_lasx_xbnz_h:
1799   case Intrinsic::loongarch_lasx_xbnz_w:
1800   case Intrinsic::loongarch_lasx_xbnz_d:
1801     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1802                                 LoongArchISD::VALL_NONZERO);
1803     break;
1804   case Intrinsic::loongarch_lsx_bnz_v:
1805   case Intrinsic::loongarch_lasx_xbnz_v:
1806     replaceVecCondBranchResults(N, Results, DAG, Subtarget,
1807                                 LoongArchISD::VANY_NONZERO);
1808     break;
1809   }
1810 }
1811 
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const1812 void LoongArchTargetLowering::ReplaceNodeResults(
1813     SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1814   SDLoc DL(N);
1815   EVT VT = N->getValueType(0);
1816   switch (N->getOpcode()) {
1817   default:
1818     llvm_unreachable("Don't know how to legalize this operation");
1819   case ISD::SHL:
1820   case ISD::SRA:
1821   case ISD::SRL:
1822   case ISD::ROTR:
1823     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1824            "Unexpected custom legalisation");
1825     if (N->getOperand(1).getOpcode() != ISD::Constant) {
1826       Results.push_back(customLegalizeToWOp(N, DAG, 2));
1827       break;
1828     }
1829     break;
1830   case ISD::ROTL:
1831     ConstantSDNode *CN;
1832     if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) {
1833       Results.push_back(customLegalizeToWOp(N, DAG, 2));
1834       break;
1835     }
1836     break;
1837   case ISD::FP_TO_SINT: {
1838     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1839            "Unexpected custom legalisation");
1840     SDValue Src = N->getOperand(0);
1841     EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
1842     if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
1843         TargetLowering::TypeSoftenFloat) {
1844       SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
1845       Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
1846       return;
1847     }
1848     // If the FP type needs to be softened, emit a library call using the 'si'
1849     // version. If we left it to default legalization we'd end up with 'di'.
1850     RTLIB::Libcall LC;
1851     LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
1852     MakeLibCallOptions CallOptions;
1853     EVT OpVT = Src.getValueType();
1854     CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
1855     SDValue Chain = SDValue();
1856     SDValue Result;
1857     std::tie(Result, Chain) =
1858         makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
1859     Results.push_back(Result);
1860     break;
1861   }
1862   case ISD::BITCAST: {
1863     SDValue Src = N->getOperand(0);
1864     EVT SrcVT = Src.getValueType();
1865     if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
1866         Subtarget.hasBasicF()) {
1867       SDValue Dst =
1868           DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
1869       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
1870     }
1871     break;
1872   }
1873   case ISD::FP_TO_UINT: {
1874     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1875            "Unexpected custom legalisation");
1876     auto &TLI = DAG.getTargetLoweringInfo();
1877     SDValue Tmp1, Tmp2;
1878     TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
1879     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
1880     break;
1881   }
1882   case ISD::BSWAP: {
1883     SDValue Src = N->getOperand(0);
1884     assert((VT == MVT::i16 || VT == MVT::i32) &&
1885            "Unexpected custom legalization");
1886     MVT GRLenVT = Subtarget.getGRLenVT();
1887     SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1888     SDValue Tmp;
1889     switch (VT.getSizeInBits()) {
1890     default:
1891       llvm_unreachable("Unexpected operand width");
1892     case 16:
1893       Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
1894       break;
1895     case 32:
1896       // Only LA64 will get to here due to the size mismatch between VT and
1897       // GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
1898       Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
1899       break;
1900     }
1901     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1902     break;
1903   }
1904   case ISD::BITREVERSE: {
1905     SDValue Src = N->getOperand(0);
1906     assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
1907            "Unexpected custom legalization");
1908     MVT GRLenVT = Subtarget.getGRLenVT();
1909     SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
1910     SDValue Tmp;
1911     switch (VT.getSizeInBits()) {
1912     default:
1913       llvm_unreachable("Unexpected operand width");
1914     case 8:
1915       Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
1916       break;
1917     case 32:
1918       Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
1919       break;
1920     }
1921     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
1922     break;
1923   }
1924   case ISD::CTLZ:
1925   case ISD::CTTZ: {
1926     assert(VT == MVT::i32 && Subtarget.is64Bit() &&
1927            "Unexpected custom legalisation");
1928     Results.push_back(customLegalizeToWOp(N, DAG, 1));
1929     break;
1930   }
1931   case ISD::INTRINSIC_W_CHAIN: {
1932     SDValue Chain = N->getOperand(0);
1933     SDValue Op2 = N->getOperand(2);
1934     MVT GRLenVT = Subtarget.getGRLenVT();
1935     const StringRef ErrorMsgOOR = "argument out of range";
1936     const StringRef ErrorMsgReqLA64 = "requires loongarch64";
1937     const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
1938 
1939     switch (N->getConstantOperandVal(1)) {
1940     default:
1941       llvm_unreachable("Unexpected Intrinsic.");
1942     case Intrinsic::loongarch_movfcsr2gr: {
1943       if (!Subtarget.hasBasicF()) {
1944         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
1945         return;
1946       }
1947       unsigned Imm = Op2->getAsZExtVal();
1948       if (!isUInt<2>(Imm)) {
1949         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
1950         return;
1951       }
1952       SDValue MOVFCSR2GRResults = DAG.getNode(
1953           LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
1954           {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
1955       Results.push_back(
1956           DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
1957       Results.push_back(MOVFCSR2GRResults.getValue(1));
1958       break;
1959     }
1960 #define CRC_CASE_EXT_BINARYOP(NAME, NODE)                                      \
1961   case Intrinsic::loongarch_##NAME: {                                          \
1962     SDValue NODE = DAG.getNode(                                                \
1963         LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
1964         {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),               \
1965          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
1966     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
1967     Results.push_back(NODE.getValue(1));                                       \
1968     break;                                                                     \
1969   }
1970       CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
1971       CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
1972       CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
1973       CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
1974       CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
1975       CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
1976 #undef CRC_CASE_EXT_BINARYOP
1977 
1978 #define CRC_CASE_EXT_UNARYOP(NAME, NODE)                                       \
1979   case Intrinsic::loongarch_##NAME: {                                          \
1980     SDValue NODE = DAG.getNode(                                                \
1981         LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},                        \
1982         {Chain, Op2,                                                           \
1983          DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))});       \
1984     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0)));   \
1985     Results.push_back(NODE.getValue(1));                                       \
1986     break;                                                                     \
1987   }
1988       CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
1989       CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
1990 #undef CRC_CASE_EXT_UNARYOP
1991 #define CSR_CASE(ID)                                                           \
1992   case Intrinsic::loongarch_##ID: {                                            \
1993     if (!Subtarget.is64Bit())                                                  \
1994       emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);   \
1995     break;                                                                     \
1996   }
1997       CSR_CASE(csrrd_d);
1998       CSR_CASE(csrwr_d);
1999       CSR_CASE(csrxchg_d);
2000       CSR_CASE(iocsrrd_d);
2001 #undef CSR_CASE
2002     case Intrinsic::loongarch_csrrd_w: {
2003       unsigned Imm = Op2->getAsZExtVal();
2004       if (!isUInt<14>(Imm)) {
2005         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2006         return;
2007       }
2008       SDValue CSRRDResults =
2009           DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2010                       {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2011       Results.push_back(
2012           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
2013       Results.push_back(CSRRDResults.getValue(1));
2014       break;
2015     }
2016     case Intrinsic::loongarch_csrwr_w: {
2017       unsigned Imm = N->getConstantOperandVal(3);
2018       if (!isUInt<14>(Imm)) {
2019         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2020         return;
2021       }
2022       SDValue CSRWRResults =
2023           DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2024                       {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2025                        DAG.getConstant(Imm, DL, GRLenVT)});
2026       Results.push_back(
2027           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
2028       Results.push_back(CSRWRResults.getValue(1));
2029       break;
2030     }
2031     case Intrinsic::loongarch_csrxchg_w: {
2032       unsigned Imm = N->getConstantOperandVal(4);
2033       if (!isUInt<14>(Imm)) {
2034         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2035         return;
2036       }
2037       SDValue CSRXCHGResults = DAG.getNode(
2038           LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2039           {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
2040            DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
2041            DAG.getConstant(Imm, DL, GRLenVT)});
2042       Results.push_back(
2043           DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
2044       Results.push_back(CSRXCHGResults.getValue(1));
2045       break;
2046     }
2047 #define IOCSRRD_CASE(NAME, NODE)                                               \
2048   case Intrinsic::loongarch_##NAME: {                                          \
2049     SDValue IOCSRRDResults =                                                   \
2050         DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other},            \
2051                     {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
2052     Results.push_back(                                                         \
2053         DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0)));       \
2054     Results.push_back(IOCSRRDResults.getValue(1));                             \
2055     break;                                                                     \
2056   }
2057       IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2058       IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2059       IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2060 #undef IOCSRRD_CASE
2061     case Intrinsic::loongarch_cpucfg: {
2062       SDValue CPUCFGResults =
2063           DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2064                       {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
2065       Results.push_back(
2066           DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
2067       Results.push_back(CPUCFGResults.getValue(1));
2068       break;
2069     }
2070     case Intrinsic::loongarch_lddir_d: {
2071       if (!Subtarget.is64Bit()) {
2072         emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
2073         return;
2074       }
2075       break;
2076     }
2077     }
2078     break;
2079   }
2080   case ISD::READ_REGISTER: {
2081     if (Subtarget.is64Bit())
2082       DAG.getContext()->emitError(
2083           "On LA64, only 64-bit registers can be read.");
2084     else
2085       DAG.getContext()->emitError(
2086           "On LA32, only 32-bit registers can be read.");
2087     Results.push_back(DAG.getUNDEF(VT));
2088     Results.push_back(N->getOperand(0));
2089     break;
2090   }
2091   case ISD::INTRINSIC_WO_CHAIN: {
2092     replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
2093     break;
2094   }
2095   }
2096 }
2097 
performANDCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)2098 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
2099                                  TargetLowering::DAGCombinerInfo &DCI,
2100                                  const LoongArchSubtarget &Subtarget) {
2101   if (DCI.isBeforeLegalizeOps())
2102     return SDValue();
2103 
2104   SDValue FirstOperand = N->getOperand(0);
2105   SDValue SecondOperand = N->getOperand(1);
2106   unsigned FirstOperandOpc = FirstOperand.getOpcode();
2107   EVT ValTy = N->getValueType(0);
2108   SDLoc DL(N);
2109   uint64_t lsb, msb;
2110   unsigned SMIdx, SMLen;
2111   ConstantSDNode *CN;
2112   SDValue NewOperand;
2113   MVT GRLenVT = Subtarget.getGRLenVT();
2114 
2115   // Op's second operand must be a shifted mask.
2116   if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
2117       !isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
2118     return SDValue();
2119 
2120   if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
2121     // Pattern match BSTRPICK.
2122     //  $dst = and ((sra or srl) $src , lsb), (2**len - 1)
2123     //  => BSTRPICK $dst, $src, msb, lsb
2124     //  where msb = lsb + len - 1
2125 
2126     // The second operand of the shift must be an immediate.
2127     if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
2128       return SDValue();
2129 
2130     lsb = CN->getZExtValue();
2131 
2132     // Return if the shifted mask does not start at bit 0 or the sum of its
2133     // length and lsb exceeds the word's size.
2134     if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
2135       return SDValue();
2136 
2137     NewOperand = FirstOperand.getOperand(0);
2138   } else {
2139     // Pattern match BSTRPICK.
2140     //  $dst = and $src, (2**len- 1) , if len > 12
2141     //  => BSTRPICK $dst, $src, msb, lsb
2142     //  where lsb = 0 and msb = len - 1
2143 
2144     // If the mask is <= 0xfff, andi can be used instead.
2145     if (CN->getZExtValue() <= 0xfff)
2146       return SDValue();
2147 
2148     // Return if the MSB exceeds.
2149     if (SMIdx + SMLen > ValTy.getSizeInBits())
2150       return SDValue();
2151 
2152     if (SMIdx > 0) {
2153       // Omit if the constant has more than 2 uses. This a conservative
2154       // decision. Whether it is a win depends on the HW microarchitecture.
2155       // However it should always be better for 1 and 2 uses.
2156       if (CN->use_size() > 2)
2157         return SDValue();
2158       // Return if the constant can be composed by a single LU12I.W.
2159       if ((CN->getZExtValue() & 0xfff) == 0)
2160         return SDValue();
2161       // Return if the constand can be composed by a single ADDI with
2162       // the zero register.
2163       if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
2164         return SDValue();
2165     }
2166 
2167     lsb = SMIdx;
2168     NewOperand = FirstOperand;
2169   }
2170 
2171   msb = lsb + SMLen - 1;
2172   SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
2173                             DAG.getConstant(msb, DL, GRLenVT),
2174                             DAG.getConstant(lsb, DL, GRLenVT));
2175   if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
2176     return NR0;
2177   // Try to optimize to
2178   //   bstrpick $Rd, $Rs, msb, lsb
2179   //   slli     $Rd, $Rd, lsb
2180   return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
2181                      DAG.getConstant(lsb, DL, GRLenVT));
2182 }
2183 
performSRLCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)2184 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
2185                                  TargetLowering::DAGCombinerInfo &DCI,
2186                                  const LoongArchSubtarget &Subtarget) {
2187   if (DCI.isBeforeLegalizeOps())
2188     return SDValue();
2189 
2190   // $dst = srl (and $src, Mask), Shamt
2191   // =>
2192   // BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
2193   // when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
2194   //
2195 
2196   SDValue FirstOperand = N->getOperand(0);
2197   ConstantSDNode *CN;
2198   EVT ValTy = N->getValueType(0);
2199   SDLoc DL(N);
2200   MVT GRLenVT = Subtarget.getGRLenVT();
2201   unsigned MaskIdx, MaskLen;
2202   uint64_t Shamt;
2203 
2204   // The first operand must be an AND and the second operand of the AND must be
2205   // a shifted mask.
2206   if (FirstOperand.getOpcode() != ISD::AND ||
2207       !(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
2208       !isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
2209     return SDValue();
2210 
2211   // The second operand (shift amount) must be an immediate.
2212   if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
2213     return SDValue();
2214 
2215   Shamt = CN->getZExtValue();
2216   if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
2217     return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
2218                        FirstOperand->getOperand(0),
2219                        DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2220                        DAG.getConstant(Shamt, DL, GRLenVT));
2221 
2222   return SDValue();
2223 }
2224 
performORCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)2225 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
2226                                 TargetLowering::DAGCombinerInfo &DCI,
2227                                 const LoongArchSubtarget &Subtarget) {
2228   MVT GRLenVT = Subtarget.getGRLenVT();
2229   EVT ValTy = N->getValueType(0);
2230   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
2231   ConstantSDNode *CN0, *CN1;
2232   SDLoc DL(N);
2233   unsigned ValBits = ValTy.getSizeInBits();
2234   unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
2235   unsigned Shamt;
2236   bool SwapAndRetried = false;
2237 
2238   if (DCI.isBeforeLegalizeOps())
2239     return SDValue();
2240 
2241   if (ValBits != 32 && ValBits != 64)
2242     return SDValue();
2243 
2244 Retry:
2245   // 1st pattern to match BSTRINS:
2246   //  R = or (and X, mask0), (and (shl Y, lsb), mask1)
2247   //  where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
2248   //  =>
2249   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2250   if (N0.getOpcode() == ISD::AND &&
2251       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2252       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2253       N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
2254       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2255       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2256       MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
2257       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2258       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2259       (MaskIdx0 + MaskLen0 <= ValBits)) {
2260     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
2261     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2262                        N1.getOperand(0).getOperand(0),
2263                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2264                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
2265   }
2266 
2267   // 2nd pattern to match BSTRINS:
2268   //  R = or (and X, mask0), (shl (and Y, mask1), lsb)
2269   //  where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
2270   //  =>
2271   //  R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
2272   if (N0.getOpcode() == ISD::AND &&
2273       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2274       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2275       N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2276       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2277       (Shamt = CN1->getZExtValue()) == MaskIdx0 &&
2278       (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2279       isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
2280       MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
2281       (MaskIdx0 + MaskLen0 <= ValBits)) {
2282     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
2283     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2284                        N1.getOperand(0).getOperand(0),
2285                        DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
2286                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
2287   }
2288 
2289   // 3rd pattern to match BSTRINS:
2290   //  R = or (and X, mask0), (and Y, mask1)
2291   //  where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
2292   //  =>
2293   //  R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
2294   //  where msb = lsb + size - 1
2295   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
2296       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2297       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2298       (MaskIdx0 + MaskLen0 <= 64) &&
2299       (CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
2300       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2301     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
2302     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2303                        DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
2304                                    DAG.getConstant(MaskIdx0, DL, GRLenVT)),
2305                        DAG.getConstant(ValBits == 32
2306                                            ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2307                                            : (MaskIdx0 + MaskLen0 - 1),
2308                                        DL, GRLenVT),
2309                        DAG.getConstant(MaskIdx0, DL, GRLenVT));
2310   }
2311 
2312   // 4th pattern to match BSTRINS:
2313   //  R = or (and X, mask), (shl Y, shamt)
2314   //  where mask = (2**shamt - 1)
2315   //  =>
2316   //  R = BSTRINS X, Y, ValBits - 1, shamt
2317   //  where ValBits = 32 or 64
2318   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
2319       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2320       isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
2321       MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2322       (Shamt = CN1->getZExtValue()) == MaskLen0 &&
2323       (MaskIdx0 + MaskLen0 <= ValBits)) {
2324     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
2325     return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2326                        N1.getOperand(0),
2327                        DAG.getConstant((ValBits - 1), DL, GRLenVT),
2328                        DAG.getConstant(Shamt, DL, GRLenVT));
2329   }
2330 
2331   // 5th pattern to match BSTRINS:
2332   //  R = or (and X, mask), const
2333   //  where ~mask = (2**size - 1) << lsb, mask & const = 0
2334   //  =>
2335   //  R = BSTRINS X, (const >> lsb), msb, lsb
2336   //  where msb = lsb + size - 1
2337   if (N0.getOpcode() == ISD::AND &&
2338       (CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
2339       isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
2340       (CN1 = dyn_cast<ConstantSDNode>(N1)) &&
2341       (CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
2342     LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
2343     return DAG.getNode(
2344         LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
2345         DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
2346         DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
2347                                       : (MaskIdx0 + MaskLen0 - 1),
2348                         DL, GRLenVT),
2349         DAG.getConstant(MaskIdx0, DL, GRLenVT));
2350   }
2351 
2352   // 6th pattern.
2353   // a = b | ((c & mask) << shamt), where all positions in b to be overwritten
2354   // by the incoming bits are known to be zero.
2355   // =>
2356   // a = BSTRINS b, c, shamt + MaskLen - 1, shamt
2357   //
2358   // Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
2359   // pattern is more common than the 1st. So we put the 1st before the 6th in
2360   // order to match as many nodes as possible.
2361   ConstantSDNode *CNMask, *CNShamt;
2362   unsigned MaskIdx, MaskLen;
2363   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
2364       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2365       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2366       MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2367       CNShamt->getZExtValue() + MaskLen <= ValBits) {
2368     Shamt = CNShamt->getZExtValue();
2369     APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
2370     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2371       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
2372       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2373                          N1.getOperand(0).getOperand(0),
2374                          DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
2375                          DAG.getConstant(Shamt, DL, GRLenVT));
2376     }
2377   }
2378 
2379   // 7th pattern.
2380   // a = b | ((c << shamt) & shifted_mask), where all positions in b to be
2381   // overwritten by the incoming bits are known to be zero.
2382   // =>
2383   // a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
2384   //
2385   // Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
2386   // before the 7th in order to match as many nodes as possible.
2387   if (N1.getOpcode() == ISD::AND &&
2388       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2389       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
2390       N1.getOperand(0).getOpcode() == ISD::SHL &&
2391       (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
2392       CNShamt->getZExtValue() == MaskIdx) {
2393     APInt ShMask(ValBits, CNMask->getZExtValue());
2394     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2395       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
2396       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2397                          N1.getOperand(0).getOperand(0),
2398                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2399                          DAG.getConstant(MaskIdx, DL, GRLenVT));
2400     }
2401   }
2402 
2403   // (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
2404   if (!SwapAndRetried) {
2405     std::swap(N0, N1);
2406     SwapAndRetried = true;
2407     goto Retry;
2408   }
2409 
2410   SwapAndRetried = false;
2411 Retry2:
2412   // 8th pattern.
2413   // a = b | (c & shifted_mask), where all positions in b to be overwritten by
2414   // the incoming bits are known to be zero.
2415   // =>
2416   // a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
2417   //
2418   // Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
2419   // we put it here in order to match as many nodes as possible or generate less
2420   // instructions.
2421   if (N1.getOpcode() == ISD::AND &&
2422       (CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
2423       isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
2424     APInt ShMask(ValBits, CNMask->getZExtValue());
2425     if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
2426       LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
2427       return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
2428                          DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
2429                                      N1->getOperand(0),
2430                                      DAG.getConstant(MaskIdx, DL, GRLenVT)),
2431                          DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
2432                          DAG.getConstant(MaskIdx, DL, GRLenVT));
2433     }
2434   }
2435   // Swap N0/N1 and retry.
2436   if (!SwapAndRetried) {
2437     std::swap(N0, N1);
2438     SwapAndRetried = true;
2439     goto Retry2;
2440   }
2441 
2442   return SDValue();
2443 }
2444 
2445 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
performBITREV_WCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)2446 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
2447                                       TargetLowering::DAGCombinerInfo &DCI,
2448                                       const LoongArchSubtarget &Subtarget) {
2449   if (DCI.isBeforeLegalizeOps())
2450     return SDValue();
2451 
2452   SDValue Src = N->getOperand(0);
2453   if (Src.getOpcode() != LoongArchISD::REVB_2W)
2454     return SDValue();
2455 
2456   return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
2457                      Src.getOperand(0));
2458 }
2459 
2460 template <unsigned N>
legalizeIntrinsicImmArg(SDNode * Node,unsigned ImmOp,SelectionDAG & DAG,const LoongArchSubtarget & Subtarget,bool IsSigned=false)2461 static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
2462                                        SelectionDAG &DAG,
2463                                        const LoongArchSubtarget &Subtarget,
2464                                        bool IsSigned = false) {
2465   SDLoc DL(Node);
2466   auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2467   // Check the ImmArg.
2468   if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2469       (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2470     DAG.getContext()->emitError(Node->getOperationName(0) +
2471                                 ": argument out of range.");
2472     return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
2473   }
2474   return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
2475 }
2476 
2477 template <unsigned N>
lowerVectorSplatImm(SDNode * Node,unsigned ImmOp,SelectionDAG & DAG,bool IsSigned=false)2478 static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
2479                                    SelectionDAG &DAG, bool IsSigned = false) {
2480   SDLoc DL(Node);
2481   EVT ResTy = Node->getValueType(0);
2482   auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
2483 
2484   // Check the ImmArg.
2485   if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
2486       (!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
2487     DAG.getContext()->emitError(Node->getOperationName(0) +
2488                                 ": argument out of range.");
2489     return DAG.getNode(ISD::UNDEF, DL, ResTy);
2490   }
2491   return DAG.getConstant(
2492       APInt(ResTy.getScalarType().getSizeInBits(),
2493             IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
2494       DL, ResTy);
2495 }
2496 
truncateVecElts(SDNode * Node,SelectionDAG & DAG)2497 static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
2498   SDLoc DL(Node);
2499   EVT ResTy = Node->getValueType(0);
2500   SDValue Vec = Node->getOperand(2);
2501   SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
2502   return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
2503 }
2504 
lowerVectorBitClear(SDNode * Node,SelectionDAG & DAG)2505 static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
2506   SDLoc DL(Node);
2507   EVT ResTy = Node->getValueType(0);
2508   SDValue One = DAG.getConstant(1, DL, ResTy);
2509   SDValue Bit =
2510       DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
2511 
2512   return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
2513                      DAG.getNOT(DL, Bit, ResTy));
2514 }
2515 
2516 template <unsigned N>
lowerVectorBitClearImm(SDNode * Node,SelectionDAG & DAG)2517 static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
2518   SDLoc DL(Node);
2519   EVT ResTy = Node->getValueType(0);
2520   auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2521   // Check the unsigned ImmArg.
2522   if (!isUInt<N>(CImm->getZExtValue())) {
2523     DAG.getContext()->emitError(Node->getOperationName(0) +
2524                                 ": argument out of range.");
2525     return DAG.getNode(ISD::UNDEF, DL, ResTy);
2526   }
2527 
2528   APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2529   SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
2530 
2531   return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
2532 }
2533 
2534 template <unsigned N>
lowerVectorBitSetImm(SDNode * Node,SelectionDAG & DAG)2535 static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
2536   SDLoc DL(Node);
2537   EVT ResTy = Node->getValueType(0);
2538   auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2539   // Check the unsigned ImmArg.
2540   if (!isUInt<N>(CImm->getZExtValue())) {
2541     DAG.getContext()->emitError(Node->getOperationName(0) +
2542                                 ": argument out of range.");
2543     return DAG.getNode(ISD::UNDEF, DL, ResTy);
2544   }
2545 
2546   APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2547   SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2548   return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
2549 }
2550 
2551 template <unsigned N>
lowerVectorBitRevImm(SDNode * Node,SelectionDAG & DAG)2552 static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
2553   SDLoc DL(Node);
2554   EVT ResTy = Node->getValueType(0);
2555   auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
2556   // Check the unsigned ImmArg.
2557   if (!isUInt<N>(CImm->getZExtValue())) {
2558     DAG.getContext()->emitError(Node->getOperationName(0) +
2559                                 ": argument out of range.");
2560     return DAG.getNode(ISD::UNDEF, DL, ResTy);
2561   }
2562 
2563   APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
2564   SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
2565   return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
2566 }
2567 
2568 static SDValue
performINTRINSIC_WO_CHAINCombine(SDNode * N,SelectionDAG & DAG,TargetLowering::DAGCombinerInfo & DCI,const LoongArchSubtarget & Subtarget)2569 performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
2570                                  TargetLowering::DAGCombinerInfo &DCI,
2571                                  const LoongArchSubtarget &Subtarget) {
2572   SDLoc DL(N);
2573   switch (N->getConstantOperandVal(0)) {
2574   default:
2575     break;
2576   case Intrinsic::loongarch_lsx_vadd_b:
2577   case Intrinsic::loongarch_lsx_vadd_h:
2578   case Intrinsic::loongarch_lsx_vadd_w:
2579   case Intrinsic::loongarch_lsx_vadd_d:
2580   case Intrinsic::loongarch_lasx_xvadd_b:
2581   case Intrinsic::loongarch_lasx_xvadd_h:
2582   case Intrinsic::loongarch_lasx_xvadd_w:
2583   case Intrinsic::loongarch_lasx_xvadd_d:
2584     return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2585                        N->getOperand(2));
2586   case Intrinsic::loongarch_lsx_vaddi_bu:
2587   case Intrinsic::loongarch_lsx_vaddi_hu:
2588   case Intrinsic::loongarch_lsx_vaddi_wu:
2589   case Intrinsic::loongarch_lsx_vaddi_du:
2590   case Intrinsic::loongarch_lasx_xvaddi_bu:
2591   case Intrinsic::loongarch_lasx_xvaddi_hu:
2592   case Intrinsic::loongarch_lasx_xvaddi_wu:
2593   case Intrinsic::loongarch_lasx_xvaddi_du:
2594     return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
2595                        lowerVectorSplatImm<5>(N, 2, DAG));
2596   case Intrinsic::loongarch_lsx_vsub_b:
2597   case Intrinsic::loongarch_lsx_vsub_h:
2598   case Intrinsic::loongarch_lsx_vsub_w:
2599   case Intrinsic::loongarch_lsx_vsub_d:
2600   case Intrinsic::loongarch_lasx_xvsub_b:
2601   case Intrinsic::loongarch_lasx_xvsub_h:
2602   case Intrinsic::loongarch_lasx_xvsub_w:
2603   case Intrinsic::loongarch_lasx_xvsub_d:
2604     return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2605                        N->getOperand(2));
2606   case Intrinsic::loongarch_lsx_vsubi_bu:
2607   case Intrinsic::loongarch_lsx_vsubi_hu:
2608   case Intrinsic::loongarch_lsx_vsubi_wu:
2609   case Intrinsic::loongarch_lsx_vsubi_du:
2610   case Intrinsic::loongarch_lasx_xvsubi_bu:
2611   case Intrinsic::loongarch_lasx_xvsubi_hu:
2612   case Intrinsic::loongarch_lasx_xvsubi_wu:
2613   case Intrinsic::loongarch_lasx_xvsubi_du:
2614     return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
2615                        lowerVectorSplatImm<5>(N, 2, DAG));
2616   case Intrinsic::loongarch_lsx_vneg_b:
2617   case Intrinsic::loongarch_lsx_vneg_h:
2618   case Intrinsic::loongarch_lsx_vneg_w:
2619   case Intrinsic::loongarch_lsx_vneg_d:
2620   case Intrinsic::loongarch_lasx_xvneg_b:
2621   case Intrinsic::loongarch_lasx_xvneg_h:
2622   case Intrinsic::loongarch_lasx_xvneg_w:
2623   case Intrinsic::loongarch_lasx_xvneg_d:
2624     return DAG.getNode(
2625         ISD::SUB, DL, N->getValueType(0),
2626         DAG.getConstant(
2627             APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
2628                   /*isSigned=*/true),
2629             SDLoc(N), N->getValueType(0)),
2630         N->getOperand(1));
2631   case Intrinsic::loongarch_lsx_vmax_b:
2632   case Intrinsic::loongarch_lsx_vmax_h:
2633   case Intrinsic::loongarch_lsx_vmax_w:
2634   case Intrinsic::loongarch_lsx_vmax_d:
2635   case Intrinsic::loongarch_lasx_xvmax_b:
2636   case Intrinsic::loongarch_lasx_xvmax_h:
2637   case Intrinsic::loongarch_lasx_xvmax_w:
2638   case Intrinsic::loongarch_lasx_xvmax_d:
2639     return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2640                        N->getOperand(2));
2641   case Intrinsic::loongarch_lsx_vmax_bu:
2642   case Intrinsic::loongarch_lsx_vmax_hu:
2643   case Intrinsic::loongarch_lsx_vmax_wu:
2644   case Intrinsic::loongarch_lsx_vmax_du:
2645   case Intrinsic::loongarch_lasx_xvmax_bu:
2646   case Intrinsic::loongarch_lasx_xvmax_hu:
2647   case Intrinsic::loongarch_lasx_xvmax_wu:
2648   case Intrinsic::loongarch_lasx_xvmax_du:
2649     return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2650                        N->getOperand(2));
2651   case Intrinsic::loongarch_lsx_vmaxi_b:
2652   case Intrinsic::loongarch_lsx_vmaxi_h:
2653   case Intrinsic::loongarch_lsx_vmaxi_w:
2654   case Intrinsic::loongarch_lsx_vmaxi_d:
2655   case Intrinsic::loongarch_lasx_xvmaxi_b:
2656   case Intrinsic::loongarch_lasx_xvmaxi_h:
2657   case Intrinsic::loongarch_lasx_xvmaxi_w:
2658   case Intrinsic::loongarch_lasx_xvmaxi_d:
2659     return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
2660                        lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2661   case Intrinsic::loongarch_lsx_vmaxi_bu:
2662   case Intrinsic::loongarch_lsx_vmaxi_hu:
2663   case Intrinsic::loongarch_lsx_vmaxi_wu:
2664   case Intrinsic::loongarch_lsx_vmaxi_du:
2665   case Intrinsic::loongarch_lasx_xvmaxi_bu:
2666   case Intrinsic::loongarch_lasx_xvmaxi_hu:
2667   case Intrinsic::loongarch_lasx_xvmaxi_wu:
2668   case Intrinsic::loongarch_lasx_xvmaxi_du:
2669     return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
2670                        lowerVectorSplatImm<5>(N, 2, DAG));
2671   case Intrinsic::loongarch_lsx_vmin_b:
2672   case Intrinsic::loongarch_lsx_vmin_h:
2673   case Intrinsic::loongarch_lsx_vmin_w:
2674   case Intrinsic::loongarch_lsx_vmin_d:
2675   case Intrinsic::loongarch_lasx_xvmin_b:
2676   case Intrinsic::loongarch_lasx_xvmin_h:
2677   case Intrinsic::loongarch_lasx_xvmin_w:
2678   case Intrinsic::loongarch_lasx_xvmin_d:
2679     return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2680                        N->getOperand(2));
2681   case Intrinsic::loongarch_lsx_vmin_bu:
2682   case Intrinsic::loongarch_lsx_vmin_hu:
2683   case Intrinsic::loongarch_lsx_vmin_wu:
2684   case Intrinsic::loongarch_lsx_vmin_du:
2685   case Intrinsic::loongarch_lasx_xvmin_bu:
2686   case Intrinsic::loongarch_lasx_xvmin_hu:
2687   case Intrinsic::loongarch_lasx_xvmin_wu:
2688   case Intrinsic::loongarch_lasx_xvmin_du:
2689     return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2690                        N->getOperand(2));
2691   case Intrinsic::loongarch_lsx_vmini_b:
2692   case Intrinsic::loongarch_lsx_vmini_h:
2693   case Intrinsic::loongarch_lsx_vmini_w:
2694   case Intrinsic::loongarch_lsx_vmini_d:
2695   case Intrinsic::loongarch_lasx_xvmini_b:
2696   case Intrinsic::loongarch_lasx_xvmini_h:
2697   case Intrinsic::loongarch_lasx_xvmini_w:
2698   case Intrinsic::loongarch_lasx_xvmini_d:
2699     return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
2700                        lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
2701   case Intrinsic::loongarch_lsx_vmini_bu:
2702   case Intrinsic::loongarch_lsx_vmini_hu:
2703   case Intrinsic::loongarch_lsx_vmini_wu:
2704   case Intrinsic::loongarch_lsx_vmini_du:
2705   case Intrinsic::loongarch_lasx_xvmini_bu:
2706   case Intrinsic::loongarch_lasx_xvmini_hu:
2707   case Intrinsic::loongarch_lasx_xvmini_wu:
2708   case Intrinsic::loongarch_lasx_xvmini_du:
2709     return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
2710                        lowerVectorSplatImm<5>(N, 2, DAG));
2711   case Intrinsic::loongarch_lsx_vmul_b:
2712   case Intrinsic::loongarch_lsx_vmul_h:
2713   case Intrinsic::loongarch_lsx_vmul_w:
2714   case Intrinsic::loongarch_lsx_vmul_d:
2715   case Intrinsic::loongarch_lasx_xvmul_b:
2716   case Intrinsic::loongarch_lasx_xvmul_h:
2717   case Intrinsic::loongarch_lasx_xvmul_w:
2718   case Intrinsic::loongarch_lasx_xvmul_d:
2719     return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
2720                        N->getOperand(2));
2721   case Intrinsic::loongarch_lsx_vmadd_b:
2722   case Intrinsic::loongarch_lsx_vmadd_h:
2723   case Intrinsic::loongarch_lsx_vmadd_w:
2724   case Intrinsic::loongarch_lsx_vmadd_d:
2725   case Intrinsic::loongarch_lasx_xvmadd_b:
2726   case Intrinsic::loongarch_lasx_xvmadd_h:
2727   case Intrinsic::loongarch_lasx_xvmadd_w:
2728   case Intrinsic::loongarch_lasx_xvmadd_d: {
2729     EVT ResTy = N->getValueType(0);
2730     return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
2731                        DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2732                                    N->getOperand(3)));
2733   }
2734   case Intrinsic::loongarch_lsx_vmsub_b:
2735   case Intrinsic::loongarch_lsx_vmsub_h:
2736   case Intrinsic::loongarch_lsx_vmsub_w:
2737   case Intrinsic::loongarch_lsx_vmsub_d:
2738   case Intrinsic::loongarch_lasx_xvmsub_b:
2739   case Intrinsic::loongarch_lasx_xvmsub_h:
2740   case Intrinsic::loongarch_lasx_xvmsub_w:
2741   case Intrinsic::loongarch_lasx_xvmsub_d: {
2742     EVT ResTy = N->getValueType(0);
2743     return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
2744                        DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
2745                                    N->getOperand(3)));
2746   }
2747   case Intrinsic::loongarch_lsx_vdiv_b:
2748   case Intrinsic::loongarch_lsx_vdiv_h:
2749   case Intrinsic::loongarch_lsx_vdiv_w:
2750   case Intrinsic::loongarch_lsx_vdiv_d:
2751   case Intrinsic::loongarch_lasx_xvdiv_b:
2752   case Intrinsic::loongarch_lasx_xvdiv_h:
2753   case Intrinsic::loongarch_lasx_xvdiv_w:
2754   case Intrinsic::loongarch_lasx_xvdiv_d:
2755     return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
2756                        N->getOperand(2));
2757   case Intrinsic::loongarch_lsx_vdiv_bu:
2758   case Intrinsic::loongarch_lsx_vdiv_hu:
2759   case Intrinsic::loongarch_lsx_vdiv_wu:
2760   case Intrinsic::loongarch_lsx_vdiv_du:
2761   case Intrinsic::loongarch_lasx_xvdiv_bu:
2762   case Intrinsic::loongarch_lasx_xvdiv_hu:
2763   case Intrinsic::loongarch_lasx_xvdiv_wu:
2764   case Intrinsic::loongarch_lasx_xvdiv_du:
2765     return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
2766                        N->getOperand(2));
2767   case Intrinsic::loongarch_lsx_vmod_b:
2768   case Intrinsic::loongarch_lsx_vmod_h:
2769   case Intrinsic::loongarch_lsx_vmod_w:
2770   case Intrinsic::loongarch_lsx_vmod_d:
2771   case Intrinsic::loongarch_lasx_xvmod_b:
2772   case Intrinsic::loongarch_lasx_xvmod_h:
2773   case Intrinsic::loongarch_lasx_xvmod_w:
2774   case Intrinsic::loongarch_lasx_xvmod_d:
2775     return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
2776                        N->getOperand(2));
2777   case Intrinsic::loongarch_lsx_vmod_bu:
2778   case Intrinsic::loongarch_lsx_vmod_hu:
2779   case Intrinsic::loongarch_lsx_vmod_wu:
2780   case Intrinsic::loongarch_lsx_vmod_du:
2781   case Intrinsic::loongarch_lasx_xvmod_bu:
2782   case Intrinsic::loongarch_lasx_xvmod_hu:
2783   case Intrinsic::loongarch_lasx_xvmod_wu:
2784   case Intrinsic::loongarch_lasx_xvmod_du:
2785     return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
2786                        N->getOperand(2));
2787   case Intrinsic::loongarch_lsx_vand_v:
2788   case Intrinsic::loongarch_lasx_xvand_v:
2789     return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2790                        N->getOperand(2));
2791   case Intrinsic::loongarch_lsx_vor_v:
2792   case Intrinsic::loongarch_lasx_xvor_v:
2793     return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2794                        N->getOperand(2));
2795   case Intrinsic::loongarch_lsx_vxor_v:
2796   case Intrinsic::loongarch_lasx_xvxor_v:
2797     return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2798                        N->getOperand(2));
2799   case Intrinsic::loongarch_lsx_vnor_v:
2800   case Intrinsic::loongarch_lasx_xvnor_v: {
2801     SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2802                               N->getOperand(2));
2803     return DAG.getNOT(DL, Res, Res->getValueType(0));
2804   }
2805   case Intrinsic::loongarch_lsx_vandi_b:
2806   case Intrinsic::loongarch_lasx_xvandi_b:
2807     return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
2808                        lowerVectorSplatImm<8>(N, 2, DAG));
2809   case Intrinsic::loongarch_lsx_vori_b:
2810   case Intrinsic::loongarch_lasx_xvori_b:
2811     return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
2812                        lowerVectorSplatImm<8>(N, 2, DAG));
2813   case Intrinsic::loongarch_lsx_vxori_b:
2814   case Intrinsic::loongarch_lasx_xvxori_b:
2815     return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
2816                        lowerVectorSplatImm<8>(N, 2, DAG));
2817   case Intrinsic::loongarch_lsx_vsll_b:
2818   case Intrinsic::loongarch_lsx_vsll_h:
2819   case Intrinsic::loongarch_lsx_vsll_w:
2820   case Intrinsic::loongarch_lsx_vsll_d:
2821   case Intrinsic::loongarch_lasx_xvsll_b:
2822   case Intrinsic::loongarch_lasx_xvsll_h:
2823   case Intrinsic::loongarch_lasx_xvsll_w:
2824   case Intrinsic::loongarch_lasx_xvsll_d:
2825     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2826                        truncateVecElts(N, DAG));
2827   case Intrinsic::loongarch_lsx_vslli_b:
2828   case Intrinsic::loongarch_lasx_xvslli_b:
2829     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2830                        lowerVectorSplatImm<3>(N, 2, DAG));
2831   case Intrinsic::loongarch_lsx_vslli_h:
2832   case Intrinsic::loongarch_lasx_xvslli_h:
2833     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2834                        lowerVectorSplatImm<4>(N, 2, DAG));
2835   case Intrinsic::loongarch_lsx_vslli_w:
2836   case Intrinsic::loongarch_lasx_xvslli_w:
2837     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2838                        lowerVectorSplatImm<5>(N, 2, DAG));
2839   case Intrinsic::loongarch_lsx_vslli_d:
2840   case Intrinsic::loongarch_lasx_xvslli_d:
2841     return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
2842                        lowerVectorSplatImm<6>(N, 2, DAG));
2843   case Intrinsic::loongarch_lsx_vsrl_b:
2844   case Intrinsic::loongarch_lsx_vsrl_h:
2845   case Intrinsic::loongarch_lsx_vsrl_w:
2846   case Intrinsic::loongarch_lsx_vsrl_d:
2847   case Intrinsic::loongarch_lasx_xvsrl_b:
2848   case Intrinsic::loongarch_lasx_xvsrl_h:
2849   case Intrinsic::loongarch_lasx_xvsrl_w:
2850   case Intrinsic::loongarch_lasx_xvsrl_d:
2851     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2852                        truncateVecElts(N, DAG));
2853   case Intrinsic::loongarch_lsx_vsrli_b:
2854   case Intrinsic::loongarch_lasx_xvsrli_b:
2855     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2856                        lowerVectorSplatImm<3>(N, 2, DAG));
2857   case Intrinsic::loongarch_lsx_vsrli_h:
2858   case Intrinsic::loongarch_lasx_xvsrli_h:
2859     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2860                        lowerVectorSplatImm<4>(N, 2, DAG));
2861   case Intrinsic::loongarch_lsx_vsrli_w:
2862   case Intrinsic::loongarch_lasx_xvsrli_w:
2863     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2864                        lowerVectorSplatImm<5>(N, 2, DAG));
2865   case Intrinsic::loongarch_lsx_vsrli_d:
2866   case Intrinsic::loongarch_lasx_xvsrli_d:
2867     return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
2868                        lowerVectorSplatImm<6>(N, 2, DAG));
2869   case Intrinsic::loongarch_lsx_vsra_b:
2870   case Intrinsic::loongarch_lsx_vsra_h:
2871   case Intrinsic::loongarch_lsx_vsra_w:
2872   case Intrinsic::loongarch_lsx_vsra_d:
2873   case Intrinsic::loongarch_lasx_xvsra_b:
2874   case Intrinsic::loongarch_lasx_xvsra_h:
2875   case Intrinsic::loongarch_lasx_xvsra_w:
2876   case Intrinsic::loongarch_lasx_xvsra_d:
2877     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2878                        truncateVecElts(N, DAG));
2879   case Intrinsic::loongarch_lsx_vsrai_b:
2880   case Intrinsic::loongarch_lasx_xvsrai_b:
2881     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2882                        lowerVectorSplatImm<3>(N, 2, DAG));
2883   case Intrinsic::loongarch_lsx_vsrai_h:
2884   case Intrinsic::loongarch_lasx_xvsrai_h:
2885     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2886                        lowerVectorSplatImm<4>(N, 2, DAG));
2887   case Intrinsic::loongarch_lsx_vsrai_w:
2888   case Intrinsic::loongarch_lasx_xvsrai_w:
2889     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2890                        lowerVectorSplatImm<5>(N, 2, DAG));
2891   case Intrinsic::loongarch_lsx_vsrai_d:
2892   case Intrinsic::loongarch_lasx_xvsrai_d:
2893     return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
2894                        lowerVectorSplatImm<6>(N, 2, DAG));
2895   case Intrinsic::loongarch_lsx_vclz_b:
2896   case Intrinsic::loongarch_lsx_vclz_h:
2897   case Intrinsic::loongarch_lsx_vclz_w:
2898   case Intrinsic::loongarch_lsx_vclz_d:
2899   case Intrinsic::loongarch_lasx_xvclz_b:
2900   case Intrinsic::loongarch_lasx_xvclz_h:
2901   case Intrinsic::loongarch_lasx_xvclz_w:
2902   case Intrinsic::loongarch_lasx_xvclz_d:
2903     return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
2904   case Intrinsic::loongarch_lsx_vpcnt_b:
2905   case Intrinsic::loongarch_lsx_vpcnt_h:
2906   case Intrinsic::loongarch_lsx_vpcnt_w:
2907   case Intrinsic::loongarch_lsx_vpcnt_d:
2908   case Intrinsic::loongarch_lasx_xvpcnt_b:
2909   case Intrinsic::loongarch_lasx_xvpcnt_h:
2910   case Intrinsic::loongarch_lasx_xvpcnt_w:
2911   case Intrinsic::loongarch_lasx_xvpcnt_d:
2912     return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
2913   case Intrinsic::loongarch_lsx_vbitclr_b:
2914   case Intrinsic::loongarch_lsx_vbitclr_h:
2915   case Intrinsic::loongarch_lsx_vbitclr_w:
2916   case Intrinsic::loongarch_lsx_vbitclr_d:
2917   case Intrinsic::loongarch_lasx_xvbitclr_b:
2918   case Intrinsic::loongarch_lasx_xvbitclr_h:
2919   case Intrinsic::loongarch_lasx_xvbitclr_w:
2920   case Intrinsic::loongarch_lasx_xvbitclr_d:
2921     return lowerVectorBitClear(N, DAG);
2922   case Intrinsic::loongarch_lsx_vbitclri_b:
2923   case Intrinsic::loongarch_lasx_xvbitclri_b:
2924     return lowerVectorBitClearImm<3>(N, DAG);
2925   case Intrinsic::loongarch_lsx_vbitclri_h:
2926   case Intrinsic::loongarch_lasx_xvbitclri_h:
2927     return lowerVectorBitClearImm<4>(N, DAG);
2928   case Intrinsic::loongarch_lsx_vbitclri_w:
2929   case Intrinsic::loongarch_lasx_xvbitclri_w:
2930     return lowerVectorBitClearImm<5>(N, DAG);
2931   case Intrinsic::loongarch_lsx_vbitclri_d:
2932   case Intrinsic::loongarch_lasx_xvbitclri_d:
2933     return lowerVectorBitClearImm<6>(N, DAG);
2934   case Intrinsic::loongarch_lsx_vbitset_b:
2935   case Intrinsic::loongarch_lsx_vbitset_h:
2936   case Intrinsic::loongarch_lsx_vbitset_w:
2937   case Intrinsic::loongarch_lsx_vbitset_d:
2938   case Intrinsic::loongarch_lasx_xvbitset_b:
2939   case Intrinsic::loongarch_lasx_xvbitset_h:
2940   case Intrinsic::loongarch_lasx_xvbitset_w:
2941   case Intrinsic::loongarch_lasx_xvbitset_d: {
2942     EVT VecTy = N->getValueType(0);
2943     SDValue One = DAG.getConstant(1, DL, VecTy);
2944     return DAG.getNode(
2945         ISD::OR, DL, VecTy, N->getOperand(1),
2946         DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2947   }
2948   case Intrinsic::loongarch_lsx_vbitseti_b:
2949   case Intrinsic::loongarch_lasx_xvbitseti_b:
2950     return lowerVectorBitSetImm<3>(N, DAG);
2951   case Intrinsic::loongarch_lsx_vbitseti_h:
2952   case Intrinsic::loongarch_lasx_xvbitseti_h:
2953     return lowerVectorBitSetImm<4>(N, DAG);
2954   case Intrinsic::loongarch_lsx_vbitseti_w:
2955   case Intrinsic::loongarch_lasx_xvbitseti_w:
2956     return lowerVectorBitSetImm<5>(N, DAG);
2957   case Intrinsic::loongarch_lsx_vbitseti_d:
2958   case Intrinsic::loongarch_lasx_xvbitseti_d:
2959     return lowerVectorBitSetImm<6>(N, DAG);
2960   case Intrinsic::loongarch_lsx_vbitrev_b:
2961   case Intrinsic::loongarch_lsx_vbitrev_h:
2962   case Intrinsic::loongarch_lsx_vbitrev_w:
2963   case Intrinsic::loongarch_lsx_vbitrev_d:
2964   case Intrinsic::loongarch_lasx_xvbitrev_b:
2965   case Intrinsic::loongarch_lasx_xvbitrev_h:
2966   case Intrinsic::loongarch_lasx_xvbitrev_w:
2967   case Intrinsic::loongarch_lasx_xvbitrev_d: {
2968     EVT VecTy = N->getValueType(0);
2969     SDValue One = DAG.getConstant(1, DL, VecTy);
2970     return DAG.getNode(
2971         ISD::XOR, DL, VecTy, N->getOperand(1),
2972         DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
2973   }
2974   case Intrinsic::loongarch_lsx_vbitrevi_b:
2975   case Intrinsic::loongarch_lasx_xvbitrevi_b:
2976     return lowerVectorBitRevImm<3>(N, DAG);
2977   case Intrinsic::loongarch_lsx_vbitrevi_h:
2978   case Intrinsic::loongarch_lasx_xvbitrevi_h:
2979     return lowerVectorBitRevImm<4>(N, DAG);
2980   case Intrinsic::loongarch_lsx_vbitrevi_w:
2981   case Intrinsic::loongarch_lasx_xvbitrevi_w:
2982     return lowerVectorBitRevImm<5>(N, DAG);
2983   case Intrinsic::loongarch_lsx_vbitrevi_d:
2984   case Intrinsic::loongarch_lasx_xvbitrevi_d:
2985     return lowerVectorBitRevImm<6>(N, DAG);
2986   case Intrinsic::loongarch_lsx_vfadd_s:
2987   case Intrinsic::loongarch_lsx_vfadd_d:
2988   case Intrinsic::loongarch_lasx_xvfadd_s:
2989   case Intrinsic::loongarch_lasx_xvfadd_d:
2990     return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
2991                        N->getOperand(2));
2992   case Intrinsic::loongarch_lsx_vfsub_s:
2993   case Intrinsic::loongarch_lsx_vfsub_d:
2994   case Intrinsic::loongarch_lasx_xvfsub_s:
2995   case Intrinsic::loongarch_lasx_xvfsub_d:
2996     return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
2997                        N->getOperand(2));
2998   case Intrinsic::loongarch_lsx_vfmul_s:
2999   case Intrinsic::loongarch_lsx_vfmul_d:
3000   case Intrinsic::loongarch_lasx_xvfmul_s:
3001   case Intrinsic::loongarch_lasx_xvfmul_d:
3002     return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
3003                        N->getOperand(2));
3004   case Intrinsic::loongarch_lsx_vfdiv_s:
3005   case Intrinsic::loongarch_lsx_vfdiv_d:
3006   case Intrinsic::loongarch_lasx_xvfdiv_s:
3007   case Intrinsic::loongarch_lasx_xvfdiv_d:
3008     return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
3009                        N->getOperand(2));
3010   case Intrinsic::loongarch_lsx_vfmadd_s:
3011   case Intrinsic::loongarch_lsx_vfmadd_d:
3012   case Intrinsic::loongarch_lasx_xvfmadd_s:
3013   case Intrinsic::loongarch_lasx_xvfmadd_d:
3014     return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
3015                        N->getOperand(2), N->getOperand(3));
3016   case Intrinsic::loongarch_lsx_vinsgr2vr_b:
3017     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3018                        N->getOperand(1), N->getOperand(2),
3019                        legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
3020   case Intrinsic::loongarch_lsx_vinsgr2vr_h:
3021   case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
3022     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3023                        N->getOperand(1), N->getOperand(2),
3024                        legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
3025   case Intrinsic::loongarch_lsx_vinsgr2vr_w:
3026   case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
3027     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3028                        N->getOperand(1), N->getOperand(2),
3029                        legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
3030   case Intrinsic::loongarch_lsx_vinsgr2vr_d:
3031     return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
3032                        N->getOperand(1), N->getOperand(2),
3033                        legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
3034   case Intrinsic::loongarch_lsx_vreplgr2vr_b:
3035   case Intrinsic::loongarch_lsx_vreplgr2vr_h:
3036   case Intrinsic::loongarch_lsx_vreplgr2vr_w:
3037   case Intrinsic::loongarch_lsx_vreplgr2vr_d:
3038   case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
3039   case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
3040   case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
3041   case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
3042     EVT ResTy = N->getValueType(0);
3043     SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
3044     return DAG.getBuildVector(ResTy, DL, Ops);
3045   }
3046   case Intrinsic::loongarch_lsx_vreplve_b:
3047   case Intrinsic::loongarch_lsx_vreplve_h:
3048   case Intrinsic::loongarch_lsx_vreplve_w:
3049   case Intrinsic::loongarch_lsx_vreplve_d:
3050   case Intrinsic::loongarch_lasx_xvreplve_b:
3051   case Intrinsic::loongarch_lasx_xvreplve_h:
3052   case Intrinsic::loongarch_lasx_xvreplve_w:
3053   case Intrinsic::loongarch_lasx_xvreplve_d:
3054     return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
3055                        N->getOperand(1),
3056                        DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
3057                                    N->getOperand(2)));
3058   }
3059   return SDValue();
3060 }
3061 
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const3062 SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
3063                                                    DAGCombinerInfo &DCI) const {
3064   SelectionDAG &DAG = DCI.DAG;
3065   switch (N->getOpcode()) {
3066   default:
3067     break;
3068   case ISD::AND:
3069     return performANDCombine(N, DAG, DCI, Subtarget);
3070   case ISD::OR:
3071     return performORCombine(N, DAG, DCI, Subtarget);
3072   case ISD::SRL:
3073     return performSRLCombine(N, DAG, DCI, Subtarget);
3074   case LoongArchISD::BITREV_W:
3075     return performBITREV_WCombine(N, DAG, DCI, Subtarget);
3076   case ISD::INTRINSIC_WO_CHAIN:
3077     return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
3078   }
3079   return SDValue();
3080 }
3081 
insertDivByZeroTrap(MachineInstr & MI,MachineBasicBlock * MBB)3082 static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
3083                                               MachineBasicBlock *MBB) {
3084   if (!ZeroDivCheck)
3085     return MBB;
3086 
3087   // Build instructions:
3088   // MBB:
3089   //   div(or mod)   $dst, $dividend, $divisor
3090   //   bnez          $divisor, SinkMBB
3091   // BreakMBB:
3092   //   break         7 // BRK_DIVZERO
3093   // SinkMBB:
3094   //   fallthrough
3095   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
3096   MachineFunction::iterator It = ++MBB->getIterator();
3097   MachineFunction *MF = MBB->getParent();
3098   auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3099   auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
3100   MF->insert(It, BreakMBB);
3101   MF->insert(It, SinkMBB);
3102 
3103   // Transfer the remainder of MBB and its successor edges to SinkMBB.
3104   SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
3105   SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
3106 
3107   const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
3108   DebugLoc DL = MI.getDebugLoc();
3109   MachineOperand &Divisor = MI.getOperand(2);
3110   Register DivisorReg = Divisor.getReg();
3111 
3112   // MBB:
3113   BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
3114       .addReg(DivisorReg, getKillRegState(Divisor.isKill()))
3115       .addMBB(SinkMBB);
3116   MBB->addSuccessor(BreakMBB);
3117   MBB->addSuccessor(SinkMBB);
3118 
3119   // BreakMBB:
3120   // See linux header file arch/loongarch/include/uapi/asm/break.h for the
3121   // definition of BRK_DIVZERO.
3122   BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
3123   BreakMBB->addSuccessor(SinkMBB);
3124 
3125   // Clear Divisor's kill flag.
3126   Divisor.setIsKill(false);
3127 
3128   return SinkMBB;
3129 }
3130 
3131 static MachineBasicBlock *
emitVecCondBranchPseudo(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)3132 emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
3133                         const LoongArchSubtarget &Subtarget) {
3134   unsigned CondOpc;
3135   switch (MI.getOpcode()) {
3136   default:
3137     llvm_unreachable("Unexpected opcode");
3138   case LoongArch::PseudoVBZ:
3139     CondOpc = LoongArch::VSETEQZ_V;
3140     break;
3141   case LoongArch::PseudoVBZ_B:
3142     CondOpc = LoongArch::VSETANYEQZ_B;
3143     break;
3144   case LoongArch::PseudoVBZ_H:
3145     CondOpc = LoongArch::VSETANYEQZ_H;
3146     break;
3147   case LoongArch::PseudoVBZ_W:
3148     CondOpc = LoongArch::VSETANYEQZ_W;
3149     break;
3150   case LoongArch::PseudoVBZ_D:
3151     CondOpc = LoongArch::VSETANYEQZ_D;
3152     break;
3153   case LoongArch::PseudoVBNZ:
3154     CondOpc = LoongArch::VSETNEZ_V;
3155     break;
3156   case LoongArch::PseudoVBNZ_B:
3157     CondOpc = LoongArch::VSETALLNEZ_B;
3158     break;
3159   case LoongArch::PseudoVBNZ_H:
3160     CondOpc = LoongArch::VSETALLNEZ_H;
3161     break;
3162   case LoongArch::PseudoVBNZ_W:
3163     CondOpc = LoongArch::VSETALLNEZ_W;
3164     break;
3165   case LoongArch::PseudoVBNZ_D:
3166     CondOpc = LoongArch::VSETALLNEZ_D;
3167     break;
3168   case LoongArch::PseudoXVBZ:
3169     CondOpc = LoongArch::XVSETEQZ_V;
3170     break;
3171   case LoongArch::PseudoXVBZ_B:
3172     CondOpc = LoongArch::XVSETANYEQZ_B;
3173     break;
3174   case LoongArch::PseudoXVBZ_H:
3175     CondOpc = LoongArch::XVSETANYEQZ_H;
3176     break;
3177   case LoongArch::PseudoXVBZ_W:
3178     CondOpc = LoongArch::XVSETANYEQZ_W;
3179     break;
3180   case LoongArch::PseudoXVBZ_D:
3181     CondOpc = LoongArch::XVSETANYEQZ_D;
3182     break;
3183   case LoongArch::PseudoXVBNZ:
3184     CondOpc = LoongArch::XVSETNEZ_V;
3185     break;
3186   case LoongArch::PseudoXVBNZ_B:
3187     CondOpc = LoongArch::XVSETALLNEZ_B;
3188     break;
3189   case LoongArch::PseudoXVBNZ_H:
3190     CondOpc = LoongArch::XVSETALLNEZ_H;
3191     break;
3192   case LoongArch::PseudoXVBNZ_W:
3193     CondOpc = LoongArch::XVSETALLNEZ_W;
3194     break;
3195   case LoongArch::PseudoXVBNZ_D:
3196     CondOpc = LoongArch::XVSETALLNEZ_D;
3197     break;
3198   }
3199 
3200   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3201   const BasicBlock *LLVM_BB = BB->getBasicBlock();
3202   DebugLoc DL = MI.getDebugLoc();
3203   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
3204   MachineFunction::iterator It = ++BB->getIterator();
3205 
3206   MachineFunction *F = BB->getParent();
3207   MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
3208   MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
3209   MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
3210 
3211   F->insert(It, FalseBB);
3212   F->insert(It, TrueBB);
3213   F->insert(It, SinkBB);
3214 
3215   // Transfer the remainder of MBB and its successor edges to Sink.
3216   SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
3217   SinkBB->transferSuccessorsAndUpdatePHIs(BB);
3218 
3219   // Insert the real instruction to BB.
3220   Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
3221   BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
3222 
3223   // Insert branch.
3224   BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
3225   BB->addSuccessor(FalseBB);
3226   BB->addSuccessor(TrueBB);
3227 
3228   // FalseBB.
3229   Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3230   BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
3231       .addReg(LoongArch::R0)
3232       .addImm(0);
3233   BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
3234   FalseBB->addSuccessor(SinkBB);
3235 
3236   // TrueBB.
3237   Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
3238   BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
3239       .addReg(LoongArch::R0)
3240       .addImm(1);
3241   TrueBB->addSuccessor(SinkBB);
3242 
3243   // SinkBB: merge the results.
3244   BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
3245           MI.getOperand(0).getReg())
3246       .addReg(RD1)
3247       .addMBB(FalseBB)
3248       .addReg(RD2)
3249       .addMBB(TrueBB);
3250 
3251   // The pseudo instruction is gone now.
3252   MI.eraseFromParent();
3253   return SinkBB;
3254 }
3255 
3256 static MachineBasicBlock *
emitPseudoXVINSGR2VR(MachineInstr & MI,MachineBasicBlock * BB,const LoongArchSubtarget & Subtarget)3257 emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
3258                      const LoongArchSubtarget &Subtarget) {
3259   unsigned InsOp;
3260   unsigned HalfSize;
3261   switch (MI.getOpcode()) {
3262   default:
3263     llvm_unreachable("Unexpected opcode");
3264   case LoongArch::PseudoXVINSGR2VR_B:
3265     HalfSize = 16;
3266     InsOp = LoongArch::VINSGR2VR_B;
3267     break;
3268   case LoongArch::PseudoXVINSGR2VR_H:
3269     HalfSize = 8;
3270     InsOp = LoongArch::VINSGR2VR_H;
3271     break;
3272   }
3273   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3274   const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
3275   const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
3276   DebugLoc DL = MI.getDebugLoc();
3277   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
3278   // XDst = vector_insert XSrc, Elt, Idx
3279   Register XDst = MI.getOperand(0).getReg();
3280   Register XSrc = MI.getOperand(1).getReg();
3281   Register Elt = MI.getOperand(2).getReg();
3282   unsigned Idx = MI.getOperand(3).getImm();
3283 
3284   Register ScratchReg1 = XSrc;
3285   if (Idx >= HalfSize) {
3286     ScratchReg1 = MRI.createVirtualRegister(RC);
3287     BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
3288         .addReg(XSrc)
3289         .addReg(XSrc)
3290         .addImm(1);
3291   }
3292 
3293   Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
3294   Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
3295   BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
3296       .addReg(ScratchReg1, 0, LoongArch::sub_128);
3297   BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
3298       .addReg(ScratchSubReg1)
3299       .addReg(Elt)
3300       .addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
3301 
3302   Register ScratchReg2 = XDst;
3303   if (Idx >= HalfSize)
3304     ScratchReg2 = MRI.createVirtualRegister(RC);
3305 
3306   BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
3307       .addImm(0)
3308       .addReg(ScratchSubReg2)
3309       .addImm(LoongArch::sub_128);
3310 
3311   if (Idx >= HalfSize)
3312     BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
3313         .addReg(XSrc)
3314         .addReg(ScratchReg2)
3315         .addImm(2);
3316 
3317   MI.eraseFromParent();
3318   return BB;
3319 }
3320 
EmitInstrWithCustomInserter(MachineInstr & MI,MachineBasicBlock * BB) const3321 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
3322     MachineInstr &MI, MachineBasicBlock *BB) const {
3323   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
3324   DebugLoc DL = MI.getDebugLoc();
3325 
3326   switch (MI.getOpcode()) {
3327   default:
3328     llvm_unreachable("Unexpected instr type to insert");
3329   case LoongArch::DIV_W:
3330   case LoongArch::DIV_WU:
3331   case LoongArch::MOD_W:
3332   case LoongArch::MOD_WU:
3333   case LoongArch::DIV_D:
3334   case LoongArch::DIV_DU:
3335   case LoongArch::MOD_D:
3336   case LoongArch::MOD_DU:
3337     return insertDivByZeroTrap(MI, BB);
3338     break;
3339   case LoongArch::WRFCSR: {
3340     BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
3341             LoongArch::FCSR0 + MI.getOperand(0).getImm())
3342         .addReg(MI.getOperand(1).getReg());
3343     MI.eraseFromParent();
3344     return BB;
3345   }
3346   case LoongArch::RDFCSR: {
3347     MachineInstr *ReadFCSR =
3348         BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
3349                 MI.getOperand(0).getReg())
3350             .addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
3351     ReadFCSR->getOperand(1).setIsUndef();
3352     MI.eraseFromParent();
3353     return BB;
3354   }
3355   case LoongArch::PseudoVBZ:
3356   case LoongArch::PseudoVBZ_B:
3357   case LoongArch::PseudoVBZ_H:
3358   case LoongArch::PseudoVBZ_W:
3359   case LoongArch::PseudoVBZ_D:
3360   case LoongArch::PseudoVBNZ:
3361   case LoongArch::PseudoVBNZ_B:
3362   case LoongArch::PseudoVBNZ_H:
3363   case LoongArch::PseudoVBNZ_W:
3364   case LoongArch::PseudoVBNZ_D:
3365   case LoongArch::PseudoXVBZ:
3366   case LoongArch::PseudoXVBZ_B:
3367   case LoongArch::PseudoXVBZ_H:
3368   case LoongArch::PseudoXVBZ_W:
3369   case LoongArch::PseudoXVBZ_D:
3370   case LoongArch::PseudoXVBNZ:
3371   case LoongArch::PseudoXVBNZ_B:
3372   case LoongArch::PseudoXVBNZ_H:
3373   case LoongArch::PseudoXVBNZ_W:
3374   case LoongArch::PseudoXVBNZ_D:
3375     return emitVecCondBranchPseudo(MI, BB, Subtarget);
3376   case LoongArch::PseudoXVINSGR2VR_B:
3377   case LoongArch::PseudoXVINSGR2VR_H:
3378     return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
3379   }
3380 }
3381 
allowsMisalignedMemoryAccesses(EVT VT,unsigned AddrSpace,Align Alignment,MachineMemOperand::Flags Flags,unsigned * Fast) const3382 bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
3383     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3384     unsigned *Fast) const {
3385   if (!Subtarget.hasUAL())
3386     return false;
3387 
3388   // TODO: set reasonable speed number.
3389   if (Fast)
3390     *Fast = 1;
3391   return true;
3392 }
3393 
getTargetNodeName(unsigned Opcode) const3394 const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
3395   switch ((LoongArchISD::NodeType)Opcode) {
3396   case LoongArchISD::FIRST_NUMBER:
3397     break;
3398 
3399 #define NODE_NAME_CASE(node)                                                   \
3400   case LoongArchISD::node:                                                     \
3401     return "LoongArchISD::" #node;
3402 
3403     // TODO: Add more target-dependent nodes later.
3404     NODE_NAME_CASE(CALL)
3405     NODE_NAME_CASE(CALL_MEDIUM)
3406     NODE_NAME_CASE(CALL_LARGE)
3407     NODE_NAME_CASE(RET)
3408     NODE_NAME_CASE(TAIL)
3409     NODE_NAME_CASE(TAIL_MEDIUM)
3410     NODE_NAME_CASE(TAIL_LARGE)
3411     NODE_NAME_CASE(SLL_W)
3412     NODE_NAME_CASE(SRA_W)
3413     NODE_NAME_CASE(SRL_W)
3414     NODE_NAME_CASE(BSTRINS)
3415     NODE_NAME_CASE(BSTRPICK)
3416     NODE_NAME_CASE(MOVGR2FR_W_LA64)
3417     NODE_NAME_CASE(MOVFR2GR_S_LA64)
3418     NODE_NAME_CASE(FTINT)
3419     NODE_NAME_CASE(REVB_2H)
3420     NODE_NAME_CASE(REVB_2W)
3421     NODE_NAME_CASE(BITREV_4B)
3422     NODE_NAME_CASE(BITREV_W)
3423     NODE_NAME_CASE(ROTR_W)
3424     NODE_NAME_CASE(ROTL_W)
3425     NODE_NAME_CASE(CLZ_W)
3426     NODE_NAME_CASE(CTZ_W)
3427     NODE_NAME_CASE(DBAR)
3428     NODE_NAME_CASE(IBAR)
3429     NODE_NAME_CASE(BREAK)
3430     NODE_NAME_CASE(SYSCALL)
3431     NODE_NAME_CASE(CRC_W_B_W)
3432     NODE_NAME_CASE(CRC_W_H_W)
3433     NODE_NAME_CASE(CRC_W_W_W)
3434     NODE_NAME_CASE(CRC_W_D_W)
3435     NODE_NAME_CASE(CRCC_W_B_W)
3436     NODE_NAME_CASE(CRCC_W_H_W)
3437     NODE_NAME_CASE(CRCC_W_W_W)
3438     NODE_NAME_CASE(CRCC_W_D_W)
3439     NODE_NAME_CASE(CSRRD)
3440     NODE_NAME_CASE(CSRWR)
3441     NODE_NAME_CASE(CSRXCHG)
3442     NODE_NAME_CASE(IOCSRRD_B)
3443     NODE_NAME_CASE(IOCSRRD_H)
3444     NODE_NAME_CASE(IOCSRRD_W)
3445     NODE_NAME_CASE(IOCSRRD_D)
3446     NODE_NAME_CASE(IOCSRWR_B)
3447     NODE_NAME_CASE(IOCSRWR_H)
3448     NODE_NAME_CASE(IOCSRWR_W)
3449     NODE_NAME_CASE(IOCSRWR_D)
3450     NODE_NAME_CASE(CPUCFG)
3451     NODE_NAME_CASE(MOVGR2FCSR)
3452     NODE_NAME_CASE(MOVFCSR2GR)
3453     NODE_NAME_CASE(CACOP_D)
3454     NODE_NAME_CASE(CACOP_W)
3455     NODE_NAME_CASE(VPICK_SEXT_ELT)
3456     NODE_NAME_CASE(VPICK_ZEXT_ELT)
3457     NODE_NAME_CASE(VREPLVE)
3458     NODE_NAME_CASE(VALL_ZERO)
3459     NODE_NAME_CASE(VANY_ZERO)
3460     NODE_NAME_CASE(VALL_NONZERO)
3461     NODE_NAME_CASE(VANY_NONZERO)
3462   }
3463 #undef NODE_NAME_CASE
3464   return nullptr;
3465 }
3466 
3467 //===----------------------------------------------------------------------===//
3468 //                     Calling Convention Implementation
3469 //===----------------------------------------------------------------------===//
3470 
3471 // Eight general-purpose registers a0-a7 used for passing integer arguments,
3472 // with a0-a1 reused to return values. Generally, the GPRs are used to pass
3473 // fixed-point arguments, and floating-point arguments when no FPR is available
3474 // or with soft float ABI.
3475 const MCPhysReg ArgGPRs[] = {LoongArch::R4,  LoongArch::R5, LoongArch::R6,
3476                              LoongArch::R7,  LoongArch::R8, LoongArch::R9,
3477                              LoongArch::R10, LoongArch::R11};
3478 // Eight floating-point registers fa0-fa7 used for passing floating-point
3479 // arguments, and fa0-fa1 are also used to return values.
3480 const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
3481                                LoongArch::F3, LoongArch::F4, LoongArch::F5,
3482                                LoongArch::F6, LoongArch::F7};
3483 // FPR32 and FPR64 alias each other.
3484 const MCPhysReg ArgFPR64s[] = {
3485     LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
3486     LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
3487 
3488 const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
3489                             LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
3490                             LoongArch::VR6, LoongArch::VR7};
3491 
3492 const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
3493                             LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
3494                             LoongArch::XR6, LoongArch::XR7};
3495 
3496 // Pass a 2*GRLen argument that has been split into two GRLen values through
3497 // registers or the stack as necessary.
CC_LoongArchAssign2GRLen(unsigned GRLen,CCState & State,CCValAssign VA1,ISD::ArgFlagsTy ArgFlags1,unsigned ValNo2,MVT ValVT2,MVT LocVT2,ISD::ArgFlagsTy ArgFlags2)3498 static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
3499                                      CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
3500                                      unsigned ValNo2, MVT ValVT2, MVT LocVT2,
3501                                      ISD::ArgFlagsTy ArgFlags2) {
3502   unsigned GRLenInBytes = GRLen / 8;
3503   if (Register Reg = State.AllocateReg(ArgGPRs)) {
3504     // At least one half can be passed via register.
3505     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
3506                                      VA1.getLocVT(), CCValAssign::Full));
3507   } else {
3508     // Both halves must be passed on the stack, with proper alignment.
3509     Align StackAlign =
3510         std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
3511     State.addLoc(
3512         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
3513                             State.AllocateStack(GRLenInBytes, StackAlign),
3514                             VA1.getLocVT(), CCValAssign::Full));
3515     State.addLoc(CCValAssign::getMem(
3516         ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3517         LocVT2, CCValAssign::Full));
3518     return false;
3519   }
3520   if (Register Reg = State.AllocateReg(ArgGPRs)) {
3521     // The second half can also be passed via register.
3522     State.addLoc(
3523         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
3524   } else {
3525     // The second half is passed via the stack, without additional alignment.
3526     State.addLoc(CCValAssign::getMem(
3527         ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
3528         LocVT2, CCValAssign::Full));
3529   }
3530   return false;
3531 }
3532 
3533 // Implements the LoongArch calling convention. Returns true upon failure.
CC_LoongArch(const DataLayout & DL,LoongArchABI::ABI ABI,unsigned ValNo,MVT ValVT,CCValAssign::LocInfo LocInfo,ISD::ArgFlagsTy ArgFlags,CCState & State,bool IsFixed,bool IsRet,Type * OrigTy)3534 static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
3535                          unsigned ValNo, MVT ValVT,
3536                          CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
3537                          CCState &State, bool IsFixed, bool IsRet,
3538                          Type *OrigTy) {
3539   unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
3540   assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
3541   MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
3542   MVT LocVT = ValVT;
3543 
3544   // Any return value split into more than two values can't be returned
3545   // directly.
3546   if (IsRet && ValNo > 1)
3547     return true;
3548 
3549   // If passing a variadic argument, or if no FPR is available.
3550   bool UseGPRForFloat = true;
3551 
3552   switch (ABI) {
3553   default:
3554     llvm_unreachable("Unexpected ABI");
3555   case LoongArchABI::ABI_ILP32S:
3556   case LoongArchABI::ABI_ILP32F:
3557   case LoongArchABI::ABI_LP64F:
3558     report_fatal_error("Unimplemented ABI");
3559     break;
3560   case LoongArchABI::ABI_ILP32D:
3561   case LoongArchABI::ABI_LP64D:
3562     UseGPRForFloat = !IsFixed;
3563     break;
3564   case LoongArchABI::ABI_LP64S:
3565     break;
3566   }
3567 
3568   // FPR32 and FPR64 alias each other.
3569   if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
3570     UseGPRForFloat = true;
3571 
3572   if (UseGPRForFloat && ValVT == MVT::f32) {
3573     LocVT = GRLenVT;
3574     LocInfo = CCValAssign::BCvt;
3575   } else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
3576     LocVT = MVT::i64;
3577     LocInfo = CCValAssign::BCvt;
3578   } else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
3579     // TODO: Handle passing f64 on LA32 with D feature.
3580     report_fatal_error("Passing f64 with GPR on LA32 is undefined");
3581   }
3582 
3583   // If this is a variadic argument, the LoongArch calling convention requires
3584   // that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
3585   // byte alignment. An aligned register should be used regardless of whether
3586   // the original argument was split during legalisation or not. The argument
3587   // will not be passed by registers if the original type is larger than
3588   // 2*GRLen, so the register alignment rule does not apply.
3589   unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
3590   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
3591       DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
3592     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
3593     // Skip 'odd' register if necessary.
3594     if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
3595       State.AllocateReg(ArgGPRs);
3596   }
3597 
3598   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
3599   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
3600       State.getPendingArgFlags();
3601 
3602   assert(PendingLocs.size() == PendingArgFlags.size() &&
3603          "PendingLocs and PendingArgFlags out of sync");
3604 
3605   // Split arguments might be passed indirectly, so keep track of the pending
3606   // values.
3607   if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
3608     LocVT = GRLenVT;
3609     LocInfo = CCValAssign::Indirect;
3610     PendingLocs.push_back(
3611         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
3612     PendingArgFlags.push_back(ArgFlags);
3613     if (!ArgFlags.isSplitEnd()) {
3614       return false;
3615     }
3616   }
3617 
3618   // If the split argument only had two elements, it should be passed directly
3619   // in registers or on the stack.
3620   if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
3621       PendingLocs.size() <= 2) {
3622     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
3623     // Apply the normal calling convention rules to the first half of the
3624     // split argument.
3625     CCValAssign VA = PendingLocs[0];
3626     ISD::ArgFlagsTy AF = PendingArgFlags[0];
3627     PendingLocs.clear();
3628     PendingArgFlags.clear();
3629     return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
3630                                     ArgFlags);
3631   }
3632 
3633   // Allocate to a register if possible, or else a stack slot.
3634   Register Reg;
3635   unsigned StoreSizeBytes = GRLen / 8;
3636   Align StackAlign = Align(GRLen / 8);
3637 
3638   if (ValVT == MVT::f32 && !UseGPRForFloat)
3639     Reg = State.AllocateReg(ArgFPR32s);
3640   else if (ValVT == MVT::f64 && !UseGPRForFloat)
3641     Reg = State.AllocateReg(ArgFPR64s);
3642   else if (ValVT.is128BitVector())
3643     Reg = State.AllocateReg(ArgVRs);
3644   else if (ValVT.is256BitVector())
3645     Reg = State.AllocateReg(ArgXRs);
3646   else
3647     Reg = State.AllocateReg(ArgGPRs);
3648 
3649   unsigned StackOffset =
3650       Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
3651 
3652   // If we reach this point and PendingLocs is non-empty, we must be at the
3653   // end of a split argument that must be passed indirectly.
3654   if (!PendingLocs.empty()) {
3655     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
3656     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
3657     for (auto &It : PendingLocs) {
3658       if (Reg)
3659         It.convertToReg(Reg);
3660       else
3661         It.convertToMem(StackOffset);
3662       State.addLoc(It);
3663     }
3664     PendingLocs.clear();
3665     PendingArgFlags.clear();
3666     return false;
3667   }
3668   assert((!UseGPRForFloat || LocVT == GRLenVT) &&
3669          "Expected an GRLenVT at this stage");
3670 
3671   if (Reg) {
3672     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3673     return false;
3674   }
3675 
3676   // When a floating-point value is passed on the stack, no bit-cast is needed.
3677   if (ValVT.isFloatingPoint()) {
3678     LocVT = ValVT;
3679     LocInfo = CCValAssign::Full;
3680   }
3681 
3682   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3683   return false;
3684 }
3685 
analyzeInputArgs(MachineFunction & MF,CCState & CCInfo,const SmallVectorImpl<ISD::InputArg> & Ins,bool IsRet,LoongArchCCAssignFn Fn) const3686 void LoongArchTargetLowering::analyzeInputArgs(
3687     MachineFunction &MF, CCState &CCInfo,
3688     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
3689     LoongArchCCAssignFn Fn) const {
3690   FunctionType *FType = MF.getFunction().getFunctionType();
3691   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
3692     MVT ArgVT = Ins[i].VT;
3693     Type *ArgTy = nullptr;
3694     if (IsRet)
3695       ArgTy = FType->getReturnType();
3696     else if (Ins[i].isOrigArg())
3697       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
3698     LoongArchABI::ABI ABI =
3699         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
3700     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
3701            CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
3702       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
3703                         << '\n');
3704       llvm_unreachable("");
3705     }
3706   }
3707 }
3708 
analyzeOutputArgs(MachineFunction & MF,CCState & CCInfo,const SmallVectorImpl<ISD::OutputArg> & Outs,bool IsRet,CallLoweringInfo * CLI,LoongArchCCAssignFn Fn) const3709 void LoongArchTargetLowering::analyzeOutputArgs(
3710     MachineFunction &MF, CCState &CCInfo,
3711     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
3712     CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
3713   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3714     MVT ArgVT = Outs[i].VT;
3715     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
3716     LoongArchABI::ABI ABI =
3717         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
3718     if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
3719            CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
3720       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
3721                         << "\n");
3722       llvm_unreachable("");
3723     }
3724   }
3725 }
3726 
3727 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
3728 // values.
convertLocVTToValVT(SelectionDAG & DAG,SDValue Val,const CCValAssign & VA,const SDLoc & DL)3729 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
3730                                    const CCValAssign &VA, const SDLoc &DL) {
3731   switch (VA.getLocInfo()) {
3732   default:
3733     llvm_unreachable("Unexpected CCValAssign::LocInfo");
3734   case CCValAssign::Full:
3735   case CCValAssign::Indirect:
3736     break;
3737   case CCValAssign::BCvt:
3738     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3739       Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
3740     else
3741       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3742     break;
3743   }
3744   return Val;
3745 }
3746 
unpackFromRegLoc(SelectionDAG & DAG,SDValue Chain,const CCValAssign & VA,const SDLoc & DL,const LoongArchTargetLowering & TLI)3747 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
3748                                 const CCValAssign &VA, const SDLoc &DL,
3749                                 const LoongArchTargetLowering &TLI) {
3750   MachineFunction &MF = DAG.getMachineFunction();
3751   MachineRegisterInfo &RegInfo = MF.getRegInfo();
3752   EVT LocVT = VA.getLocVT();
3753   SDValue Val;
3754   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
3755   Register VReg = RegInfo.createVirtualRegister(RC);
3756   RegInfo.addLiveIn(VA.getLocReg(), VReg);
3757   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
3758 
3759   return convertLocVTToValVT(DAG, Val, VA, DL);
3760 }
3761 
3762 // The caller is responsible for loading the full value if the argument is
3763 // passed with CCValAssign::Indirect.
unpackFromMemLoc(SelectionDAG & DAG,SDValue Chain,const CCValAssign & VA,const SDLoc & DL)3764 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
3765                                 const CCValAssign &VA, const SDLoc &DL) {
3766   MachineFunction &MF = DAG.getMachineFunction();
3767   MachineFrameInfo &MFI = MF.getFrameInfo();
3768   EVT ValVT = VA.getValVT();
3769   int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
3770                                  /*IsImmutable=*/true);
3771   SDValue FIN = DAG.getFrameIndex(
3772       FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)));
3773 
3774   ISD::LoadExtType ExtType;
3775   switch (VA.getLocInfo()) {
3776   default:
3777     llvm_unreachable("Unexpected CCValAssign::LocInfo");
3778   case CCValAssign::Full:
3779   case CCValAssign::Indirect:
3780   case CCValAssign::BCvt:
3781     ExtType = ISD::NON_EXTLOAD;
3782     break;
3783   }
3784   return DAG.getExtLoad(
3785       ExtType, DL, VA.getLocVT(), Chain, FIN,
3786       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
3787 }
3788 
convertValVTToLocVT(SelectionDAG & DAG,SDValue Val,const CCValAssign & VA,const SDLoc & DL)3789 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
3790                                    const CCValAssign &VA, const SDLoc &DL) {
3791   EVT LocVT = VA.getLocVT();
3792 
3793   switch (VA.getLocInfo()) {
3794   default:
3795     llvm_unreachable("Unexpected CCValAssign::LocInfo");
3796   case CCValAssign::Full:
3797     break;
3798   case CCValAssign::BCvt:
3799     if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3800       Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
3801     else
3802       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
3803     break;
3804   }
3805   return Val;
3806 }
3807 
CC_LoongArch_GHC(unsigned ValNo,MVT ValVT,MVT LocVT,CCValAssign::LocInfo LocInfo,ISD::ArgFlagsTy ArgFlags,CCState & State)3808 static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
3809                              CCValAssign::LocInfo LocInfo,
3810                              ISD::ArgFlagsTy ArgFlags, CCState &State) {
3811   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3812     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
3813     //                        s0    s1  s2  s3  s4  s5  s6  s7  s8
3814     static const MCPhysReg GPRList[] = {
3815         LoongArch::R23, LoongArch::R24, LoongArch::R25,
3816         LoongArch::R26, LoongArch::R27, LoongArch::R28,
3817         LoongArch::R29, LoongArch::R30, LoongArch::R31};
3818     if (unsigned Reg = State.AllocateReg(GPRList)) {
3819       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3820       return false;
3821     }
3822   }
3823 
3824   if (LocVT == MVT::f32) {
3825     // Pass in STG registers: F1, F2, F3, F4
3826     //                        fs0,fs1,fs2,fs3
3827     static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
3828                                           LoongArch::F26, LoongArch::F27};
3829     if (unsigned Reg = State.AllocateReg(FPR32List)) {
3830       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3831       return false;
3832     }
3833   }
3834 
3835   if (LocVT == MVT::f64) {
3836     // Pass in STG registers: D1, D2, D3, D4
3837     //                        fs4,fs5,fs6,fs7
3838     static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
3839                                           LoongArch::F30_64, LoongArch::F31_64};
3840     if (unsigned Reg = State.AllocateReg(FPR64List)) {
3841       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3842       return false;
3843     }
3844   }
3845 
3846   report_fatal_error("No registers left in GHC calling convention");
3847   return true;
3848 }
3849 
3850 // Transform physical registers into virtual registers.
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & DL,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const3851 SDValue LoongArchTargetLowering::LowerFormalArguments(
3852     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3853     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3854     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3855 
3856   MachineFunction &MF = DAG.getMachineFunction();
3857 
3858   switch (CallConv) {
3859   default:
3860     llvm_unreachable("Unsupported calling convention");
3861   case CallingConv::C:
3862   case CallingConv::Fast:
3863     break;
3864   case CallingConv::GHC:
3865     if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
3866         !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
3867       report_fatal_error(
3868           "GHC calling convention requires the F and D extensions");
3869   }
3870 
3871   EVT PtrVT = getPointerTy(DAG.getDataLayout());
3872   MVT GRLenVT = Subtarget.getGRLenVT();
3873   unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
3874   // Used with varargs to acumulate store chains.
3875   std::vector<SDValue> OutChains;
3876 
3877   // Assign locations to all of the incoming arguments.
3878   SmallVector<CCValAssign> ArgLocs;
3879   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3880 
3881   if (CallConv == CallingConv::GHC)
3882     CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC);
3883   else
3884     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
3885 
3886   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3887     CCValAssign &VA = ArgLocs[i];
3888     SDValue ArgValue;
3889     if (VA.isRegLoc())
3890       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
3891     else
3892       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
3893     if (VA.getLocInfo() == CCValAssign::Indirect) {
3894       // If the original argument was split and passed by reference, we need to
3895       // load all parts of it here (using the same address).
3896       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
3897                                    MachinePointerInfo()));
3898       unsigned ArgIndex = Ins[i].OrigArgIndex;
3899       unsigned ArgPartOffset = Ins[i].PartOffset;
3900       assert(ArgPartOffset == 0);
3901       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
3902         CCValAssign &PartVA = ArgLocs[i + 1];
3903         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
3904         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
3905         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
3906         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
3907                                      MachinePointerInfo()));
3908         ++i;
3909       }
3910       continue;
3911     }
3912     InVals.push_back(ArgValue);
3913   }
3914 
3915   if (IsVarArg) {
3916     ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
3917     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
3918     const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
3919     MachineFrameInfo &MFI = MF.getFrameInfo();
3920     MachineRegisterInfo &RegInfo = MF.getRegInfo();
3921     auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
3922 
3923     // Offset of the first variable argument from stack pointer, and size of
3924     // the vararg save area. For now, the varargs save area is either zero or
3925     // large enough to hold a0-a7.
3926     int VaArgOffset, VarArgsSaveSize;
3927 
3928     // If all registers are allocated, then all varargs must be passed on the
3929     // stack and we don't need to save any argregs.
3930     if (ArgRegs.size() == Idx) {
3931       VaArgOffset = CCInfo.getStackSize();
3932       VarArgsSaveSize = 0;
3933     } else {
3934       VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
3935       VaArgOffset = -VarArgsSaveSize;
3936     }
3937 
3938     // Record the frame index of the first variable argument
3939     // which is a value necessary to VASTART.
3940     int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3941     LoongArchFI->setVarArgsFrameIndex(FI);
3942 
3943     // If saving an odd number of registers then create an extra stack slot to
3944     // ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
3945     // offsets to even-numbered registered remain 2*GRLen-aligned.
3946     if (Idx % 2) {
3947       MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
3948                             true);
3949       VarArgsSaveSize += GRLenInBytes;
3950     }
3951 
3952     // Copy the integer registers that may have been used for passing varargs
3953     // to the vararg save area.
3954     for (unsigned I = Idx; I < ArgRegs.size();
3955          ++I, VaArgOffset += GRLenInBytes) {
3956       const Register Reg = RegInfo.createVirtualRegister(RC);
3957       RegInfo.addLiveIn(ArgRegs[I], Reg);
3958       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
3959       FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
3960       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3961       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
3962                                    MachinePointerInfo::getFixedStack(MF, FI));
3963       cast<StoreSDNode>(Store.getNode())
3964           ->getMemOperand()
3965           ->setValue((Value *)nullptr);
3966       OutChains.push_back(Store);
3967     }
3968     LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
3969   }
3970 
3971   // All stores are grouped in one node to allow the matching between
3972   // the size of Ins and InVals. This only happens for vararg functions.
3973   if (!OutChains.empty()) {
3974     OutChains.push_back(Chain);
3975     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
3976   }
3977 
3978   return Chain;
3979 }
3980 
mayBeEmittedAsTailCall(const CallInst * CI) const3981 bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3982   return CI->isTailCall();
3983 }
3984 
3985 // Check if the return value is used as only a return value, as otherwise
3986 // we can't perform a tail-call.
isUsedByReturnOnly(SDNode * N,SDValue & Chain) const3987 bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
3988                                                  SDValue &Chain) const {
3989   if (N->getNumValues() != 1)
3990     return false;
3991   if (!N->hasNUsesOfValue(1, 0))
3992     return false;
3993 
3994   SDNode *Copy = *N->use_begin();
3995   if (Copy->getOpcode() != ISD::CopyToReg)
3996     return false;
3997 
3998   // If the ISD::CopyToReg has a glue operand, we conservatively assume it
3999   // isn't safe to perform a tail call.
4000   if (Copy->getGluedNode())
4001     return false;
4002 
4003   // The copy must be used by a LoongArchISD::RET, and nothing else.
4004   bool HasRet = false;
4005   for (SDNode *Node : Copy->uses()) {
4006     if (Node->getOpcode() != LoongArchISD::RET)
4007       return false;
4008     HasRet = true;
4009   }
4010 
4011   if (!HasRet)
4012     return false;
4013 
4014   Chain = Copy->getOperand(0);
4015   return true;
4016 }
4017 
4018 // Check whether the call is eligible for tail call optimization.
isEligibleForTailCallOptimization(CCState & CCInfo,CallLoweringInfo & CLI,MachineFunction & MF,const SmallVectorImpl<CCValAssign> & ArgLocs) const4019 bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
4020     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
4021     const SmallVectorImpl<CCValAssign> &ArgLocs) const {
4022 
4023   auto CalleeCC = CLI.CallConv;
4024   auto &Outs = CLI.Outs;
4025   auto &Caller = MF.getFunction();
4026   auto CallerCC = Caller.getCallingConv();
4027 
4028   // Do not tail call opt if the stack is used to pass parameters.
4029   if (CCInfo.getStackSize() != 0)
4030     return false;
4031 
4032   // Do not tail call opt if any parameters need to be passed indirectly.
4033   for (auto &VA : ArgLocs)
4034     if (VA.getLocInfo() == CCValAssign::Indirect)
4035       return false;
4036 
4037   // Do not tail call opt if either caller or callee uses struct return
4038   // semantics.
4039   auto IsCallerStructRet = Caller.hasStructRetAttr();
4040   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
4041   if (IsCallerStructRet || IsCalleeStructRet)
4042     return false;
4043 
4044   // Do not tail call opt if either the callee or caller has a byval argument.
4045   for (auto &Arg : Outs)
4046     if (Arg.Flags.isByVal())
4047       return false;
4048 
4049   // The callee has to preserve all registers the caller needs to preserve.
4050   const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
4051   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4052   if (CalleeCC != CallerCC) {
4053     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4054     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4055       return false;
4056   }
4057   return true;
4058 }
4059 
getPrefTypeAlign(EVT VT,SelectionDAG & DAG)4060 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
4061   return DAG.getDataLayout().getPrefTypeAlign(
4062       VT.getTypeForEVT(*DAG.getContext()));
4063 }
4064 
4065 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
4066 // and output parameter nodes.
4067 SDValue
LowerCall(CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const4068 LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
4069                                    SmallVectorImpl<SDValue> &InVals) const {
4070   SelectionDAG &DAG = CLI.DAG;
4071   SDLoc &DL = CLI.DL;
4072   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
4073   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
4074   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
4075   SDValue Chain = CLI.Chain;
4076   SDValue Callee = CLI.Callee;
4077   CallingConv::ID CallConv = CLI.CallConv;
4078   bool IsVarArg = CLI.IsVarArg;
4079   EVT PtrVT = getPointerTy(DAG.getDataLayout());
4080   MVT GRLenVT = Subtarget.getGRLenVT();
4081   bool &IsTailCall = CLI.IsTailCall;
4082 
4083   MachineFunction &MF = DAG.getMachineFunction();
4084 
4085   // Analyze the operands of the call, assigning locations to each operand.
4086   SmallVector<CCValAssign> ArgLocs;
4087   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
4088 
4089   if (CallConv == CallingConv::GHC)
4090     ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
4091   else
4092     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
4093 
4094   // Check if it's really possible to do a tail call.
4095   if (IsTailCall)
4096     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
4097 
4098   if (IsTailCall)
4099     ++NumTailCalls;
4100   else if (CLI.CB && CLI.CB->isMustTailCall())
4101     report_fatal_error("failed to perform tail call elimination on a call "
4102                        "site marked musttail");
4103 
4104   // Get a count of how many bytes are to be pushed on the stack.
4105   unsigned NumBytes = ArgCCInfo.getStackSize();
4106 
4107   // Create local copies for byval args.
4108   SmallVector<SDValue> ByValArgs;
4109   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4110     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4111     if (!Flags.isByVal())
4112       continue;
4113 
4114     SDValue Arg = OutVals[i];
4115     unsigned Size = Flags.getByValSize();
4116     Align Alignment = Flags.getNonZeroByValAlign();
4117 
4118     int FI =
4119         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
4120     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4121     SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
4122 
4123     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
4124                           /*IsVolatile=*/false,
4125                           /*AlwaysInline=*/false, /*isTailCall=*/IsTailCall,
4126                           MachinePointerInfo(), MachinePointerInfo());
4127     ByValArgs.push_back(FIPtr);
4128   }
4129 
4130   if (!IsTailCall)
4131     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
4132 
4133   // Copy argument values to their designated locations.
4134   SmallVector<std::pair<Register, SDValue>> RegsToPass;
4135   SmallVector<SDValue> MemOpChains;
4136   SDValue StackPtr;
4137   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
4138     CCValAssign &VA = ArgLocs[i];
4139     SDValue ArgValue = OutVals[i];
4140     ISD::ArgFlagsTy Flags = Outs[i].Flags;
4141 
4142     // Promote the value if needed.
4143     // For now, only handle fully promoted and indirect arguments.
4144     if (VA.getLocInfo() == CCValAssign::Indirect) {
4145       // Store the argument in a stack slot and pass its address.
4146       Align StackAlign =
4147           std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
4148                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
4149       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
4150       // If the original argument was split and passed by reference, we need to
4151       // store the required parts of it here (and pass just one address).
4152       unsigned ArgIndex = Outs[i].OrigArgIndex;
4153       unsigned ArgPartOffset = Outs[i].PartOffset;
4154       assert(ArgPartOffset == 0);
4155       // Calculate the total size to store. We don't have access to what we're
4156       // actually storing other than performing the loop and collecting the
4157       // info.
4158       SmallVector<std::pair<SDValue, SDValue>> Parts;
4159       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
4160         SDValue PartValue = OutVals[i + 1];
4161         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
4162         SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
4163         EVT PartVT = PartValue.getValueType();
4164 
4165         StoredSize += PartVT.getStoreSize();
4166         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
4167         Parts.push_back(std::make_pair(PartValue, Offset));
4168         ++i;
4169       }
4170       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
4171       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4172       MemOpChains.push_back(
4173           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
4174                        MachinePointerInfo::getFixedStack(MF, FI)));
4175       for (const auto &Part : Parts) {
4176         SDValue PartValue = Part.first;
4177         SDValue PartOffset = Part.second;
4178         SDValue Address =
4179             DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
4180         MemOpChains.push_back(
4181             DAG.getStore(Chain, DL, PartValue, Address,
4182                          MachinePointerInfo::getFixedStack(MF, FI)));
4183       }
4184       ArgValue = SpillSlot;
4185     } else {
4186       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
4187     }
4188 
4189     // Use local copy if it is a byval arg.
4190     if (Flags.isByVal())
4191       ArgValue = ByValArgs[j++];
4192 
4193     if (VA.isRegLoc()) {
4194       // Queue up the argument copies and emit them at the end.
4195       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
4196     } else {
4197       assert(VA.isMemLoc() && "Argument not register or memory");
4198       assert(!IsTailCall && "Tail call not allowed if stack is used "
4199                             "for passing parameters");
4200 
4201       // Work out the address of the stack slot.
4202       if (!StackPtr.getNode())
4203         StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
4204       SDValue Address =
4205           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
4206                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
4207 
4208       // Emit the store.
4209       MemOpChains.push_back(
4210           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
4211     }
4212   }
4213 
4214   // Join the stores, which are independent of one another.
4215   if (!MemOpChains.empty())
4216     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
4217 
4218   SDValue Glue;
4219 
4220   // Build a sequence of copy-to-reg nodes, chained and glued together.
4221   for (auto &Reg : RegsToPass) {
4222     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
4223     Glue = Chain.getValue(1);
4224   }
4225 
4226   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
4227   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
4228   // split it and then direct call can be matched by PseudoCALL.
4229   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
4230     const GlobalValue *GV = S->getGlobal();
4231     unsigned OpFlags =
4232         getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV)
4233             ? LoongArchII::MO_CALL
4234             : LoongArchII::MO_CALL_PLT;
4235     Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
4236   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4237     unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(
4238                            *MF.getFunction().getParent(), nullptr)
4239                            ? LoongArchII::MO_CALL
4240                            : LoongArchII::MO_CALL_PLT;
4241     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
4242   }
4243 
4244   // The first call operand is the chain and the second is the target address.
4245   SmallVector<SDValue> Ops;
4246   Ops.push_back(Chain);
4247   Ops.push_back(Callee);
4248 
4249   // Add argument registers to the end of the list so that they are
4250   // known live into the call.
4251   for (auto &Reg : RegsToPass)
4252     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
4253 
4254   if (!IsTailCall) {
4255     // Add a register mask operand representing the call-preserved registers.
4256     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4257     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
4258     assert(Mask && "Missing call preserved mask for calling convention");
4259     Ops.push_back(DAG.getRegisterMask(Mask));
4260   }
4261 
4262   // Glue the call to the argument copies, if any.
4263   if (Glue.getNode())
4264     Ops.push_back(Glue);
4265 
4266   // Emit the call.
4267   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4268   unsigned Op;
4269   switch (DAG.getTarget().getCodeModel()) {
4270   default:
4271     report_fatal_error("Unsupported code model");
4272   case CodeModel::Small:
4273     Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
4274     break;
4275   case CodeModel::Medium:
4276     assert(Subtarget.is64Bit() && "Medium code model requires LA64");
4277     Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
4278     break;
4279   case CodeModel::Large:
4280     assert(Subtarget.is64Bit() && "Large code model requires LA64");
4281     Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
4282     break;
4283   }
4284 
4285   if (IsTailCall) {
4286     MF.getFrameInfo().setHasTailCall();
4287     SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
4288     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
4289     return Ret;
4290   }
4291 
4292   Chain = DAG.getNode(Op, DL, NodeTys, Ops);
4293   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4294   Glue = Chain.getValue(1);
4295 
4296   // Mark the end of the call, which is glued to the call itself.
4297   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
4298   Glue = Chain.getValue(1);
4299 
4300   // Assign locations to each value returned by this call.
4301   SmallVector<CCValAssign> RVLocs;
4302   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
4303   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
4304 
4305   // Copy all of the result registers out of their specified physreg.
4306   for (auto &VA : RVLocs) {
4307     // Copy the value out.
4308     SDValue RetValue =
4309         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
4310     // Glue the RetValue to the end of the call sequence.
4311     Chain = RetValue.getValue(1);
4312     Glue = RetValue.getValue(2);
4313 
4314     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
4315 
4316     InVals.push_back(RetValue);
4317   }
4318 
4319   return Chain;
4320 }
4321 
CanLowerReturn(CallingConv::ID CallConv,MachineFunction & MF,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,LLVMContext & Context) const4322 bool LoongArchTargetLowering::CanLowerReturn(
4323     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
4324     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
4325   SmallVector<CCValAssign> RVLocs;
4326   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
4327 
4328   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4329     LoongArchABI::ABI ABI =
4330         MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4331     if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
4332                      Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
4333                      nullptr))
4334       return false;
4335   }
4336   return true;
4337 }
4338 
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & DL,SelectionDAG & DAG) const4339 SDValue LoongArchTargetLowering::LowerReturn(
4340     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
4341     const SmallVectorImpl<ISD::OutputArg> &Outs,
4342     const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
4343     SelectionDAG &DAG) const {
4344   // Stores the assignment of the return value to a location.
4345   SmallVector<CCValAssign> RVLocs;
4346 
4347   // Info about the registers and stack slot.
4348   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
4349                  *DAG.getContext());
4350 
4351   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
4352                     nullptr, CC_LoongArch);
4353   if (CallConv == CallingConv::GHC && !RVLocs.empty())
4354     report_fatal_error("GHC functions return void only");
4355   SDValue Glue;
4356   SmallVector<SDValue, 4> RetOps(1, Chain);
4357 
4358   // Copy the result values into the output registers.
4359   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
4360     CCValAssign &VA = RVLocs[i];
4361     assert(VA.isRegLoc() && "Can only return in registers!");
4362 
4363     // Handle a 'normal' return.
4364     SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
4365     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
4366 
4367     // Guarantee that all emitted copies are stuck together.
4368     Glue = Chain.getValue(1);
4369     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
4370   }
4371 
4372   RetOps[0] = Chain; // Update chain.
4373 
4374   // Add the glue node if we have it.
4375   if (Glue.getNode())
4376     RetOps.push_back(Glue);
4377 
4378   return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
4379 }
4380 
isFPImmLegal(const APFloat & Imm,EVT VT,bool ForCodeSize) const4381 bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
4382                                            bool ForCodeSize) const {
4383   // TODO: Maybe need more checks here after vector extension is supported.
4384   if (VT == MVT::f32 && !Subtarget.hasBasicF())
4385     return false;
4386   if (VT == MVT::f64 && !Subtarget.hasBasicD())
4387     return false;
4388   return (Imm.isZero() || Imm.isExactlyValue(+1.0));
4389 }
4390 
isCheapToSpeculateCttz(Type *) const4391 bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
4392   return true;
4393 }
4394 
isCheapToSpeculateCtlz(Type *) const4395 bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
4396   return true;
4397 }
4398 
shouldInsertFencesForAtomic(const Instruction * I) const4399 bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
4400     const Instruction *I) const {
4401   if (!Subtarget.is64Bit())
4402     return isa<LoadInst>(I) || isa<StoreInst>(I);
4403 
4404   if (isa<LoadInst>(I))
4405     return true;
4406 
4407   // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
4408   // require fences beacuse we can use amswap_db.[w/d].
4409   if (isa<StoreInst>(I)) {
4410     unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth();
4411     return (Size == 8 || Size == 16);
4412   }
4413 
4414   return false;
4415 }
4416 
getSetCCResultType(const DataLayout & DL,LLVMContext & Context,EVT VT) const4417 EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
4418                                                 LLVMContext &Context,
4419                                                 EVT VT) const {
4420   if (!VT.isVector())
4421     return getPointerTy(DL);
4422   return VT.changeVectorElementTypeToInteger();
4423 }
4424 
hasAndNot(SDValue Y) const4425 bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
4426   // TODO: Support vectors.
4427   return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
4428 }
4429 
getTgtMemIntrinsic(IntrinsicInfo & Info,const CallInst & I,MachineFunction & MF,unsigned Intrinsic) const4430 bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4431                                                  const CallInst &I,
4432                                                  MachineFunction &MF,
4433                                                  unsigned Intrinsic) const {
4434   switch (Intrinsic) {
4435   default:
4436     return false;
4437   case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
4438   case Intrinsic::loongarch_masked_atomicrmw_add_i32:
4439   case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
4440   case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
4441     Info.opc = ISD::INTRINSIC_W_CHAIN;
4442     Info.memVT = MVT::i32;
4443     Info.ptrVal = I.getArgOperand(0);
4444     Info.offset = 0;
4445     Info.align = Align(4);
4446     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
4447                  MachineMemOperand::MOVolatile;
4448     return true;
4449     // TODO: Add more Intrinsics later.
4450   }
4451 }
4452 
4453 TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst * AI) const4454 LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
4455   // TODO: Add more AtomicRMWInst that needs to be extended.
4456 
4457   // Since floating-point operation requires a non-trivial set of data
4458   // operations, use CmpXChg to expand.
4459   if (AI->isFloatingPointOperation() ||
4460       AI->getOperation() == AtomicRMWInst::UIncWrap ||
4461       AI->getOperation() == AtomicRMWInst::UDecWrap)
4462     return AtomicExpansionKind::CmpXChg;
4463 
4464   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
4465   if (Size == 8 || Size == 16)
4466     return AtomicExpansionKind::MaskedIntrinsic;
4467   return AtomicExpansionKind::None;
4468 }
4469 
4470 static Intrinsic::ID
getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,AtomicRMWInst::BinOp BinOp)4471 getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
4472                                     AtomicRMWInst::BinOp BinOp) {
4473   if (GRLen == 64) {
4474     switch (BinOp) {
4475     default:
4476       llvm_unreachable("Unexpected AtomicRMW BinOp");
4477     case AtomicRMWInst::Xchg:
4478       return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
4479     case AtomicRMWInst::Add:
4480       return Intrinsic::loongarch_masked_atomicrmw_add_i64;
4481     case AtomicRMWInst::Sub:
4482       return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
4483     case AtomicRMWInst::Nand:
4484       return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
4485     case AtomicRMWInst::UMax:
4486       return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
4487     case AtomicRMWInst::UMin:
4488       return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
4489     case AtomicRMWInst::Max:
4490       return Intrinsic::loongarch_masked_atomicrmw_max_i64;
4491     case AtomicRMWInst::Min:
4492       return Intrinsic::loongarch_masked_atomicrmw_min_i64;
4493       // TODO: support other AtomicRMWInst.
4494     }
4495   }
4496 
4497   if (GRLen == 32) {
4498     switch (BinOp) {
4499     default:
4500       llvm_unreachable("Unexpected AtomicRMW BinOp");
4501     case AtomicRMWInst::Xchg:
4502       return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
4503     case AtomicRMWInst::Add:
4504       return Intrinsic::loongarch_masked_atomicrmw_add_i32;
4505     case AtomicRMWInst::Sub:
4506       return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
4507     case AtomicRMWInst::Nand:
4508       return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
4509       // TODO: support other AtomicRMWInst.
4510     }
4511   }
4512 
4513   llvm_unreachable("Unexpected GRLen\n");
4514 }
4515 
4516 TargetLowering::AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst * CI) const4517 LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
4518     AtomicCmpXchgInst *CI) const {
4519   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
4520   if (Size == 8 || Size == 16)
4521     return AtomicExpansionKind::MaskedIntrinsic;
4522   return AtomicExpansionKind::None;
4523 }
4524 
emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase & Builder,AtomicCmpXchgInst * CI,Value * AlignedAddr,Value * CmpVal,Value * NewVal,Value * Mask,AtomicOrdering Ord) const4525 Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
4526     IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
4527     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
4528   AtomicOrdering FailOrd = CI->getFailureOrdering();
4529   Value *FailureOrdering =
4530       Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
4531 
4532   // TODO: Support cmpxchg on LA32.
4533   Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
4534   CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
4535   NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
4536   Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4537   Type *Tys[] = {AlignedAddr->getType()};
4538   Function *MaskedCmpXchg =
4539       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
4540   Value *Result = Builder.CreateCall(
4541       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
4542   Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4543   return Result;
4544 }
4545 
emitMaskedAtomicRMWIntrinsic(IRBuilderBase & Builder,AtomicRMWInst * AI,Value * AlignedAddr,Value * Incr,Value * Mask,Value * ShiftAmt,AtomicOrdering Ord) const4546 Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
4547     IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
4548     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
4549   // In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
4550   // the atomic instruction with an AtomicRMWInst::And/Or with appropriate
4551   // mask, as this produces better code than the LL/SC loop emitted by
4552   // int_loongarch_masked_atomicrmw_xchg.
4553   if (AI->getOperation() == AtomicRMWInst::Xchg &&
4554       isa<ConstantInt>(AI->getValOperand())) {
4555     ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
4556     if (CVal->isZero())
4557       return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
4558                                      Builder.CreateNot(Mask, "Inv_Mask"),
4559                                      AI->getAlign(), Ord);
4560     if (CVal->isMinusOne())
4561       return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
4562                                      AI->getAlign(), Ord);
4563   }
4564 
4565   unsigned GRLen = Subtarget.getGRLen();
4566   Value *Ordering =
4567       Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
4568   Type *Tys[] = {AlignedAddr->getType()};
4569   Function *LlwOpScwLoop = Intrinsic::getDeclaration(
4570       AI->getModule(),
4571       getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);
4572 
4573   if (GRLen == 64) {
4574     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
4575     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4576     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
4577   }
4578 
4579   Value *Result;
4580 
4581   // Must pass the shift amount needed to sign extend the loaded value prior
4582   // to performing a signed comparison for min/max. ShiftAmt is the number of
4583   // bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
4584   // is the number of bits to left+right shift the value in order to
4585   // sign-extend.
4586   if (AI->getOperation() == AtomicRMWInst::Min ||
4587       AI->getOperation() == AtomicRMWInst::Max) {
4588     const DataLayout &DL = AI->getModule()->getDataLayout();
4589     unsigned ValWidth =
4590         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
4591     Value *SextShamt =
4592         Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
4593     Result = Builder.CreateCall(LlwOpScwLoop,
4594                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
4595   } else {
4596     Result =
4597         Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
4598   }
4599 
4600   if (GRLen == 64)
4601     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4602   return Result;
4603 }
4604 
isFMAFasterThanFMulAndFAdd(const MachineFunction & MF,EVT VT) const4605 bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
4606     const MachineFunction &MF, EVT VT) const {
4607   VT = VT.getScalarType();
4608 
4609   if (!VT.isSimple())
4610     return false;
4611 
4612   switch (VT.getSimpleVT().SimpleTy) {
4613   case MVT::f32:
4614   case MVT::f64:
4615     return true;
4616   default:
4617     break;
4618   }
4619 
4620   return false;
4621 }
4622 
getExceptionPointerRegister(const Constant * PersonalityFn) const4623 Register LoongArchTargetLowering::getExceptionPointerRegister(
4624     const Constant *PersonalityFn) const {
4625   return LoongArch::R4;
4626 }
4627 
getExceptionSelectorRegister(const Constant * PersonalityFn) const4628 Register LoongArchTargetLowering::getExceptionSelectorRegister(
4629     const Constant *PersonalityFn) const {
4630   return LoongArch::R5;
4631 }
4632 
4633 //===----------------------------------------------------------------------===//
4634 //                           LoongArch Inline Assembly Support
4635 //===----------------------------------------------------------------------===//
4636 
4637 LoongArchTargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const4638 LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
4639   // LoongArch specific constraints in GCC: config/loongarch/constraints.md
4640   //
4641   // 'f':  A floating-point register (if available).
4642   // 'k':  A memory operand whose address is formed by a base register and
4643   //       (optionally scaled) index register.
4644   // 'l':  A signed 16-bit constant.
4645   // 'm':  A memory operand whose address is formed by a base register and
4646   //       offset that is suitable for use in instructions with the same
4647   //       addressing mode as st.w and ld.w.
4648   // 'I':  A signed 12-bit constant (for arithmetic instructions).
4649   // 'J':  Integer zero.
4650   // 'K':  An unsigned 12-bit constant (for logic instructions).
4651   // "ZB": An address that is held in a general-purpose register. The offset is
4652   //       zero.
4653   // "ZC": A memory operand whose address is formed by a base register and
4654   //       offset that is suitable for use in instructions with the same
4655   //       addressing mode as ll.w and sc.w.
4656   if (Constraint.size() == 1) {
4657     switch (Constraint[0]) {
4658     default:
4659       break;
4660     case 'f':
4661       return C_RegisterClass;
4662     case 'l':
4663     case 'I':
4664     case 'J':
4665     case 'K':
4666       return C_Immediate;
4667     case 'k':
4668       return C_Memory;
4669     }
4670   }
4671 
4672   if (Constraint == "ZC" || Constraint == "ZB")
4673     return C_Memory;
4674 
4675   // 'm' is handled here.
4676   return TargetLowering::getConstraintType(Constraint);
4677 }
4678 
getInlineAsmMemConstraint(StringRef ConstraintCode) const4679 InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
4680     StringRef ConstraintCode) const {
4681   return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
4682       .Case("k", InlineAsm::ConstraintCode::k)
4683       .Case("ZB", InlineAsm::ConstraintCode::ZB)
4684       .Case("ZC", InlineAsm::ConstraintCode::ZC)
4685       .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
4686 }
4687 
4688 std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const4689 LoongArchTargetLowering::getRegForInlineAsmConstraint(
4690     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
4691   // First, see if this is a constraint that directly corresponds to a LoongArch
4692   // register class.
4693   if (Constraint.size() == 1) {
4694     switch (Constraint[0]) {
4695     case 'r':
4696       // TODO: Support fixed vectors up to GRLen?
4697       if (VT.isVector())
4698         break;
4699       return std::make_pair(0U, &LoongArch::GPRRegClass);
4700     case 'f':
4701       if (Subtarget.hasBasicF() && VT == MVT::f32)
4702         return std::make_pair(0U, &LoongArch::FPR32RegClass);
4703       if (Subtarget.hasBasicD() && VT == MVT::f64)
4704         return std::make_pair(0U, &LoongArch::FPR64RegClass);
4705       if (Subtarget.hasExtLSX() &&
4706           TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
4707         return std::make_pair(0U, &LoongArch::LSX128RegClass);
4708       if (Subtarget.hasExtLASX() &&
4709           TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
4710         return std::make_pair(0U, &LoongArch::LASX256RegClass);
4711       break;
4712     default:
4713       break;
4714     }
4715   }
4716 
4717   // TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
4718   // record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
4719   // constraints while the official register name is prefixed with a '$'. So we
4720   // clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
4721   // before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
4722   // case insensitive, so no need to convert the constraint to upper case here.
4723   //
4724   // For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
4725   // decode the usage of register name aliases into their official names. And
4726   // AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
4727   // official register names.
4728   if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
4729       Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
4730     bool IsFP = Constraint[2] == 'f';
4731     std::pair<StringRef, StringRef> Temp = Constraint.split('$');
4732     std::pair<unsigned, const TargetRegisterClass *> R;
4733     R = TargetLowering::getRegForInlineAsmConstraint(
4734         TRI, join_items("", Temp.first, Temp.second), VT);
4735     // Match those names to the widest floating point register type available.
4736     if (IsFP) {
4737       unsigned RegNo = R.first;
4738       if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
4739         if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
4740           unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
4741           return std::make_pair(DReg, &LoongArch::FPR64RegClass);
4742         }
4743       }
4744     }
4745     return R;
4746   }
4747 
4748   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
4749 }
4750 
LowerAsmOperandForConstraint(SDValue Op,StringRef Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const4751 void LoongArchTargetLowering::LowerAsmOperandForConstraint(
4752     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
4753     SelectionDAG &DAG) const {
4754   // Currently only support length 1 constraints.
4755   if (Constraint.size() == 1) {
4756     switch (Constraint[0]) {
4757     case 'l':
4758       // Validate & create a 16-bit signed immediate operand.
4759       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4760         uint64_t CVal = C->getSExtValue();
4761         if (isInt<16>(CVal))
4762           Ops.push_back(
4763               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4764       }
4765       return;
4766     case 'I':
4767       // Validate & create a 12-bit signed immediate operand.
4768       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4769         uint64_t CVal = C->getSExtValue();
4770         if (isInt<12>(CVal))
4771           Ops.push_back(
4772               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4773       }
4774       return;
4775     case 'J':
4776       // Validate & create an integer zero operand.
4777       if (auto *C = dyn_cast<ConstantSDNode>(Op))
4778         if (C->getZExtValue() == 0)
4779           Ops.push_back(
4780               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
4781       return;
4782     case 'K':
4783       // Validate & create a 12-bit unsigned immediate operand.
4784       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4785         uint64_t CVal = C->getZExtValue();
4786         if (isUInt<12>(CVal))
4787           Ops.push_back(
4788               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
4789       }
4790       return;
4791     default:
4792       break;
4793     }
4794   }
4795   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
4796 }
4797 
4798 #define GET_REGISTER_MATCHER
4799 #include "LoongArchGenAsmMatcher.inc"
4800 
4801 Register
getRegisterByName(const char * RegName,LLT VT,const MachineFunction & MF) const4802 LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
4803                                            const MachineFunction &MF) const {
4804   std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
4805   std::string NewRegName = Name.second.str();
4806   Register Reg = MatchRegisterAltName(NewRegName);
4807   if (Reg == LoongArch::NoRegister)
4808     Reg = MatchRegisterName(NewRegName);
4809   if (Reg == LoongArch::NoRegister)
4810     report_fatal_error(
4811         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
4812   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
4813   if (!ReservedRegs.test(Reg))
4814     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
4815                              StringRef(RegName) + "\"."));
4816   return Reg;
4817 }
4818 
decomposeMulByConstant(LLVMContext & Context,EVT VT,SDValue C) const4819 bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
4820                                                      EVT VT, SDValue C) const {
4821   // TODO: Support vectors.
4822   if (!VT.isScalarInteger())
4823     return false;
4824 
4825   // Omit the optimization if the data size exceeds GRLen.
4826   if (VT.getSizeInBits() > Subtarget.getGRLen())
4827     return false;
4828 
4829   if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
4830     const APInt &Imm = ConstNode->getAPIntValue();
4831     // Break MUL into (SLLI + ADD/SUB) or ALSL.
4832     if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
4833         (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
4834       return true;
4835     // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
4836     if (ConstNode->hasOneUse() &&
4837         ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
4838          (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
4839       return true;
4840     // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
4841     // in which the immediate has two set bits. Or Break (MUL x, imm)
4842     // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
4843     // equals to (1 << s0) - (1 << s1).
4844     if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
4845       unsigned Shifts = Imm.countr_zero();
4846       // Reject immediates which can be composed via a single LUI.
4847       if (Shifts >= 12)
4848         return false;
4849       // Reject multiplications can be optimized to
4850       // (SLLI (ALSL x, x, 1/2/3/4), s).
4851       APInt ImmPop = Imm.ashr(Shifts);
4852       if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
4853         return false;
4854       // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
4855       // since it needs one more instruction than other 3 cases.
4856       APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
4857       if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
4858           (ImmSmall - Imm).isPowerOf2())
4859         return true;
4860     }
4861   }
4862 
4863   return false;
4864 }
4865 
isLegalAddressingMode(const DataLayout & DL,const AddrMode & AM,Type * Ty,unsigned AS,Instruction * I) const4866 bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
4867                                                     const AddrMode &AM,
4868                                                     Type *Ty, unsigned AS,
4869                                                     Instruction *I) const {
4870   // LoongArch has four basic addressing modes:
4871   //  1. reg
4872   //  2. reg + 12-bit signed offset
4873   //  3. reg + 14-bit signed offset left-shifted by 2
4874   //  4. reg1 + reg2
4875   // TODO: Add more checks after support vector extension.
4876 
4877   // No global is ever allowed as a base.
4878   if (AM.BaseGV)
4879     return false;
4880 
4881   // Require a 12 or 14 bit signed offset.
4882   if (!isInt<12>(AM.BaseOffs) || !isShiftedInt<14, 2>(AM.BaseOffs))
4883     return false;
4884 
4885   switch (AM.Scale) {
4886   case 0:
4887     // "i" is not allowed.
4888     if (!AM.HasBaseReg)
4889       return false;
4890     // Otherwise we have "r+i".
4891     break;
4892   case 1:
4893     // "r+r+i" is not allowed.
4894     if (AM.HasBaseReg && AM.BaseOffs != 0)
4895       return false;
4896     // Otherwise we have "r+r" or "r+i".
4897     break;
4898   case 2:
4899     // "2*r+r" or "2*r+i" is not allowed.
4900     if (AM.HasBaseReg || AM.BaseOffs)
4901       return false;
4902     // Otherwise we have "r+r".
4903     break;
4904   default:
4905     return false;
4906   }
4907 
4908   return true;
4909 }
4910 
isLegalICmpImmediate(int64_t Imm) const4911 bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
4912   return isInt<12>(Imm);
4913 }
4914 
isLegalAddImmediate(int64_t Imm) const4915 bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
4916   return isInt<12>(Imm);
4917 }
4918 
isZExtFree(SDValue Val,EVT VT2) const4919 bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
4920   // Zexts are free if they can be combined with a load.
4921   // Don't advertise i32->i64 zextload as being free for LA64. It interacts
4922   // poorly with type legalization of compares preferring sext.
4923   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
4924     EVT MemVT = LD->getMemoryVT();
4925     if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
4926         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
4927          LD->getExtensionType() == ISD::ZEXTLOAD))
4928       return true;
4929   }
4930 
4931   return TargetLowering::isZExtFree(Val, VT2);
4932 }
4933 
isSExtCheaperThanZExt(EVT SrcVT,EVT DstVT) const4934 bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
4935   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
4936 }
4937 
hasAndNotCompare(SDValue Y) const4938 bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
4939   // TODO: Support vectors.
4940   if (Y.getValueType().isVector())
4941     return false;
4942 
4943   return !isa<ConstantSDNode>(Y);
4944 }
4945 
getExtendForAtomicCmpSwapArg() const4946 ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
4947   // TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
4948   return ISD::SIGN_EXTEND;
4949 }
4950