1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "RISCVISelLowering.h"
15 #include "MCTargetDesc/RISCVMatInt.h"
16 #include "RISCV.h"
17 #include "RISCVMachineFunctionInfo.h"
18 #include "RISCVRegisterInfo.h"
19 #include "RISCVSubtarget.h"
20 #include "RISCVTargetMachine.h"
21 #include "llvm/ADT/SmallSet.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/CodeGen/CallingConvLower.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegisterInfo.h"
28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29 #include "llvm/CodeGen/ValueTypes.h"
30 #include "llvm/IR/DiagnosticInfo.h"
31 #include "llvm/IR/DiagnosticPrinter.h"
32 #include "llvm/IR/IntrinsicsRISCV.h"
33 #include "llvm/Support/Debug.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Support/KnownBits.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38
39 using namespace llvm;
40
41 #define DEBUG_TYPE "riscv-lower"
42
43 STATISTIC(NumTailCalls, "Number of tail calls");
44
RISCVTargetLowering(const TargetMachine & TM,const RISCVSubtarget & STI)45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
46 const RISCVSubtarget &STI)
47 : TargetLowering(TM), Subtarget(STI) {
48
49 if (Subtarget.isRV32E())
50 report_fatal_error("Codegen not yet implemented for RV32E");
51
52 RISCVABI::ABI ABI = Subtarget.getTargetABI();
53 assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
54
55 if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
56 !Subtarget.hasStdExtF()) {
57 errs() << "Hard-float 'f' ABI can't be used for a target that "
58 "doesn't support the F instruction set extension (ignoring "
59 "target-abi)\n";
60 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
61 } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
62 !Subtarget.hasStdExtD()) {
63 errs() << "Hard-float 'd' ABI can't be used for a target that "
64 "doesn't support the D instruction set extension (ignoring "
65 "target-abi)\n";
66 ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
67 }
68
69 switch (ABI) {
70 default:
71 report_fatal_error("Don't know how to lower this ABI");
72 case RISCVABI::ABI_ILP32:
73 case RISCVABI::ABI_ILP32F:
74 case RISCVABI::ABI_ILP32D:
75 case RISCVABI::ABI_LP64:
76 case RISCVABI::ABI_LP64F:
77 case RISCVABI::ABI_LP64D:
78 break;
79 }
80
81 MVT XLenVT = Subtarget.getXLenVT();
82
83 // Set up the register classes.
84 addRegisterClass(XLenVT, &RISCV::GPRRegClass);
85
86 if (Subtarget.hasStdExtZfh())
87 addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
88 if (Subtarget.hasStdExtF())
89 addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
90 if (Subtarget.hasStdExtD())
91 addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
92
93 if (Subtarget.hasStdExtV()) {
94 addRegisterClass(RISCVVMVTs::vbool64_t, &RISCV::VRRegClass);
95 addRegisterClass(RISCVVMVTs::vbool32_t, &RISCV::VRRegClass);
96 addRegisterClass(RISCVVMVTs::vbool16_t, &RISCV::VRRegClass);
97 addRegisterClass(RISCVVMVTs::vbool8_t, &RISCV::VRRegClass);
98 addRegisterClass(RISCVVMVTs::vbool4_t, &RISCV::VRRegClass);
99 addRegisterClass(RISCVVMVTs::vbool2_t, &RISCV::VRRegClass);
100 addRegisterClass(RISCVVMVTs::vbool1_t, &RISCV::VRRegClass);
101
102 addRegisterClass(RISCVVMVTs::vint8mf8_t, &RISCV::VRRegClass);
103 addRegisterClass(RISCVVMVTs::vint8mf4_t, &RISCV::VRRegClass);
104 addRegisterClass(RISCVVMVTs::vint8mf2_t, &RISCV::VRRegClass);
105 addRegisterClass(RISCVVMVTs::vint8m1_t, &RISCV::VRRegClass);
106 addRegisterClass(RISCVVMVTs::vint8m2_t, &RISCV::VRM2RegClass);
107 addRegisterClass(RISCVVMVTs::vint8m4_t, &RISCV::VRM4RegClass);
108 addRegisterClass(RISCVVMVTs::vint8m8_t, &RISCV::VRM8RegClass);
109
110 addRegisterClass(RISCVVMVTs::vint16mf4_t, &RISCV::VRRegClass);
111 addRegisterClass(RISCVVMVTs::vint16mf2_t, &RISCV::VRRegClass);
112 addRegisterClass(RISCVVMVTs::vint16m1_t, &RISCV::VRRegClass);
113 addRegisterClass(RISCVVMVTs::vint16m2_t, &RISCV::VRM2RegClass);
114 addRegisterClass(RISCVVMVTs::vint16m4_t, &RISCV::VRM4RegClass);
115 addRegisterClass(RISCVVMVTs::vint16m8_t, &RISCV::VRM8RegClass);
116
117 addRegisterClass(RISCVVMVTs::vint32mf2_t, &RISCV::VRRegClass);
118 addRegisterClass(RISCVVMVTs::vint32m1_t, &RISCV::VRRegClass);
119 addRegisterClass(RISCVVMVTs::vint32m2_t, &RISCV::VRM2RegClass);
120 addRegisterClass(RISCVVMVTs::vint32m4_t, &RISCV::VRM4RegClass);
121 addRegisterClass(RISCVVMVTs::vint32m8_t, &RISCV::VRM8RegClass);
122
123 addRegisterClass(RISCVVMVTs::vint64m1_t, &RISCV::VRRegClass);
124 addRegisterClass(RISCVVMVTs::vint64m2_t, &RISCV::VRM2RegClass);
125 addRegisterClass(RISCVVMVTs::vint64m4_t, &RISCV::VRM4RegClass);
126 addRegisterClass(RISCVVMVTs::vint64m8_t, &RISCV::VRM8RegClass);
127
128 if (Subtarget.hasStdExtZfh()) {
129 addRegisterClass(RISCVVMVTs::vfloat16mf4_t, &RISCV::VRRegClass);
130 addRegisterClass(RISCVVMVTs::vfloat16mf2_t, &RISCV::VRRegClass);
131 addRegisterClass(RISCVVMVTs::vfloat16m1_t, &RISCV::VRRegClass);
132 addRegisterClass(RISCVVMVTs::vfloat16m2_t, &RISCV::VRM2RegClass);
133 addRegisterClass(RISCVVMVTs::vfloat16m4_t, &RISCV::VRM4RegClass);
134 addRegisterClass(RISCVVMVTs::vfloat16m8_t, &RISCV::VRM8RegClass);
135 }
136
137 if (Subtarget.hasStdExtF()) {
138 addRegisterClass(RISCVVMVTs::vfloat32mf2_t, &RISCV::VRRegClass);
139 addRegisterClass(RISCVVMVTs::vfloat32m1_t, &RISCV::VRRegClass);
140 addRegisterClass(RISCVVMVTs::vfloat32m2_t, &RISCV::VRM2RegClass);
141 addRegisterClass(RISCVVMVTs::vfloat32m4_t, &RISCV::VRM4RegClass);
142 addRegisterClass(RISCVVMVTs::vfloat32m8_t, &RISCV::VRM8RegClass);
143 }
144
145 if (Subtarget.hasStdExtD()) {
146 addRegisterClass(RISCVVMVTs::vfloat64m1_t, &RISCV::VRRegClass);
147 addRegisterClass(RISCVVMVTs::vfloat64m2_t, &RISCV::VRM2RegClass);
148 addRegisterClass(RISCVVMVTs::vfloat64m4_t, &RISCV::VRM4RegClass);
149 addRegisterClass(RISCVVMVTs::vfloat64m8_t, &RISCV::VRM8RegClass);
150 }
151 }
152
153 // Compute derived properties from the register classes.
154 computeRegisterProperties(STI.getRegisterInfo());
155
156 setStackPointerRegisterToSaveRestore(RISCV::X2);
157
158 for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
159 setLoadExtAction(N, XLenVT, MVT::i1, Promote);
160
161 // TODO: add all necessary setOperationAction calls.
162 setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
163
164 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
165 setOperationAction(ISD::BR_CC, XLenVT, Expand);
166 setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
167
168 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
169 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
170
171 setOperationAction(ISD::VASTART, MVT::Other, Custom);
172 setOperationAction(ISD::VAARG, MVT::Other, Expand);
173 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
174 setOperationAction(ISD::VAEND, MVT::Other, Expand);
175
176 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
177 if (!Subtarget.hasStdExtZbb()) {
178 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
179 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
180 }
181
182 if (Subtarget.is64Bit()) {
183 setOperationAction(ISD::ADD, MVT::i32, Custom);
184 setOperationAction(ISD::SUB, MVT::i32, Custom);
185 setOperationAction(ISD::SHL, MVT::i32, Custom);
186 setOperationAction(ISD::SRA, MVT::i32, Custom);
187 setOperationAction(ISD::SRL, MVT::i32, Custom);
188 }
189
190 if (!Subtarget.hasStdExtM()) {
191 setOperationAction(ISD::MUL, XLenVT, Expand);
192 setOperationAction(ISD::MULHS, XLenVT, Expand);
193 setOperationAction(ISD::MULHU, XLenVT, Expand);
194 setOperationAction(ISD::SDIV, XLenVT, Expand);
195 setOperationAction(ISD::UDIV, XLenVT, Expand);
196 setOperationAction(ISD::SREM, XLenVT, Expand);
197 setOperationAction(ISD::UREM, XLenVT, Expand);
198 }
199
200 if (Subtarget.is64Bit() && Subtarget.hasStdExtM()) {
201 setOperationAction(ISD::MUL, MVT::i32, Custom);
202
203 setOperationAction(ISD::SDIV, MVT::i8, Custom);
204 setOperationAction(ISD::UDIV, MVT::i8, Custom);
205 setOperationAction(ISD::UREM, MVT::i8, Custom);
206 setOperationAction(ISD::SDIV, MVT::i16, Custom);
207 setOperationAction(ISD::UDIV, MVT::i16, Custom);
208 setOperationAction(ISD::UREM, MVT::i16, Custom);
209 setOperationAction(ISD::SDIV, MVT::i32, Custom);
210 setOperationAction(ISD::UDIV, MVT::i32, Custom);
211 setOperationAction(ISD::UREM, MVT::i32, Custom);
212 }
213
214 setOperationAction(ISD::SDIVREM, XLenVT, Expand);
215 setOperationAction(ISD::UDIVREM, XLenVT, Expand);
216 setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
217 setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
218
219 setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
220 setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
221 setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
222
223 if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
224 if (Subtarget.is64Bit()) {
225 setOperationAction(ISD::ROTL, MVT::i32, Custom);
226 setOperationAction(ISD::ROTR, MVT::i32, Custom);
227 }
228 } else {
229 setOperationAction(ISD::ROTL, XLenVT, Expand);
230 setOperationAction(ISD::ROTR, XLenVT, Expand);
231 }
232
233 if (Subtarget.hasStdExtZbp()) {
234 // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
235 // more combining.
236 setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
237 setOperationAction(ISD::BSWAP, XLenVT, Custom);
238
239 if (Subtarget.is64Bit()) {
240 setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
241 setOperationAction(ISD::BSWAP, MVT::i32, Custom);
242 }
243 } else {
244 // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
245 // pattern match it directly in isel.
246 setOperationAction(ISD::BSWAP, XLenVT,
247 Subtarget.hasStdExtZbb() ? Legal : Expand);
248 }
249
250 if (Subtarget.hasStdExtZbb()) {
251 setOperationAction(ISD::SMIN, XLenVT, Legal);
252 setOperationAction(ISD::SMAX, XLenVT, Legal);
253 setOperationAction(ISD::UMIN, XLenVT, Legal);
254 setOperationAction(ISD::UMAX, XLenVT, Legal);
255 } else {
256 setOperationAction(ISD::CTTZ, XLenVT, Expand);
257 setOperationAction(ISD::CTLZ, XLenVT, Expand);
258 setOperationAction(ISD::CTPOP, XLenVT, Expand);
259 }
260
261 if (Subtarget.hasStdExtZbt()) {
262 setOperationAction(ISD::FSHL, XLenVT, Legal);
263 setOperationAction(ISD::FSHR, XLenVT, Legal);
264 setOperationAction(ISD::SELECT, XLenVT, Legal);
265
266 if (Subtarget.is64Bit()) {
267 setOperationAction(ISD::FSHL, MVT::i32, Custom);
268 setOperationAction(ISD::FSHR, MVT::i32, Custom);
269 }
270 } else {
271 setOperationAction(ISD::SELECT, XLenVT, Custom);
272 }
273
274 ISD::CondCode FPCCToExpand[] = {
275 ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
276 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
277 ISD::SETGE, ISD::SETNE, ISD::SETO, ISD::SETUO};
278
279 ISD::NodeType FPOpToExpand[] = {
280 ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
281 ISD::FP_TO_FP16};
282
283 if (Subtarget.hasStdExtZfh())
284 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
285
286 if (Subtarget.hasStdExtZfh()) {
287 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
288 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
289 for (auto CC : FPCCToExpand)
290 setCondCodeAction(CC, MVT::f16, Expand);
291 setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
292 setOperationAction(ISD::SELECT, MVT::f16, Custom);
293 setOperationAction(ISD::BR_CC, MVT::f16, Expand);
294 for (auto Op : FPOpToExpand)
295 setOperationAction(Op, MVT::f16, Expand);
296 }
297
298 if (Subtarget.hasStdExtF()) {
299 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
300 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
301 for (auto CC : FPCCToExpand)
302 setCondCodeAction(CC, MVT::f32, Expand);
303 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
304 setOperationAction(ISD::SELECT, MVT::f32, Custom);
305 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
306 for (auto Op : FPOpToExpand)
307 setOperationAction(Op, MVT::f32, Expand);
308 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
309 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
310 }
311
312 if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
313 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
314
315 if (Subtarget.hasStdExtD()) {
316 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
317 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
318 for (auto CC : FPCCToExpand)
319 setCondCodeAction(CC, MVT::f64, Expand);
320 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
321 setOperationAction(ISD::SELECT, MVT::f64, Custom);
322 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
323 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
324 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
325 for (auto Op : FPOpToExpand)
326 setOperationAction(Op, MVT::f64, Expand);
327 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
328 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
329 }
330
331 if (Subtarget.is64Bit()) {
332 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
333 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
334 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
335 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
336 }
337
338 setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
339 setOperationAction(ISD::BlockAddress, XLenVT, Custom);
340 setOperationAction(ISD::ConstantPool, XLenVT, Custom);
341 setOperationAction(ISD::JumpTable, XLenVT, Custom);
342
343 setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
344
345 // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
346 // Unfortunately this can't be determined just from the ISA naming string.
347 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
348 Subtarget.is64Bit() ? Legal : Custom);
349
350 setOperationAction(ISD::TRAP, MVT::Other, Legal);
351 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
352 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
353
354 if (Subtarget.hasStdExtA()) {
355 setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
356 setMinCmpXchgSizeInBits(32);
357 } else {
358 setMaxAtomicSizeInBitsSupported(0);
359 }
360
361 setBooleanContents(ZeroOrOneBooleanContent);
362
363 if (Subtarget.hasStdExtV()) {
364 setBooleanVectorContents(ZeroOrOneBooleanContent);
365
366 setOperationAction(ISD::VSCALE, XLenVT, Custom);
367
368 // RVV intrinsics may have illegal operands.
369 // We also need to custom legalize vmv.x.s.
370 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
371 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
372 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
373 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
374 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
375 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
376
377 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
378
379 if (Subtarget.is64Bit()) {
380 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
381 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
382 }
383
384 for (auto VT : MVT::integer_scalable_vector_valuetypes()) {
385 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
386
387 setOperationAction(ISD::SMIN, VT, Legal);
388 setOperationAction(ISD::SMAX, VT, Legal);
389 setOperationAction(ISD::UMIN, VT, Legal);
390 setOperationAction(ISD::UMAX, VT, Legal);
391
392 setOperationAction(ISD::ROTL, VT, Expand);
393 setOperationAction(ISD::ROTR, VT, Expand);
394
395 if (isTypeLegal(VT)) {
396 // Custom-lower extensions and truncations from/to mask types.
397 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
398 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
399 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
400
401 // We custom-lower all legally-typed vector truncates:
402 // 1. Mask VTs are custom-expanded into a series of standard nodes
403 // 2. Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR"
404 // nodes which truncate by one power of two at a time.
405 setOperationAction(ISD::TRUNCATE, VT, Custom);
406
407 // Custom-lower insert/extract operations to simplify patterns.
408 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
409 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
410 }
411 }
412
413 // We must custom-lower certain vXi64 operations on RV32 due to the vector
414 // element type being illegal.
415 if (!Subtarget.is64Bit()) {
416 setOperationAction(ISD::SPLAT_VECTOR, MVT::i64, Custom);
417 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
418 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
419 }
420
421 // Expand various CCs to best match the RVV ISA, which natively supports UNE
422 // but no other unordered comparisons, and supports all ordered comparisons
423 // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
424 // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
425 // and we pattern-match those back to the "original", swapping operands once
426 // more. This way we catch both operations and both "vf" and "fv" forms with
427 // fewer patterns.
428 ISD::CondCode VFPCCToExpand[] = {
429 ISD::SETO, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
430 ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
431 ISD::SETGT, ISD::SETOGT, ISD::SETGE, ISD::SETOGE,
432 };
433
434 // Sets common operation actions on RVV floating-point vector types.
435 const auto SetCommonVFPActions = [&](MVT VT) {
436 setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
437 // Custom-lower insert/extract operations to simplify patterns.
438 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
439 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
440 for (auto CC : VFPCCToExpand)
441 setCondCodeAction(CC, VT, Expand);
442 };
443
444 if (Subtarget.hasStdExtZfh()) {
445 for (auto VT : {RISCVVMVTs::vfloat16mf4_t, RISCVVMVTs::vfloat16mf2_t,
446 RISCVVMVTs::vfloat16m1_t, RISCVVMVTs::vfloat16m2_t,
447 RISCVVMVTs::vfloat16m4_t, RISCVVMVTs::vfloat16m8_t})
448 SetCommonVFPActions(VT);
449 }
450
451 if (Subtarget.hasStdExtF()) {
452 for (auto VT : {RISCVVMVTs::vfloat32mf2_t, RISCVVMVTs::vfloat32m1_t,
453 RISCVVMVTs::vfloat32m2_t, RISCVVMVTs::vfloat32m4_t,
454 RISCVVMVTs::vfloat32m8_t})
455 SetCommonVFPActions(VT);
456 }
457
458 if (Subtarget.hasStdExtD()) {
459 for (auto VT : {RISCVVMVTs::vfloat64m1_t, RISCVVMVTs::vfloat64m2_t,
460 RISCVVMVTs::vfloat64m4_t, RISCVVMVTs::vfloat64m8_t})
461 SetCommonVFPActions(VT);
462 }
463 }
464
465 // Function alignments.
466 const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
467 setMinFunctionAlignment(FunctionAlignment);
468 setPrefFunctionAlignment(FunctionAlignment);
469
470 setMinimumJumpTableEntries(5);
471
472 // Jumps are expensive, compared to logic
473 setJumpIsExpensive();
474
475 // We can use any register for comparisons
476 setHasMultipleConditionRegisters();
477
478 setTargetDAGCombine(ISD::SETCC);
479 if (Subtarget.hasStdExtZbp()) {
480 setTargetDAGCombine(ISD::OR);
481 }
482 }
483
getSetCCResultType(const DataLayout & DL,LLVMContext &,EVT VT) const484 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
485 EVT VT) const {
486 if (!VT.isVector())
487 return getPointerTy(DL);
488 if (Subtarget.hasStdExtV())
489 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
490 return VT.changeVectorElementTypeToInteger();
491 }
492
getTgtMemIntrinsic(IntrinsicInfo & Info,const CallInst & I,MachineFunction & MF,unsigned Intrinsic) const493 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
494 const CallInst &I,
495 MachineFunction &MF,
496 unsigned Intrinsic) const {
497 switch (Intrinsic) {
498 default:
499 return false;
500 case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
501 case Intrinsic::riscv_masked_atomicrmw_add_i32:
502 case Intrinsic::riscv_masked_atomicrmw_sub_i32:
503 case Intrinsic::riscv_masked_atomicrmw_nand_i32:
504 case Intrinsic::riscv_masked_atomicrmw_max_i32:
505 case Intrinsic::riscv_masked_atomicrmw_min_i32:
506 case Intrinsic::riscv_masked_atomicrmw_umax_i32:
507 case Intrinsic::riscv_masked_atomicrmw_umin_i32:
508 case Intrinsic::riscv_masked_cmpxchg_i32:
509 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
510 Info.opc = ISD::INTRINSIC_W_CHAIN;
511 Info.memVT = MVT::getVT(PtrTy->getElementType());
512 Info.ptrVal = I.getArgOperand(0);
513 Info.offset = 0;
514 Info.align = Align(4);
515 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
516 MachineMemOperand::MOVolatile;
517 return true;
518 }
519 }
520
isLegalAddressingMode(const DataLayout & DL,const AddrMode & AM,Type * Ty,unsigned AS,Instruction * I) const521 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
522 const AddrMode &AM, Type *Ty,
523 unsigned AS,
524 Instruction *I) const {
525 // No global is ever allowed as a base.
526 if (AM.BaseGV)
527 return false;
528
529 // Require a 12-bit signed offset.
530 if (!isInt<12>(AM.BaseOffs))
531 return false;
532
533 switch (AM.Scale) {
534 case 0: // "r+i" or just "i", depending on HasBaseReg.
535 break;
536 case 1:
537 if (!AM.HasBaseReg) // allow "r+i".
538 break;
539 return false; // disallow "r+r" or "r+r+i".
540 default:
541 return false;
542 }
543
544 return true;
545 }
546
isLegalICmpImmediate(int64_t Imm) const547 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
548 return isInt<12>(Imm);
549 }
550
isLegalAddImmediate(int64_t Imm) const551 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
552 return isInt<12>(Imm);
553 }
554
555 // On RV32, 64-bit integers are split into their high and low parts and held
556 // in two different registers, so the trunc is free since the low register can
557 // just be used.
isTruncateFree(Type * SrcTy,Type * DstTy) const558 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
559 if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
560 return false;
561 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
562 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
563 return (SrcBits == 64 && DestBits == 32);
564 }
565
isTruncateFree(EVT SrcVT,EVT DstVT) const566 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
567 if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
568 !SrcVT.isInteger() || !DstVT.isInteger())
569 return false;
570 unsigned SrcBits = SrcVT.getSizeInBits();
571 unsigned DestBits = DstVT.getSizeInBits();
572 return (SrcBits == 64 && DestBits == 32);
573 }
574
isZExtFree(SDValue Val,EVT VT2) const575 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
576 // Zexts are free if they can be combined with a load.
577 if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
578 EVT MemVT = LD->getMemoryVT();
579 if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
580 (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
581 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
582 LD->getExtensionType() == ISD::ZEXTLOAD))
583 return true;
584 }
585
586 return TargetLowering::isZExtFree(Val, VT2);
587 }
588
isSExtCheaperThanZExt(EVT SrcVT,EVT DstVT) const589 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
590 return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
591 }
592
isCheapToSpeculateCttz() const593 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
594 return Subtarget.hasStdExtZbb();
595 }
596
isCheapToSpeculateCtlz() const597 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
598 return Subtarget.hasStdExtZbb();
599 }
600
isFPImmLegal(const APFloat & Imm,EVT VT,bool ForCodeSize) const601 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
602 bool ForCodeSize) const {
603 if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
604 return false;
605 if (VT == MVT::f32 && !Subtarget.hasStdExtF())
606 return false;
607 if (VT == MVT::f64 && !Subtarget.hasStdExtD())
608 return false;
609 if (Imm.isNegZero())
610 return false;
611 return Imm.isZero();
612 }
613
hasBitPreservingFPLogic(EVT VT) const614 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
615 return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
616 (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
617 (VT == MVT::f64 && Subtarget.hasStdExtD());
618 }
619
620 // Changes the condition code and swaps operands if necessary, so the SetCC
621 // operation matches one of the comparisons supported directly in the RISC-V
622 // ISA.
normaliseSetCC(SDValue & LHS,SDValue & RHS,ISD::CondCode & CC)623 static void normaliseSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
624 switch (CC) {
625 default:
626 break;
627 case ISD::SETGT:
628 case ISD::SETLE:
629 case ISD::SETUGT:
630 case ISD::SETULE:
631 CC = ISD::getSetCCSwappedOperands(CC);
632 std::swap(LHS, RHS);
633 break;
634 }
635 }
636
637 // Return the RISC-V branch opcode that matches the given DAG integer
638 // condition code. The CondCode must be one of those supported by the RISC-V
639 // ISA (see normaliseSetCC).
getBranchOpcodeForIntCondCode(ISD::CondCode CC)640 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
641 switch (CC) {
642 default:
643 llvm_unreachable("Unsupported CondCode");
644 case ISD::SETEQ:
645 return RISCV::BEQ;
646 case ISD::SETNE:
647 return RISCV::BNE;
648 case ISD::SETLT:
649 return RISCV::BLT;
650 case ISD::SETGE:
651 return RISCV::BGE;
652 case ISD::SETULT:
653 return RISCV::BLTU;
654 case ISD::SETUGE:
655 return RISCV::BGEU;
656 }
657 }
658
LowerOperation(SDValue Op,SelectionDAG & DAG) const659 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
660 SelectionDAG &DAG) const {
661 switch (Op.getOpcode()) {
662 default:
663 report_fatal_error("unimplemented operand");
664 case ISD::GlobalAddress:
665 return lowerGlobalAddress(Op, DAG);
666 case ISD::BlockAddress:
667 return lowerBlockAddress(Op, DAG);
668 case ISD::ConstantPool:
669 return lowerConstantPool(Op, DAG);
670 case ISD::JumpTable:
671 return lowerJumpTable(Op, DAG);
672 case ISD::GlobalTLSAddress:
673 return lowerGlobalTLSAddress(Op, DAG);
674 case ISD::SELECT:
675 return lowerSELECT(Op, DAG);
676 case ISD::VASTART:
677 return lowerVASTART(Op, DAG);
678 case ISD::FRAMEADDR:
679 return lowerFRAMEADDR(Op, DAG);
680 case ISD::RETURNADDR:
681 return lowerRETURNADDR(Op, DAG);
682 case ISD::SHL_PARTS:
683 return lowerShiftLeftParts(Op, DAG);
684 case ISD::SRA_PARTS:
685 return lowerShiftRightParts(Op, DAG, true);
686 case ISD::SRL_PARTS:
687 return lowerShiftRightParts(Op, DAG, false);
688 case ISD::BITCAST: {
689 assert(((Subtarget.is64Bit() && Subtarget.hasStdExtF()) ||
690 Subtarget.hasStdExtZfh()) &&
691 "Unexpected custom legalisation");
692 SDLoc DL(Op);
693 SDValue Op0 = Op.getOperand(0);
694 if (Op.getValueType() == MVT::f16 && Subtarget.hasStdExtZfh()) {
695 if (Op0.getValueType() != MVT::i16)
696 return SDValue();
697 SDValue NewOp0 =
698 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Op0);
699 SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
700 return FPConv;
701 } else if (Op.getValueType() == MVT::f32 && Subtarget.is64Bit() &&
702 Subtarget.hasStdExtF()) {
703 if (Op0.getValueType() != MVT::i32)
704 return SDValue();
705 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
706 SDValue FPConv =
707 DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
708 return FPConv;
709 }
710 return SDValue();
711 }
712 case ISD::INTRINSIC_WO_CHAIN:
713 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
714 case ISD::INTRINSIC_W_CHAIN:
715 return LowerINTRINSIC_W_CHAIN(Op, DAG);
716 case ISD::BSWAP:
717 case ISD::BITREVERSE: {
718 // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
719 assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
720 MVT VT = Op.getSimpleValueType();
721 SDLoc DL(Op);
722 // Start with the maximum immediate value which is the bitwidth - 1.
723 unsigned Imm = VT.getSizeInBits() - 1;
724 // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
725 if (Op.getOpcode() == ISD::BSWAP)
726 Imm &= ~0x7U;
727 return DAG.getNode(RISCVISD::GREVI, DL, VT, Op.getOperand(0),
728 DAG.getTargetConstant(Imm, DL, Subtarget.getXLenVT()));
729 }
730 case ISD::TRUNCATE: {
731 SDLoc DL(Op);
732 EVT VT = Op.getValueType();
733 // Only custom-lower vector truncates
734 if (!VT.isVector())
735 return Op;
736
737 // Truncates to mask types are handled differently
738 if (VT.getVectorElementType() == MVT::i1)
739 return lowerVectorMaskTrunc(Op, DAG);
740
741 // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
742 // truncates as a series of "RISCVISD::TRUNCATE_VECTOR" nodes which
743 // truncate by one power of two at a time.
744 EVT DstEltVT = VT.getVectorElementType();
745
746 SDValue Src = Op.getOperand(0);
747 EVT SrcVT = Src.getValueType();
748 EVT SrcEltVT = SrcVT.getVectorElementType();
749
750 assert(DstEltVT.bitsLT(SrcEltVT) &&
751 isPowerOf2_64(DstEltVT.getSizeInBits()) &&
752 isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
753 "Unexpected vector truncate lowering");
754
755 SDValue Result = Src;
756 LLVMContext &Context = *DAG.getContext();
757 const ElementCount Count = SrcVT.getVectorElementCount();
758 do {
759 SrcEltVT = EVT::getIntegerVT(Context, SrcEltVT.getSizeInBits() / 2);
760 EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
761 Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR, DL, ResultVT, Result);
762 } while (SrcEltVT != DstEltVT);
763
764 return Result;
765 }
766 case ISD::ANY_EXTEND:
767 case ISD::ZERO_EXTEND:
768 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
769 case ISD::SIGN_EXTEND:
770 return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
771 case ISD::SPLAT_VECTOR:
772 return lowerSPLATVECTOR(Op, DAG);
773 case ISD::INSERT_VECTOR_ELT:
774 return lowerINSERT_VECTOR_ELT(Op, DAG);
775 case ISD::EXTRACT_VECTOR_ELT:
776 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
777 case ISD::VSCALE: {
778 MVT VT = Op.getSimpleValueType();
779 SDLoc DL(Op);
780 SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
781 // We define our scalable vector types for lmul=1 to use a 64 bit known
782 // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
783 // vscale as VLENB / 8.
784 SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
785 DAG.getConstant(3, DL, VT));
786 return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
787 }
788 }
789 }
790
getTargetNode(GlobalAddressSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)791 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
792 SelectionDAG &DAG, unsigned Flags) {
793 return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
794 }
795
getTargetNode(BlockAddressSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)796 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
797 SelectionDAG &DAG, unsigned Flags) {
798 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
799 Flags);
800 }
801
getTargetNode(ConstantPoolSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)802 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
803 SelectionDAG &DAG, unsigned Flags) {
804 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
805 N->getOffset(), Flags);
806 }
807
getTargetNode(JumpTableSDNode * N,SDLoc DL,EVT Ty,SelectionDAG & DAG,unsigned Flags)808 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
809 SelectionDAG &DAG, unsigned Flags) {
810 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
811 }
812
813 template <class NodeTy>
getAddr(NodeTy * N,SelectionDAG & DAG,bool IsLocal) const814 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
815 bool IsLocal) const {
816 SDLoc DL(N);
817 EVT Ty = getPointerTy(DAG.getDataLayout());
818
819 if (isPositionIndependent()) {
820 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
821 if (IsLocal)
822 // Use PC-relative addressing to access the symbol. This generates the
823 // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
824 // %pcrel_lo(auipc)).
825 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
826
827 // Use PC-relative addressing to access the GOT for this symbol, then load
828 // the address from the GOT. This generates the pattern (PseudoLA sym),
829 // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
830 return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
831 }
832
833 switch (getTargetMachine().getCodeModel()) {
834 default:
835 report_fatal_error("Unsupported code model for lowering");
836 case CodeModel::Small: {
837 // Generate a sequence for accessing addresses within the first 2 GiB of
838 // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
839 SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
840 SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
841 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
842 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
843 }
844 case CodeModel::Medium: {
845 // Generate a sequence for accessing addresses within any 2GiB range within
846 // the address space. This generates the pattern (PseudoLLA sym), which
847 // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
848 SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
849 return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
850 }
851 }
852 }
853
lowerGlobalAddress(SDValue Op,SelectionDAG & DAG) const854 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
855 SelectionDAG &DAG) const {
856 SDLoc DL(Op);
857 EVT Ty = Op.getValueType();
858 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
859 int64_t Offset = N->getOffset();
860 MVT XLenVT = Subtarget.getXLenVT();
861
862 const GlobalValue *GV = N->getGlobal();
863 bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
864 SDValue Addr = getAddr(N, DAG, IsLocal);
865
866 // In order to maximise the opportunity for common subexpression elimination,
867 // emit a separate ADD node for the global address offset instead of folding
868 // it in the global address node. Later peephole optimisations may choose to
869 // fold it back in when profitable.
870 if (Offset != 0)
871 return DAG.getNode(ISD::ADD, DL, Ty, Addr,
872 DAG.getConstant(Offset, DL, XLenVT));
873 return Addr;
874 }
875
lowerBlockAddress(SDValue Op,SelectionDAG & DAG) const876 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
877 SelectionDAG &DAG) const {
878 BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
879
880 return getAddr(N, DAG);
881 }
882
lowerConstantPool(SDValue Op,SelectionDAG & DAG) const883 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
884 SelectionDAG &DAG) const {
885 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
886
887 return getAddr(N, DAG);
888 }
889
lowerJumpTable(SDValue Op,SelectionDAG & DAG) const890 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
891 SelectionDAG &DAG) const {
892 JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
893
894 return getAddr(N, DAG);
895 }
896
getStaticTLSAddr(GlobalAddressSDNode * N,SelectionDAG & DAG,bool UseGOT) const897 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
898 SelectionDAG &DAG,
899 bool UseGOT) const {
900 SDLoc DL(N);
901 EVT Ty = getPointerTy(DAG.getDataLayout());
902 const GlobalValue *GV = N->getGlobal();
903 MVT XLenVT = Subtarget.getXLenVT();
904
905 if (UseGOT) {
906 // Use PC-relative addressing to access the GOT for this TLS symbol, then
907 // load the address from the GOT and add the thread pointer. This generates
908 // the pattern (PseudoLA_TLS_IE sym), which expands to
909 // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
910 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
911 SDValue Load =
912 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
913
914 // Add the thread pointer.
915 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
916 return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
917 }
918
919 // Generate a sequence for accessing the address relative to the thread
920 // pointer, with the appropriate adjustment for the thread pointer offset.
921 // This generates the pattern
922 // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
923 SDValue AddrHi =
924 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
925 SDValue AddrAdd =
926 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
927 SDValue AddrLo =
928 DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
929
930 SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
931 SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
932 SDValue MNAdd = SDValue(
933 DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
934 0);
935 return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
936 }
937
getDynamicTLSAddr(GlobalAddressSDNode * N,SelectionDAG & DAG) const938 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
939 SelectionDAG &DAG) const {
940 SDLoc DL(N);
941 EVT Ty = getPointerTy(DAG.getDataLayout());
942 IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
943 const GlobalValue *GV = N->getGlobal();
944
945 // Use a PC-relative addressing mode to access the global dynamic GOT address.
946 // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
947 // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
948 SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
949 SDValue Load =
950 SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
951
952 // Prepare argument list to generate call.
953 ArgListTy Args;
954 ArgListEntry Entry;
955 Entry.Node = Load;
956 Entry.Ty = CallTy;
957 Args.push_back(Entry);
958
959 // Setup call to __tls_get_addr.
960 TargetLowering::CallLoweringInfo CLI(DAG);
961 CLI.setDebugLoc(DL)
962 .setChain(DAG.getEntryNode())
963 .setLibCallee(CallingConv::C, CallTy,
964 DAG.getExternalSymbol("__tls_get_addr", Ty),
965 std::move(Args));
966
967 return LowerCallTo(CLI).first;
968 }
969
lowerGlobalTLSAddress(SDValue Op,SelectionDAG & DAG) const970 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
971 SelectionDAG &DAG) const {
972 SDLoc DL(Op);
973 EVT Ty = Op.getValueType();
974 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
975 int64_t Offset = N->getOffset();
976 MVT XLenVT = Subtarget.getXLenVT();
977
978 TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
979
980 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
981 CallingConv::GHC)
982 report_fatal_error("In GHC calling convention TLS is not supported");
983
984 SDValue Addr;
985 switch (Model) {
986 case TLSModel::LocalExec:
987 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
988 break;
989 case TLSModel::InitialExec:
990 Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
991 break;
992 case TLSModel::LocalDynamic:
993 case TLSModel::GeneralDynamic:
994 Addr = getDynamicTLSAddr(N, DAG);
995 break;
996 }
997
998 // In order to maximise the opportunity for common subexpression elimination,
999 // emit a separate ADD node for the global address offset instead of folding
1000 // it in the global address node. Later peephole optimisations may choose to
1001 // fold it back in when profitable.
1002 if (Offset != 0)
1003 return DAG.getNode(ISD::ADD, DL, Ty, Addr,
1004 DAG.getConstant(Offset, DL, XLenVT));
1005 return Addr;
1006 }
1007
lowerSELECT(SDValue Op,SelectionDAG & DAG) const1008 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
1009 SDValue CondV = Op.getOperand(0);
1010 SDValue TrueV = Op.getOperand(1);
1011 SDValue FalseV = Op.getOperand(2);
1012 SDLoc DL(Op);
1013 MVT XLenVT = Subtarget.getXLenVT();
1014
1015 // If the result type is XLenVT and CondV is the output of a SETCC node
1016 // which also operated on XLenVT inputs, then merge the SETCC node into the
1017 // lowered RISCVISD::SELECT_CC to take advantage of the integer
1018 // compare+branch instructions. i.e.:
1019 // (select (setcc lhs, rhs, cc), truev, falsev)
1020 // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
1021 if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
1022 CondV.getOperand(0).getSimpleValueType() == XLenVT) {
1023 SDValue LHS = CondV.getOperand(0);
1024 SDValue RHS = CondV.getOperand(1);
1025 auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
1026 ISD::CondCode CCVal = CC->get();
1027
1028 normaliseSetCC(LHS, RHS, CCVal);
1029
1030 SDValue TargetCC = DAG.getConstant(CCVal, DL, XLenVT);
1031 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
1032 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
1033 }
1034
1035 // Otherwise:
1036 // (select condv, truev, falsev)
1037 // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
1038 SDValue Zero = DAG.getConstant(0, DL, XLenVT);
1039 SDValue SetNE = DAG.getConstant(ISD::SETNE, DL, XLenVT);
1040
1041 SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
1042
1043 return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
1044 }
1045
lowerVASTART(SDValue Op,SelectionDAG & DAG) const1046 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1047 MachineFunction &MF = DAG.getMachineFunction();
1048 RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
1049
1050 SDLoc DL(Op);
1051 SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1052 getPointerTy(MF.getDataLayout()));
1053
1054 // vastart just stores the address of the VarArgsFrameIndex slot into the
1055 // memory location argument.
1056 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1057 return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1058 MachinePointerInfo(SV));
1059 }
1060
lowerFRAMEADDR(SDValue Op,SelectionDAG & DAG) const1061 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
1062 SelectionDAG &DAG) const {
1063 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1064 MachineFunction &MF = DAG.getMachineFunction();
1065 MachineFrameInfo &MFI = MF.getFrameInfo();
1066 MFI.setFrameAddressIsTaken(true);
1067 Register FrameReg = RI.getFrameRegister(MF);
1068 int XLenInBytes = Subtarget.getXLen() / 8;
1069
1070 EVT VT = Op.getValueType();
1071 SDLoc DL(Op);
1072 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1073 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1074 while (Depth--) {
1075 int Offset = -(XLenInBytes * 2);
1076 SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1077 DAG.getIntPtrConstant(Offset, DL));
1078 FrameAddr =
1079 DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1080 }
1081 return FrameAddr;
1082 }
1083
lowerRETURNADDR(SDValue Op,SelectionDAG & DAG) const1084 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
1085 SelectionDAG &DAG) const {
1086 const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
1087 MachineFunction &MF = DAG.getMachineFunction();
1088 MachineFrameInfo &MFI = MF.getFrameInfo();
1089 MFI.setReturnAddressIsTaken(true);
1090 MVT XLenVT = Subtarget.getXLenVT();
1091 int XLenInBytes = Subtarget.getXLen() / 8;
1092
1093 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1094 return SDValue();
1095
1096 EVT VT = Op.getValueType();
1097 SDLoc DL(Op);
1098 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1099 if (Depth) {
1100 int Off = -XLenInBytes;
1101 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
1102 SDValue Offset = DAG.getConstant(Off, DL, VT);
1103 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1104 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1105 MachinePointerInfo());
1106 }
1107
1108 // Return the value of the return address register, marking it an implicit
1109 // live-in.
1110 Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
1111 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
1112 }
1113
lowerShiftLeftParts(SDValue Op,SelectionDAG & DAG) const1114 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
1115 SelectionDAG &DAG) const {
1116 SDLoc DL(Op);
1117 SDValue Lo = Op.getOperand(0);
1118 SDValue Hi = Op.getOperand(1);
1119 SDValue Shamt = Op.getOperand(2);
1120 EVT VT = Lo.getValueType();
1121
1122 // if Shamt-XLEN < 0: // Shamt < XLEN
1123 // Lo = Lo << Shamt
1124 // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
1125 // else:
1126 // Lo = 0
1127 // Hi = Lo << (Shamt-XLEN)
1128
1129 SDValue Zero = DAG.getConstant(0, DL, VT);
1130 SDValue One = DAG.getConstant(1, DL, VT);
1131 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
1132 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
1133 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1134 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1135
1136 SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
1137 SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
1138 SDValue ShiftRightLo =
1139 DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
1140 SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
1141 SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
1142 SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
1143
1144 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1145
1146 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
1147 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1148
1149 SDValue Parts[2] = {Lo, Hi};
1150 return DAG.getMergeValues(Parts, DL);
1151 }
1152
lowerShiftRightParts(SDValue Op,SelectionDAG & DAG,bool IsSRA) const1153 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
1154 bool IsSRA) const {
1155 SDLoc DL(Op);
1156 SDValue Lo = Op.getOperand(0);
1157 SDValue Hi = Op.getOperand(1);
1158 SDValue Shamt = Op.getOperand(2);
1159 EVT VT = Lo.getValueType();
1160
1161 // SRA expansion:
1162 // if Shamt-XLEN < 0: // Shamt < XLEN
1163 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
1164 // Hi = Hi >>s Shamt
1165 // else:
1166 // Lo = Hi >>s (Shamt-XLEN);
1167 // Hi = Hi >>s (XLEN-1)
1168 //
1169 // SRL expansion:
1170 // if Shamt-XLEN < 0: // Shamt < XLEN
1171 // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
1172 // Hi = Hi >>u Shamt
1173 // else:
1174 // Lo = Hi >>u (Shamt-XLEN);
1175 // Hi = 0;
1176
1177 unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
1178
1179 SDValue Zero = DAG.getConstant(0, DL, VT);
1180 SDValue One = DAG.getConstant(1, DL, VT);
1181 SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
1182 SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
1183 SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
1184 SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
1185
1186 SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
1187 SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
1188 SDValue ShiftLeftHi =
1189 DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
1190 SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
1191 SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
1192 SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
1193 SDValue HiFalse =
1194 IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
1195
1196 SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
1197
1198 Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
1199 Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
1200
1201 SDValue Parts[2] = {Lo, Hi};
1202 return DAG.getMergeValues(Parts, DL);
1203 }
1204
1205 // Custom-lower a SPLAT_VECTOR where XLEN<SEW, as the SEW element type is
1206 // illegal (currently only vXi64 RV32).
1207 // FIXME: We could also catch non-constant sign-extended i32 values and lower
1208 // them to SPLAT_VECTOR_I64
lowerSPLATVECTOR(SDValue Op,SelectionDAG & DAG) const1209 SDValue RISCVTargetLowering::lowerSPLATVECTOR(SDValue Op,
1210 SelectionDAG &DAG) const {
1211 SDLoc DL(Op);
1212 EVT VecVT = Op.getValueType();
1213 assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
1214 "Unexpected SPLAT_VECTOR lowering");
1215 SDValue SplatVal = Op.getOperand(0);
1216
1217 // If we can prove that the value is a sign-extended 32-bit value, lower this
1218 // as a custom node in order to try and match RVV vector/scalar instructions.
1219 if (auto *CVal = dyn_cast<ConstantSDNode>(SplatVal)) {
1220 if (isInt<32>(CVal->getSExtValue()))
1221 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
1222 DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32));
1223 }
1224
1225 if (SplatVal.getOpcode() == ISD::SIGN_EXTEND &&
1226 SplatVal.getOperand(0).getValueType() == MVT::i32) {
1227 return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
1228 SplatVal.getOperand(0));
1229 }
1230
1231 // Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not
1232 // to accidentally sign-extend the 32-bit halves to the e64 SEW:
1233 // vmv.v.x vX, hi
1234 // vsll.vx vX, vX, /*32*/
1235 // vmv.v.x vY, lo
1236 // vsll.vx vY, vY, /*32*/
1237 // vsrl.vx vY, vY, /*32*/
1238 // vor.vv vX, vX, vY
1239 SDValue One = DAG.getConstant(1, DL, MVT::i32);
1240 SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
1241 SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT);
1242 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, Zero);
1243 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, SplatVal, One);
1244
1245 Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
1246 Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV);
1247 Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV);
1248
1249 if (isNullConstant(Hi))
1250 return Lo;
1251
1252 Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi);
1253 Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV);
1254
1255 return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi);
1256 }
1257
1258 // Custom-lower extensions from mask vectors by using a vselect either with 1
1259 // for zero/any-extension or -1 for sign-extension:
1260 // (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
1261 // Note that any-extension is lowered identically to zero-extension.
lowerVectorMaskExt(SDValue Op,SelectionDAG & DAG,int64_t ExtTrueVal) const1262 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
1263 int64_t ExtTrueVal) const {
1264 SDLoc DL(Op);
1265 EVT VecVT = Op.getValueType();
1266 SDValue Src = Op.getOperand(0);
1267 // Only custom-lower extensions from mask types
1268 if (!Src.getValueType().isVector() ||
1269 Src.getValueType().getVectorElementType() != MVT::i1)
1270 return Op;
1271
1272 // Be careful not to introduce illegal scalar types at this stage, and be
1273 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
1274 // illegal and must be expanded. Since we know that the constants are
1275 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
1276 bool IsRV32E64 =
1277 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
1278 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1279 SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, Subtarget.getXLenVT());
1280
1281 if (!IsRV32E64) {
1282 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
1283 SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
1284 } else {
1285 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
1286 SplatTrueVal =
1287 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
1288 }
1289
1290 return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
1291 }
1292
1293 // Custom-lower truncations from vectors to mask vectors by using a mask and a
1294 // setcc operation:
1295 // (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
lowerVectorMaskTrunc(SDValue Op,SelectionDAG & DAG) const1296 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
1297 SelectionDAG &DAG) const {
1298 SDLoc DL(Op);
1299 EVT MaskVT = Op.getValueType();
1300 // Only expect to custom-lower truncations to mask types
1301 assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
1302 "Unexpected type for vector mask lowering");
1303 SDValue Src = Op.getOperand(0);
1304 EVT VecVT = Src.getValueType();
1305
1306 // Be careful not to introduce illegal scalar types at this stage, and be
1307 // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
1308 // illegal and must be expanded. Since we know that the constants are
1309 // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
1310 bool IsRV32E64 =
1311 !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
1312 SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
1313 SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
1314
1315 if (!IsRV32E64) {
1316 SplatOne = DAG.getSplatVector(VecVT, DL, SplatOne);
1317 SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
1318 } else {
1319 SplatOne = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatOne);
1320 SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
1321 }
1322
1323 SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
1324
1325 return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
1326 }
1327
lowerINSERT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const1328 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1329 SelectionDAG &DAG) const {
1330 SDLoc DL(Op);
1331 EVT VecVT = Op.getValueType();
1332 SDValue Vec = Op.getOperand(0);
1333 SDValue Val = Op.getOperand(1);
1334 SDValue Idx = Op.getOperand(2);
1335
1336 // Custom-legalize INSERT_VECTOR_ELT where XLEN>=SEW, so that the vector is
1337 // first slid down into position, the value is inserted into the first
1338 // position, and the vector is slid back up. We do this to simplify patterns.
1339 // (slideup vec, (insertelt (slidedown impdef, vec, idx), val, 0), idx),
1340 if (Subtarget.is64Bit() || VecVT.getVectorElementType() != MVT::i64) {
1341 if (isNullConstant(Idx))
1342 return Op;
1343 SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT,
1344 DAG.getUNDEF(VecVT), Vec, Idx);
1345 SDValue InsertElt0 =
1346 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, Slidedown, Val,
1347 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
1348
1349 return DAG.getNode(RISCVISD::VSLIDEUP, DL, VecVT, Vec, InsertElt0, Idx);
1350 }
1351
1352 // Custom-legalize INSERT_VECTOR_ELT where XLEN<SEW, as the SEW element type
1353 // is illegal (currently only vXi64 RV32).
1354 // Since there is no easy way of getting a single element into a vector when
1355 // XLEN<SEW, we lower the operation to the following sequence:
1356 // splat vVal, rVal
1357 // vid.v vVid
1358 // vmseq.vx mMask, vVid, rIdx
1359 // vmerge.vvm vDest, vSrc, vVal, mMask
1360 // This essentially merges the original vector with the inserted element by
1361 // using a mask whose only set bit is that corresponding to the insert
1362 // index.
1363 SDValue SplattedVal = DAG.getSplatVector(VecVT, DL, Val);
1364 SDValue SplattedIdx = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Idx);
1365
1366 SDValue VID = DAG.getNode(RISCVISD::VID, DL, VecVT);
1367 auto SetCCVT =
1368 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VecVT);
1369 SDValue Mask = DAG.getSetCC(DL, SetCCVT, VID, SplattedIdx, ISD::SETEQ);
1370
1371 return DAG.getNode(ISD::VSELECT, DL, VecVT, Mask, SplattedVal, Vec);
1372 }
1373
1374 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
1375 // extract the first element: (extractelt (slidedown vec, idx), 0). This is
1376 // done to maintain partity with the legalization of RV32 vXi64 legalization.
lowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const1377 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1378 SelectionDAG &DAG) const {
1379 SDLoc DL(Op);
1380 SDValue Idx = Op.getOperand(1);
1381 if (isNullConstant(Idx))
1382 return Op;
1383
1384 SDValue Vec = Op.getOperand(0);
1385 EVT EltVT = Op.getValueType();
1386 EVT VecVT = Vec.getValueType();
1387 SDValue Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT,
1388 DAG.getUNDEF(VecVT), Vec, Idx);
1389
1390 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Slidedown,
1391 DAG.getConstant(0, DL, Subtarget.getXLenVT()));
1392 }
1393
LowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const1394 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
1395 SelectionDAG &DAG) const {
1396 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1397 SDLoc DL(Op);
1398
1399 if (Subtarget.hasStdExtV()) {
1400 // Some RVV intrinsics may claim that they want an integer operand to be
1401 // extended.
1402 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1403 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
1404 if (II->ExtendedOperand) {
1405 assert(II->ExtendedOperand < Op.getNumOperands());
1406 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
1407 SDValue &ScalarOp = Operands[II->ExtendedOperand];
1408 EVT OpVT = ScalarOp.getValueType();
1409 if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
1410 (OpVT == MVT::i32 && Subtarget.is64Bit())) {
1411 // If the operand is a constant, sign extend to increase our chances
1412 // of being able to use a .vi instruction. ANY_EXTEND would become a
1413 // a zero extend and the simm5 check in isel would fail.
1414 // FIXME: Should we ignore the upper bits in isel instead?
1415 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
1416 : ISD::ANY_EXTEND;
1417 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
1418 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
1419 Operands);
1420 }
1421 }
1422 }
1423 }
1424
1425 switch (IntNo) {
1426 default:
1427 return SDValue(); // Don't custom lower most intrinsics.
1428 case Intrinsic::thread_pointer: {
1429 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1430 return DAG.getRegister(RISCV::X4, PtrVT);
1431 }
1432 case Intrinsic::riscv_vmv_x_s:
1433 assert(Op.getValueType() == Subtarget.getXLenVT() && "Unexpected VT!");
1434 return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
1435 Op.getOperand(1));
1436 }
1437 }
1438
LowerINTRINSIC_W_CHAIN(SDValue Op,SelectionDAG & DAG) const1439 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
1440 SelectionDAG &DAG) const {
1441 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1442 SDLoc DL(Op);
1443
1444 if (Subtarget.hasStdExtV()) {
1445 // Some RVV intrinsics may claim that they want an integer operand to be
1446 // extended.
1447 if (const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
1448 RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo)) {
1449 if (II->ExtendedOperand) {
1450 // The operands start from the second argument in INTRINSIC_W_CHAIN.
1451 unsigned ExtendOp = II->ExtendedOperand + 1;
1452 assert(ExtendOp < Op.getNumOperands());
1453 SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
1454 SDValue &ScalarOp = Operands[ExtendOp];
1455 EVT OpVT = ScalarOp.getValueType();
1456 if (OpVT == MVT::i8 || OpVT == MVT::i16 ||
1457 (OpVT == MVT::i32 && Subtarget.is64Bit())) {
1458 // If the operand is a constant, sign extend to increase our chances
1459 // of being able to use a .vi instruction. ANY_EXTEND would become a
1460 // a zero extend and the simm5 check in isel would fail.
1461 // FIXME: Should we ignore the upper bits in isel instead?
1462 unsigned ExtOpc = isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND
1463 : ISD::ANY_EXTEND;
1464 ScalarOp = DAG.getNode(ExtOpc, DL, Subtarget.getXLenVT(), ScalarOp);
1465 return DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, Op->getVTList(),
1466 Operands);
1467 }
1468 }
1469 }
1470 }
1471
1472 unsigned NF = 1;
1473 switch (IntNo) {
1474 default:
1475 return SDValue(); // Don't custom lower most intrinsics.
1476 case Intrinsic::riscv_vleff: {
1477 SDLoc DL(Op);
1478 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
1479 SDValue Load = DAG.getNode(RISCVISD::VLEFF, DL, VTs, Op.getOperand(0),
1480 Op.getOperand(2), Op.getOperand(3));
1481 VTs = DAG.getVTList(Op->getValueType(1), MVT::Other);
1482 SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2));
1483 return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
1484 }
1485 case Intrinsic::riscv_vleff_mask: {
1486 SDLoc DL(Op);
1487 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other, MVT::Glue);
1488 SDValue Load = DAG.getNode(RISCVISD::VLEFF_MASK, DL, VTs, Op.getOperand(0),
1489 Op.getOperand(2), Op.getOperand(3),
1490 Op.getOperand(4), Op.getOperand(5));
1491 VTs = DAG.getVTList(Op->getValueType(1), MVT::Other);
1492 SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs, Load.getValue(2));
1493 return DAG.getMergeValues({Load, ReadVL, Load.getValue(1)}, DL);
1494 }
1495 case Intrinsic::riscv_vlseg8ff:
1496 NF++;
1497 LLVM_FALLTHROUGH;
1498 case Intrinsic::riscv_vlseg7ff:
1499 NF++;
1500 LLVM_FALLTHROUGH;
1501 case Intrinsic::riscv_vlseg6ff:
1502 NF++;
1503 LLVM_FALLTHROUGH;
1504 case Intrinsic::riscv_vlseg5ff:
1505 NF++;
1506 LLVM_FALLTHROUGH;
1507 case Intrinsic::riscv_vlseg4ff:
1508 NF++;
1509 LLVM_FALLTHROUGH;
1510 case Intrinsic::riscv_vlseg3ff:
1511 NF++;
1512 LLVM_FALLTHROUGH;
1513 case Intrinsic::riscv_vlseg2ff: {
1514 NF++;
1515 SDLoc DL(Op);
1516 SmallVector<EVT, 8> EVTs(NF, Op.getValueType());
1517 EVTs.push_back(MVT::Other);
1518 EVTs.push_back(MVT::Glue);
1519 SDVTList VTs = DAG.getVTList(EVTs);
1520 SDValue Load =
1521 DAG.getNode(RISCVISD::VLSEGFF, DL, VTs, Op.getOperand(0),
1522 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
1523 VTs = DAG.getVTList(Op->getValueType(NF), MVT::Other);
1524 SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs,
1525 /*Glue*/ Load.getValue(NF + 1));
1526 SmallVector<SDValue, 8> Results;
1527 for (unsigned i = 0; i < NF; ++i)
1528 Results.push_back(Load.getValue(i));
1529 Results.push_back(ReadVL);
1530 Results.push_back(Load.getValue(NF)); // Chain.
1531 return DAG.getMergeValues(Results, DL);
1532 }
1533 case Intrinsic::riscv_vlseg8ff_mask:
1534 NF++;
1535 LLVM_FALLTHROUGH;
1536 case Intrinsic::riscv_vlseg7ff_mask:
1537 NF++;
1538 LLVM_FALLTHROUGH;
1539 case Intrinsic::riscv_vlseg6ff_mask:
1540 NF++;
1541 LLVM_FALLTHROUGH;
1542 case Intrinsic::riscv_vlseg5ff_mask:
1543 NF++;
1544 LLVM_FALLTHROUGH;
1545 case Intrinsic::riscv_vlseg4ff_mask:
1546 NF++;
1547 LLVM_FALLTHROUGH;
1548 case Intrinsic::riscv_vlseg3ff_mask:
1549 NF++;
1550 LLVM_FALLTHROUGH;
1551 case Intrinsic::riscv_vlseg2ff_mask: {
1552 NF++;
1553 SDLoc DL(Op);
1554 SmallVector<EVT, 8> EVTs(NF, Op.getValueType());
1555 EVTs.push_back(MVT::Other);
1556 EVTs.push_back(MVT::Glue);
1557 SDVTList VTs = DAG.getVTList(EVTs);
1558 SmallVector<SDValue, 13> LoadOps;
1559 LoadOps.push_back(Op.getOperand(0)); // Chain.
1560 LoadOps.push_back(Op.getOperand(1)); // Intrinsic ID.
1561 for (unsigned i = 0; i < NF; ++i)
1562 LoadOps.push_back(Op.getOperand(2 + i)); // MaskedOff.
1563 LoadOps.push_back(Op.getOperand(2 + NF)); // Base.
1564 LoadOps.push_back(Op.getOperand(3 + NF)); // Mask.
1565 LoadOps.push_back(Op.getOperand(4 + NF)); // VL.
1566 SDValue Load = DAG.getNode(RISCVISD::VLSEGFF_MASK, DL, VTs, LoadOps);
1567 VTs = DAG.getVTList(Op->getValueType(NF), MVT::Other);
1568 SDValue ReadVL = DAG.getNode(RISCVISD::READ_VL, DL, VTs,
1569 /*Glue*/ Load.getValue(NF + 1));
1570 SmallVector<SDValue, 8> Results;
1571 for (unsigned i = 0; i < NF; ++i)
1572 Results.push_back(Load.getValue(i));
1573 Results.push_back(ReadVL);
1574 Results.push_back(Load.getValue(NF)); // Chain.
1575 return DAG.getMergeValues(Results, DL);
1576 }
1577 }
1578 }
1579
1580 // Returns the opcode of the target-specific SDNode that implements the 32-bit
1581 // form of the given Opcode.
getRISCVWOpcode(unsigned Opcode)1582 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
1583 switch (Opcode) {
1584 default:
1585 llvm_unreachable("Unexpected opcode");
1586 case ISD::SHL:
1587 return RISCVISD::SLLW;
1588 case ISD::SRA:
1589 return RISCVISD::SRAW;
1590 case ISD::SRL:
1591 return RISCVISD::SRLW;
1592 case ISD::SDIV:
1593 return RISCVISD::DIVW;
1594 case ISD::UDIV:
1595 return RISCVISD::DIVUW;
1596 case ISD::UREM:
1597 return RISCVISD::REMUW;
1598 case ISD::ROTL:
1599 return RISCVISD::ROLW;
1600 case ISD::ROTR:
1601 return RISCVISD::RORW;
1602 case RISCVISD::GREVI:
1603 return RISCVISD::GREVIW;
1604 case RISCVISD::GORCI:
1605 return RISCVISD::GORCIW;
1606 }
1607 }
1608
1609 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
1610 // Because i32 isn't a legal type for RV64, these operations would otherwise
1611 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
1612 // later one because the fact the operation was originally of type i32 is
1613 // lost.
customLegalizeToWOp(SDNode * N,SelectionDAG & DAG,unsigned ExtOpc=ISD::ANY_EXTEND)1614 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
1615 unsigned ExtOpc = ISD::ANY_EXTEND) {
1616 SDLoc DL(N);
1617 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
1618 SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
1619 SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
1620 SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
1621 // ReplaceNodeResults requires we maintain the same type for the return value.
1622 return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
1623 }
1624
1625 // Converts the given 32-bit operation to a i64 operation with signed extension
1626 // semantic to reduce the signed extension instructions.
customLegalizeToWOpWithSExt(SDNode * N,SelectionDAG & DAG)1627 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
1628 SDLoc DL(N);
1629 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1630 SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
1631 SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
1632 SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
1633 DAG.getValueType(MVT::i32));
1634 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
1635 }
1636
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const1637 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
1638 SmallVectorImpl<SDValue> &Results,
1639 SelectionDAG &DAG) const {
1640 SDLoc DL(N);
1641 switch (N->getOpcode()) {
1642 default:
1643 llvm_unreachable("Don't know how to custom type legalize this operation!");
1644 case ISD::STRICT_FP_TO_SINT:
1645 case ISD::STRICT_FP_TO_UINT:
1646 case ISD::FP_TO_SINT:
1647 case ISD::FP_TO_UINT: {
1648 bool IsStrict = N->isStrictFPOpcode();
1649 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1650 "Unexpected custom legalisation");
1651 SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
1652 // If the FP type needs to be softened, emit a library call using the 'si'
1653 // version. If we left it to default legalization we'd end up with 'di'. If
1654 // the FP type doesn't need to be softened just let generic type
1655 // legalization promote the result type.
1656 if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
1657 TargetLowering::TypeSoftenFloat)
1658 return;
1659 RTLIB::Libcall LC;
1660 if (N->getOpcode() == ISD::FP_TO_SINT ||
1661 N->getOpcode() == ISD::STRICT_FP_TO_SINT)
1662 LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
1663 else
1664 LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
1665 MakeLibCallOptions CallOptions;
1666 EVT OpVT = Op0.getValueType();
1667 CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
1668 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
1669 SDValue Result;
1670 std::tie(Result, Chain) =
1671 makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
1672 Results.push_back(Result);
1673 if (IsStrict)
1674 Results.push_back(Chain);
1675 break;
1676 }
1677 case ISD::READCYCLECOUNTER: {
1678 assert(!Subtarget.is64Bit() &&
1679 "READCYCLECOUNTER only has custom type legalization on riscv32");
1680
1681 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
1682 SDValue RCW =
1683 DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
1684
1685 Results.push_back(
1686 DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
1687 Results.push_back(RCW.getValue(2));
1688 break;
1689 }
1690 case ISD::ADD:
1691 case ISD::SUB:
1692 case ISD::MUL:
1693 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1694 "Unexpected custom legalisation");
1695 if (N->getOperand(1).getOpcode() == ISD::Constant)
1696 return;
1697 Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
1698 break;
1699 case ISD::SHL:
1700 case ISD::SRA:
1701 case ISD::SRL:
1702 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1703 "Unexpected custom legalisation");
1704 if (N->getOperand(1).getOpcode() == ISD::Constant)
1705 return;
1706 Results.push_back(customLegalizeToWOp(N, DAG));
1707 break;
1708 case ISD::ROTL:
1709 case ISD::ROTR:
1710 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1711 "Unexpected custom legalisation");
1712 Results.push_back(customLegalizeToWOp(N, DAG));
1713 break;
1714 case ISD::SDIV:
1715 case ISD::UDIV:
1716 case ISD::UREM: {
1717 MVT VT = N->getSimpleValueType(0);
1718 assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
1719 Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
1720 "Unexpected custom legalisation");
1721 if (N->getOperand(0).getOpcode() == ISD::Constant ||
1722 N->getOperand(1).getOpcode() == ISD::Constant)
1723 return;
1724
1725 // If the input is i32, use ANY_EXTEND since the W instructions don't read
1726 // the upper 32 bits. For other types we need to sign or zero extend
1727 // based on the opcode.
1728 unsigned ExtOpc = ISD::ANY_EXTEND;
1729 if (VT != MVT::i32)
1730 ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
1731 : ISD::ZERO_EXTEND;
1732
1733 Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
1734 break;
1735 }
1736 case ISD::BITCAST: {
1737 assert(((N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1738 Subtarget.hasStdExtF()) ||
1739 (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh())) &&
1740 "Unexpected custom legalisation");
1741 SDValue Op0 = N->getOperand(0);
1742 if (N->getValueType(0) == MVT::i16 && Subtarget.hasStdExtZfh()) {
1743 if (Op0.getValueType() != MVT::f16)
1744 return;
1745 SDValue FPConv =
1746 DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Op0);
1747 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
1748 } else if (N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1749 Subtarget.hasStdExtF()) {
1750 if (Op0.getValueType() != MVT::f32)
1751 return;
1752 SDValue FPConv =
1753 DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
1754 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
1755 }
1756 break;
1757 }
1758 case RISCVISD::GREVI:
1759 case RISCVISD::GORCI: {
1760 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1761 "Unexpected custom legalisation");
1762 // This is similar to customLegalizeToWOp, except that we pass the second
1763 // operand (a TargetConstant) straight through: it is already of type
1764 // XLenVT.
1765 SDLoc DL(N);
1766 RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
1767 SDValue NewOp0 =
1768 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1769 SDValue NewRes =
1770 DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, N->getOperand(1));
1771 // ReplaceNodeResults requires we maintain the same type for the return
1772 // value.
1773 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
1774 break;
1775 }
1776 case ISD::BSWAP:
1777 case ISD::BITREVERSE: {
1778 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1779 Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
1780 SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
1781 N->getOperand(0));
1782 unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24;
1783 SDValue GREVIW = DAG.getNode(RISCVISD::GREVIW, DL, MVT::i64, NewOp0,
1784 DAG.getTargetConstant(Imm, DL,
1785 Subtarget.getXLenVT()));
1786 // ReplaceNodeResults requires we maintain the same type for the return
1787 // value.
1788 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
1789 break;
1790 }
1791 case ISD::FSHL:
1792 case ISD::FSHR: {
1793 assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
1794 Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
1795 SDValue NewOp0 =
1796 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
1797 SDValue NewOp1 =
1798 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
1799 SDValue NewOp2 =
1800 DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
1801 // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
1802 // Mask the shift amount to 5 bits.
1803 NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
1804 DAG.getConstant(0x1f, DL, MVT::i64));
1805 unsigned Opc =
1806 N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
1807 SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
1808 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
1809 break;
1810 }
1811 case ISD::EXTRACT_VECTOR_ELT: {
1812 // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
1813 // type is illegal (currently only vXi64 RV32).
1814 // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
1815 // transferred to the destination register. We issue two of these from the
1816 // upper- and lower- halves of the SEW-bit vector element, slid down to the
1817 // first element.
1818 SDLoc DL(N);
1819 SDValue Vec = N->getOperand(0);
1820 SDValue Idx = N->getOperand(1);
1821 EVT VecVT = Vec.getValueType();
1822 assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
1823 VecVT.getVectorElementType() == MVT::i64 &&
1824 "Unexpected EXTRACT_VECTOR_ELT legalization");
1825
1826 SDValue Slidedown = Vec;
1827 // Unless the index is known to be 0, we must slide the vector down to get
1828 // the desired element into index 0.
1829 if (!isNullConstant(Idx))
1830 Slidedown = DAG.getNode(RISCVISD::VSLIDEDOWN, DL, VecVT,
1831 DAG.getUNDEF(VecVT), Vec, Idx);
1832
1833 MVT XLenVT = Subtarget.getXLenVT();
1834 // Extract the lower XLEN bits of the correct vector element.
1835 SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Slidedown, Idx);
1836
1837 // To extract the upper XLEN bits of the vector element, shift the first
1838 // element right by 32 bits and re-extract the lower XLEN bits.
1839 SDValue ThirtyTwoV =
1840 DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT,
1841 DAG.getConstant(32, DL, Subtarget.getXLenVT()));
1842 SDValue LShr32 = DAG.getNode(ISD::SRL, DL, VecVT, Slidedown, ThirtyTwoV);
1843
1844 SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32, Idx);
1845
1846 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
1847 break;
1848 }
1849 case ISD::INTRINSIC_WO_CHAIN: {
1850 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
1851 switch (IntNo) {
1852 default:
1853 llvm_unreachable(
1854 "Don't know how to custom type legalize this intrinsic!");
1855 case Intrinsic::riscv_vmv_x_s: {
1856 EVT VT = N->getValueType(0);
1857 assert((VT == MVT::i8 || VT == MVT::i16 ||
1858 (Subtarget.is64Bit() && VT == MVT::i32)) &&
1859 "Unexpected custom legalisation!");
1860 SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
1861 Subtarget.getXLenVT(), N->getOperand(1));
1862 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
1863 break;
1864 }
1865 }
1866 break;
1867 }
1868 }
1869 }
1870
1871 // A structure to hold one of the bit-manipulation patterns below. Together, a
1872 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
1873 // (or (and (shl x, 1), 0xAAAAAAAA),
1874 // (and (srl x, 1), 0x55555555))
1875 struct RISCVBitmanipPat {
1876 SDValue Op;
1877 unsigned ShAmt;
1878 bool IsSHL;
1879
formsPairWithRISCVBitmanipPat1880 bool formsPairWith(const RISCVBitmanipPat &Other) const {
1881 return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
1882 }
1883 };
1884
1885 // Matches any of the following bit-manipulation patterns:
1886 // (and (shl x, 1), (0x55555555 << 1))
1887 // (and (srl x, 1), 0x55555555)
1888 // (shl (and x, 0x55555555), 1)
1889 // (srl (and x, (0x55555555 << 1)), 1)
1890 // where the shift amount and mask may vary thus:
1891 // [1] = 0x55555555 / 0xAAAAAAAA
1892 // [2] = 0x33333333 / 0xCCCCCCCC
1893 // [4] = 0x0F0F0F0F / 0xF0F0F0F0
1894 // [8] = 0x00FF00FF / 0xFF00FF00
1895 // [16] = 0x0000FFFF / 0xFFFFFFFF
1896 // [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
matchRISCVBitmanipPat(SDValue Op)1897 static Optional<RISCVBitmanipPat> matchRISCVBitmanipPat(SDValue Op) {
1898 Optional<uint64_t> Mask;
1899 // Optionally consume a mask around the shift operation.
1900 if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
1901 Mask = Op.getConstantOperandVal(1);
1902 Op = Op.getOperand(0);
1903 }
1904 if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
1905 return None;
1906 bool IsSHL = Op.getOpcode() == ISD::SHL;
1907
1908 if (!isa<ConstantSDNode>(Op.getOperand(1)))
1909 return None;
1910 auto ShAmt = Op.getConstantOperandVal(1);
1911
1912 if (!isPowerOf2_64(ShAmt))
1913 return None;
1914
1915 // These are the unshifted masks which we use to match bit-manipulation
1916 // patterns. They may be shifted left in certain circumstances.
1917 static const uint64_t BitmanipMasks[] = {
1918 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
1919 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL,
1920 };
1921
1922 unsigned MaskIdx = Log2_64(ShAmt);
1923 if (MaskIdx >= array_lengthof(BitmanipMasks))
1924 return None;
1925
1926 auto Src = Op.getOperand(0);
1927
1928 unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
1929 auto ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
1930
1931 // The expected mask is shifted left when the AND is found around SHL
1932 // patterns.
1933 // ((x >> 1) & 0x55555555)
1934 // ((x << 1) & 0xAAAAAAAA)
1935 bool SHLExpMask = IsSHL;
1936
1937 if (!Mask) {
1938 // Sometimes LLVM keeps the mask as an operand of the shift, typically when
1939 // the mask is all ones: consume that now.
1940 if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
1941 Mask = Src.getConstantOperandVal(1);
1942 Src = Src.getOperand(0);
1943 // The expected mask is now in fact shifted left for SRL, so reverse the
1944 // decision.
1945 // ((x & 0xAAAAAAAA) >> 1)
1946 // ((x & 0x55555555) << 1)
1947 SHLExpMask = !SHLExpMask;
1948 } else {
1949 // Use a default shifted mask of all-ones if there's no AND, truncated
1950 // down to the expected width. This simplifies the logic later on.
1951 Mask = maskTrailingOnes<uint64_t>(Width);
1952 *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
1953 }
1954 }
1955
1956 if (SHLExpMask)
1957 ExpMask <<= ShAmt;
1958
1959 if (Mask != ExpMask)
1960 return None;
1961
1962 return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
1963 }
1964
1965 // Match the following pattern as a GREVI(W) operation
1966 // (or (BITMANIP_SHL x), (BITMANIP_SRL x))
combineORToGREV(SDValue Op,SelectionDAG & DAG,const RISCVSubtarget & Subtarget)1967 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
1968 const RISCVSubtarget &Subtarget) {
1969 EVT VT = Op.getValueType();
1970
1971 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
1972 auto LHS = matchRISCVBitmanipPat(Op.getOperand(0));
1973 auto RHS = matchRISCVBitmanipPat(Op.getOperand(1));
1974 if (LHS && RHS && LHS->formsPairWith(*RHS)) {
1975 SDLoc DL(Op);
1976 return DAG.getNode(
1977 RISCVISD::GREVI, DL, VT, LHS->Op,
1978 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
1979 }
1980 }
1981 return SDValue();
1982 }
1983
1984 // Matches any the following pattern as a GORCI(W) operation
1985 // 1. (or (GREVI x, shamt), x) if shamt is a power of 2
1986 // 2. (or x, (GREVI x, shamt)) if shamt is a power of 2
1987 // 3. (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
1988 // Note that with the variant of 3.,
1989 // (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
1990 // the inner pattern will first be matched as GREVI and then the outer
1991 // pattern will be matched to GORC via the first rule above.
1992 // 4. (or (rotl/rotr x, bitwidth/2), x)
combineORToGORC(SDValue Op,SelectionDAG & DAG,const RISCVSubtarget & Subtarget)1993 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
1994 const RISCVSubtarget &Subtarget) {
1995 EVT VT = Op.getValueType();
1996
1997 if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
1998 SDLoc DL(Op);
1999 SDValue Op0 = Op.getOperand(0);
2000 SDValue Op1 = Op.getOperand(1);
2001
2002 auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
2003 if (Reverse.getOpcode() == RISCVISD::GREVI && Reverse.getOperand(0) == X &&
2004 isPowerOf2_32(Reverse.getConstantOperandVal(1)))
2005 return DAG.getNode(RISCVISD::GORCI, DL, VT, X, Reverse.getOperand(1));
2006 // We can also form GORCI from ROTL/ROTR by half the bitwidth.
2007 if ((Reverse.getOpcode() == ISD::ROTL ||
2008 Reverse.getOpcode() == ISD::ROTR) &&
2009 Reverse.getOperand(0) == X &&
2010 isa<ConstantSDNode>(Reverse.getOperand(1))) {
2011 uint64_t RotAmt = Reverse.getConstantOperandVal(1);
2012 if (RotAmt == (VT.getSizeInBits() / 2))
2013 return DAG.getNode(
2014 RISCVISD::GORCI, DL, VT, X,
2015 DAG.getTargetConstant(RotAmt, DL, Subtarget.getXLenVT()));
2016 }
2017 return SDValue();
2018 };
2019
2020 // Check for either commutable permutation of (or (GREVI x, shamt), x)
2021 if (SDValue V = MatchOROfReverse(Op0, Op1))
2022 return V;
2023 if (SDValue V = MatchOROfReverse(Op1, Op0))
2024 return V;
2025
2026 // OR is commutable so canonicalize its OR operand to the left
2027 if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
2028 std::swap(Op0, Op1);
2029 if (Op0.getOpcode() != ISD::OR)
2030 return SDValue();
2031 SDValue OrOp0 = Op0.getOperand(0);
2032 SDValue OrOp1 = Op0.getOperand(1);
2033 auto LHS = matchRISCVBitmanipPat(OrOp0);
2034 // OR is commutable so swap the operands and try again: x might have been
2035 // on the left
2036 if (!LHS) {
2037 std::swap(OrOp0, OrOp1);
2038 LHS = matchRISCVBitmanipPat(OrOp0);
2039 }
2040 auto RHS = matchRISCVBitmanipPat(Op1);
2041 if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
2042 return DAG.getNode(
2043 RISCVISD::GORCI, DL, VT, LHS->Op,
2044 DAG.getTargetConstant(LHS->ShAmt, DL, Subtarget.getXLenVT()));
2045 }
2046 }
2047 return SDValue();
2048 }
2049
2050 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
2051 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
2052 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
2053 // not undo itself, but they are redundant.
combineGREVI_GORCI(SDNode * N,SelectionDAG & DAG)2054 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
2055 unsigned ShAmt1 = N->getConstantOperandVal(1);
2056 SDValue Src = N->getOperand(0);
2057
2058 if (Src.getOpcode() != N->getOpcode())
2059 return SDValue();
2060
2061 unsigned ShAmt2 = Src.getConstantOperandVal(1);
2062 Src = Src.getOperand(0);
2063
2064 unsigned CombinedShAmt;
2065 if (N->getOpcode() == RISCVISD::GORCI || N->getOpcode() == RISCVISD::GORCIW)
2066 CombinedShAmt = ShAmt1 | ShAmt2;
2067 else
2068 CombinedShAmt = ShAmt1 ^ ShAmt2;
2069
2070 if (CombinedShAmt == 0)
2071 return Src;
2072
2073 SDLoc DL(N);
2074 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), Src,
2075 DAG.getTargetConstant(CombinedShAmt, DL,
2076 N->getOperand(1).getValueType()));
2077 }
2078
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const2079 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
2080 DAGCombinerInfo &DCI) const {
2081 SelectionDAG &DAG = DCI.DAG;
2082
2083 switch (N->getOpcode()) {
2084 default:
2085 break;
2086 case RISCVISD::SplitF64: {
2087 SDValue Op0 = N->getOperand(0);
2088 // If the input to SplitF64 is just BuildPairF64 then the operation is
2089 // redundant. Instead, use BuildPairF64's operands directly.
2090 if (Op0->getOpcode() == RISCVISD::BuildPairF64)
2091 return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
2092
2093 SDLoc DL(N);
2094
2095 // It's cheaper to materialise two 32-bit integers than to load a double
2096 // from the constant pool and transfer it to integer registers through the
2097 // stack.
2098 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
2099 APInt V = C->getValueAPF().bitcastToAPInt();
2100 SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
2101 SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
2102 return DCI.CombineTo(N, Lo, Hi);
2103 }
2104
2105 // This is a target-specific version of a DAGCombine performed in
2106 // DAGCombiner::visitBITCAST. It performs the equivalent of:
2107 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
2108 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
2109 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
2110 !Op0.getNode()->hasOneUse())
2111 break;
2112 SDValue NewSplitF64 =
2113 DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
2114 Op0.getOperand(0));
2115 SDValue Lo = NewSplitF64.getValue(0);
2116 SDValue Hi = NewSplitF64.getValue(1);
2117 APInt SignBit = APInt::getSignMask(32);
2118 if (Op0.getOpcode() == ISD::FNEG) {
2119 SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
2120 DAG.getConstant(SignBit, DL, MVT::i32));
2121 return DCI.CombineTo(N, Lo, NewHi);
2122 }
2123 assert(Op0.getOpcode() == ISD::FABS);
2124 SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
2125 DAG.getConstant(~SignBit, DL, MVT::i32));
2126 return DCI.CombineTo(N, Lo, NewHi);
2127 }
2128 case RISCVISD::SLLW:
2129 case RISCVISD::SRAW:
2130 case RISCVISD::SRLW:
2131 case RISCVISD::ROLW:
2132 case RISCVISD::RORW: {
2133 // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
2134 SDValue LHS = N->getOperand(0);
2135 SDValue RHS = N->getOperand(1);
2136 APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
2137 APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
2138 if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
2139 SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
2140 if (N->getOpcode() != ISD::DELETED_NODE)
2141 DCI.AddToWorklist(N);
2142 return SDValue(N, 0);
2143 }
2144 break;
2145 }
2146 case RISCVISD::FSLW:
2147 case RISCVISD::FSRW: {
2148 // Only the lower 32 bits of Values and lower 6 bits of shift amount are
2149 // read.
2150 SDValue Op0 = N->getOperand(0);
2151 SDValue Op1 = N->getOperand(1);
2152 SDValue ShAmt = N->getOperand(2);
2153 APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
2154 APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
2155 if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
2156 SimplifyDemandedBits(Op1, OpMask, DCI) ||
2157 SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
2158 if (N->getOpcode() != ISD::DELETED_NODE)
2159 DCI.AddToWorklist(N);
2160 return SDValue(N, 0);
2161 }
2162 break;
2163 }
2164 case RISCVISD::GREVIW:
2165 case RISCVISD::GORCIW: {
2166 // Only the lower 32 bits of the first operand are read
2167 SDValue Op0 = N->getOperand(0);
2168 APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
2169 if (SimplifyDemandedBits(Op0, Mask, DCI)) {
2170 if (N->getOpcode() != ISD::DELETED_NODE)
2171 DCI.AddToWorklist(N);
2172 return SDValue(N, 0);
2173 }
2174
2175 return combineGREVI_GORCI(N, DCI.DAG);
2176 }
2177 case RISCVISD::FMV_X_ANYEXTW_RV64: {
2178 SDLoc DL(N);
2179 SDValue Op0 = N->getOperand(0);
2180 // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
2181 // conversion is unnecessary and can be replaced with an ANY_EXTEND
2182 // of the FMV_W_X_RV64 operand.
2183 if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
2184 assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
2185 "Unexpected value type!");
2186 return Op0.getOperand(0);
2187 }
2188
2189 // This is a target-specific version of a DAGCombine performed in
2190 // DAGCombiner::visitBITCAST. It performs the equivalent of:
2191 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
2192 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
2193 if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
2194 !Op0.getNode()->hasOneUse())
2195 break;
2196 SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
2197 Op0.getOperand(0));
2198 APInt SignBit = APInt::getSignMask(32).sext(64);
2199 if (Op0.getOpcode() == ISD::FNEG)
2200 return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
2201 DAG.getConstant(SignBit, DL, MVT::i64));
2202
2203 assert(Op0.getOpcode() == ISD::FABS);
2204 return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
2205 DAG.getConstant(~SignBit, DL, MVT::i64));
2206 }
2207 case RISCVISD::GREVI:
2208 case RISCVISD::GORCI:
2209 return combineGREVI_GORCI(N, DCI.DAG);
2210 case ISD::OR:
2211 if (auto GREV = combineORToGREV(SDValue(N, 0), DCI.DAG, Subtarget))
2212 return GREV;
2213 if (auto GORC = combineORToGORC(SDValue(N, 0), DCI.DAG, Subtarget))
2214 return GORC;
2215 break;
2216 case RISCVISD::SELECT_CC: {
2217 // Transform
2218 // (select_cc (xor X, 1), 0, setne, trueV, falseV) ->
2219 // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
2220 // This can occur when legalizing some floating point comparisons.
2221 SDValue LHS = N->getOperand(0);
2222 SDValue RHS = N->getOperand(1);
2223 auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2));
2224 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
2225 if (ISD::isIntEqualitySetCC(CCVal) && isNullConstant(RHS) &&
2226 LHS.getOpcode() == ISD::XOR && isOneConstant(LHS.getOperand(1)) &&
2227 DAG.MaskedValueIsZero(LHS.getOperand(0), Mask)) {
2228 SDLoc DL(N);
2229 CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
2230 SDValue TargetCC = DAG.getConstant(CCVal, DL, Subtarget.getXLenVT());
2231 return DAG.getNode(RISCVISD::SELECT_CC, DL, N->getValueType(0),
2232 {LHS.getOperand(0), RHS, TargetCC, N->getOperand(3),
2233 N->getOperand(4)});
2234 }
2235 break;
2236 }
2237 case ISD::SETCC: {
2238 // (setcc X, 1, setne) -> (setcc X, 0, seteq) if we can prove X is 0/1.
2239 // Comparing with 0 may allow us to fold into bnez/beqz.
2240 SDValue LHS = N->getOperand(0);
2241 SDValue RHS = N->getOperand(1);
2242 if (LHS.getValueType().isScalableVector())
2243 break;
2244 auto CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
2245 APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
2246 if (isOneConstant(RHS) && ISD::isIntEqualitySetCC(CC) &&
2247 DAG.MaskedValueIsZero(LHS, Mask)) {
2248 SDLoc DL(N);
2249 SDValue Zero = DAG.getConstant(0, DL, LHS.getValueType());
2250 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
2251 return DAG.getSetCC(DL, N->getValueType(0), LHS, Zero, CC);
2252 }
2253 break;
2254 }
2255 }
2256
2257 return SDValue();
2258 }
2259
isDesirableToCommuteWithShift(const SDNode * N,CombineLevel Level) const2260 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
2261 const SDNode *N, CombineLevel Level) const {
2262 // The following folds are only desirable if `(OP _, c1 << c2)` can be
2263 // materialised in fewer instructions than `(OP _, c1)`:
2264 //
2265 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
2266 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
2267 SDValue N0 = N->getOperand(0);
2268 EVT Ty = N0.getValueType();
2269 if (Ty.isScalarInteger() &&
2270 (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
2271 auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
2272 auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
2273 if (C1 && C2) {
2274 const APInt &C1Int = C1->getAPIntValue();
2275 APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
2276
2277 // We can materialise `c1 << c2` into an add immediate, so it's "free",
2278 // and the combine should happen, to potentially allow further combines
2279 // later.
2280 if (ShiftedC1Int.getMinSignedBits() <= 64 &&
2281 isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
2282 return true;
2283
2284 // We can materialise `c1` in an add immediate, so it's "free", and the
2285 // combine should be prevented.
2286 if (C1Int.getMinSignedBits() <= 64 &&
2287 isLegalAddImmediate(C1Int.getSExtValue()))
2288 return false;
2289
2290 // Neither constant will fit into an immediate, so find materialisation
2291 // costs.
2292 int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
2293 Subtarget.is64Bit());
2294 int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
2295 ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
2296
2297 // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
2298 // combine should be prevented.
2299 if (C1Cost < ShiftedC1Cost)
2300 return false;
2301 }
2302 }
2303 return true;
2304 }
2305
targetShrinkDemandedConstant(SDValue Op,const APInt & DemandedBits,const APInt & DemandedElts,TargetLoweringOpt & TLO) const2306 bool RISCVTargetLowering::targetShrinkDemandedConstant(
2307 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
2308 TargetLoweringOpt &TLO) const {
2309 // Delay this optimization as late as possible.
2310 if (!TLO.LegalOps)
2311 return false;
2312
2313 EVT VT = Op.getValueType();
2314 if (VT.isVector())
2315 return false;
2316
2317 // Only handle AND for now.
2318 if (Op.getOpcode() != ISD::AND)
2319 return false;
2320
2321 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
2322 if (!C)
2323 return false;
2324
2325 const APInt &Mask = C->getAPIntValue();
2326
2327 // Clear all non-demanded bits initially.
2328 APInt ShrunkMask = Mask & DemandedBits;
2329
2330 // If the shrunk mask fits in sign extended 12 bits, let the target
2331 // independent code apply it.
2332 if (ShrunkMask.isSignedIntN(12))
2333 return false;
2334
2335 // Try to make a smaller immediate by setting undemanded bits.
2336
2337 // We need to be able to make a negative number through a combination of mask
2338 // and undemanded bits.
2339 APInt ExpandedMask = Mask | ~DemandedBits;
2340 if (!ExpandedMask.isNegative())
2341 return false;
2342
2343 // What is the fewest number of bits we need to represent the negative number.
2344 unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
2345
2346 // Try to make a 12 bit negative immediate. If that fails try to make a 32
2347 // bit negative immediate unless the shrunk immediate already fits in 32 bits.
2348 APInt NewMask = ShrunkMask;
2349 if (MinSignedBits <= 12)
2350 NewMask.setBitsFrom(11);
2351 else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
2352 NewMask.setBitsFrom(31);
2353 else
2354 return false;
2355
2356 // Sanity check that our new mask is a subset of the demanded mask.
2357 assert(NewMask.isSubsetOf(ExpandedMask));
2358
2359 // If we aren't changing the mask, just return true to keep it and prevent
2360 // the caller from optimizing.
2361 if (NewMask == Mask)
2362 return true;
2363
2364 // Replace the constant with the new mask.
2365 SDLoc DL(Op);
2366 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
2367 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
2368 return TLO.CombineTo(Op, NewOp);
2369 }
2370
computeKnownBitsForTargetNode(const SDValue Op,KnownBits & Known,const APInt & DemandedElts,const SelectionDAG & DAG,unsigned Depth) const2371 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
2372 KnownBits &Known,
2373 const APInt &DemandedElts,
2374 const SelectionDAG &DAG,
2375 unsigned Depth) const {
2376 unsigned BitWidth = Known.getBitWidth();
2377 unsigned Opc = Op.getOpcode();
2378 assert((Opc >= ISD::BUILTIN_OP_END ||
2379 Opc == ISD::INTRINSIC_WO_CHAIN ||
2380 Opc == ISD::INTRINSIC_W_CHAIN ||
2381 Opc == ISD::INTRINSIC_VOID) &&
2382 "Should use MaskedValueIsZero if you don't know whether Op"
2383 " is a target node!");
2384
2385 Known.resetAll();
2386 switch (Opc) {
2387 default: break;
2388 case RISCVISD::REMUW: {
2389 KnownBits Known2;
2390 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
2391 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
2392 // We only care about the lower 32 bits.
2393 Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
2394 // Restore the original width by sign extending.
2395 Known = Known.sext(BitWidth);
2396 break;
2397 }
2398 case RISCVISD::DIVUW: {
2399 KnownBits Known2;
2400 Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
2401 Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
2402 // We only care about the lower 32 bits.
2403 Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
2404 // Restore the original width by sign extending.
2405 Known = Known.sext(BitWidth);
2406 break;
2407 }
2408 case RISCVISD::READ_VLENB:
2409 // We assume VLENB is at least 8 bytes.
2410 // FIXME: The 1.0 draft spec defines minimum VLEN as 128 bits.
2411 Known.Zero.setLowBits(3);
2412 break;
2413 }
2414 }
2415
ComputeNumSignBitsForTargetNode(SDValue Op,const APInt & DemandedElts,const SelectionDAG & DAG,unsigned Depth) const2416 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
2417 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
2418 unsigned Depth) const {
2419 switch (Op.getOpcode()) {
2420 default:
2421 break;
2422 case RISCVISD::SLLW:
2423 case RISCVISD::SRAW:
2424 case RISCVISD::SRLW:
2425 case RISCVISD::DIVW:
2426 case RISCVISD::DIVUW:
2427 case RISCVISD::REMUW:
2428 case RISCVISD::ROLW:
2429 case RISCVISD::RORW:
2430 case RISCVISD::GREVIW:
2431 case RISCVISD::GORCIW:
2432 case RISCVISD::FSLW:
2433 case RISCVISD::FSRW:
2434 // TODO: As the result is sign-extended, this is conservatively correct. A
2435 // more precise answer could be calculated for SRAW depending on known
2436 // bits in the shift amount.
2437 return 33;
2438 case RISCVISD::VMV_X_S:
2439 // The number of sign bits of the scalar result is computed by obtaining the
2440 // element type of the input vector operand, subtracting its width from the
2441 // XLEN, and then adding one (sign bit within the element type). If the
2442 // element type is wider than XLen, the least-significant XLEN bits are
2443 // taken.
2444 if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
2445 return 1;
2446 return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
2447 }
2448
2449 return 1;
2450 }
2451
emitReadCycleWidePseudo(MachineInstr & MI,MachineBasicBlock * BB)2452 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
2453 MachineBasicBlock *BB) {
2454 assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
2455
2456 // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
2457 // Should the count have wrapped while it was being read, we need to try
2458 // again.
2459 // ...
2460 // read:
2461 // rdcycleh x3 # load high word of cycle
2462 // rdcycle x2 # load low word of cycle
2463 // rdcycleh x4 # load high word of cycle
2464 // bne x3, x4, read # check if high word reads match, otherwise try again
2465 // ...
2466
2467 MachineFunction &MF = *BB->getParent();
2468 const BasicBlock *LLVM_BB = BB->getBasicBlock();
2469 MachineFunction::iterator It = ++BB->getIterator();
2470
2471 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
2472 MF.insert(It, LoopMBB);
2473
2474 MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
2475 MF.insert(It, DoneMBB);
2476
2477 // Transfer the remainder of BB and its successor edges to DoneMBB.
2478 DoneMBB->splice(DoneMBB->begin(), BB,
2479 std::next(MachineBasicBlock::iterator(MI)), BB->end());
2480 DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
2481
2482 BB->addSuccessor(LoopMBB);
2483
2484 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2485 Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
2486 Register LoReg = MI.getOperand(0).getReg();
2487 Register HiReg = MI.getOperand(1).getReg();
2488 DebugLoc DL = MI.getDebugLoc();
2489
2490 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
2491 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
2492 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
2493 .addReg(RISCV::X0);
2494 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
2495 .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
2496 .addReg(RISCV::X0);
2497 BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
2498 .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
2499 .addReg(RISCV::X0);
2500
2501 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
2502 .addReg(HiReg)
2503 .addReg(ReadAgainReg)
2504 .addMBB(LoopMBB);
2505
2506 LoopMBB->addSuccessor(LoopMBB);
2507 LoopMBB->addSuccessor(DoneMBB);
2508
2509 MI.eraseFromParent();
2510
2511 return DoneMBB;
2512 }
2513
emitSplitF64Pseudo(MachineInstr & MI,MachineBasicBlock * BB)2514 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
2515 MachineBasicBlock *BB) {
2516 assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
2517
2518 MachineFunction &MF = *BB->getParent();
2519 DebugLoc DL = MI.getDebugLoc();
2520 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2521 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
2522 Register LoReg = MI.getOperand(0).getReg();
2523 Register HiReg = MI.getOperand(1).getReg();
2524 Register SrcReg = MI.getOperand(2).getReg();
2525 const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
2526 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
2527
2528 TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
2529 RI);
2530 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
2531 MachineMemOperand *MMOLo =
2532 MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
2533 MachineMemOperand *MMOHi = MF.getMachineMemOperand(
2534 MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
2535 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
2536 .addFrameIndex(FI)
2537 .addImm(0)
2538 .addMemOperand(MMOLo);
2539 BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
2540 .addFrameIndex(FI)
2541 .addImm(4)
2542 .addMemOperand(MMOHi);
2543 MI.eraseFromParent(); // The pseudo instruction is gone now.
2544 return BB;
2545 }
2546
emitBuildPairF64Pseudo(MachineInstr & MI,MachineBasicBlock * BB)2547 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
2548 MachineBasicBlock *BB) {
2549 assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
2550 "Unexpected instruction");
2551
2552 MachineFunction &MF = *BB->getParent();
2553 DebugLoc DL = MI.getDebugLoc();
2554 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2555 const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
2556 Register DstReg = MI.getOperand(0).getReg();
2557 Register LoReg = MI.getOperand(1).getReg();
2558 Register HiReg = MI.getOperand(2).getReg();
2559 const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
2560 int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
2561
2562 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
2563 MachineMemOperand *MMOLo =
2564 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
2565 MachineMemOperand *MMOHi = MF.getMachineMemOperand(
2566 MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
2567 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
2568 .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
2569 .addFrameIndex(FI)
2570 .addImm(0)
2571 .addMemOperand(MMOLo);
2572 BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
2573 .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
2574 .addFrameIndex(FI)
2575 .addImm(4)
2576 .addMemOperand(MMOHi);
2577 TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
2578 MI.eraseFromParent(); // The pseudo instruction is gone now.
2579 return BB;
2580 }
2581
isSelectPseudo(MachineInstr & MI)2582 static bool isSelectPseudo(MachineInstr &MI) {
2583 switch (MI.getOpcode()) {
2584 default:
2585 return false;
2586 case RISCV::Select_GPR_Using_CC_GPR:
2587 case RISCV::Select_FPR16_Using_CC_GPR:
2588 case RISCV::Select_FPR32_Using_CC_GPR:
2589 case RISCV::Select_FPR64_Using_CC_GPR:
2590 return true;
2591 }
2592 }
2593
emitSelectPseudo(MachineInstr & MI,MachineBasicBlock * BB)2594 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
2595 MachineBasicBlock *BB) {
2596 // To "insert" Select_* instructions, we actually have to insert the triangle
2597 // control-flow pattern. The incoming instructions know the destination vreg
2598 // to set, the condition code register to branch on, the true/false values to
2599 // select between, and the condcode to use to select the appropriate branch.
2600 //
2601 // We produce the following control flow:
2602 // HeadMBB
2603 // | \
2604 // | IfFalseMBB
2605 // | /
2606 // TailMBB
2607 //
2608 // When we find a sequence of selects we attempt to optimize their emission
2609 // by sharing the control flow. Currently we only handle cases where we have
2610 // multiple selects with the exact same condition (same LHS, RHS and CC).
2611 // The selects may be interleaved with other instructions if the other
2612 // instructions meet some requirements we deem safe:
2613 // - They are debug instructions. Otherwise,
2614 // - They do not have side-effects, do not access memory and their inputs do
2615 // not depend on the results of the select pseudo-instructions.
2616 // The TrueV/FalseV operands of the selects cannot depend on the result of
2617 // previous selects in the sequence.
2618 // These conditions could be further relaxed. See the X86 target for a
2619 // related approach and more information.
2620 Register LHS = MI.getOperand(1).getReg();
2621 Register RHS = MI.getOperand(2).getReg();
2622 auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
2623
2624 SmallVector<MachineInstr *, 4> SelectDebugValues;
2625 SmallSet<Register, 4> SelectDests;
2626 SelectDests.insert(MI.getOperand(0).getReg());
2627
2628 MachineInstr *LastSelectPseudo = &MI;
2629
2630 for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
2631 SequenceMBBI != E; ++SequenceMBBI) {
2632 if (SequenceMBBI->isDebugInstr())
2633 continue;
2634 else if (isSelectPseudo(*SequenceMBBI)) {
2635 if (SequenceMBBI->getOperand(1).getReg() != LHS ||
2636 SequenceMBBI->getOperand(2).getReg() != RHS ||
2637 SequenceMBBI->getOperand(3).getImm() != CC ||
2638 SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
2639 SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
2640 break;
2641 LastSelectPseudo = &*SequenceMBBI;
2642 SequenceMBBI->collectDebugValues(SelectDebugValues);
2643 SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
2644 } else {
2645 if (SequenceMBBI->hasUnmodeledSideEffects() ||
2646 SequenceMBBI->mayLoadOrStore())
2647 break;
2648 if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
2649 return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
2650 }))
2651 break;
2652 }
2653 }
2654
2655 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
2656 const BasicBlock *LLVM_BB = BB->getBasicBlock();
2657 DebugLoc DL = MI.getDebugLoc();
2658 MachineFunction::iterator I = ++BB->getIterator();
2659
2660 MachineBasicBlock *HeadMBB = BB;
2661 MachineFunction *F = BB->getParent();
2662 MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
2663 MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
2664
2665 F->insert(I, IfFalseMBB);
2666 F->insert(I, TailMBB);
2667
2668 // Transfer debug instructions associated with the selects to TailMBB.
2669 for (MachineInstr *DebugInstr : SelectDebugValues) {
2670 TailMBB->push_back(DebugInstr->removeFromParent());
2671 }
2672
2673 // Move all instructions after the sequence to TailMBB.
2674 TailMBB->splice(TailMBB->end(), HeadMBB,
2675 std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
2676 // Update machine-CFG edges by transferring all successors of the current
2677 // block to the new block which will contain the Phi nodes for the selects.
2678 TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
2679 // Set the successors for HeadMBB.
2680 HeadMBB->addSuccessor(IfFalseMBB);
2681 HeadMBB->addSuccessor(TailMBB);
2682
2683 // Insert appropriate branch.
2684 unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
2685
2686 BuildMI(HeadMBB, DL, TII.get(Opcode))
2687 .addReg(LHS)
2688 .addReg(RHS)
2689 .addMBB(TailMBB);
2690
2691 // IfFalseMBB just falls through to TailMBB.
2692 IfFalseMBB->addSuccessor(TailMBB);
2693
2694 // Create PHIs for all of the select pseudo-instructions.
2695 auto SelectMBBI = MI.getIterator();
2696 auto SelectEnd = std::next(LastSelectPseudo->getIterator());
2697 auto InsertionPoint = TailMBB->begin();
2698 while (SelectMBBI != SelectEnd) {
2699 auto Next = std::next(SelectMBBI);
2700 if (isSelectPseudo(*SelectMBBI)) {
2701 // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
2702 BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
2703 TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
2704 .addReg(SelectMBBI->getOperand(4).getReg())
2705 .addMBB(HeadMBB)
2706 .addReg(SelectMBBI->getOperand(5).getReg())
2707 .addMBB(IfFalseMBB);
2708 SelectMBBI->eraseFromParent();
2709 }
2710 SelectMBBI = Next;
2711 }
2712
2713 F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
2714 return TailMBB;
2715 }
2716
addVSetVL(MachineInstr & MI,MachineBasicBlock * BB,int VLIndex,unsigned SEWIndex,RISCVVLMUL VLMul,bool WritesElement0)2717 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
2718 int VLIndex, unsigned SEWIndex,
2719 RISCVVLMUL VLMul, bool WritesElement0) {
2720 MachineFunction &MF = *BB->getParent();
2721 DebugLoc DL = MI.getDebugLoc();
2722 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2723
2724 unsigned SEW = MI.getOperand(SEWIndex).getImm();
2725 assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
2726 RISCVVSEW ElementWidth = static_cast<RISCVVSEW>(Log2_32(SEW / 8));
2727
2728 MachineRegisterInfo &MRI = MF.getRegInfo();
2729
2730 // VL and VTYPE are alive here.
2731 MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI));
2732
2733 if (VLIndex >= 0) {
2734 // Set VL (rs1 != X0).
2735 Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
2736 MIB.addReg(DestReg, RegState::Define | RegState::Dead)
2737 .addReg(MI.getOperand(VLIndex).getReg());
2738 } else
2739 // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0).
2740 MIB.addReg(RISCV::X0, RegState::Define | RegState::Dead)
2741 .addReg(RISCV::X0, RegState::Kill);
2742
2743 // Default to tail agnostic unless the destination is tied to a source. In
2744 // that case the user would have some control over the tail values. The tail
2745 // policy is also ignored on instructions that only update element 0 like
2746 // vmv.s.x or reductions so use agnostic there to match the common case.
2747 // FIXME: This is conservatively correct, but we might want to detect that
2748 // the input is undefined.
2749 bool TailAgnostic = true;
2750 unsigned UseOpIdx;
2751 if (MI.isRegTiedToUseOperand(0, &UseOpIdx) && !WritesElement0) {
2752 TailAgnostic = false;
2753 // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
2754 const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
2755 MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
2756 if (UseMI && UseMI->isImplicitDef())
2757 TailAgnostic = true;
2758 }
2759
2760 // For simplicity we reuse the vtype representation here.
2761 MIB.addImm(RISCVVType::encodeVTYPE(VLMul, ElementWidth,
2762 /*TailAgnostic*/ TailAgnostic,
2763 /*MaskAgnostic*/ false));
2764
2765 // Remove (now) redundant operands from pseudo
2766 MI.getOperand(SEWIndex).setImm(-1);
2767 if (VLIndex >= 0) {
2768 MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
2769 MI.getOperand(VLIndex).setIsKill(false);
2770 }
2771
2772 return BB;
2773 }
2774
2775 MachineBasicBlock *
EmitInstrWithCustomInserter(MachineInstr & MI,MachineBasicBlock * BB) const2776 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2777 MachineBasicBlock *BB) const {
2778 uint64_t TSFlags = MI.getDesc().TSFlags;
2779
2780 if (TSFlags & RISCVII::HasSEWOpMask) {
2781 unsigned NumOperands = MI.getNumExplicitOperands();
2782 int VLIndex = (TSFlags & RISCVII::HasVLOpMask) ? NumOperands - 2 : -1;
2783 unsigned SEWIndex = NumOperands - 1;
2784 bool WritesElement0 = TSFlags & RISCVII::WritesElement0Mask;
2785
2786 RISCVVLMUL VLMul = static_cast<RISCVVLMUL>((TSFlags & RISCVII::VLMulMask) >>
2787 RISCVII::VLMulShift);
2788 return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, WritesElement0);
2789 }
2790
2791 switch (MI.getOpcode()) {
2792 default:
2793 llvm_unreachable("Unexpected instr type to insert");
2794 case RISCV::ReadCycleWide:
2795 assert(!Subtarget.is64Bit() &&
2796 "ReadCycleWrite is only to be used on riscv32");
2797 return emitReadCycleWidePseudo(MI, BB);
2798 case RISCV::Select_GPR_Using_CC_GPR:
2799 case RISCV::Select_FPR16_Using_CC_GPR:
2800 case RISCV::Select_FPR32_Using_CC_GPR:
2801 case RISCV::Select_FPR64_Using_CC_GPR:
2802 return emitSelectPseudo(MI, BB);
2803 case RISCV::BuildPairF64Pseudo:
2804 return emitBuildPairF64Pseudo(MI, BB);
2805 case RISCV::SplitF64Pseudo:
2806 return emitSplitF64Pseudo(MI, BB);
2807 }
2808 }
2809
2810 // Calling Convention Implementation.
2811 // The expectations for frontend ABI lowering vary from target to target.
2812 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
2813 // details, but this is a longer term goal. For now, we simply try to keep the
2814 // role of the frontend as simple and well-defined as possible. The rules can
2815 // be summarised as:
2816 // * Never split up large scalar arguments. We handle them here.
2817 // * If a hardfloat calling convention is being used, and the struct may be
2818 // passed in a pair of registers (fp+fp, int+fp), and both registers are
2819 // available, then pass as two separate arguments. If either the GPRs or FPRs
2820 // are exhausted, then pass according to the rule below.
2821 // * If a struct could never be passed in registers or directly in a stack
2822 // slot (as it is larger than 2*XLEN and the floating point rules don't
2823 // apply), then pass it using a pointer with the byval attribute.
2824 // * If a struct is less than 2*XLEN, then coerce to either a two-element
2825 // word-sized array or a 2*XLEN scalar (depending on alignment).
2826 // * The frontend can determine whether a struct is returned by reference or
2827 // not based on its size and fields. If it will be returned by reference, the
2828 // frontend must modify the prototype so a pointer with the sret annotation is
2829 // passed as the first argument. This is not necessary for large scalar
2830 // returns.
2831 // * Struct return values and varargs should be coerced to structs containing
2832 // register-size fields in the same situations they would be for fixed
2833 // arguments.
2834
2835 static const MCPhysReg ArgGPRs[] = {
2836 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
2837 RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
2838 };
2839 static const MCPhysReg ArgFPR16s[] = {
2840 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
2841 RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
2842 };
2843 static const MCPhysReg ArgFPR32s[] = {
2844 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
2845 RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
2846 };
2847 static const MCPhysReg ArgFPR64s[] = {
2848 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
2849 RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
2850 };
2851 // This is an interim calling convention and it may be changed in the future.
2852 static const MCPhysReg ArgVRs[] = {
2853 RISCV::V8, RISCV::V9, RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
2854 RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
2855 RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
2856 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2, RISCV::V10M2, RISCV::V12M2,
2857 RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
2858 RISCV::V20M2, RISCV::V22M2};
2859 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
2860 RISCV::V20M4};
2861 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
2862
2863 // Pass a 2*XLEN argument that has been split into two XLEN values through
2864 // registers or the stack as necessary.
CC_RISCVAssign2XLen(unsigned XLen,CCState & State,CCValAssign VA1,ISD::ArgFlagsTy ArgFlags1,unsigned ValNo2,MVT ValVT2,MVT LocVT2,ISD::ArgFlagsTy ArgFlags2)2865 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
2866 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
2867 MVT ValVT2, MVT LocVT2,
2868 ISD::ArgFlagsTy ArgFlags2) {
2869 unsigned XLenInBytes = XLen / 8;
2870 if (Register Reg = State.AllocateReg(ArgGPRs)) {
2871 // At least one half can be passed via register.
2872 State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
2873 VA1.getLocVT(), CCValAssign::Full));
2874 } else {
2875 // Both halves must be passed on the stack, with proper alignment.
2876 Align StackAlign =
2877 std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
2878 State.addLoc(
2879 CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
2880 State.AllocateStack(XLenInBytes, StackAlign),
2881 VA1.getLocVT(), CCValAssign::Full));
2882 State.addLoc(CCValAssign::getMem(
2883 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
2884 LocVT2, CCValAssign::Full));
2885 return false;
2886 }
2887
2888 if (Register Reg = State.AllocateReg(ArgGPRs)) {
2889 // The second half can also be passed via register.
2890 State.addLoc(
2891 CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
2892 } else {
2893 // The second half is passed via the stack, without additional alignment.
2894 State.addLoc(CCValAssign::getMem(
2895 ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
2896 LocVT2, CCValAssign::Full));
2897 }
2898
2899 return false;
2900 }
2901
2902 // Implements the RISC-V calling convention. Returns true upon failure.
CC_RISCV(const DataLayout & DL,RISCVABI::ABI ABI,unsigned ValNo,MVT ValVT,MVT LocVT,CCValAssign::LocInfo LocInfo,ISD::ArgFlagsTy ArgFlags,CCState & State,bool IsFixed,bool IsRet,Type * OrigTy,const RISCVTargetLowering & TLI,Optional<unsigned> FirstMaskArgument)2903 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
2904 MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
2905 ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
2906 bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
2907 Optional<unsigned> FirstMaskArgument) {
2908 unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
2909 assert(XLen == 32 || XLen == 64);
2910 MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
2911
2912 // Any return value split in to more than two values can't be returned
2913 // directly.
2914 if (IsRet && ValNo > 1)
2915 return true;
2916
2917 // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
2918 // variadic argument, or if no F16/F32 argument registers are available.
2919 bool UseGPRForF16_F32 = true;
2920 // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
2921 // variadic argument, or if no F64 argument registers are available.
2922 bool UseGPRForF64 = true;
2923
2924 switch (ABI) {
2925 default:
2926 llvm_unreachable("Unexpected ABI");
2927 case RISCVABI::ABI_ILP32:
2928 case RISCVABI::ABI_LP64:
2929 break;
2930 case RISCVABI::ABI_ILP32F:
2931 case RISCVABI::ABI_LP64F:
2932 UseGPRForF16_F32 = !IsFixed;
2933 break;
2934 case RISCVABI::ABI_ILP32D:
2935 case RISCVABI::ABI_LP64D:
2936 UseGPRForF16_F32 = !IsFixed;
2937 UseGPRForF64 = !IsFixed;
2938 break;
2939 }
2940
2941 // FPR16, FPR32, and FPR64 alias each other.
2942 if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
2943 UseGPRForF16_F32 = true;
2944 UseGPRForF64 = true;
2945 }
2946
2947 // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
2948 // similar local variables rather than directly checking against the target
2949 // ABI.
2950
2951 if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
2952 LocVT = XLenVT;
2953 LocInfo = CCValAssign::BCvt;
2954 } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
2955 LocVT = MVT::i64;
2956 LocInfo = CCValAssign::BCvt;
2957 }
2958
2959 // If this is a variadic argument, the RISC-V calling convention requires
2960 // that it is assigned an 'even' or 'aligned' register if it has 8-byte
2961 // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
2962 // be used regardless of whether the original argument was split during
2963 // legalisation or not. The argument will not be passed by registers if the
2964 // original type is larger than 2*XLEN, so the register alignment rule does
2965 // not apply.
2966 unsigned TwoXLenInBytes = (2 * XLen) / 8;
2967 if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
2968 DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
2969 unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
2970 // Skip 'odd' register if necessary.
2971 if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
2972 State.AllocateReg(ArgGPRs);
2973 }
2974
2975 SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
2976 SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
2977 State.getPendingArgFlags();
2978
2979 assert(PendingLocs.size() == PendingArgFlags.size() &&
2980 "PendingLocs and PendingArgFlags out of sync");
2981
2982 // Handle passing f64 on RV32D with a soft float ABI or when floating point
2983 // registers are exhausted.
2984 if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
2985 assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
2986 "Can't lower f64 if it is split");
2987 // Depending on available argument GPRS, f64 may be passed in a pair of
2988 // GPRs, split between a GPR and the stack, or passed completely on the
2989 // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
2990 // cases.
2991 Register Reg = State.AllocateReg(ArgGPRs);
2992 LocVT = MVT::i32;
2993 if (!Reg) {
2994 unsigned StackOffset = State.AllocateStack(8, Align(8));
2995 State.addLoc(
2996 CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
2997 return false;
2998 }
2999 if (!State.AllocateReg(ArgGPRs))
3000 State.AllocateStack(4, Align(4));
3001 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3002 return false;
3003 }
3004
3005 // Split arguments might be passed indirectly, so keep track of the pending
3006 // values.
3007 if (ArgFlags.isSplit() || !PendingLocs.empty()) {
3008 LocVT = XLenVT;
3009 LocInfo = CCValAssign::Indirect;
3010 PendingLocs.push_back(
3011 CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
3012 PendingArgFlags.push_back(ArgFlags);
3013 if (!ArgFlags.isSplitEnd()) {
3014 return false;
3015 }
3016 }
3017
3018 // If the split argument only had two elements, it should be passed directly
3019 // in registers or on the stack.
3020 if (ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
3021 assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
3022 // Apply the normal calling convention rules to the first half of the
3023 // split argument.
3024 CCValAssign VA = PendingLocs[0];
3025 ISD::ArgFlagsTy AF = PendingArgFlags[0];
3026 PendingLocs.clear();
3027 PendingArgFlags.clear();
3028 return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
3029 ArgFlags);
3030 }
3031
3032 // Allocate to a register if possible, or else a stack slot.
3033 Register Reg;
3034 if (ValVT == MVT::f16 && !UseGPRForF16_F32)
3035 Reg = State.AllocateReg(ArgFPR16s);
3036 else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
3037 Reg = State.AllocateReg(ArgFPR32s);
3038 else if (ValVT == MVT::f64 && !UseGPRForF64)
3039 Reg = State.AllocateReg(ArgFPR64s);
3040 else if (ValVT.isScalableVector()) {
3041 const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
3042 if (RC == &RISCV::VRRegClass) {
3043 // Assign the first mask argument to V0.
3044 // This is an interim calling convention and it may be changed in the
3045 // future.
3046 if (FirstMaskArgument.hasValue() &&
3047 ValNo == FirstMaskArgument.getValue()) {
3048 Reg = State.AllocateReg(RISCV::V0);
3049 } else {
3050 Reg = State.AllocateReg(ArgVRs);
3051 }
3052 } else if (RC == &RISCV::VRM2RegClass) {
3053 Reg = State.AllocateReg(ArgVRM2s);
3054 } else if (RC == &RISCV::VRM4RegClass) {
3055 Reg = State.AllocateReg(ArgVRM4s);
3056 } else if (RC == &RISCV::VRM8RegClass) {
3057 Reg = State.AllocateReg(ArgVRM8s);
3058 } else {
3059 llvm_unreachable("Unhandled class register for ValueType");
3060 }
3061 if (!Reg) {
3062 LocInfo = CCValAssign::Indirect;
3063 // Try using a GPR to pass the address
3064 Reg = State.AllocateReg(ArgGPRs);
3065 LocVT = XLenVT;
3066 }
3067 } else
3068 Reg = State.AllocateReg(ArgGPRs);
3069 unsigned StackOffset =
3070 Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
3071
3072 // If we reach this point and PendingLocs is non-empty, we must be at the
3073 // end of a split argument that must be passed indirectly.
3074 if (!PendingLocs.empty()) {
3075 assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
3076 assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
3077
3078 for (auto &It : PendingLocs) {
3079 if (Reg)
3080 It.convertToReg(Reg);
3081 else
3082 It.convertToMem(StackOffset);
3083 State.addLoc(It);
3084 }
3085 PendingLocs.clear();
3086 PendingArgFlags.clear();
3087 return false;
3088 }
3089
3090 assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
3091 (TLI.getSubtarget().hasStdExtV() && ValVT.isScalableVector())) &&
3092 "Expected an XLenVT or scalable vector types at this stage");
3093
3094 if (Reg) {
3095 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3096 return false;
3097 }
3098
3099 // When a floating-point value is passed on the stack, no bit-conversion is
3100 // needed.
3101 if (ValVT.isFloatingPoint()) {
3102 LocVT = ValVT;
3103 LocInfo = CCValAssign::Full;
3104 }
3105 State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
3106 return false;
3107 }
3108
3109 template <typename ArgTy>
preAssignMask(const ArgTy & Args)3110 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
3111 for (const auto &ArgIdx : enumerate(Args)) {
3112 MVT ArgVT = ArgIdx.value().VT;
3113 if (ArgVT.isScalableVector() &&
3114 ArgVT.getVectorElementType().SimpleTy == MVT::i1)
3115 return ArgIdx.index();
3116 }
3117 return None;
3118 }
3119
analyzeInputArgs(MachineFunction & MF,CCState & CCInfo,const SmallVectorImpl<ISD::InputArg> & Ins,bool IsRet) const3120 void RISCVTargetLowering::analyzeInputArgs(
3121 MachineFunction &MF, CCState &CCInfo,
3122 const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
3123 unsigned NumArgs = Ins.size();
3124 FunctionType *FType = MF.getFunction().getFunctionType();
3125
3126 Optional<unsigned> FirstMaskArgument;
3127 if (Subtarget.hasStdExtV())
3128 FirstMaskArgument = preAssignMask(Ins);
3129
3130 for (unsigned i = 0; i != NumArgs; ++i) {
3131 MVT ArgVT = Ins[i].VT;
3132 ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
3133
3134 Type *ArgTy = nullptr;
3135 if (IsRet)
3136 ArgTy = FType->getReturnType();
3137 else if (Ins[i].isOrigArg())
3138 ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
3139
3140 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
3141 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
3142 ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
3143 FirstMaskArgument)) {
3144 LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
3145 << EVT(ArgVT).getEVTString() << '\n');
3146 llvm_unreachable(nullptr);
3147 }
3148 }
3149 }
3150
analyzeOutputArgs(MachineFunction & MF,CCState & CCInfo,const SmallVectorImpl<ISD::OutputArg> & Outs,bool IsRet,CallLoweringInfo * CLI) const3151 void RISCVTargetLowering::analyzeOutputArgs(
3152 MachineFunction &MF, CCState &CCInfo,
3153 const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
3154 CallLoweringInfo *CLI) const {
3155 unsigned NumArgs = Outs.size();
3156
3157 Optional<unsigned> FirstMaskArgument;
3158 if (Subtarget.hasStdExtV())
3159 FirstMaskArgument = preAssignMask(Outs);
3160
3161 for (unsigned i = 0; i != NumArgs; i++) {
3162 MVT ArgVT = Outs[i].VT;
3163 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3164 Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
3165
3166 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
3167 if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
3168 ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
3169 FirstMaskArgument)) {
3170 LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
3171 << EVT(ArgVT).getEVTString() << "\n");
3172 llvm_unreachable(nullptr);
3173 }
3174 }
3175 }
3176
3177 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
3178 // values.
convertLocVTToValVT(SelectionDAG & DAG,SDValue Val,const CCValAssign & VA,const SDLoc & DL)3179 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
3180 const CCValAssign &VA, const SDLoc &DL) {
3181 switch (VA.getLocInfo()) {
3182 default:
3183 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3184 case CCValAssign::Full:
3185 break;
3186 case CCValAssign::BCvt:
3187 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
3188 Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
3189 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3190 Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
3191 else
3192 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3193 break;
3194 }
3195 return Val;
3196 }
3197
3198 // The caller is responsible for loading the full value if the argument is
3199 // passed with CCValAssign::Indirect.
unpackFromRegLoc(SelectionDAG & DAG,SDValue Chain,const CCValAssign & VA,const SDLoc & DL,const RISCVTargetLowering & TLI)3200 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
3201 const CCValAssign &VA, const SDLoc &DL,
3202 const RISCVTargetLowering &TLI) {
3203 MachineFunction &MF = DAG.getMachineFunction();
3204 MachineRegisterInfo &RegInfo = MF.getRegInfo();
3205 EVT LocVT = VA.getLocVT();
3206 SDValue Val;
3207 const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
3208 Register VReg = RegInfo.createVirtualRegister(RC);
3209 RegInfo.addLiveIn(VA.getLocReg(), VReg);
3210 Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
3211
3212 if (VA.getLocInfo() == CCValAssign::Indirect)
3213 return Val;
3214
3215 return convertLocVTToValVT(DAG, Val, VA, DL);
3216 }
3217
convertValVTToLocVT(SelectionDAG & DAG,SDValue Val,const CCValAssign & VA,const SDLoc & DL)3218 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
3219 const CCValAssign &VA, const SDLoc &DL) {
3220 EVT LocVT = VA.getLocVT();
3221
3222 switch (VA.getLocInfo()) {
3223 default:
3224 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3225 case CCValAssign::Full:
3226 break;
3227 case CCValAssign::BCvt:
3228 if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
3229 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
3230 else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
3231 Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
3232 else
3233 Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
3234 break;
3235 }
3236 return Val;
3237 }
3238
3239 // The caller is responsible for loading the full value if the argument is
3240 // passed with CCValAssign::Indirect.
unpackFromMemLoc(SelectionDAG & DAG,SDValue Chain,const CCValAssign & VA,const SDLoc & DL)3241 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
3242 const CCValAssign &VA, const SDLoc &DL) {
3243 MachineFunction &MF = DAG.getMachineFunction();
3244 MachineFrameInfo &MFI = MF.getFrameInfo();
3245 EVT LocVT = VA.getLocVT();
3246 EVT ValVT = VA.getValVT();
3247 EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
3248 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3249 VA.getLocMemOffset(), /*Immutable=*/true);
3250 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3251 SDValue Val;
3252
3253 ISD::LoadExtType ExtType;
3254 switch (VA.getLocInfo()) {
3255 default:
3256 llvm_unreachable("Unexpected CCValAssign::LocInfo");
3257 case CCValAssign::Full:
3258 case CCValAssign::Indirect:
3259 case CCValAssign::BCvt:
3260 ExtType = ISD::NON_EXTLOAD;
3261 break;
3262 }
3263 Val = DAG.getExtLoad(
3264 ExtType, DL, LocVT, Chain, FIN,
3265 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
3266 return Val;
3267 }
3268
unpackF64OnRV32DSoftABI(SelectionDAG & DAG,SDValue Chain,const CCValAssign & VA,const SDLoc & DL)3269 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
3270 const CCValAssign &VA, const SDLoc &DL) {
3271 assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
3272 "Unexpected VA");
3273 MachineFunction &MF = DAG.getMachineFunction();
3274 MachineFrameInfo &MFI = MF.getFrameInfo();
3275 MachineRegisterInfo &RegInfo = MF.getRegInfo();
3276
3277 if (VA.isMemLoc()) {
3278 // f64 is passed on the stack.
3279 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
3280 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
3281 return DAG.getLoad(MVT::f64, DL, Chain, FIN,
3282 MachinePointerInfo::getFixedStack(MF, FI));
3283 }
3284
3285 assert(VA.isRegLoc() && "Expected register VA assignment");
3286
3287 Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
3288 RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
3289 SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
3290 SDValue Hi;
3291 if (VA.getLocReg() == RISCV::X17) {
3292 // Second half of f64 is passed on the stack.
3293 int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
3294 SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
3295 Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
3296 MachinePointerInfo::getFixedStack(MF, FI));
3297 } else {
3298 // Second half of f64 is passed in another GPR.
3299 Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
3300 RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
3301 Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
3302 }
3303 return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
3304 }
3305
3306 // FastCC has less than 1% performance improvement for some particular
3307 // benchmark. But theoretically, it may has benenfit for some cases.
CC_RISCV_FastCC(unsigned ValNo,MVT ValVT,MVT LocVT,CCValAssign::LocInfo LocInfo,ISD::ArgFlagsTy ArgFlags,CCState & State)3308 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
3309 CCValAssign::LocInfo LocInfo,
3310 ISD::ArgFlagsTy ArgFlags, CCState &State) {
3311
3312 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3313 // X5 and X6 might be used for save-restore libcall.
3314 static const MCPhysReg GPRList[] = {
3315 RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
3316 RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
3317 RISCV::X29, RISCV::X30, RISCV::X31};
3318 if (unsigned Reg = State.AllocateReg(GPRList)) {
3319 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3320 return false;
3321 }
3322 }
3323
3324 if (LocVT == MVT::f16) {
3325 static const MCPhysReg FPR16List[] = {
3326 RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
3327 RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
3328 RISCV::F2_H, RISCV::F3_H, RISCV::F4_H, RISCV::F5_H, RISCV::F6_H,
3329 RISCV::F7_H, RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
3330 if (unsigned Reg = State.AllocateReg(FPR16List)) {
3331 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3332 return false;
3333 }
3334 }
3335
3336 if (LocVT == MVT::f32) {
3337 static const MCPhysReg FPR32List[] = {
3338 RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
3339 RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
3340 RISCV::F2_F, RISCV::F3_F, RISCV::F4_F, RISCV::F5_F, RISCV::F6_F,
3341 RISCV::F7_F, RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
3342 if (unsigned Reg = State.AllocateReg(FPR32List)) {
3343 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3344 return false;
3345 }
3346 }
3347
3348 if (LocVT == MVT::f64) {
3349 static const MCPhysReg FPR64List[] = {
3350 RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
3351 RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
3352 RISCV::F2_D, RISCV::F3_D, RISCV::F4_D, RISCV::F5_D, RISCV::F6_D,
3353 RISCV::F7_D, RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
3354 if (unsigned Reg = State.AllocateReg(FPR64List)) {
3355 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3356 return false;
3357 }
3358 }
3359
3360 if (LocVT == MVT::i32 || LocVT == MVT::f32) {
3361 unsigned Offset4 = State.AllocateStack(4, Align(4));
3362 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
3363 return false;
3364 }
3365
3366 if (LocVT == MVT::i64 || LocVT == MVT::f64) {
3367 unsigned Offset5 = State.AllocateStack(8, Align(8));
3368 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
3369 return false;
3370 }
3371
3372 return true; // CC didn't match.
3373 }
3374
CC_RISCV_GHC(unsigned ValNo,MVT ValVT,MVT LocVT,CCValAssign::LocInfo LocInfo,ISD::ArgFlagsTy ArgFlags,CCState & State)3375 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
3376 CCValAssign::LocInfo LocInfo,
3377 ISD::ArgFlagsTy ArgFlags, CCState &State) {
3378
3379 if (LocVT == MVT::i32 || LocVT == MVT::i64) {
3380 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
3381 // s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
3382 static const MCPhysReg GPRList[] = {
3383 RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
3384 RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
3385 if (unsigned Reg = State.AllocateReg(GPRList)) {
3386 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3387 return false;
3388 }
3389 }
3390
3391 if (LocVT == MVT::f32) {
3392 // Pass in STG registers: F1, ..., F6
3393 // fs0 ... fs5
3394 static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
3395 RISCV::F18_F, RISCV::F19_F,
3396 RISCV::F20_F, RISCV::F21_F};
3397 if (unsigned Reg = State.AllocateReg(FPR32List)) {
3398 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3399 return false;
3400 }
3401 }
3402
3403 if (LocVT == MVT::f64) {
3404 // Pass in STG registers: D1, ..., D6
3405 // fs6 ... fs11
3406 static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
3407 RISCV::F24_D, RISCV::F25_D,
3408 RISCV::F26_D, RISCV::F27_D};
3409 if (unsigned Reg = State.AllocateReg(FPR64List)) {
3410 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
3411 return false;
3412 }
3413 }
3414
3415 report_fatal_error("No registers left in GHC calling convention");
3416 return true;
3417 }
3418
3419 // Transform physical registers into virtual registers.
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & DL,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const3420 SDValue RISCVTargetLowering::LowerFormalArguments(
3421 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3422 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3423 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3424
3425 MachineFunction &MF = DAG.getMachineFunction();
3426
3427 switch (CallConv) {
3428 default:
3429 report_fatal_error("Unsupported calling convention");
3430 case CallingConv::C:
3431 case CallingConv::Fast:
3432 break;
3433 case CallingConv::GHC:
3434 if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
3435 !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
3436 report_fatal_error(
3437 "GHC calling convention requires the F and D instruction set extensions");
3438 }
3439
3440 const Function &Func = MF.getFunction();
3441 if (Func.hasFnAttribute("interrupt")) {
3442 if (!Func.arg_empty())
3443 report_fatal_error(
3444 "Functions with the interrupt attribute cannot have arguments!");
3445
3446 StringRef Kind =
3447 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
3448
3449 if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
3450 report_fatal_error(
3451 "Function interrupt attribute argument not supported!");
3452 }
3453
3454 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3455 MVT XLenVT = Subtarget.getXLenVT();
3456 unsigned XLenInBytes = Subtarget.getXLen() / 8;
3457 // Used with vargs to acumulate store chains.
3458 std::vector<SDValue> OutChains;
3459
3460 // Assign locations to all of the incoming arguments.
3461 SmallVector<CCValAssign, 16> ArgLocs;
3462 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3463
3464 if (CallConv == CallingConv::Fast)
3465 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
3466 else if (CallConv == CallingConv::GHC)
3467 CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
3468 else
3469 analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
3470
3471 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3472 CCValAssign &VA = ArgLocs[i];
3473 SDValue ArgValue;
3474 // Passing f64 on RV32D with a soft float ABI must be handled as a special
3475 // case.
3476 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
3477 ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
3478 else if (VA.isRegLoc())
3479 ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
3480 else
3481 ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
3482
3483 if (VA.getLocInfo() == CCValAssign::Indirect) {
3484 // If the original argument was split and passed by reference (e.g. i128
3485 // on RV32), we need to load all parts of it here (using the same
3486 // address).
3487 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
3488 MachinePointerInfo()));
3489 unsigned ArgIndex = Ins[i].OrigArgIndex;
3490 assert(Ins[i].PartOffset == 0);
3491 while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
3492 CCValAssign &PartVA = ArgLocs[i + 1];
3493 unsigned PartOffset = Ins[i + 1].PartOffset;
3494 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
3495 DAG.getIntPtrConstant(PartOffset, DL));
3496 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
3497 MachinePointerInfo()));
3498 ++i;
3499 }
3500 continue;
3501 }
3502 InVals.push_back(ArgValue);
3503 }
3504
3505 if (IsVarArg) {
3506 ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
3507 unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
3508 const TargetRegisterClass *RC = &RISCV::GPRRegClass;
3509 MachineFrameInfo &MFI = MF.getFrameInfo();
3510 MachineRegisterInfo &RegInfo = MF.getRegInfo();
3511 RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
3512
3513 // Offset of the first variable argument from stack pointer, and size of
3514 // the vararg save area. For now, the varargs save area is either zero or
3515 // large enough to hold a0-a7.
3516 int VaArgOffset, VarArgsSaveSize;
3517
3518 // If all registers are allocated, then all varargs must be passed on the
3519 // stack and we don't need to save any argregs.
3520 if (ArgRegs.size() == Idx) {
3521 VaArgOffset = CCInfo.getNextStackOffset();
3522 VarArgsSaveSize = 0;
3523 } else {
3524 VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
3525 VaArgOffset = -VarArgsSaveSize;
3526 }
3527
3528 // Record the frame index of the first variable argument
3529 // which is a value necessary to VASTART.
3530 int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
3531 RVFI->setVarArgsFrameIndex(FI);
3532
3533 // If saving an odd number of registers then create an extra stack slot to
3534 // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
3535 // offsets to even-numbered registered remain 2*XLEN-aligned.
3536 if (Idx % 2) {
3537 MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
3538 VarArgsSaveSize += XLenInBytes;
3539 }
3540
3541 // Copy the integer registers that may have been used for passing varargs
3542 // to the vararg save area.
3543 for (unsigned I = Idx; I < ArgRegs.size();
3544 ++I, VaArgOffset += XLenInBytes) {
3545 const Register Reg = RegInfo.createVirtualRegister(RC);
3546 RegInfo.addLiveIn(ArgRegs[I], Reg);
3547 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
3548 FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
3549 SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3550 SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
3551 MachinePointerInfo::getFixedStack(MF, FI));
3552 cast<StoreSDNode>(Store.getNode())
3553 ->getMemOperand()
3554 ->setValue((Value *)nullptr);
3555 OutChains.push_back(Store);
3556 }
3557 RVFI->setVarArgsSaveSize(VarArgsSaveSize);
3558 }
3559
3560 // All stores are grouped in one node to allow the matching between
3561 // the size of Ins and InVals. This only happens for vararg functions.
3562 if (!OutChains.empty()) {
3563 OutChains.push_back(Chain);
3564 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
3565 }
3566
3567 return Chain;
3568 }
3569
3570 /// isEligibleForTailCallOptimization - Check whether the call is eligible
3571 /// for tail call optimization.
3572 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
isEligibleForTailCallOptimization(CCState & CCInfo,CallLoweringInfo & CLI,MachineFunction & MF,const SmallVector<CCValAssign,16> & ArgLocs) const3573 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
3574 CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
3575 const SmallVector<CCValAssign, 16> &ArgLocs) const {
3576
3577 auto &Callee = CLI.Callee;
3578 auto CalleeCC = CLI.CallConv;
3579 auto &Outs = CLI.Outs;
3580 auto &Caller = MF.getFunction();
3581 auto CallerCC = Caller.getCallingConv();
3582
3583 // Exception-handling functions need a special set of instructions to
3584 // indicate a return to the hardware. Tail-calling another function would
3585 // probably break this.
3586 // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
3587 // should be expanded as new function attributes are introduced.
3588 if (Caller.hasFnAttribute("interrupt"))
3589 return false;
3590
3591 // Do not tail call opt if the stack is used to pass parameters.
3592 if (CCInfo.getNextStackOffset() != 0)
3593 return false;
3594
3595 // Do not tail call opt if any parameters need to be passed indirectly.
3596 // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
3597 // passed indirectly. So the address of the value will be passed in a
3598 // register, or if not available, then the address is put on the stack. In
3599 // order to pass indirectly, space on the stack often needs to be allocated
3600 // in order to store the value. In this case the CCInfo.getNextStackOffset()
3601 // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
3602 // are passed CCValAssign::Indirect.
3603 for (auto &VA : ArgLocs)
3604 if (VA.getLocInfo() == CCValAssign::Indirect)
3605 return false;
3606
3607 // Do not tail call opt if either caller or callee uses struct return
3608 // semantics.
3609 auto IsCallerStructRet = Caller.hasStructRetAttr();
3610 auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
3611 if (IsCallerStructRet || IsCalleeStructRet)
3612 return false;
3613
3614 // Externally-defined functions with weak linkage should not be
3615 // tail-called. The behaviour of branch instructions in this situation (as
3616 // used for tail calls) is implementation-defined, so we cannot rely on the
3617 // linker replacing the tail call with a return.
3618 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3619 const GlobalValue *GV = G->getGlobal();
3620 if (GV->hasExternalWeakLinkage())
3621 return false;
3622 }
3623
3624 // The callee has to preserve all registers the caller needs to preserve.
3625 const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
3626 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3627 if (CalleeCC != CallerCC) {
3628 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3629 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3630 return false;
3631 }
3632
3633 // Byval parameters hand the function a pointer directly into the stack area
3634 // we want to reuse during a tail call. Working around this *is* possible
3635 // but less efficient and uglier in LowerCall.
3636 for (auto &Arg : Outs)
3637 if (Arg.Flags.isByVal())
3638 return false;
3639
3640 return true;
3641 }
3642
3643 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
3644 // and output parameter nodes.
LowerCall(CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const3645 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
3646 SmallVectorImpl<SDValue> &InVals) const {
3647 SelectionDAG &DAG = CLI.DAG;
3648 SDLoc &DL = CLI.DL;
3649 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3650 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3651 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3652 SDValue Chain = CLI.Chain;
3653 SDValue Callee = CLI.Callee;
3654 bool &IsTailCall = CLI.IsTailCall;
3655 CallingConv::ID CallConv = CLI.CallConv;
3656 bool IsVarArg = CLI.IsVarArg;
3657 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3658 MVT XLenVT = Subtarget.getXLenVT();
3659
3660 MachineFunction &MF = DAG.getMachineFunction();
3661
3662 // Analyze the operands of the call, assigning locations to each operand.
3663 SmallVector<CCValAssign, 16> ArgLocs;
3664 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3665
3666 if (CallConv == CallingConv::Fast)
3667 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
3668 else if (CallConv == CallingConv::GHC)
3669 ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
3670 else
3671 analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
3672
3673 // Check if it's really possible to do a tail call.
3674 if (IsTailCall)
3675 IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
3676
3677 if (IsTailCall)
3678 ++NumTailCalls;
3679 else if (CLI.CB && CLI.CB->isMustTailCall())
3680 report_fatal_error("failed to perform tail call elimination on a call "
3681 "site marked musttail");
3682
3683 // Get a count of how many bytes are to be pushed on the stack.
3684 unsigned NumBytes = ArgCCInfo.getNextStackOffset();
3685
3686 // Create local copies for byval args
3687 SmallVector<SDValue, 8> ByValArgs;
3688 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3689 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3690 if (!Flags.isByVal())
3691 continue;
3692
3693 SDValue Arg = OutVals[i];
3694 unsigned Size = Flags.getByValSize();
3695 Align Alignment = Flags.getNonZeroByValAlign();
3696
3697 int FI =
3698 MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
3699 SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3700 SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
3701
3702 Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
3703 /*IsVolatile=*/false,
3704 /*AlwaysInline=*/false, IsTailCall,
3705 MachinePointerInfo(), MachinePointerInfo());
3706 ByValArgs.push_back(FIPtr);
3707 }
3708
3709 if (!IsTailCall)
3710 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
3711
3712 // Copy argument values to their designated locations.
3713 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
3714 SmallVector<SDValue, 8> MemOpChains;
3715 SDValue StackPtr;
3716 for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
3717 CCValAssign &VA = ArgLocs[i];
3718 SDValue ArgValue = OutVals[i];
3719 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3720
3721 // Handle passing f64 on RV32D with a soft float ABI as a special case.
3722 bool IsF64OnRV32DSoftABI =
3723 VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
3724 if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
3725 SDValue SplitF64 = DAG.getNode(
3726 RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
3727 SDValue Lo = SplitF64.getValue(0);
3728 SDValue Hi = SplitF64.getValue(1);
3729
3730 Register RegLo = VA.getLocReg();
3731 RegsToPass.push_back(std::make_pair(RegLo, Lo));
3732
3733 if (RegLo == RISCV::X17) {
3734 // Second half of f64 is passed on the stack.
3735 // Work out the address of the stack slot.
3736 if (!StackPtr.getNode())
3737 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
3738 // Emit the store.
3739 MemOpChains.push_back(
3740 DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
3741 } else {
3742 // Second half of f64 is passed in another GPR.
3743 assert(RegLo < RISCV::X31 && "Invalid register pair");
3744 Register RegHigh = RegLo + 1;
3745 RegsToPass.push_back(std::make_pair(RegHigh, Hi));
3746 }
3747 continue;
3748 }
3749
3750 // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
3751 // as any other MemLoc.
3752
3753 // Promote the value if needed.
3754 // For now, only handle fully promoted and indirect arguments.
3755 if (VA.getLocInfo() == CCValAssign::Indirect) {
3756 // Store the argument in a stack slot and pass its address.
3757 SDValue SpillSlot = DAG.CreateStackTemporary(Outs[i].ArgVT);
3758 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3759 MemOpChains.push_back(
3760 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
3761 MachinePointerInfo::getFixedStack(MF, FI)));
3762 // If the original argument was split (e.g. i128), we need
3763 // to store all parts of it here (and pass just one address).
3764 unsigned ArgIndex = Outs[i].OrigArgIndex;
3765 assert(Outs[i].PartOffset == 0);
3766 while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
3767 SDValue PartValue = OutVals[i + 1];
3768 unsigned PartOffset = Outs[i + 1].PartOffset;
3769 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
3770 DAG.getIntPtrConstant(PartOffset, DL));
3771 MemOpChains.push_back(
3772 DAG.getStore(Chain, DL, PartValue, Address,
3773 MachinePointerInfo::getFixedStack(MF, FI)));
3774 ++i;
3775 }
3776 ArgValue = SpillSlot;
3777 } else {
3778 ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
3779 }
3780
3781 // Use local copy if it is a byval arg.
3782 if (Flags.isByVal())
3783 ArgValue = ByValArgs[j++];
3784
3785 if (VA.isRegLoc()) {
3786 // Queue up the argument copies and emit them at the end.
3787 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
3788 } else {
3789 assert(VA.isMemLoc() && "Argument not register or memory");
3790 assert(!IsTailCall && "Tail call not allowed if stack is used "
3791 "for passing parameters");
3792
3793 // Work out the address of the stack slot.
3794 if (!StackPtr.getNode())
3795 StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
3796 SDValue Address =
3797 DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
3798 DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
3799
3800 // Emit the store.
3801 MemOpChains.push_back(
3802 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
3803 }
3804 }
3805
3806 // Join the stores, which are independent of one another.
3807 if (!MemOpChains.empty())
3808 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3809
3810 SDValue Glue;
3811
3812 // Build a sequence of copy-to-reg nodes, chained and glued together.
3813 for (auto &Reg : RegsToPass) {
3814 Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
3815 Glue = Chain.getValue(1);
3816 }
3817
3818 // Validate that none of the argument registers have been marked as
3819 // reserved, if so report an error. Do the same for the return address if this
3820 // is not a tailcall.
3821 validateCCReservedRegs(RegsToPass, MF);
3822 if (!IsTailCall &&
3823 MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
3824 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
3825 MF.getFunction(),
3826 "Return address register required, but has been reserved."});
3827
3828 // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
3829 // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
3830 // split it and then direct call can be matched by PseudoCALL.
3831 if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
3832 const GlobalValue *GV = S->getGlobal();
3833
3834 unsigned OpFlags = RISCVII::MO_CALL;
3835 if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
3836 OpFlags = RISCVII::MO_PLT;
3837
3838 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
3839 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3840 unsigned OpFlags = RISCVII::MO_CALL;
3841
3842 if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
3843 nullptr))
3844 OpFlags = RISCVII::MO_PLT;
3845
3846 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
3847 }
3848
3849 // The first call operand is the chain and the second is the target address.
3850 SmallVector<SDValue, 8> Ops;
3851 Ops.push_back(Chain);
3852 Ops.push_back(Callee);
3853
3854 // Add argument registers to the end of the list so that they are
3855 // known live into the call.
3856 for (auto &Reg : RegsToPass)
3857 Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
3858
3859 if (!IsTailCall) {
3860 // Add a register mask operand representing the call-preserved registers.
3861 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3862 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
3863 assert(Mask && "Missing call preserved mask for calling convention");
3864 Ops.push_back(DAG.getRegisterMask(Mask));
3865 }
3866
3867 // Glue the call to the argument copies, if any.
3868 if (Glue.getNode())
3869 Ops.push_back(Glue);
3870
3871 // Emit the call.
3872 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3873
3874 if (IsTailCall) {
3875 MF.getFrameInfo().setHasTailCall();
3876 return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
3877 }
3878
3879 Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
3880 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
3881 Glue = Chain.getValue(1);
3882
3883 // Mark the end of the call, which is glued to the call itself.
3884 Chain = DAG.getCALLSEQ_END(Chain,
3885 DAG.getConstant(NumBytes, DL, PtrVT, true),
3886 DAG.getConstant(0, DL, PtrVT, true),
3887 Glue, DL);
3888 Glue = Chain.getValue(1);
3889
3890 // Assign locations to each value returned by this call.
3891 SmallVector<CCValAssign, 16> RVLocs;
3892 CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
3893 analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
3894
3895 // Copy all of the result registers out of their specified physreg.
3896 for (auto &VA : RVLocs) {
3897 // Copy the value out
3898 SDValue RetValue =
3899 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
3900 // Glue the RetValue to the end of the call sequence
3901 Chain = RetValue.getValue(1);
3902 Glue = RetValue.getValue(2);
3903
3904 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
3905 assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
3906 SDValue RetValue2 =
3907 DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
3908 Chain = RetValue2.getValue(1);
3909 Glue = RetValue2.getValue(2);
3910 RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
3911 RetValue2);
3912 }
3913
3914 RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
3915
3916 InVals.push_back(RetValue);
3917 }
3918
3919 return Chain;
3920 }
3921
CanLowerReturn(CallingConv::ID CallConv,MachineFunction & MF,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,LLVMContext & Context) const3922 bool RISCVTargetLowering::CanLowerReturn(
3923 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
3924 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3925 SmallVector<CCValAssign, 16> RVLocs;
3926 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
3927
3928 Optional<unsigned> FirstMaskArgument;
3929 if (Subtarget.hasStdExtV())
3930 FirstMaskArgument = preAssignMask(Outs);
3931
3932 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
3933 MVT VT = Outs[i].VT;
3934 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3935 RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
3936 if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
3937 ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
3938 *this, FirstMaskArgument))
3939 return false;
3940 }
3941 return true;
3942 }
3943
3944 SDValue
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & DL,SelectionDAG & DAG) const3945 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3946 bool IsVarArg,
3947 const SmallVectorImpl<ISD::OutputArg> &Outs,
3948 const SmallVectorImpl<SDValue> &OutVals,
3949 const SDLoc &DL, SelectionDAG &DAG) const {
3950 const MachineFunction &MF = DAG.getMachineFunction();
3951 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
3952
3953 // Stores the assignment of the return value to a location.
3954 SmallVector<CCValAssign, 16> RVLocs;
3955
3956 // Info about the registers and stack slot.
3957 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
3958 *DAG.getContext());
3959
3960 analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
3961 nullptr);
3962
3963 if (CallConv == CallingConv::GHC && !RVLocs.empty())
3964 report_fatal_error("GHC functions return void only");
3965
3966 SDValue Glue;
3967 SmallVector<SDValue, 4> RetOps(1, Chain);
3968
3969 // Copy the result values into the output registers.
3970 for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
3971 SDValue Val = OutVals[i];
3972 CCValAssign &VA = RVLocs[i];
3973 assert(VA.isRegLoc() && "Can only return in registers!");
3974
3975 if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
3976 // Handle returning f64 on RV32D with a soft float ABI.
3977 assert(VA.isRegLoc() && "Expected return via registers");
3978 SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
3979 DAG.getVTList(MVT::i32, MVT::i32), Val);
3980 SDValue Lo = SplitF64.getValue(0);
3981 SDValue Hi = SplitF64.getValue(1);
3982 Register RegLo = VA.getLocReg();
3983 assert(RegLo < RISCV::X31 && "Invalid register pair");
3984 Register RegHi = RegLo + 1;
3985
3986 if (STI.isRegisterReservedByUser(RegLo) ||
3987 STI.isRegisterReservedByUser(RegHi))
3988 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
3989 MF.getFunction(),
3990 "Return value register required, but has been reserved."});
3991
3992 Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
3993 Glue = Chain.getValue(1);
3994 RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
3995 Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
3996 Glue = Chain.getValue(1);
3997 RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
3998 } else {
3999 // Handle a 'normal' return.
4000 Val = convertValVTToLocVT(DAG, Val, VA, DL);
4001 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
4002
4003 if (STI.isRegisterReservedByUser(VA.getLocReg()))
4004 MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
4005 MF.getFunction(),
4006 "Return value register required, but has been reserved."});
4007
4008 // Guarantee that all emitted copies are stuck together.
4009 Glue = Chain.getValue(1);
4010 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
4011 }
4012 }
4013
4014 RetOps[0] = Chain; // Update chain.
4015
4016 // Add the glue node if we have it.
4017 if (Glue.getNode()) {
4018 RetOps.push_back(Glue);
4019 }
4020
4021 // Interrupt service routines use different return instructions.
4022 const Function &Func = DAG.getMachineFunction().getFunction();
4023 if (Func.hasFnAttribute("interrupt")) {
4024 if (!Func.getReturnType()->isVoidTy())
4025 report_fatal_error(
4026 "Functions with the interrupt attribute must have void return type!");
4027
4028 MachineFunction &MF = DAG.getMachineFunction();
4029 StringRef Kind =
4030 MF.getFunction().getFnAttribute("interrupt").getValueAsString();
4031
4032 unsigned RetOpc;
4033 if (Kind == "user")
4034 RetOpc = RISCVISD::URET_FLAG;
4035 else if (Kind == "supervisor")
4036 RetOpc = RISCVISD::SRET_FLAG;
4037 else
4038 RetOpc = RISCVISD::MRET_FLAG;
4039
4040 return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
4041 }
4042
4043 return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
4044 }
4045
validateCCReservedRegs(const SmallVectorImpl<std::pair<llvm::Register,llvm::SDValue>> & Regs,MachineFunction & MF) const4046 void RISCVTargetLowering::validateCCReservedRegs(
4047 const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
4048 MachineFunction &MF) const {
4049 const Function &F = MF.getFunction();
4050 const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
4051
4052 if (llvm::any_of(Regs, [&STI](auto Reg) {
4053 return STI.isRegisterReservedByUser(Reg.first);
4054 }))
4055 F.getContext().diagnose(DiagnosticInfoUnsupported{
4056 F, "Argument register required, but has been reserved."});
4057 }
4058
mayBeEmittedAsTailCall(const CallInst * CI) const4059 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
4060 return CI->isTailCall();
4061 }
4062
getTargetNodeName(unsigned Opcode) const4063 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
4064 #define NODE_NAME_CASE(NODE) \
4065 case RISCVISD::NODE: \
4066 return "RISCVISD::" #NODE;
4067 // clang-format off
4068 switch ((RISCVISD::NodeType)Opcode) {
4069 case RISCVISD::FIRST_NUMBER:
4070 break;
4071 NODE_NAME_CASE(RET_FLAG)
4072 NODE_NAME_CASE(URET_FLAG)
4073 NODE_NAME_CASE(SRET_FLAG)
4074 NODE_NAME_CASE(MRET_FLAG)
4075 NODE_NAME_CASE(CALL)
4076 NODE_NAME_CASE(SELECT_CC)
4077 NODE_NAME_CASE(BuildPairF64)
4078 NODE_NAME_CASE(SplitF64)
4079 NODE_NAME_CASE(TAIL)
4080 NODE_NAME_CASE(SLLW)
4081 NODE_NAME_CASE(SRAW)
4082 NODE_NAME_CASE(SRLW)
4083 NODE_NAME_CASE(DIVW)
4084 NODE_NAME_CASE(DIVUW)
4085 NODE_NAME_CASE(REMUW)
4086 NODE_NAME_CASE(ROLW)
4087 NODE_NAME_CASE(RORW)
4088 NODE_NAME_CASE(FSLW)
4089 NODE_NAME_CASE(FSRW)
4090 NODE_NAME_CASE(FMV_H_X)
4091 NODE_NAME_CASE(FMV_X_ANYEXTH)
4092 NODE_NAME_CASE(FMV_W_X_RV64)
4093 NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
4094 NODE_NAME_CASE(READ_CYCLE_WIDE)
4095 NODE_NAME_CASE(GREVI)
4096 NODE_NAME_CASE(GREVIW)
4097 NODE_NAME_CASE(GORCI)
4098 NODE_NAME_CASE(GORCIW)
4099 NODE_NAME_CASE(VMV_X_S)
4100 NODE_NAME_CASE(SPLAT_VECTOR_I64)
4101 NODE_NAME_CASE(READ_VLENB)
4102 NODE_NAME_CASE(TRUNCATE_VECTOR)
4103 NODE_NAME_CASE(VLEFF)
4104 NODE_NAME_CASE(VLEFF_MASK)
4105 NODE_NAME_CASE(VLSEGFF)
4106 NODE_NAME_CASE(VLSEGFF_MASK)
4107 NODE_NAME_CASE(READ_VL)
4108 NODE_NAME_CASE(VSLIDEUP)
4109 NODE_NAME_CASE(VSLIDEDOWN)
4110 NODE_NAME_CASE(VID)
4111 }
4112 // clang-format on
4113 return nullptr;
4114 #undef NODE_NAME_CASE
4115 }
4116
4117 /// getConstraintType - Given a constraint letter, return the type of
4118 /// constraint it is for this target.
4119 RISCVTargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const4120 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
4121 if (Constraint.size() == 1) {
4122 switch (Constraint[0]) {
4123 default:
4124 break;
4125 case 'f':
4126 return C_RegisterClass;
4127 case 'I':
4128 case 'J':
4129 case 'K':
4130 return C_Immediate;
4131 case 'A':
4132 return C_Memory;
4133 }
4134 }
4135 return TargetLowering::getConstraintType(Constraint);
4136 }
4137
4138 std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const4139 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
4140 StringRef Constraint,
4141 MVT VT) const {
4142 // First, see if this is a constraint that directly corresponds to a
4143 // RISCV register class.
4144 if (Constraint.size() == 1) {
4145 switch (Constraint[0]) {
4146 case 'r':
4147 return std::make_pair(0U, &RISCV::GPRRegClass);
4148 case 'f':
4149 if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
4150 return std::make_pair(0U, &RISCV::FPR16RegClass);
4151 if (Subtarget.hasStdExtF() && VT == MVT::f32)
4152 return std::make_pair(0U, &RISCV::FPR32RegClass);
4153 if (Subtarget.hasStdExtD() && VT == MVT::f64)
4154 return std::make_pair(0U, &RISCV::FPR64RegClass);
4155 break;
4156 default:
4157 break;
4158 }
4159 }
4160
4161 // Clang will correctly decode the usage of register name aliases into their
4162 // official names. However, other frontends like `rustc` do not. This allows
4163 // users of these frontends to use the ABI names for registers in LLVM-style
4164 // register constraints.
4165 unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
4166 .Case("{zero}", RISCV::X0)
4167 .Case("{ra}", RISCV::X1)
4168 .Case("{sp}", RISCV::X2)
4169 .Case("{gp}", RISCV::X3)
4170 .Case("{tp}", RISCV::X4)
4171 .Case("{t0}", RISCV::X5)
4172 .Case("{t1}", RISCV::X6)
4173 .Case("{t2}", RISCV::X7)
4174 .Cases("{s0}", "{fp}", RISCV::X8)
4175 .Case("{s1}", RISCV::X9)
4176 .Case("{a0}", RISCV::X10)
4177 .Case("{a1}", RISCV::X11)
4178 .Case("{a2}", RISCV::X12)
4179 .Case("{a3}", RISCV::X13)
4180 .Case("{a4}", RISCV::X14)
4181 .Case("{a5}", RISCV::X15)
4182 .Case("{a6}", RISCV::X16)
4183 .Case("{a7}", RISCV::X17)
4184 .Case("{s2}", RISCV::X18)
4185 .Case("{s3}", RISCV::X19)
4186 .Case("{s4}", RISCV::X20)
4187 .Case("{s5}", RISCV::X21)
4188 .Case("{s6}", RISCV::X22)
4189 .Case("{s7}", RISCV::X23)
4190 .Case("{s8}", RISCV::X24)
4191 .Case("{s9}", RISCV::X25)
4192 .Case("{s10}", RISCV::X26)
4193 .Case("{s11}", RISCV::X27)
4194 .Case("{t3}", RISCV::X28)
4195 .Case("{t4}", RISCV::X29)
4196 .Case("{t5}", RISCV::X30)
4197 .Case("{t6}", RISCV::X31)
4198 .Default(RISCV::NoRegister);
4199 if (XRegFromAlias != RISCV::NoRegister)
4200 return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
4201
4202 // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
4203 // TableGen record rather than the AsmName to choose registers for InlineAsm
4204 // constraints, plus we want to match those names to the widest floating point
4205 // register type available, manually select floating point registers here.
4206 //
4207 // The second case is the ABI name of the register, so that frontends can also
4208 // use the ABI names in register constraint lists.
4209 if (Subtarget.hasStdExtF()) {
4210 unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
4211 .Cases("{f0}", "{ft0}", RISCV::F0_F)
4212 .Cases("{f1}", "{ft1}", RISCV::F1_F)
4213 .Cases("{f2}", "{ft2}", RISCV::F2_F)
4214 .Cases("{f3}", "{ft3}", RISCV::F3_F)
4215 .Cases("{f4}", "{ft4}", RISCV::F4_F)
4216 .Cases("{f5}", "{ft5}", RISCV::F5_F)
4217 .Cases("{f6}", "{ft6}", RISCV::F6_F)
4218 .Cases("{f7}", "{ft7}", RISCV::F7_F)
4219 .Cases("{f8}", "{fs0}", RISCV::F8_F)
4220 .Cases("{f9}", "{fs1}", RISCV::F9_F)
4221 .Cases("{f10}", "{fa0}", RISCV::F10_F)
4222 .Cases("{f11}", "{fa1}", RISCV::F11_F)
4223 .Cases("{f12}", "{fa2}", RISCV::F12_F)
4224 .Cases("{f13}", "{fa3}", RISCV::F13_F)
4225 .Cases("{f14}", "{fa4}", RISCV::F14_F)
4226 .Cases("{f15}", "{fa5}", RISCV::F15_F)
4227 .Cases("{f16}", "{fa6}", RISCV::F16_F)
4228 .Cases("{f17}", "{fa7}", RISCV::F17_F)
4229 .Cases("{f18}", "{fs2}", RISCV::F18_F)
4230 .Cases("{f19}", "{fs3}", RISCV::F19_F)
4231 .Cases("{f20}", "{fs4}", RISCV::F20_F)
4232 .Cases("{f21}", "{fs5}", RISCV::F21_F)
4233 .Cases("{f22}", "{fs6}", RISCV::F22_F)
4234 .Cases("{f23}", "{fs7}", RISCV::F23_F)
4235 .Cases("{f24}", "{fs8}", RISCV::F24_F)
4236 .Cases("{f25}", "{fs9}", RISCV::F25_F)
4237 .Cases("{f26}", "{fs10}", RISCV::F26_F)
4238 .Cases("{f27}", "{fs11}", RISCV::F27_F)
4239 .Cases("{f28}", "{ft8}", RISCV::F28_F)
4240 .Cases("{f29}", "{ft9}", RISCV::F29_F)
4241 .Cases("{f30}", "{ft10}", RISCV::F30_F)
4242 .Cases("{f31}", "{ft11}", RISCV::F31_F)
4243 .Default(RISCV::NoRegister);
4244 if (FReg != RISCV::NoRegister) {
4245 assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
4246 if (Subtarget.hasStdExtD()) {
4247 unsigned RegNo = FReg - RISCV::F0_F;
4248 unsigned DReg = RISCV::F0_D + RegNo;
4249 return std::make_pair(DReg, &RISCV::FPR64RegClass);
4250 }
4251 return std::make_pair(FReg, &RISCV::FPR32RegClass);
4252 }
4253 }
4254
4255 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
4256 }
4257
4258 unsigned
getInlineAsmMemConstraint(StringRef ConstraintCode) const4259 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
4260 // Currently only support length 1 constraints.
4261 if (ConstraintCode.size() == 1) {
4262 switch (ConstraintCode[0]) {
4263 case 'A':
4264 return InlineAsm::Constraint_A;
4265 default:
4266 break;
4267 }
4268 }
4269
4270 return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
4271 }
4272
LowerAsmOperandForConstraint(SDValue Op,std::string & Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const4273 void RISCVTargetLowering::LowerAsmOperandForConstraint(
4274 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
4275 SelectionDAG &DAG) const {
4276 // Currently only support length 1 constraints.
4277 if (Constraint.length() == 1) {
4278 switch (Constraint[0]) {
4279 case 'I':
4280 // Validate & create a 12-bit signed immediate operand.
4281 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4282 uint64_t CVal = C->getSExtValue();
4283 if (isInt<12>(CVal))
4284 Ops.push_back(
4285 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
4286 }
4287 return;
4288 case 'J':
4289 // Validate & create an integer zero operand.
4290 if (auto *C = dyn_cast<ConstantSDNode>(Op))
4291 if (C->getZExtValue() == 0)
4292 Ops.push_back(
4293 DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
4294 return;
4295 case 'K':
4296 // Validate & create a 5-bit unsigned immediate operand.
4297 if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4298 uint64_t CVal = C->getZExtValue();
4299 if (isUInt<5>(CVal))
4300 Ops.push_back(
4301 DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
4302 }
4303 return;
4304 default:
4305 break;
4306 }
4307 }
4308 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
4309 }
4310
emitLeadingFence(IRBuilder<> & Builder,Instruction * Inst,AtomicOrdering Ord) const4311 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
4312 Instruction *Inst,
4313 AtomicOrdering Ord) const {
4314 if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
4315 return Builder.CreateFence(Ord);
4316 if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
4317 return Builder.CreateFence(AtomicOrdering::Release);
4318 return nullptr;
4319 }
4320
emitTrailingFence(IRBuilder<> & Builder,Instruction * Inst,AtomicOrdering Ord) const4321 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
4322 Instruction *Inst,
4323 AtomicOrdering Ord) const {
4324 if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
4325 return Builder.CreateFence(AtomicOrdering::Acquire);
4326 return nullptr;
4327 }
4328
4329 TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst * AI) const4330 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
4331 // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
4332 // point operations can't be used in an lr/sc sequence without breaking the
4333 // forward-progress guarantee.
4334 if (AI->isFloatingPointOperation())
4335 return AtomicExpansionKind::CmpXChg;
4336
4337 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
4338 if (Size == 8 || Size == 16)
4339 return AtomicExpansionKind::MaskedIntrinsic;
4340 return AtomicExpansionKind::None;
4341 }
4342
4343 static Intrinsic::ID
getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen,AtomicRMWInst::BinOp BinOp)4344 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
4345 if (XLen == 32) {
4346 switch (BinOp) {
4347 default:
4348 llvm_unreachable("Unexpected AtomicRMW BinOp");
4349 case AtomicRMWInst::Xchg:
4350 return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
4351 case AtomicRMWInst::Add:
4352 return Intrinsic::riscv_masked_atomicrmw_add_i32;
4353 case AtomicRMWInst::Sub:
4354 return Intrinsic::riscv_masked_atomicrmw_sub_i32;
4355 case AtomicRMWInst::Nand:
4356 return Intrinsic::riscv_masked_atomicrmw_nand_i32;
4357 case AtomicRMWInst::Max:
4358 return Intrinsic::riscv_masked_atomicrmw_max_i32;
4359 case AtomicRMWInst::Min:
4360 return Intrinsic::riscv_masked_atomicrmw_min_i32;
4361 case AtomicRMWInst::UMax:
4362 return Intrinsic::riscv_masked_atomicrmw_umax_i32;
4363 case AtomicRMWInst::UMin:
4364 return Intrinsic::riscv_masked_atomicrmw_umin_i32;
4365 }
4366 }
4367
4368 if (XLen == 64) {
4369 switch (BinOp) {
4370 default:
4371 llvm_unreachable("Unexpected AtomicRMW BinOp");
4372 case AtomicRMWInst::Xchg:
4373 return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
4374 case AtomicRMWInst::Add:
4375 return Intrinsic::riscv_masked_atomicrmw_add_i64;
4376 case AtomicRMWInst::Sub:
4377 return Intrinsic::riscv_masked_atomicrmw_sub_i64;
4378 case AtomicRMWInst::Nand:
4379 return Intrinsic::riscv_masked_atomicrmw_nand_i64;
4380 case AtomicRMWInst::Max:
4381 return Intrinsic::riscv_masked_atomicrmw_max_i64;
4382 case AtomicRMWInst::Min:
4383 return Intrinsic::riscv_masked_atomicrmw_min_i64;
4384 case AtomicRMWInst::UMax:
4385 return Intrinsic::riscv_masked_atomicrmw_umax_i64;
4386 case AtomicRMWInst::UMin:
4387 return Intrinsic::riscv_masked_atomicrmw_umin_i64;
4388 }
4389 }
4390
4391 llvm_unreachable("Unexpected XLen\n");
4392 }
4393
emitMaskedAtomicRMWIntrinsic(IRBuilder<> & Builder,AtomicRMWInst * AI,Value * AlignedAddr,Value * Incr,Value * Mask,Value * ShiftAmt,AtomicOrdering Ord) const4394 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
4395 IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
4396 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
4397 unsigned XLen = Subtarget.getXLen();
4398 Value *Ordering =
4399 Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
4400 Type *Tys[] = {AlignedAddr->getType()};
4401 Function *LrwOpScwLoop = Intrinsic::getDeclaration(
4402 AI->getModule(),
4403 getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
4404
4405 if (XLen == 64) {
4406 Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
4407 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4408 ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
4409 }
4410
4411 Value *Result;
4412
4413 // Must pass the shift amount needed to sign extend the loaded value prior
4414 // to performing a signed comparison for min/max. ShiftAmt is the number of
4415 // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
4416 // is the number of bits to left+right shift the value in order to
4417 // sign-extend.
4418 if (AI->getOperation() == AtomicRMWInst::Min ||
4419 AI->getOperation() == AtomicRMWInst::Max) {
4420 const DataLayout &DL = AI->getModule()->getDataLayout();
4421 unsigned ValWidth =
4422 DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
4423 Value *SextShamt =
4424 Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
4425 Result = Builder.CreateCall(LrwOpScwLoop,
4426 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
4427 } else {
4428 Result =
4429 Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
4430 }
4431
4432 if (XLen == 64)
4433 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4434 return Result;
4435 }
4436
4437 TargetLowering::AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst * CI) const4438 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
4439 AtomicCmpXchgInst *CI) const {
4440 unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
4441 if (Size == 8 || Size == 16)
4442 return AtomicExpansionKind::MaskedIntrinsic;
4443 return AtomicExpansionKind::None;
4444 }
4445
emitMaskedAtomicCmpXchgIntrinsic(IRBuilder<> & Builder,AtomicCmpXchgInst * CI,Value * AlignedAddr,Value * CmpVal,Value * NewVal,Value * Mask,AtomicOrdering Ord) const4446 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
4447 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
4448 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
4449 unsigned XLen = Subtarget.getXLen();
4450 Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
4451 Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
4452 if (XLen == 64) {
4453 CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
4454 NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
4455 Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
4456 CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
4457 }
4458 Type *Tys[] = {AlignedAddr->getType()};
4459 Function *MaskedCmpXchg =
4460 Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
4461 Value *Result = Builder.CreateCall(
4462 MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
4463 if (XLen == 64)
4464 Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
4465 return Result;
4466 }
4467
isFMAFasterThanFMulAndFAdd(const MachineFunction & MF,EVT VT) const4468 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
4469 EVT VT) const {
4470 VT = VT.getScalarType();
4471
4472 if (!VT.isSimple())
4473 return false;
4474
4475 switch (VT.getSimpleVT().SimpleTy) {
4476 case MVT::f16:
4477 return Subtarget.hasStdExtZfh();
4478 case MVT::f32:
4479 return Subtarget.hasStdExtF();
4480 case MVT::f64:
4481 return Subtarget.hasStdExtD();
4482 default:
4483 break;
4484 }
4485
4486 return false;
4487 }
4488
getExceptionPointerRegister(const Constant * PersonalityFn) const4489 Register RISCVTargetLowering::getExceptionPointerRegister(
4490 const Constant *PersonalityFn) const {
4491 return RISCV::X10;
4492 }
4493
getExceptionSelectorRegister(const Constant * PersonalityFn) const4494 Register RISCVTargetLowering::getExceptionSelectorRegister(
4495 const Constant *PersonalityFn) const {
4496 return RISCV::X11;
4497 }
4498
shouldExtendTypeInLibCall(EVT Type) const4499 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
4500 // Return false to suppress the unnecessary extensions if the LibCall
4501 // arguments or return value is f32 type for LP64 ABI.
4502 RISCVABI::ABI ABI = Subtarget.getTargetABI();
4503 if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
4504 return false;
4505
4506 return true;
4507 }
4508
shouldSignExtendTypeInLibCall(EVT Type,bool IsSigned) const4509 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
4510 if (Subtarget.is64Bit() && Type == MVT::i32)
4511 return true;
4512
4513 return IsSigned;
4514 }
4515
decomposeMulByConstant(LLVMContext & Context,EVT VT,SDValue C) const4516 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
4517 SDValue C) const {
4518 // Check integral scalar types.
4519 if (VT.isScalarInteger()) {
4520 // Omit the optimization if the sub target has the M extension and the data
4521 // size exceeds XLen.
4522 if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
4523 return false;
4524 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
4525 // Break the MUL to a SLLI and an ADD/SUB.
4526 const APInt &Imm = ConstNode->getAPIntValue();
4527 if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
4528 (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
4529 return true;
4530 // Omit the following optimization if the sub target has the M extension
4531 // and the data size >= XLen.
4532 if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
4533 return false;
4534 // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
4535 // a pair of LUI/ADDI.
4536 if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
4537 APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
4538 if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
4539 (1 - ImmS).isPowerOf2())
4540 return true;
4541 }
4542 }
4543 }
4544
4545 return false;
4546 }
4547
4548 #define GET_REGISTER_MATCHER
4549 #include "RISCVGenAsmMatcher.inc"
4550
4551 Register
getRegisterByName(const char * RegName,LLT VT,const MachineFunction & MF) const4552 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
4553 const MachineFunction &MF) const {
4554 Register Reg = MatchRegisterAltName(RegName);
4555 if (Reg == RISCV::NoRegister)
4556 Reg = MatchRegisterName(RegName);
4557 if (Reg == RISCV::NoRegister)
4558 report_fatal_error(
4559 Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
4560 BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
4561 if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
4562 report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
4563 StringRef(RegName) + "\"."));
4564 return Reg;
4565 }
4566
4567 namespace llvm {
4568 namespace RISCVVIntrinsicsTable {
4569
4570 #define GET_RISCVVIntrinsicsTable_IMPL
4571 #include "RISCVGenSearchableTables.inc"
4572
4573 } // namespace RISCVVIntrinsicsTable
4574
4575 namespace RISCVZvlssegTable {
4576
4577 #define GET_RISCVZvlssegTable_IMPL
4578 #include "RISCVGenSearchableTables.inc"
4579
4580 } // namespace RISCVZvlssegTable
4581 } // namespace llvm
4582