1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SystemZTargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "SystemZISelLowering.h"
14 #include "SystemZCallingConv.h"
15 #include "SystemZConstantPoolValue.h"
16 #include "SystemZMachineFunctionInfo.h"
17 #include "SystemZTargetMachine.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/IntrinsicsS390.h"
25 #include "llvm/Support/CommandLine.h"
26 #include "llvm/Support/KnownBits.h"
27 #include <cctype>
28 #include <optional>
29
30 using namespace llvm;
31
32 #define DEBUG_TYPE "systemz-lower"
33
34 namespace {
35 // Represents information about a comparison.
36 struct Comparison {
Comparison__anon6742e6340111::Comparison37 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
38 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
39 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
40
41 // The operands to the comparison.
42 SDValue Op0, Op1;
43
44 // Chain if this is a strict floating-point comparison.
45 SDValue Chain;
46
47 // The opcode that should be used to compare Op0 and Op1.
48 unsigned Opcode;
49
50 // A SystemZICMP value. Only used for integer comparisons.
51 unsigned ICmpType;
52
53 // The mask of CC values that Opcode can produce.
54 unsigned CCValid;
55
56 // The mask of CC values for which the original condition is true.
57 unsigned CCMask;
58 };
59 } // end anonymous namespace
60
61 // Classify VT as either 32 or 64 bit.
is32Bit(EVT VT)62 static bool is32Bit(EVT VT) {
63 switch (VT.getSimpleVT().SimpleTy) {
64 case MVT::i32:
65 return true;
66 case MVT::i64:
67 return false;
68 default:
69 llvm_unreachable("Unsupported type");
70 }
71 }
72
73 // Return a version of MachineOperand that can be safely used before the
74 // final use.
earlyUseOperand(MachineOperand Op)75 static MachineOperand earlyUseOperand(MachineOperand Op) {
76 if (Op.isReg())
77 Op.setIsKill(false);
78 return Op;
79 }
80
SystemZTargetLowering(const TargetMachine & TM,const SystemZSubtarget & STI)81 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
82 const SystemZSubtarget &STI)
83 : TargetLowering(TM), Subtarget(STI) {
84 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
85
86 auto *Regs = STI.getSpecialRegisters();
87
88 // Set up the register classes.
89 if (Subtarget.hasHighWord())
90 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
91 else
92 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
93 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
94 if (!useSoftFloat()) {
95 if (Subtarget.hasVector()) {
96 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
97 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
98 } else {
99 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
100 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
101 }
102 if (Subtarget.hasVectorEnhancements1())
103 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
104 else
105 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
106
107 if (Subtarget.hasVector()) {
108 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
109 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
110 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
111 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
113 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
114 }
115 }
116
117 // Compute derived properties from the register classes
118 computeRegisterProperties(Subtarget.getRegisterInfo());
119
120 // Set up special registers.
121 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
122
123 // TODO: It may be better to default to latency-oriented scheduling, however
124 // LLVM's current latency-oriented scheduler can't handle physreg definitions
125 // such as SystemZ has with CC, so set this to the register-pressure
126 // scheduler, because it can.
127 setSchedulingPreference(Sched::RegPressure);
128
129 setBooleanContents(ZeroOrOneBooleanContent);
130 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
131
132 // Instructions are strings of 2-byte aligned 2-byte values.
133 setMinFunctionAlignment(Align(2));
134 // For performance reasons we prefer 16-byte alignment.
135 setPrefFunctionAlignment(Align(16));
136
137 // Handle operations that are handled in a similar way for all types.
138 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
139 I <= MVT::LAST_FP_VALUETYPE;
140 ++I) {
141 MVT VT = MVT::SimpleValueType(I);
142 if (isTypeLegal(VT)) {
143 // Lower SET_CC into an IPM-based sequence.
144 setOperationAction(ISD::SETCC, VT, Custom);
145 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
146 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
147
148 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
149 setOperationAction(ISD::SELECT, VT, Expand);
150
151 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
152 setOperationAction(ISD::SELECT_CC, VT, Custom);
153 setOperationAction(ISD::BR_CC, VT, Custom);
154 }
155 }
156
157 // Expand jump table branches as address arithmetic followed by an
158 // indirect jump.
159 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
160
161 // Expand BRCOND into a BR_CC (see above).
162 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
163
164 // Handle integer types.
165 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
166 I <= MVT::LAST_INTEGER_VALUETYPE;
167 ++I) {
168 MVT VT = MVT::SimpleValueType(I);
169 if (isTypeLegal(VT)) {
170 setOperationAction(ISD::ABS, VT, Legal);
171
172 // Expand individual DIV and REMs into DIVREMs.
173 setOperationAction(ISD::SDIV, VT, Expand);
174 setOperationAction(ISD::UDIV, VT, Expand);
175 setOperationAction(ISD::SREM, VT, Expand);
176 setOperationAction(ISD::UREM, VT, Expand);
177 setOperationAction(ISD::SDIVREM, VT, Custom);
178 setOperationAction(ISD::UDIVREM, VT, Custom);
179
180 // Support addition/subtraction with overflow.
181 setOperationAction(ISD::SADDO, VT, Custom);
182 setOperationAction(ISD::SSUBO, VT, Custom);
183
184 // Support addition/subtraction with carry.
185 setOperationAction(ISD::UADDO, VT, Custom);
186 setOperationAction(ISD::USUBO, VT, Custom);
187
188 // Support carry in as value rather than glue.
189 setOperationAction(ISD::ADDCARRY, VT, Custom);
190 setOperationAction(ISD::SUBCARRY, VT, Custom);
191
192 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
193 // stores, putting a serialization instruction after the stores.
194 setOperationAction(ISD::ATOMIC_LOAD, VT, Custom);
195 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
196
197 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
198 // available, or if the operand is constant.
199 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
200
201 // Use POPCNT on z196 and above.
202 if (Subtarget.hasPopulationCount())
203 setOperationAction(ISD::CTPOP, VT, Custom);
204 else
205 setOperationAction(ISD::CTPOP, VT, Expand);
206
207 // No special instructions for these.
208 setOperationAction(ISD::CTTZ, VT, Expand);
209 setOperationAction(ISD::ROTR, VT, Expand);
210
211 // Use *MUL_LOHI where possible instead of MULH*.
212 setOperationAction(ISD::MULHS, VT, Expand);
213 setOperationAction(ISD::MULHU, VT, Expand);
214 setOperationAction(ISD::SMUL_LOHI, VT, Custom);
215 setOperationAction(ISD::UMUL_LOHI, VT, Custom);
216
217 // Only z196 and above have native support for conversions to unsigned.
218 // On z10, promoting to i64 doesn't generate an inexact condition for
219 // values that are outside the i32 range but in the i64 range, so use
220 // the default expansion.
221 if (!Subtarget.hasFPExtension())
222 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
223
224 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
225 // default to Expand, so need to be modified to Legal where appropriate.
226 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal);
227 if (Subtarget.hasFPExtension())
228 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal);
229
230 // And similarly for STRICT_[SU]INT_TO_FP.
231 setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal);
232 if (Subtarget.hasFPExtension())
233 setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal);
234 }
235 }
236
237 // Type legalization will convert 8- and 16-bit atomic operations into
238 // forms that operate on i32s (but still keeping the original memory VT).
239 // Lower them into full i32 operations.
240 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);
241 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);
242 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
243 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
244 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);
245 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);
246 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
247 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);
248 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
249 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
250 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
251
252 // Even though i128 is not a legal type, we still need to custom lower
253 // the atomic operations in order to exploit SystemZ instructions.
254 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
255 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
256
257 // We can use the CC result of compare-and-swap to implement
258 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
259 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
260 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);
261 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
262
263 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
264
265 // Traps are legal, as we will convert them to "j .+2".
266 setOperationAction(ISD::TRAP, MVT::Other, Legal);
267
268 // z10 has instructions for signed but not unsigned FP conversion.
269 // Handle unsigned 32-bit types as signed 64-bit types.
270 if (!Subtarget.hasFPExtension()) {
271 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote);
272 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
273 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote);
274 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand);
275 }
276
277 // We have native support for a 64-bit CTLZ, via FLOGR.
278 setOperationAction(ISD::CTLZ, MVT::i32, Promote);
279 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
280 setOperationAction(ISD::CTLZ, MVT::i64, Legal);
281
282 // On z15 we have native support for a 64-bit CTPOP.
283 if (Subtarget.hasMiscellaneousExtensions3()) {
284 setOperationAction(ISD::CTPOP, MVT::i32, Promote);
285 setOperationAction(ISD::CTPOP, MVT::i64, Legal);
286 }
287
288 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
289 setOperationAction(ISD::OR, MVT::i64, Custom);
290
291 // Expand 128 bit shifts without using a libcall.
292 setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand);
293 setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand);
294 setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand);
295 setLibcallName(RTLIB::SRL_I128, nullptr);
296 setLibcallName(RTLIB::SHL_I128, nullptr);
297 setLibcallName(RTLIB::SRA_I128, nullptr);
298
299 // Handle bitcast from fp128 to i128.
300 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
301
302 // We have native instructions for i8, i16 and i32 extensions, but not i1.
303 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
304 for (MVT VT : MVT::integer_valuetypes()) {
305 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
306 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
307 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
308 }
309
310 // Handle the various types of symbolic address.
311 setOperationAction(ISD::ConstantPool, PtrVT, Custom);
312 setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
313 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
314 setOperationAction(ISD::BlockAddress, PtrVT, Custom);
315 setOperationAction(ISD::JumpTable, PtrVT, Custom);
316
317 // We need to handle dynamic allocations specially because of the
318 // 160-byte area at the bottom of the stack.
319 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
320 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
321
322 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
323 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
324
325 // Handle prefetches with PFD or PFDRL.
326 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
327
328 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
329 // Assume by default that all vector operations need to be expanded.
330 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
331 if (getOperationAction(Opcode, VT) == Legal)
332 setOperationAction(Opcode, VT, Expand);
333
334 // Likewise all truncating stores and extending loads.
335 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
336 setTruncStoreAction(VT, InnerVT, Expand);
337 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
338 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
339 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
340 }
341
342 if (isTypeLegal(VT)) {
343 // These operations are legal for anything that can be stored in a
344 // vector register, even if there is no native support for the format
345 // as such. In particular, we can do these for v4f32 even though there
346 // are no specific instructions for that format.
347 setOperationAction(ISD::LOAD, VT, Legal);
348 setOperationAction(ISD::STORE, VT, Legal);
349 setOperationAction(ISD::VSELECT, VT, Legal);
350 setOperationAction(ISD::BITCAST, VT, Legal);
351 setOperationAction(ISD::UNDEF, VT, Legal);
352
353 // Likewise, except that we need to replace the nodes with something
354 // more specific.
355 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
356 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
357 }
358 }
359
360 // Handle integer vector types.
361 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
362 if (isTypeLegal(VT)) {
363 // These operations have direct equivalents.
364 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
365 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
366 setOperationAction(ISD::ADD, VT, Legal);
367 setOperationAction(ISD::SUB, VT, Legal);
368 if (VT != MVT::v2i64)
369 setOperationAction(ISD::MUL, VT, Legal);
370 setOperationAction(ISD::ABS, VT, Legal);
371 setOperationAction(ISD::AND, VT, Legal);
372 setOperationAction(ISD::OR, VT, Legal);
373 setOperationAction(ISD::XOR, VT, Legal);
374 if (Subtarget.hasVectorEnhancements1())
375 setOperationAction(ISD::CTPOP, VT, Legal);
376 else
377 setOperationAction(ISD::CTPOP, VT, Custom);
378 setOperationAction(ISD::CTTZ, VT, Legal);
379 setOperationAction(ISD::CTLZ, VT, Legal);
380
381 // Convert a GPR scalar to a vector by inserting it into element 0.
382 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
383
384 // Use a series of unpacks for extensions.
385 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
386 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
387
388 // Detect shifts by a scalar amount and convert them into
389 // V*_BY_SCALAR.
390 setOperationAction(ISD::SHL, VT, Custom);
391 setOperationAction(ISD::SRA, VT, Custom);
392 setOperationAction(ISD::SRL, VT, Custom);
393
394 // At present ROTL isn't matched by DAGCombiner. ROTR should be
395 // converted into ROTL.
396 setOperationAction(ISD::ROTL, VT, Expand);
397 setOperationAction(ISD::ROTR, VT, Expand);
398
399 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
400 // and inverting the result as necessary.
401 setOperationAction(ISD::SETCC, VT, Custom);
402 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
403 if (Subtarget.hasVectorEnhancements1())
404 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
405 }
406 }
407
408 if (Subtarget.hasVector()) {
409 // There should be no need to check for float types other than v2f64
410 // since <2 x f32> isn't a legal type.
411 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
412 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal);
413 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
414 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal);
415 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
416 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal);
417 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
418 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
419
420 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal);
421 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal);
422 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal);
423 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal);
424 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal);
425 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal);
426 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal);
427 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal);
428 }
429
430 if (Subtarget.hasVectorEnhancements2()) {
431 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
432 setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal);
433 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
434 setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal);
435 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
436 setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);
437 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
438 setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);
439
440 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
441 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal);
442 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal);
443 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal);
444 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
445 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal);
446 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal);
447 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal);
448 }
449
450 // Handle floating-point types.
451 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
452 I <= MVT::LAST_FP_VALUETYPE;
453 ++I) {
454 MVT VT = MVT::SimpleValueType(I);
455 if (isTypeLegal(VT)) {
456 // We can use FI for FRINT.
457 setOperationAction(ISD::FRINT, VT, Legal);
458
459 // We can use the extended form of FI for other rounding operations.
460 if (Subtarget.hasFPExtension()) {
461 setOperationAction(ISD::FNEARBYINT, VT, Legal);
462 setOperationAction(ISD::FFLOOR, VT, Legal);
463 setOperationAction(ISD::FCEIL, VT, Legal);
464 setOperationAction(ISD::FTRUNC, VT, Legal);
465 setOperationAction(ISD::FROUND, VT, Legal);
466 }
467
468 // No special instructions for these.
469 setOperationAction(ISD::FSIN, VT, Expand);
470 setOperationAction(ISD::FCOS, VT, Expand);
471 setOperationAction(ISD::FSINCOS, VT, Expand);
472 setOperationAction(ISD::FREM, VT, Expand);
473 setOperationAction(ISD::FPOW, VT, Expand);
474
475 // Special treatment.
476 setOperationAction(ISD::IS_FPCLASS, VT, Custom);
477
478 // Handle constrained floating-point operations.
479 setOperationAction(ISD::STRICT_FADD, VT, Legal);
480 setOperationAction(ISD::STRICT_FSUB, VT, Legal);
481 setOperationAction(ISD::STRICT_FMUL, VT, Legal);
482 setOperationAction(ISD::STRICT_FDIV, VT, Legal);
483 setOperationAction(ISD::STRICT_FMA, VT, Legal);
484 setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
485 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
486 setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
487 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
488 if (Subtarget.hasFPExtension()) {
489 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
490 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
491 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
492 setOperationAction(ISD::STRICT_FROUND, VT, Legal);
493 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
494 }
495 }
496 }
497
498 // Handle floating-point vector types.
499 if (Subtarget.hasVector()) {
500 // Scalar-to-vector conversion is just a subreg.
501 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
502 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
503
504 // Some insertions and extractions can be done directly but others
505 // need to go via integers.
506 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
507 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
508 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
509 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
510
511 // These operations have direct equivalents.
512 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
513 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
514 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
515 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
516 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
517 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
518 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
519 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
520 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
521 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
522 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
523 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
524 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
525 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
526
527 // Handle constrained floating-point operations.
528 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
529 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
530 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
531 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
532 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
533 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
534 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
535 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
536 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
537 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
538 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
539 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
540 }
541
542 // The vector enhancements facility 1 has instructions for these.
543 if (Subtarget.hasVectorEnhancements1()) {
544 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
545 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
546 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
547 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
548 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
549 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
550 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
551 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
552 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
553 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
554 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
555 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
556 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
557 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
558
559 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
560 setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
561 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
562 setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
563
564 setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
565 setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
566 setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
567 setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
568
569 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
570 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
571 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
572 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
573
574 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
575 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
576 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
577 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
578
579 setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
580 setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
581 setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
582 setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
583
584 // Handle constrained floating-point operations.
585 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
586 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
587 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
588 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
589 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
590 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
591 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
592 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
593 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
594 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
595 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
596 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
597 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
598 MVT::v4f32, MVT::v2f64 }) {
599 setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
600 setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
601 setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal);
602 setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal);
603 }
604 }
605
606 // We only have fused f128 multiply-addition on vector registers.
607 if (!Subtarget.hasVectorEnhancements1()) {
608 setOperationAction(ISD::FMA, MVT::f128, Expand);
609 setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand);
610 }
611
612 // We don't have a copysign instruction on vector registers.
613 if (Subtarget.hasVectorEnhancements1())
614 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
615
616 // Needed so that we don't try to implement f128 constant loads using
617 // a load-and-extend of a f80 constant (in cases where the constant
618 // would fit in an f80).
619 for (MVT VT : MVT::fp_valuetypes())
620 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
621
622 // We don't have extending load instruction on vector registers.
623 if (Subtarget.hasVectorEnhancements1()) {
624 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
625 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
626 }
627
628 // Floating-point truncation and stores need to be done separately.
629 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
630 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
631 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
632
633 // We have 64-bit FPR<->GPR moves, but need special handling for
634 // 32-bit forms.
635 if (!Subtarget.hasVector()) {
636 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
637 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
638 }
639
640 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
641 // structure, but VAEND is a no-op.
642 setOperationAction(ISD::VASTART, MVT::Other, Custom);
643 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
644 setOperationAction(ISD::VAEND, MVT::Other, Expand);
645
646 setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
647
648 // Codes for which we want to perform some z-specific combinations.
649 setTargetDAGCombine({ISD::ZERO_EXTEND,
650 ISD::SIGN_EXTEND,
651 ISD::SIGN_EXTEND_INREG,
652 ISD::LOAD,
653 ISD::STORE,
654 ISD::VECTOR_SHUFFLE,
655 ISD::EXTRACT_VECTOR_ELT,
656 ISD::FP_ROUND,
657 ISD::STRICT_FP_ROUND,
658 ISD::FP_EXTEND,
659 ISD::SINT_TO_FP,
660 ISD::UINT_TO_FP,
661 ISD::STRICT_FP_EXTEND,
662 ISD::BSWAP,
663 ISD::SDIV,
664 ISD::UDIV,
665 ISD::SREM,
666 ISD::UREM,
667 ISD::INTRINSIC_VOID,
668 ISD::INTRINSIC_W_CHAIN});
669
670 // Handle intrinsics.
671 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
672 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
673
674 // We want to use MVC in preference to even a single load/store pair.
675 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
676 MaxStoresPerMemcpyOptSize = 0;
677
678 // The main memset sequence is a byte store followed by an MVC.
679 // Two STC or MV..I stores win over that, but the kind of fused stores
680 // generated by target-independent code don't when the byte value is
681 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
682 // than "STC;MVC". Handle the choice in target-specific code instead.
683 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
684 MaxStoresPerMemsetOptSize = 0;
685
686 // Default to having -disable-strictnode-mutation on
687 IsStrictFPEnabled = true;
688 }
689
useSoftFloat() const690 bool SystemZTargetLowering::useSoftFloat() const {
691 return Subtarget.hasSoftFloat();
692 }
693
getSetCCResultType(const DataLayout & DL,LLVMContext &,EVT VT) const694 EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL,
695 LLVMContext &, EVT VT) const {
696 if (!VT.isVector())
697 return MVT::i32;
698 return VT.changeVectorElementTypeToInteger();
699 }
700
isFMAFasterThanFMulAndFAdd(const MachineFunction & MF,EVT VT) const701 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(
702 const MachineFunction &MF, EVT VT) const {
703 VT = VT.getScalarType();
704
705 if (!VT.isSimple())
706 return false;
707
708 switch (VT.getSimpleVT().SimpleTy) {
709 case MVT::f32:
710 case MVT::f64:
711 return true;
712 case MVT::f128:
713 return Subtarget.hasVectorEnhancements1();
714 default:
715 break;
716 }
717
718 return false;
719 }
720
721 // Return true if the constant can be generated with a vector instruction,
722 // such as VGM, VGMB or VREPI.
isVectorConstantLegal(const SystemZSubtarget & Subtarget)723 bool SystemZVectorConstantInfo::isVectorConstantLegal(
724 const SystemZSubtarget &Subtarget) {
725 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
726 if (!Subtarget.hasVector() ||
727 (isFP128 && !Subtarget.hasVectorEnhancements1()))
728 return false;
729
730 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
731 // preferred way of creating all-zero and all-one vectors so give it
732 // priority over other methods below.
733 unsigned Mask = 0;
734 unsigned I = 0;
735 for (; I < SystemZ::VectorBytes; ++I) {
736 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
737 if (Byte == 0xff)
738 Mask |= 1ULL << I;
739 else if (Byte != 0)
740 break;
741 }
742 if (I == SystemZ::VectorBytes) {
743 Opcode = SystemZISD::BYTE_MASK;
744 OpVals.push_back(Mask);
745 VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16);
746 return true;
747 }
748
749 if (SplatBitSize > 64)
750 return false;
751
752 auto tryValue = [&](uint64_t Value) -> bool {
753 // Try VECTOR REPLICATE IMMEDIATE
754 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
755 if (isInt<16>(SignedValue)) {
756 OpVals.push_back(((unsigned) SignedValue));
757 Opcode = SystemZISD::REPLICATE;
758 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
759 SystemZ::VectorBits / SplatBitSize);
760 return true;
761 }
762 // Try VECTOR GENERATE MASK
763 unsigned Start, End;
764 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
765 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
766 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
767 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
768 OpVals.push_back(Start - (64 - SplatBitSize));
769 OpVals.push_back(End - (64 - SplatBitSize));
770 Opcode = SystemZISD::ROTATE_MASK;
771 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
772 SystemZ::VectorBits / SplatBitSize);
773 return true;
774 }
775 return false;
776 };
777
778 // First try assuming that any undefined bits above the highest set bit
779 // and below the lowest set bit are 1s. This increases the likelihood of
780 // being able to use a sign-extended element value in VECTOR REPLICATE
781 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
782 uint64_t SplatBitsZ = SplatBits.getZExtValue();
783 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
784 uint64_t Lower =
785 (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
786 uint64_t Upper =
787 (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
788 if (tryValue(SplatBitsZ | Upper | Lower))
789 return true;
790
791 // Now try assuming that any undefined bits between the first and
792 // last defined set bits are set. This increases the chances of
793 // using a non-wraparound mask.
794 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
795 return tryValue(SplatBitsZ | Middle);
796 }
797
SystemZVectorConstantInfo(APInt IntImm)798 SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) {
799 if (IntImm.isSingleWord()) {
800 IntBits = APInt(128, IntImm.getZExtValue());
801 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
802 } else
803 IntBits = IntImm;
804 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
805
806 // Find the smallest splat.
807 SplatBits = IntImm;
808 unsigned Width = SplatBits.getBitWidth();
809 while (Width > 8) {
810 unsigned HalfSize = Width / 2;
811 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
812 APInt LowValue = SplatBits.trunc(HalfSize);
813
814 // If the two halves do not match, stop here.
815 if (HighValue != LowValue || 8 > HalfSize)
816 break;
817
818 SplatBits = HighValue;
819 Width = HalfSize;
820 }
821 SplatUndef = 0;
822 SplatBitSize = Width;
823 }
824
SystemZVectorConstantInfo(BuildVectorSDNode * BVN)825 SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) {
826 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
827 bool HasAnyUndefs;
828
829 // Get IntBits by finding the 128 bit splat.
830 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
831 true);
832
833 // Get SplatBits by finding the 8 bit or greater splat.
834 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
835 true);
836 }
837
isFPImmLegal(const APFloat & Imm,EVT VT,bool ForCodeSize) const838 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
839 bool ForCodeSize) const {
840 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
841 if (Imm.isZero() || Imm.isNegZero())
842 return true;
843
844 return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
845 }
846
847 /// Returns true if stack probing through inline assembly is requested.
hasInlineStackProbe(const MachineFunction & MF) const848 bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const {
849 // If the function specifically requests inline stack probes, emit them.
850 if (MF.getFunction().hasFnAttribute("probe-stack"))
851 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
852 "inline-asm";
853 return false;
854 }
855
isLegalICmpImmediate(int64_t Imm) const856 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
857 // We can use CGFI or CLGFI.
858 return isInt<32>(Imm) || isUInt<32>(Imm);
859 }
860
isLegalAddImmediate(int64_t Imm) const861 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
862 // We can use ALGFI or SLGFI.
863 return isUInt<32>(Imm) || isUInt<32>(-Imm);
864 }
865
allowsMisalignedMemoryAccesses(EVT VT,unsigned,Align,MachineMemOperand::Flags,unsigned * Fast) const866 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(
867 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
868 // Unaligned accesses should never be slower than the expanded version.
869 // We check specifically for aligned accesses in the few cases where
870 // they are required.
871 if (Fast)
872 *Fast = 1;
873 return true;
874 }
875
876 // Information about the addressing mode for a memory access.
877 struct AddressingMode {
878 // True if a long displacement is supported.
879 bool LongDisplacement;
880
881 // True if use of index register is supported.
882 bool IndexReg;
883
AddressingModeAddressingMode884 AddressingMode(bool LongDispl, bool IdxReg) :
885 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
886 };
887
888 // Return the desired addressing mode for a Load which has only one use (in
889 // the same block) which is a Store.
getLoadStoreAddrMode(bool HasVector,Type * Ty)890 static AddressingMode getLoadStoreAddrMode(bool HasVector,
891 Type *Ty) {
892 // With vector support a Load->Store combination may be combined to either
893 // an MVC or vector operations and it seems to work best to allow the
894 // vector addressing mode.
895 if (HasVector)
896 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
897
898 // Otherwise only the MVC case is special.
899 bool MVC = Ty->isIntegerTy(8);
900 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
901 }
902
903 // Return the addressing mode which seems most desirable given an LLVM
904 // Instruction pointer.
905 static AddressingMode
supportedAddressingMode(Instruction * I,bool HasVector)906 supportedAddressingMode(Instruction *I, bool HasVector) {
907 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
908 switch (II->getIntrinsicID()) {
909 default: break;
910 case Intrinsic::memset:
911 case Intrinsic::memmove:
912 case Intrinsic::memcpy:
913 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
914 }
915 }
916
917 if (isa<LoadInst>(I) && I->hasOneUse()) {
918 auto *SingleUser = cast<Instruction>(*I->user_begin());
919 if (SingleUser->getParent() == I->getParent()) {
920 if (isa<ICmpInst>(SingleUser)) {
921 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
922 if (C->getBitWidth() <= 64 &&
923 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
924 // Comparison of memory with 16 bit signed / unsigned immediate
925 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
926 } else if (isa<StoreInst>(SingleUser))
927 // Load->Store
928 return getLoadStoreAddrMode(HasVector, I->getType());
929 }
930 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
931 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
932 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
933 // Load->Store
934 return getLoadStoreAddrMode(HasVector, LoadI->getType());
935 }
936
937 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
938
939 // * Use LDE instead of LE/LEY for z13 to avoid partial register
940 // dependencies (LDE only supports small offsets).
941 // * Utilize the vector registers to hold floating point
942 // values (vector load / store instructions only support small
943 // offsets).
944
945 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
946 I->getOperand(0)->getType());
947 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
948 bool IsVectorAccess = MemAccessTy->isVectorTy();
949
950 // A store of an extracted vector element will be combined into a VSTE type
951 // instruction.
952 if (!IsVectorAccess && isa<StoreInst>(I)) {
953 Value *DataOp = I->getOperand(0);
954 if (isa<ExtractElementInst>(DataOp))
955 IsVectorAccess = true;
956 }
957
958 // A load which gets inserted into a vector element will be combined into a
959 // VLE type instruction.
960 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
961 User *LoadUser = *I->user_begin();
962 if (isa<InsertElementInst>(LoadUser))
963 IsVectorAccess = true;
964 }
965
966 if (IsFPAccess || IsVectorAccess)
967 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
968 }
969
970 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
971 }
972
isLegalAddressingMode(const DataLayout & DL,const AddrMode & AM,Type * Ty,unsigned AS,Instruction * I) const973 bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
974 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
975 // Punt on globals for now, although they can be used in limited
976 // RELATIVE LONG cases.
977 if (AM.BaseGV)
978 return false;
979
980 // Require a 20-bit signed offset.
981 if (!isInt<20>(AM.BaseOffs))
982 return false;
983
984 bool RequireD12 = Subtarget.hasVector() && Ty->isVectorTy();
985 AddressingMode SupportedAM(!RequireD12, true);
986 if (I != nullptr)
987 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
988
989 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
990 return false;
991
992 if (!SupportedAM.IndexReg)
993 // No indexing allowed.
994 return AM.Scale == 0;
995 else
996 // Indexing is OK but no scale factor can be applied.
997 return AM.Scale == 0 || AM.Scale == 1;
998 }
999
findOptimalMemOpLowering(std::vector<EVT> & MemOps,unsigned Limit,const MemOp & Op,unsigned DstAS,unsigned SrcAS,const AttributeList & FuncAttributes) const1000 bool SystemZTargetLowering::findOptimalMemOpLowering(
1001 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1002 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1003 const int MVCFastLen = 16;
1004
1005 if (Limit != ~unsigned(0)) {
1006 // Don't expand Op into scalar loads/stores in these cases:
1007 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1008 return false; // Small memcpy: Use MVC
1009 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1010 return false; // Small memset (first byte with STC/MVI): Use MVC
1011 if (Op.isZeroMemset())
1012 return false; // Memset zero: Use XC
1013 }
1014
1015 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1016 SrcAS, FuncAttributes);
1017 }
1018
getOptimalMemOpType(const MemOp & Op,const AttributeList & FuncAttributes) const1019 EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op,
1020 const AttributeList &FuncAttributes) const {
1021 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1022 }
1023
isTruncateFree(Type * FromType,Type * ToType) const1024 bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1025 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1026 return false;
1027 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1028 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1029 return FromBits > ToBits;
1030 }
1031
isTruncateFree(EVT FromVT,EVT ToVT) const1032 bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const {
1033 if (!FromVT.isInteger() || !ToVT.isInteger())
1034 return false;
1035 unsigned FromBits = FromVT.getFixedSizeInBits();
1036 unsigned ToBits = ToVT.getFixedSizeInBits();
1037 return FromBits > ToBits;
1038 }
1039
1040 //===----------------------------------------------------------------------===//
1041 // Inline asm support
1042 //===----------------------------------------------------------------------===//
1043
1044 TargetLowering::ConstraintType
getConstraintType(StringRef Constraint) const1045 SystemZTargetLowering::getConstraintType(StringRef Constraint) const {
1046 if (Constraint.size() == 1) {
1047 switch (Constraint[0]) {
1048 case 'a': // Address register
1049 case 'd': // Data register (equivalent to 'r')
1050 case 'f': // Floating-point register
1051 case 'h': // High-part register
1052 case 'r': // General-purpose register
1053 case 'v': // Vector register
1054 return C_RegisterClass;
1055
1056 case 'Q': // Memory with base and unsigned 12-bit displacement
1057 case 'R': // Likewise, plus an index
1058 case 'S': // Memory with base and signed 20-bit displacement
1059 case 'T': // Likewise, plus an index
1060 case 'm': // Equivalent to 'T'.
1061 return C_Memory;
1062
1063 case 'I': // Unsigned 8-bit constant
1064 case 'J': // Unsigned 12-bit constant
1065 case 'K': // Signed 16-bit constant
1066 case 'L': // Signed 20-bit displacement (on all targets we support)
1067 case 'M': // 0x7fffffff
1068 return C_Immediate;
1069
1070 default:
1071 break;
1072 }
1073 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1074 switch (Constraint[1]) {
1075 case 'Q': // Address with base and unsigned 12-bit displacement
1076 case 'R': // Likewise, plus an index
1077 case 'S': // Address with base and signed 20-bit displacement
1078 case 'T': // Likewise, plus an index
1079 return C_Address;
1080
1081 default:
1082 break;
1083 }
1084 }
1085 return TargetLowering::getConstraintType(Constraint);
1086 }
1087
1088 TargetLowering::ConstraintWeight SystemZTargetLowering::
getSingleConstraintMatchWeight(AsmOperandInfo & info,const char * constraint) const1089 getSingleConstraintMatchWeight(AsmOperandInfo &info,
1090 const char *constraint) const {
1091 ConstraintWeight weight = CW_Invalid;
1092 Value *CallOperandVal = info.CallOperandVal;
1093 // If we don't have a value, we can't do a match,
1094 // but allow it at the lowest weight.
1095 if (!CallOperandVal)
1096 return CW_Default;
1097 Type *type = CallOperandVal->getType();
1098 // Look at the constraint type.
1099 switch (*constraint) {
1100 default:
1101 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
1102 break;
1103
1104 case 'a': // Address register
1105 case 'd': // Data register (equivalent to 'r')
1106 case 'h': // High-part register
1107 case 'r': // General-purpose register
1108 if (CallOperandVal->getType()->isIntegerTy())
1109 weight = CW_Register;
1110 break;
1111
1112 case 'f': // Floating-point register
1113 if (type->isFloatingPointTy())
1114 weight = CW_Register;
1115 break;
1116
1117 case 'v': // Vector register
1118 if ((type->isVectorTy() || type->isFloatingPointTy()) &&
1119 Subtarget.hasVector())
1120 weight = CW_Register;
1121 break;
1122
1123 case 'I': // Unsigned 8-bit constant
1124 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1125 if (isUInt<8>(C->getZExtValue()))
1126 weight = CW_Constant;
1127 break;
1128
1129 case 'J': // Unsigned 12-bit constant
1130 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1131 if (isUInt<12>(C->getZExtValue()))
1132 weight = CW_Constant;
1133 break;
1134
1135 case 'K': // Signed 16-bit constant
1136 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1137 if (isInt<16>(C->getSExtValue()))
1138 weight = CW_Constant;
1139 break;
1140
1141 case 'L': // Signed 20-bit displacement (on all targets we support)
1142 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1143 if (isInt<20>(C->getSExtValue()))
1144 weight = CW_Constant;
1145 break;
1146
1147 case 'M': // 0x7fffffff
1148 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1149 if (C->getZExtValue() == 0x7fffffff)
1150 weight = CW_Constant;
1151 break;
1152 }
1153 return weight;
1154 }
1155
1156 // Parse a "{tNNN}" register constraint for which the register type "t"
1157 // has already been verified. MC is the class associated with "t" and
1158 // Map maps 0-based register numbers to LLVM register numbers.
1159 static std::pair<unsigned, const TargetRegisterClass *>
parseRegisterNumber(StringRef Constraint,const TargetRegisterClass * RC,const unsigned * Map,unsigned Size)1160 parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC,
1161 const unsigned *Map, unsigned Size) {
1162 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1163 if (isdigit(Constraint[2])) {
1164 unsigned Index;
1165 bool Failed =
1166 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1167 if (!Failed && Index < Size && Map[Index])
1168 return std::make_pair(Map[Index], RC);
1169 }
1170 return std::make_pair(0U, nullptr);
1171 }
1172
1173 std::pair<unsigned, const TargetRegisterClass *>
getRegForInlineAsmConstraint(const TargetRegisterInfo * TRI,StringRef Constraint,MVT VT) const1174 SystemZTargetLowering::getRegForInlineAsmConstraint(
1175 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1176 if (Constraint.size() == 1) {
1177 // GCC Constraint Letters
1178 switch (Constraint[0]) {
1179 default: break;
1180 case 'd': // Data register (equivalent to 'r')
1181 case 'r': // General-purpose register
1182 if (VT == MVT::i64)
1183 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1184 else if (VT == MVT::i128)
1185 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1186 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1187
1188 case 'a': // Address register
1189 if (VT == MVT::i64)
1190 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1191 else if (VT == MVT::i128)
1192 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1193 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1194
1195 case 'h': // High-part register (an LLVM extension)
1196 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1197
1198 case 'f': // Floating-point register
1199 if (!useSoftFloat()) {
1200 if (VT == MVT::f64)
1201 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1202 else if (VT == MVT::f128)
1203 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1204 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1205 }
1206 break;
1207 case 'v': // Vector register
1208 if (Subtarget.hasVector()) {
1209 if (VT == MVT::f32)
1210 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1211 if (VT == MVT::f64)
1212 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1213 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1214 }
1215 break;
1216 }
1217 }
1218 if (Constraint.size() > 0 && Constraint[0] == '{') {
1219 // We need to override the default register parsing for GPRs and FPRs
1220 // because the interpretation depends on VT. The internal names of
1221 // the registers are also different from the external names
1222 // (F0D and F0S instead of F0, etc.).
1223 if (Constraint[1] == 'r') {
1224 if (VT == MVT::i32)
1225 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1226 SystemZMC::GR32Regs, 16);
1227 if (VT == MVT::i128)
1228 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1229 SystemZMC::GR128Regs, 16);
1230 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1231 SystemZMC::GR64Regs, 16);
1232 }
1233 if (Constraint[1] == 'f') {
1234 if (useSoftFloat())
1235 return std::make_pair(
1236 0u, static_cast<const TargetRegisterClass *>(nullptr));
1237 if (VT == MVT::f32)
1238 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1239 SystemZMC::FP32Regs, 16);
1240 if (VT == MVT::f128)
1241 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1242 SystemZMC::FP128Regs, 16);
1243 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1244 SystemZMC::FP64Regs, 16);
1245 }
1246 if (Constraint[1] == 'v') {
1247 if (!Subtarget.hasVector())
1248 return std::make_pair(
1249 0u, static_cast<const TargetRegisterClass *>(nullptr));
1250 if (VT == MVT::f32)
1251 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1252 SystemZMC::VR32Regs, 32);
1253 if (VT == MVT::f64)
1254 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1255 SystemZMC::VR64Regs, 32);
1256 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1257 SystemZMC::VR128Regs, 32);
1258 }
1259 }
1260 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1261 }
1262
1263 // FIXME? Maybe this could be a TableGen attribute on some registers and
1264 // this table could be generated automatically from RegInfo.
1265 Register
getRegisterByName(const char * RegName,LLT VT,const MachineFunction & MF) const1266 SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
1267 const MachineFunction &MF) const {
1268 const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
1269
1270 Register Reg =
1271 StringSwitch<Register>(RegName)
1272 .Case("r4", Subtarget->isTargetXPLINK64() ? SystemZ::R4D : 0)
1273 .Case("r15", Subtarget->isTargetELF() ? SystemZ::R15D : 0)
1274 .Default(0);
1275
1276 if (Reg)
1277 return Reg;
1278 report_fatal_error("Invalid register name global variable");
1279 }
1280
1281 void SystemZTargetLowering::
LowerAsmOperandForConstraint(SDValue Op,std::string & Constraint,std::vector<SDValue> & Ops,SelectionDAG & DAG) const1282 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1283 std::vector<SDValue> &Ops,
1284 SelectionDAG &DAG) const {
1285 // Only support length 1 constraints for now.
1286 if (Constraint.length() == 1) {
1287 switch (Constraint[0]) {
1288 case 'I': // Unsigned 8-bit constant
1289 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1290 if (isUInt<8>(C->getZExtValue()))
1291 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1292 Op.getValueType()));
1293 return;
1294
1295 case 'J': // Unsigned 12-bit constant
1296 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1297 if (isUInt<12>(C->getZExtValue()))
1298 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1299 Op.getValueType()));
1300 return;
1301
1302 case 'K': // Signed 16-bit constant
1303 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1304 if (isInt<16>(C->getSExtValue()))
1305 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1306 Op.getValueType()));
1307 return;
1308
1309 case 'L': // Signed 20-bit displacement (on all targets we support)
1310 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1311 if (isInt<20>(C->getSExtValue()))
1312 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1313 Op.getValueType()));
1314 return;
1315
1316 case 'M': // 0x7fffffff
1317 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1318 if (C->getZExtValue() == 0x7fffffff)
1319 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1320 Op.getValueType()));
1321 return;
1322 }
1323 }
1324 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1325 }
1326
1327 //===----------------------------------------------------------------------===//
1328 // Calling conventions
1329 //===----------------------------------------------------------------------===//
1330
1331 #include "SystemZGenCallingConv.inc"
1332
getScratchRegisters(CallingConv::ID) const1333 const MCPhysReg *SystemZTargetLowering::getScratchRegisters(
1334 CallingConv::ID) const {
1335 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1336 SystemZ::R14D, 0 };
1337 return ScratchRegs;
1338 }
1339
allowTruncateForTailCall(Type * FromType,Type * ToType) const1340 bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType,
1341 Type *ToType) const {
1342 return isTruncateFree(FromType, ToType);
1343 }
1344
mayBeEmittedAsTailCall(const CallInst * CI) const1345 bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1346 return CI->isTailCall();
1347 }
1348
1349 // We do not yet support 128-bit single-element vector types. If the user
1350 // attempts to use such types as function argument or return type, prefer
1351 // to error out instead of emitting code violating the ABI.
VerifyVectorType(MVT VT,EVT ArgVT)1352 static void VerifyVectorType(MVT VT, EVT ArgVT) {
1353 if (ArgVT.isVector() && !VT.isVector())
1354 report_fatal_error("Unsupported vector argument or return type");
1355 }
1356
VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> & Ins)1357 static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
1358 for (unsigned i = 0; i < Ins.size(); ++i)
1359 VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1360 }
1361
VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> & Outs)1362 static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
1363 for (unsigned i = 0; i < Outs.size(); ++i)
1364 VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1365 }
1366
1367 // Value is a value that has been passed to us in the location described by VA
1368 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1369 // any loads onto Chain.
convertLocVTToValVT(SelectionDAG & DAG,const SDLoc & DL,CCValAssign & VA,SDValue Chain,SDValue Value)1370 static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL,
1371 CCValAssign &VA, SDValue Chain,
1372 SDValue Value) {
1373 // If the argument has been promoted from a smaller type, insert an
1374 // assertion to capture this.
1375 if (VA.getLocInfo() == CCValAssign::SExt)
1376 Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1377 DAG.getValueType(VA.getValVT()));
1378 else if (VA.getLocInfo() == CCValAssign::ZExt)
1379 Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1380 DAG.getValueType(VA.getValVT()));
1381
1382 if (VA.isExtInLoc())
1383 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1384 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1385 // If this is a short vector argument loaded from the stack,
1386 // extend from i64 to full vector size and then bitcast.
1387 assert(VA.getLocVT() == MVT::i64);
1388 assert(VA.getValVT().isVector());
1389 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1390 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1391 } else
1392 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1393 return Value;
1394 }
1395
1396 // Value is a value of type VA.getValVT() that we need to copy into
1397 // the location described by VA. Return a copy of Value converted to
1398 // VA.getValVT(). The caller is responsible for handling indirect values.
convertValVTToLocVT(SelectionDAG & DAG,const SDLoc & DL,CCValAssign & VA,SDValue Value)1399 static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
1400 CCValAssign &VA, SDValue Value) {
1401 switch (VA.getLocInfo()) {
1402 case CCValAssign::SExt:
1403 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1404 case CCValAssign::ZExt:
1405 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1406 case CCValAssign::AExt:
1407 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1408 case CCValAssign::BCvt: {
1409 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1410 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1411 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1412 // For an f32 vararg we need to first promote it to an f64 and then
1413 // bitcast it to an i64.
1414 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1415 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1416 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1417 ? MVT::v2i64
1418 : VA.getLocVT();
1419 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1420 // For ELF, this is a short vector argument to be stored to the stack,
1421 // bitcast to v2i64 and then extract first element.
1422 if (BitCastToType == MVT::v2i64)
1423 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1424 DAG.getConstant(0, DL, MVT::i32));
1425 return Value;
1426 }
1427 case CCValAssign::Full:
1428 return Value;
1429 default:
1430 llvm_unreachable("Unhandled getLocInfo()");
1431 }
1432 }
1433
lowerI128ToGR128(SelectionDAG & DAG,SDValue In)1434 static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
1435 SDLoc DL(In);
1436 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
1437 DAG.getIntPtrConstant(0, DL));
1438 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
1439 DAG.getIntPtrConstant(1, DL));
1440 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1441 MVT::Untyped, Hi, Lo);
1442 return SDValue(Pair, 0);
1443 }
1444
lowerGR128ToI128(SelectionDAG & DAG,SDValue In)1445 static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) {
1446 SDLoc DL(In);
1447 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1448 DL, MVT::i64, In);
1449 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1450 DL, MVT::i64, In);
1451 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1452 }
1453
splitValueIntoRegisterParts(SelectionDAG & DAG,const SDLoc & DL,SDValue Val,SDValue * Parts,unsigned NumParts,MVT PartVT,std::optional<CallingConv::ID> CC) const1454 bool SystemZTargetLowering::splitValueIntoRegisterParts(
1455 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1456 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1457 EVT ValueVT = Val.getValueType();
1458 assert((ValueVT != MVT::i128 ||
1459 ((NumParts == 1 && PartVT == MVT::Untyped) ||
1460 (NumParts == 2 && PartVT == MVT::i64))) &&
1461 "Unknown handling of i128 value.");
1462 if (ValueVT == MVT::i128 && NumParts == 1) {
1463 // Inline assembly operand.
1464 Parts[0] = lowerI128ToGR128(DAG, Val);
1465 return true;
1466 }
1467 return false;
1468 }
1469
joinRegisterPartsIntoValue(SelectionDAG & DAG,const SDLoc & DL,const SDValue * Parts,unsigned NumParts,MVT PartVT,EVT ValueVT,std::optional<CallingConv::ID> CC) const1470 SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(
1471 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1472 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1473 assert((ValueVT != MVT::i128 ||
1474 ((NumParts == 1 && PartVT == MVT::Untyped) ||
1475 (NumParts == 2 && PartVT == MVT::i64))) &&
1476 "Unknown handling of i128 value.");
1477 if (ValueVT == MVT::i128 && NumParts == 1)
1478 // Inline assembly operand.
1479 return lowerGR128ToI128(DAG, Parts[0]);
1480 return SDValue();
1481 }
1482
LowerFormalArguments(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::InputArg> & Ins,const SDLoc & DL,SelectionDAG & DAG,SmallVectorImpl<SDValue> & InVals) const1483 SDValue SystemZTargetLowering::LowerFormalArguments(
1484 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1485 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1486 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1487 MachineFunction &MF = DAG.getMachineFunction();
1488 MachineFrameInfo &MFI = MF.getFrameInfo();
1489 MachineRegisterInfo &MRI = MF.getRegInfo();
1490 SystemZMachineFunctionInfo *FuncInfo =
1491 MF.getInfo<SystemZMachineFunctionInfo>();
1492 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1493 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1494
1495 // Detect unsupported vector argument types.
1496 if (Subtarget.hasVector())
1497 VerifyVectorTypes(Ins);
1498
1499 // Assign locations to all of the incoming arguments.
1500 SmallVector<CCValAssign, 16> ArgLocs;
1501 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1502 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1503
1504 unsigned NumFixedGPRs = 0;
1505 unsigned NumFixedFPRs = 0;
1506 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1507 SDValue ArgValue;
1508 CCValAssign &VA = ArgLocs[I];
1509 EVT LocVT = VA.getLocVT();
1510 if (VA.isRegLoc()) {
1511 // Arguments passed in registers
1512 const TargetRegisterClass *RC;
1513 switch (LocVT.getSimpleVT().SimpleTy) {
1514 default:
1515 // Integers smaller than i64 should be promoted to i64.
1516 llvm_unreachable("Unexpected argument type");
1517 case MVT::i32:
1518 NumFixedGPRs += 1;
1519 RC = &SystemZ::GR32BitRegClass;
1520 break;
1521 case MVT::i64:
1522 NumFixedGPRs += 1;
1523 RC = &SystemZ::GR64BitRegClass;
1524 break;
1525 case MVT::f32:
1526 NumFixedFPRs += 1;
1527 RC = &SystemZ::FP32BitRegClass;
1528 break;
1529 case MVT::f64:
1530 NumFixedFPRs += 1;
1531 RC = &SystemZ::FP64BitRegClass;
1532 break;
1533 case MVT::f128:
1534 NumFixedFPRs += 2;
1535 RC = &SystemZ::FP128BitRegClass;
1536 break;
1537 case MVT::v16i8:
1538 case MVT::v8i16:
1539 case MVT::v4i32:
1540 case MVT::v2i64:
1541 case MVT::v4f32:
1542 case MVT::v2f64:
1543 RC = &SystemZ::VR128BitRegClass;
1544 break;
1545 }
1546
1547 Register VReg = MRI.createVirtualRegister(RC);
1548 MRI.addLiveIn(VA.getLocReg(), VReg);
1549 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1550 } else {
1551 assert(VA.isMemLoc() && "Argument not register or memory");
1552
1553 // Create the frame index object for this incoming parameter.
1554 // FIXME: Pre-include call frame size in the offset, should not
1555 // need to manually add it here.
1556 int64_t ArgSPOffset = VA.getLocMemOffset();
1557 if (Subtarget.isTargetXPLINK64()) {
1558 auto &XPRegs =
1559 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
1560 ArgSPOffset += XPRegs.getCallFrameSize();
1561 }
1562 int FI =
1563 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1564
1565 // Create the SelectionDAG nodes corresponding to a load
1566 // from this parameter. Unpromoted ints and floats are
1567 // passed as right-justified 8-byte values.
1568 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1569 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1570 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1571 DAG.getIntPtrConstant(4, DL));
1572 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1573 MachinePointerInfo::getFixedStack(MF, FI));
1574 }
1575
1576 // Convert the value of the argument register into the value that's
1577 // being passed.
1578 if (VA.getLocInfo() == CCValAssign::Indirect) {
1579 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1580 MachinePointerInfo()));
1581 // If the original argument was split (e.g. i128), we need
1582 // to load all parts of it here (using the same address).
1583 unsigned ArgIndex = Ins[I].OrigArgIndex;
1584 assert (Ins[I].PartOffset == 0);
1585 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1586 CCValAssign &PartVA = ArgLocs[I + 1];
1587 unsigned PartOffset = Ins[I + 1].PartOffset;
1588 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1589 DAG.getIntPtrConstant(PartOffset, DL));
1590 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1591 MachinePointerInfo()));
1592 ++I;
1593 }
1594 } else
1595 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1596 }
1597
1598 // FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
1599 if (IsVarArg && Subtarget.isTargetELF()) {
1600 // Save the number of non-varargs registers for later use by va_start, etc.
1601 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1602 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1603
1604 // Likewise the address (in the form of a frame index) of where the
1605 // first stack vararg would be. The 1-byte size here is arbitrary.
1606 int64_t StackSize = CCInfo.getNextStackOffset();
1607 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1608
1609 // ...and a similar frame index for the caller-allocated save area
1610 // that will be used to store the incoming registers.
1611 int64_t RegSaveOffset =
1612 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1613 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1614 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1615
1616 // Store the FPR varargs in the reserved frame slots. (We store the
1617 // GPRs as part of the prologue.)
1618 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1619 SDValue MemOps[SystemZ::ELFNumArgFPRs];
1620 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1621 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1622 int FI =
1623 MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true);
1624 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1625 Register VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I],
1626 &SystemZ::FP64BitRegClass);
1627 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1628 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1629 MachinePointerInfo::getFixedStack(MF, FI));
1630 }
1631 // Join the stores, which are independent of one another.
1632 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1633 ArrayRef(&MemOps[NumFixedFPRs],
1634 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
1635 }
1636 }
1637
1638 // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
1639 // register (R5)
1640 return Chain;
1641 }
1642
canUseSiblingCall(const CCState & ArgCCInfo,SmallVectorImpl<CCValAssign> & ArgLocs,SmallVectorImpl<ISD::OutputArg> & Outs)1643 static bool canUseSiblingCall(const CCState &ArgCCInfo,
1644 SmallVectorImpl<CCValAssign> &ArgLocs,
1645 SmallVectorImpl<ISD::OutputArg> &Outs) {
1646 // Punt if there are any indirect or stack arguments, or if the call
1647 // needs the callee-saved argument register R6, or if the call uses
1648 // the callee-saved register arguments SwiftSelf and SwiftError.
1649 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1650 CCValAssign &VA = ArgLocs[I];
1651 if (VA.getLocInfo() == CCValAssign::Indirect)
1652 return false;
1653 if (!VA.isRegLoc())
1654 return false;
1655 Register Reg = VA.getLocReg();
1656 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1657 return false;
1658 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1659 return false;
1660 }
1661 return true;
1662 }
1663
1664 SDValue
LowerCall(CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const1665 SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
1666 SmallVectorImpl<SDValue> &InVals) const {
1667 SelectionDAG &DAG = CLI.DAG;
1668 SDLoc &DL = CLI.DL;
1669 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1670 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1671 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1672 SDValue Chain = CLI.Chain;
1673 SDValue Callee = CLI.Callee;
1674 bool &IsTailCall = CLI.IsTailCall;
1675 CallingConv::ID CallConv = CLI.CallConv;
1676 bool IsVarArg = CLI.IsVarArg;
1677 MachineFunction &MF = DAG.getMachineFunction();
1678 EVT PtrVT = getPointerTy(MF.getDataLayout());
1679 LLVMContext &Ctx = *DAG.getContext();
1680 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
1681
1682 // FIXME: z/OS support to be added in later.
1683 if (Subtarget.isTargetXPLINK64())
1684 IsTailCall = false;
1685
1686 // Detect unsupported vector argument and return types.
1687 if (Subtarget.hasVector()) {
1688 VerifyVectorTypes(Outs);
1689 VerifyVectorTypes(Ins);
1690 }
1691
1692 // Analyze the operands of the call, assigning locations to each operand.
1693 SmallVector<CCValAssign, 16> ArgLocs;
1694 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1695 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1696
1697 // We don't support GuaranteedTailCallOpt, only automatically-detected
1698 // sibling calls.
1699 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1700 IsTailCall = false;
1701
1702 // Get a count of how many bytes are to be pushed on the stack.
1703 unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1704
1705 if (Subtarget.isTargetXPLINK64())
1706 // Although the XPLINK specifications for AMODE64 state that minimum size
1707 // of the param area is minimum 32 bytes and no rounding is otherwise
1708 // specified, we round this area in 64 bytes increments to be compatible
1709 // with existing compilers.
1710 NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
1711
1712 // Mark the start of the call.
1713 if (!IsTailCall)
1714 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1715
1716 // Copy argument values to their designated locations.
1717 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass;
1718 SmallVector<SDValue, 8> MemOpChains;
1719 SDValue StackPtr;
1720 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1721 CCValAssign &VA = ArgLocs[I];
1722 SDValue ArgValue = OutVals[I];
1723
1724 if (VA.getLocInfo() == CCValAssign::Indirect) {
1725 // Store the argument in a stack slot and pass its address.
1726 unsigned ArgIndex = Outs[I].OrigArgIndex;
1727 EVT SlotVT;
1728 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1729 // Allocate the full stack space for a promoted (and split) argument.
1730 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1731 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1732 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1733 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1734 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1735 } else {
1736 SlotVT = Outs[I].ArgVT;
1737 }
1738 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1739 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1740 MemOpChains.push_back(
1741 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1742 MachinePointerInfo::getFixedStack(MF, FI)));
1743 // If the original argument was split (e.g. i128), we need
1744 // to store all parts of it here (and pass just one address).
1745 assert (Outs[I].PartOffset == 0);
1746 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1747 SDValue PartValue = OutVals[I + 1];
1748 unsigned PartOffset = Outs[I + 1].PartOffset;
1749 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1750 DAG.getIntPtrConstant(PartOffset, DL));
1751 MemOpChains.push_back(
1752 DAG.getStore(Chain, DL, PartValue, Address,
1753 MachinePointerInfo::getFixedStack(MF, FI)));
1754 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1755 SlotVT.getStoreSize()) && "Not enough space for argument part!");
1756 ++I;
1757 }
1758 ArgValue = SpillSlot;
1759 } else
1760 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1761
1762 if (VA.isRegLoc()) {
1763 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1764 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1765 // and low values.
1766 if (VA.getLocVT() == MVT::i128)
1767 ArgValue = lowerI128ToGR128(DAG, ArgValue);
1768 // Queue up the argument copies and emit them at the end.
1769 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1770 } else {
1771 assert(VA.isMemLoc() && "Argument not register or memory");
1772
1773 // Work out the address of the stack slot. Unpromoted ints and
1774 // floats are passed as right-justified 8-byte values.
1775 if (!StackPtr.getNode())
1776 StackPtr = DAG.getCopyFromReg(Chain, DL,
1777 Regs->getStackPointerRegister(), PtrVT);
1778 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1779 VA.getLocMemOffset();
1780 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1781 Offset += 4;
1782 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1783 DAG.getIntPtrConstant(Offset, DL));
1784
1785 // Emit the store.
1786 MemOpChains.push_back(
1787 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1788
1789 // Although long doubles or vectors are passed through the stack when
1790 // they are vararg (non-fixed arguments), if a long double or vector
1791 // occupies the third and fourth slot of the argument list GPR3 should
1792 // still shadow the third slot of the argument list.
1793 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
1794 SDValue ShadowArgValue =
1795 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
1796 DAG.getIntPtrConstant(1, DL));
1797 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
1798 }
1799 }
1800 }
1801
1802 // Join the stores, which are independent of one another.
1803 if (!MemOpChains.empty())
1804 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1805
1806 // Accept direct calls by converting symbolic call addresses to the
1807 // associated Target* opcodes. Force %r1 to be used for indirect
1808 // tail calls.
1809 SDValue Glue;
1810 // FIXME: Add support for XPLINK using the ADA register.
1811 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1812 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1813 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1814 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1815 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1816 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1817 } else if (IsTailCall) {
1818 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1819 Glue = Chain.getValue(1);
1820 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1821 }
1822
1823 // Build a sequence of copy-to-reg nodes, chained and glued together.
1824 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1825 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1826 RegsToPass[I].second, Glue);
1827 Glue = Chain.getValue(1);
1828 }
1829
1830 // The first call operand is the chain and the second is the target address.
1831 SmallVector<SDValue, 8> Ops;
1832 Ops.push_back(Chain);
1833 Ops.push_back(Callee);
1834
1835 // Add argument registers to the end of the list so that they are
1836 // known live into the call.
1837 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1838 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1839 RegsToPass[I].second.getValueType()));
1840
1841 // Add a register mask operand representing the call-preserved registers.
1842 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1843 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1844 assert(Mask && "Missing call preserved mask for calling convention");
1845 Ops.push_back(DAG.getRegisterMask(Mask));
1846
1847 // Glue the call to the argument copies, if any.
1848 if (Glue.getNode())
1849 Ops.push_back(Glue);
1850
1851 // Emit the call.
1852 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1853 if (IsTailCall)
1854 return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1855 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1856 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
1857 Glue = Chain.getValue(1);
1858
1859 // Mark the end of the call, which is glued to the call itself.
1860 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
1861 Glue = Chain.getValue(1);
1862
1863 // Assign locations to each value returned by this call.
1864 SmallVector<CCValAssign, 16> RetLocs;
1865 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
1866 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1867
1868 // Copy all of the result registers out of their specified physreg.
1869 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1870 CCValAssign &VA = RetLocs[I];
1871
1872 // Copy the value out, gluing the copy to the end of the call sequence.
1873 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1874 VA.getLocVT(), Glue);
1875 Chain = RetValue.getValue(1);
1876 Glue = RetValue.getValue(2);
1877
1878 // Convert the value of the return register into the value that's
1879 // being returned.
1880 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1881 }
1882
1883 return Chain;
1884 }
1885
1886 // Generate a call taking the given operands as arguments and returning a
1887 // result of type RetVT.
makeExternalCall(SDValue Chain,SelectionDAG & DAG,const char * CalleeName,EVT RetVT,ArrayRef<SDValue> Ops,CallingConv::ID CallConv,bool IsSigned,SDLoc DL,bool DoesNotReturn,bool IsReturnValueUsed) const1888 std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall(
1889 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
1890 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
1891 bool DoesNotReturn, bool IsReturnValueUsed) const {
1892 TargetLowering::ArgListTy Args;
1893 Args.reserve(Ops.size());
1894
1895 TargetLowering::ArgListEntry Entry;
1896 for (SDValue Op : Ops) {
1897 Entry.Node = Op;
1898 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1899 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
1900 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
1901 Args.push_back(Entry);
1902 }
1903
1904 SDValue Callee =
1905 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
1906
1907 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
1908 TargetLowering::CallLoweringInfo CLI(DAG);
1909 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
1910 CLI.setDebugLoc(DL)
1911 .setChain(Chain)
1912 .setCallee(CallConv, RetTy, Callee, std::move(Args))
1913 .setNoReturn(DoesNotReturn)
1914 .setDiscardResult(!IsReturnValueUsed)
1915 .setSExtResult(SignExtend)
1916 .setZExtResult(!SignExtend);
1917 return LowerCallTo(CLI);
1918 }
1919
1920 bool SystemZTargetLowering::
CanLowerReturn(CallingConv::ID CallConv,MachineFunction & MF,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,LLVMContext & Context) const1921 CanLowerReturn(CallingConv::ID CallConv,
1922 MachineFunction &MF, bool isVarArg,
1923 const SmallVectorImpl<ISD::OutputArg> &Outs,
1924 LLVMContext &Context) const {
1925 // Detect unsupported vector return types.
1926 if (Subtarget.hasVector())
1927 VerifyVectorTypes(Outs);
1928
1929 // Special case that we cannot easily detect in RetCC_SystemZ since
1930 // i128 is not a legal type.
1931 for (auto &Out : Outs)
1932 if (Out.ArgVT == MVT::i128)
1933 return false;
1934
1935 SmallVector<CCValAssign, 16> RetLocs;
1936 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1937 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1938 }
1939
1940 SDValue
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool IsVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & DL,SelectionDAG & DAG) const1941 SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1942 bool IsVarArg,
1943 const SmallVectorImpl<ISD::OutputArg> &Outs,
1944 const SmallVectorImpl<SDValue> &OutVals,
1945 const SDLoc &DL, SelectionDAG &DAG) const {
1946 MachineFunction &MF = DAG.getMachineFunction();
1947
1948 // Detect unsupported vector return types.
1949 if (Subtarget.hasVector())
1950 VerifyVectorTypes(Outs);
1951
1952 // Assign locations to each returned value.
1953 SmallVector<CCValAssign, 16> RetLocs;
1954 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1955 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1956
1957 // Quick exit for void returns
1958 if (RetLocs.empty())
1959 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1960
1961 if (CallConv == CallingConv::GHC)
1962 report_fatal_error("GHC functions return void only");
1963
1964 // Copy the result values into the output registers.
1965 SDValue Glue;
1966 SmallVector<SDValue, 4> RetOps;
1967 RetOps.push_back(Chain);
1968 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1969 CCValAssign &VA = RetLocs[I];
1970 SDValue RetValue = OutVals[I];
1971
1972 // Make the return register live on exit.
1973 assert(VA.isRegLoc() && "Can only return in registers!");
1974
1975 // Promote the value as required.
1976 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1977
1978 // Chain and glue the copies together.
1979 Register Reg = VA.getLocReg();
1980 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1981 Glue = Chain.getValue(1);
1982 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1983 }
1984
1985 // Update chain and glue.
1986 RetOps[0] = Chain;
1987 if (Glue.getNode())
1988 RetOps.push_back(Glue);
1989
1990 return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1991 }
1992
1993 // Return true if Op is an intrinsic node with chain that returns the CC value
1994 // as its only (other) argument. Provide the associated SystemZISD opcode and
1995 // the mask of valid CC values if so.
isIntrinsicWithCCAndChain(SDValue Op,unsigned & Opcode,unsigned & CCValid)1996 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1997 unsigned &CCValid) {
1998 unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1999 switch (Id) {
2000 case Intrinsic::s390_tbegin:
2001 Opcode = SystemZISD::TBEGIN;
2002 CCValid = SystemZ::CCMASK_TBEGIN;
2003 return true;
2004
2005 case Intrinsic::s390_tbegin_nofloat:
2006 Opcode = SystemZISD::TBEGIN_NOFLOAT;
2007 CCValid = SystemZ::CCMASK_TBEGIN;
2008 return true;
2009
2010 case Intrinsic::s390_tend:
2011 Opcode = SystemZISD::TEND;
2012 CCValid = SystemZ::CCMASK_TEND;
2013 return true;
2014
2015 default:
2016 return false;
2017 }
2018 }
2019
2020 // Return true if Op is an intrinsic node without chain that returns the
2021 // CC value as its final argument. Provide the associated SystemZISD
2022 // opcode and the mask of valid CC values if so.
isIntrinsicWithCC(SDValue Op,unsigned & Opcode,unsigned & CCValid)2023 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2024 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2025 switch (Id) {
2026 case Intrinsic::s390_vpkshs:
2027 case Intrinsic::s390_vpksfs:
2028 case Intrinsic::s390_vpksgs:
2029 Opcode = SystemZISD::PACKS_CC;
2030 CCValid = SystemZ::CCMASK_VCMP;
2031 return true;
2032
2033 case Intrinsic::s390_vpklshs:
2034 case Intrinsic::s390_vpklsfs:
2035 case Intrinsic::s390_vpklsgs:
2036 Opcode = SystemZISD::PACKLS_CC;
2037 CCValid = SystemZ::CCMASK_VCMP;
2038 return true;
2039
2040 case Intrinsic::s390_vceqbs:
2041 case Intrinsic::s390_vceqhs:
2042 case Intrinsic::s390_vceqfs:
2043 case Intrinsic::s390_vceqgs:
2044 Opcode = SystemZISD::VICMPES;
2045 CCValid = SystemZ::CCMASK_VCMP;
2046 return true;
2047
2048 case Intrinsic::s390_vchbs:
2049 case Intrinsic::s390_vchhs:
2050 case Intrinsic::s390_vchfs:
2051 case Intrinsic::s390_vchgs:
2052 Opcode = SystemZISD::VICMPHS;
2053 CCValid = SystemZ::CCMASK_VCMP;
2054 return true;
2055
2056 case Intrinsic::s390_vchlbs:
2057 case Intrinsic::s390_vchlhs:
2058 case Intrinsic::s390_vchlfs:
2059 case Intrinsic::s390_vchlgs:
2060 Opcode = SystemZISD::VICMPHLS;
2061 CCValid = SystemZ::CCMASK_VCMP;
2062 return true;
2063
2064 case Intrinsic::s390_vtm:
2065 Opcode = SystemZISD::VTM;
2066 CCValid = SystemZ::CCMASK_VCMP;
2067 return true;
2068
2069 case Intrinsic::s390_vfaebs:
2070 case Intrinsic::s390_vfaehs:
2071 case Intrinsic::s390_vfaefs:
2072 Opcode = SystemZISD::VFAE_CC;
2073 CCValid = SystemZ::CCMASK_ANY;
2074 return true;
2075
2076 case Intrinsic::s390_vfaezbs:
2077 case Intrinsic::s390_vfaezhs:
2078 case Intrinsic::s390_vfaezfs:
2079 Opcode = SystemZISD::VFAEZ_CC;
2080 CCValid = SystemZ::CCMASK_ANY;
2081 return true;
2082
2083 case Intrinsic::s390_vfeebs:
2084 case Intrinsic::s390_vfeehs:
2085 case Intrinsic::s390_vfeefs:
2086 Opcode = SystemZISD::VFEE_CC;
2087 CCValid = SystemZ::CCMASK_ANY;
2088 return true;
2089
2090 case Intrinsic::s390_vfeezbs:
2091 case Intrinsic::s390_vfeezhs:
2092 case Intrinsic::s390_vfeezfs:
2093 Opcode = SystemZISD::VFEEZ_CC;
2094 CCValid = SystemZ::CCMASK_ANY;
2095 return true;
2096
2097 case Intrinsic::s390_vfenebs:
2098 case Intrinsic::s390_vfenehs:
2099 case Intrinsic::s390_vfenefs:
2100 Opcode = SystemZISD::VFENE_CC;
2101 CCValid = SystemZ::CCMASK_ANY;
2102 return true;
2103
2104 case Intrinsic::s390_vfenezbs:
2105 case Intrinsic::s390_vfenezhs:
2106 case Intrinsic::s390_vfenezfs:
2107 Opcode = SystemZISD::VFENEZ_CC;
2108 CCValid = SystemZ::CCMASK_ANY;
2109 return true;
2110
2111 case Intrinsic::s390_vistrbs:
2112 case Intrinsic::s390_vistrhs:
2113 case Intrinsic::s390_vistrfs:
2114 Opcode = SystemZISD::VISTR_CC;
2115 CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
2116 return true;
2117
2118 case Intrinsic::s390_vstrcbs:
2119 case Intrinsic::s390_vstrchs:
2120 case Intrinsic::s390_vstrcfs:
2121 Opcode = SystemZISD::VSTRC_CC;
2122 CCValid = SystemZ::CCMASK_ANY;
2123 return true;
2124
2125 case Intrinsic::s390_vstrczbs:
2126 case Intrinsic::s390_vstrczhs:
2127 case Intrinsic::s390_vstrczfs:
2128 Opcode = SystemZISD::VSTRCZ_CC;
2129 CCValid = SystemZ::CCMASK_ANY;
2130 return true;
2131
2132 case Intrinsic::s390_vstrsb:
2133 case Intrinsic::s390_vstrsh:
2134 case Intrinsic::s390_vstrsf:
2135 Opcode = SystemZISD::VSTRS_CC;
2136 CCValid = SystemZ::CCMASK_ANY;
2137 return true;
2138
2139 case Intrinsic::s390_vstrszb:
2140 case Intrinsic::s390_vstrszh:
2141 case Intrinsic::s390_vstrszf:
2142 Opcode = SystemZISD::VSTRSZ_CC;
2143 CCValid = SystemZ::CCMASK_ANY;
2144 return true;
2145
2146 case Intrinsic::s390_vfcedbs:
2147 case Intrinsic::s390_vfcesbs:
2148 Opcode = SystemZISD::VFCMPES;
2149 CCValid = SystemZ::CCMASK_VCMP;
2150 return true;
2151
2152 case Intrinsic::s390_vfchdbs:
2153 case Intrinsic::s390_vfchsbs:
2154 Opcode = SystemZISD::VFCMPHS;
2155 CCValid = SystemZ::CCMASK_VCMP;
2156 return true;
2157
2158 case Intrinsic::s390_vfchedbs:
2159 case Intrinsic::s390_vfchesbs:
2160 Opcode = SystemZISD::VFCMPHES;
2161 CCValid = SystemZ::CCMASK_VCMP;
2162 return true;
2163
2164 case Intrinsic::s390_vftcidb:
2165 case Intrinsic::s390_vftcisb:
2166 Opcode = SystemZISD::VFTCI;
2167 CCValid = SystemZ::CCMASK_VCMP;
2168 return true;
2169
2170 case Intrinsic::s390_tdc:
2171 Opcode = SystemZISD::TDC;
2172 CCValid = SystemZ::CCMASK_TDC;
2173 return true;
2174
2175 default:
2176 return false;
2177 }
2178 }
2179
2180 // Emit an intrinsic with chain and an explicit CC register result.
emitIntrinsicWithCCAndChain(SelectionDAG & DAG,SDValue Op,unsigned Opcode)2181 static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op,
2182 unsigned Opcode) {
2183 // Copy all operands except the intrinsic ID.
2184 unsigned NumOps = Op.getNumOperands();
2185 SmallVector<SDValue, 6> Ops;
2186 Ops.reserve(NumOps - 1);
2187 Ops.push_back(Op.getOperand(0));
2188 for (unsigned I = 2; I < NumOps; ++I)
2189 Ops.push_back(Op.getOperand(I));
2190
2191 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2192 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2193 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2194 SDValue OldChain = SDValue(Op.getNode(), 1);
2195 SDValue NewChain = SDValue(Intr.getNode(), 1);
2196 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2197 return Intr.getNode();
2198 }
2199
2200 // Emit an intrinsic with an explicit CC register result.
emitIntrinsicWithCC(SelectionDAG & DAG,SDValue Op,unsigned Opcode)2201 static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op,
2202 unsigned Opcode) {
2203 // Copy all operands except the intrinsic ID.
2204 unsigned NumOps = Op.getNumOperands();
2205 SmallVector<SDValue, 6> Ops;
2206 Ops.reserve(NumOps - 1);
2207 for (unsigned I = 1; I < NumOps; ++I)
2208 Ops.push_back(Op.getOperand(I));
2209
2210 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2211 return Intr.getNode();
2212 }
2213
2214 // CC is a comparison that will be implemented using an integer or
2215 // floating-point comparison. Return the condition code mask for
2216 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
2217 // unsigned comparisons and clear for signed ones. In the floating-point
2218 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
CCMaskForCondCode(ISD::CondCode CC)2219 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
2220 #define CONV(X) \
2221 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2222 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2223 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2224
2225 switch (CC) {
2226 default:
2227 llvm_unreachable("Invalid integer condition!");
2228
2229 CONV(EQ);
2230 CONV(NE);
2231 CONV(GT);
2232 CONV(GE);
2233 CONV(LT);
2234 CONV(LE);
2235
2236 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2237 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
2238 }
2239 #undef CONV
2240 }
2241
2242 // If C can be converted to a comparison against zero, adjust the operands
2243 // as necessary.
adjustZeroCmp(SelectionDAG & DAG,const SDLoc & DL,Comparison & C)2244 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2245 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2246 return;
2247
2248 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2249 if (!ConstOp1)
2250 return;
2251
2252 int64_t Value = ConstOp1->getSExtValue();
2253 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2254 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2255 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2256 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2257 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2258 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2259 }
2260 }
2261
2262 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2263 // adjust the operands as necessary.
adjustSubwordCmp(SelectionDAG & DAG,const SDLoc & DL,Comparison & C)2264 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2265 Comparison &C) {
2266 // For us to make any changes, it must a comparison between a single-use
2267 // load and a constant.
2268 if (!C.Op0.hasOneUse() ||
2269 C.Op0.getOpcode() != ISD::LOAD ||
2270 C.Op1.getOpcode() != ISD::Constant)
2271 return;
2272
2273 // We must have an 8- or 16-bit load.
2274 auto *Load = cast<LoadSDNode>(C.Op0);
2275 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2276 if ((NumBits != 8 && NumBits != 16) ||
2277 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2278 return;
2279
2280 // The load must be an extending one and the constant must be within the
2281 // range of the unextended value.
2282 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2283 uint64_t Value = ConstOp1->getZExtValue();
2284 uint64_t Mask = (1 << NumBits) - 1;
2285 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2286 // Make sure that ConstOp1 is in range of C.Op0.
2287 int64_t SignedValue = ConstOp1->getSExtValue();
2288 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2289 return;
2290 if (C.ICmpType != SystemZICMP::SignedOnly) {
2291 // Unsigned comparison between two sign-extended values is equivalent
2292 // to unsigned comparison between two zero-extended values.
2293 Value &= Mask;
2294 } else if (NumBits == 8) {
2295 // Try to treat the comparison as unsigned, so that we can use CLI.
2296 // Adjust CCMask and Value as necessary.
2297 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2298 // Test whether the high bit of the byte is set.
2299 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2300 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2301 // Test whether the high bit of the byte is clear.
2302 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2303 else
2304 // No instruction exists for this combination.
2305 return;
2306 C.ICmpType = SystemZICMP::UnsignedOnly;
2307 }
2308 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2309 if (Value > Mask)
2310 return;
2311 // If the constant is in range, we can use any comparison.
2312 C.ICmpType = SystemZICMP::Any;
2313 } else
2314 return;
2315
2316 // Make sure that the first operand is an i32 of the right extension type.
2317 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2318 ISD::SEXTLOAD :
2319 ISD::ZEXTLOAD);
2320 if (C.Op0.getValueType() != MVT::i32 ||
2321 Load->getExtensionType() != ExtType) {
2322 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2323 Load->getBasePtr(), Load->getPointerInfo(),
2324 Load->getMemoryVT(), Load->getAlign(),
2325 Load->getMemOperand()->getFlags());
2326 // Update the chain uses.
2327 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2328 }
2329
2330 // Make sure that the second operand is an i32 with the right value.
2331 if (C.Op1.getValueType() != MVT::i32 ||
2332 Value != ConstOp1->getZExtValue())
2333 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2334 }
2335
2336 // Return true if Op is either an unextended load, or a load suitable
2337 // for integer register-memory comparisons of type ICmpType.
isNaturalMemoryOperand(SDValue Op,unsigned ICmpType)2338 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2339 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2340 if (Load) {
2341 // There are no instructions to compare a register with a memory byte.
2342 if (Load->getMemoryVT() == MVT::i8)
2343 return false;
2344 // Otherwise decide on extension type.
2345 switch (Load->getExtensionType()) {
2346 case ISD::NON_EXTLOAD:
2347 return true;
2348 case ISD::SEXTLOAD:
2349 return ICmpType != SystemZICMP::UnsignedOnly;
2350 case ISD::ZEXTLOAD:
2351 return ICmpType != SystemZICMP::SignedOnly;
2352 default:
2353 break;
2354 }
2355 }
2356 return false;
2357 }
2358
2359 // Return true if it is better to swap the operands of C.
shouldSwapCmpOperands(const Comparison & C)2360 static bool shouldSwapCmpOperands(const Comparison &C) {
2361 // Leave f128 comparisons alone, since they have no memory forms.
2362 if (C.Op0.getValueType() == MVT::f128)
2363 return false;
2364
2365 // Always keep a floating-point constant second, since comparisons with
2366 // zero can use LOAD TEST and comparisons with other constants make a
2367 // natural memory operand.
2368 if (isa<ConstantFPSDNode>(C.Op1))
2369 return false;
2370
2371 // Never swap comparisons with zero since there are many ways to optimize
2372 // those later.
2373 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2374 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2375 return false;
2376
2377 // Also keep natural memory operands second if the loaded value is
2378 // only used here. Several comparisons have memory forms.
2379 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2380 return false;
2381
2382 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2383 // In that case we generally prefer the memory to be second.
2384 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2385 // The only exceptions are when the second operand is a constant and
2386 // we can use things like CHHSI.
2387 if (!ConstOp1)
2388 return true;
2389 // The unsigned memory-immediate instructions can handle 16-bit
2390 // unsigned integers.
2391 if (C.ICmpType != SystemZICMP::SignedOnly &&
2392 isUInt<16>(ConstOp1->getZExtValue()))
2393 return false;
2394 // The signed memory-immediate instructions can handle 16-bit
2395 // signed integers.
2396 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2397 isInt<16>(ConstOp1->getSExtValue()))
2398 return false;
2399 return true;
2400 }
2401
2402 // Try to promote the use of CGFR and CLGFR.
2403 unsigned Opcode0 = C.Op0.getOpcode();
2404 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2405 return true;
2406 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2407 return true;
2408 if (C.ICmpType != SystemZICMP::SignedOnly &&
2409 Opcode0 == ISD::AND &&
2410 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2411 cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2412 return true;
2413
2414 return false;
2415 }
2416
2417 // Check whether C tests for equality between X and Y and whether X - Y
2418 // or Y - X is also computed. In that case it's better to compare the
2419 // result of the subtraction against zero.
adjustForSubtraction(SelectionDAG & DAG,const SDLoc & DL,Comparison & C)2420 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2421 Comparison &C) {
2422 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2423 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2424 for (SDNode *N : C.Op0->uses()) {
2425 if (N->getOpcode() == ISD::SUB &&
2426 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2427 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2428 C.Op0 = SDValue(N, 0);
2429 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2430 return;
2431 }
2432 }
2433 }
2434 }
2435
2436 // Check whether C compares a floating-point value with zero and if that
2437 // floating-point value is also negated. In this case we can use the
2438 // negation to set CC, so avoiding separate LOAD AND TEST and
2439 // LOAD (NEGATIVE/COMPLEMENT) instructions.
adjustForFNeg(Comparison & C)2440 static void adjustForFNeg(Comparison &C) {
2441 // This optimization is invalid for strict comparisons, since FNEG
2442 // does not raise any exceptions.
2443 if (C.Chain)
2444 return;
2445 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2446 if (C1 && C1->isZero()) {
2447 for (SDNode *N : C.Op0->uses()) {
2448 if (N->getOpcode() == ISD::FNEG) {
2449 C.Op0 = SDValue(N, 0);
2450 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2451 return;
2452 }
2453 }
2454 }
2455 }
2456
2457 // Check whether C compares (shl X, 32) with 0 and whether X is
2458 // also sign-extended. In that case it is better to test the result
2459 // of the sign extension using LTGFR.
2460 //
2461 // This case is important because InstCombine transforms a comparison
2462 // with (sext (trunc X)) into a comparison with (shl X, 32).
adjustForLTGFR(Comparison & C)2463 static void adjustForLTGFR(Comparison &C) {
2464 // Check for a comparison between (shl X, 32) and 0.
2465 if (C.Op0.getOpcode() == ISD::SHL &&
2466 C.Op0.getValueType() == MVT::i64 &&
2467 C.Op1.getOpcode() == ISD::Constant &&
2468 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2469 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2470 if (C1 && C1->getZExtValue() == 32) {
2471 SDValue ShlOp0 = C.Op0.getOperand(0);
2472 // See whether X has any SIGN_EXTEND_INREG uses.
2473 for (SDNode *N : ShlOp0->uses()) {
2474 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2475 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2476 C.Op0 = SDValue(N, 0);
2477 return;
2478 }
2479 }
2480 }
2481 }
2482 }
2483
2484 // If C compares the truncation of an extending load, try to compare
2485 // the untruncated value instead. This exposes more opportunities to
2486 // reuse CC.
adjustICmpTruncate(SelectionDAG & DAG,const SDLoc & DL,Comparison & C)2487 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2488 Comparison &C) {
2489 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2490 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2491 C.Op1.getOpcode() == ISD::Constant &&
2492 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2493 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2494 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
2495 C.Op0.getValueSizeInBits().getFixedValue()) {
2496 unsigned Type = L->getExtensionType();
2497 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2498 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2499 C.Op0 = C.Op0.getOperand(0);
2500 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2501 }
2502 }
2503 }
2504 }
2505
2506 // Return true if shift operation N has an in-range constant shift value.
2507 // Store it in ShiftVal if so.
isSimpleShift(SDValue N,unsigned & ShiftVal)2508 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2509 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2510 if (!Shift)
2511 return false;
2512
2513 uint64_t Amount = Shift->getZExtValue();
2514 if (Amount >= N.getValueSizeInBits())
2515 return false;
2516
2517 ShiftVal = Amount;
2518 return true;
2519 }
2520
2521 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2522 // instruction and whether the CC value is descriptive enough to handle
2523 // a comparison of type Opcode between the AND result and CmpVal.
2524 // CCMask says which comparison result is being tested and BitSize is
2525 // the number of bits in the operands. If TEST UNDER MASK can be used,
2526 // return the corresponding CC mask, otherwise return 0.
getTestUnderMaskCond(unsigned BitSize,unsigned CCMask,uint64_t Mask,uint64_t CmpVal,unsigned ICmpType)2527 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2528 uint64_t Mask, uint64_t CmpVal,
2529 unsigned ICmpType) {
2530 assert(Mask != 0 && "ANDs with zero should have been removed by now");
2531
2532 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2533 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2534 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2535 return 0;
2536
2537 // Work out the masks for the lowest and highest bits.
2538 unsigned HighShift = 63 - countLeadingZeros(Mask);
2539 uint64_t High = uint64_t(1) << HighShift;
2540 uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
2541
2542 // Signed ordered comparisons are effectively unsigned if the sign
2543 // bit is dropped.
2544 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2545
2546 // Check for equality comparisons with 0, or the equivalent.
2547 if (CmpVal == 0) {
2548 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2549 return SystemZ::CCMASK_TM_ALL_0;
2550 if (CCMask == SystemZ::CCMASK_CMP_NE)
2551 return SystemZ::CCMASK_TM_SOME_1;
2552 }
2553 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2554 if (CCMask == SystemZ::CCMASK_CMP_LT)
2555 return SystemZ::CCMASK_TM_ALL_0;
2556 if (CCMask == SystemZ::CCMASK_CMP_GE)
2557 return SystemZ::CCMASK_TM_SOME_1;
2558 }
2559 if (EffectivelyUnsigned && CmpVal < Low) {
2560 if (CCMask == SystemZ::CCMASK_CMP_LE)
2561 return SystemZ::CCMASK_TM_ALL_0;
2562 if (CCMask == SystemZ::CCMASK_CMP_GT)
2563 return SystemZ::CCMASK_TM_SOME_1;
2564 }
2565
2566 // Check for equality comparisons with the mask, or the equivalent.
2567 if (CmpVal == Mask) {
2568 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2569 return SystemZ::CCMASK_TM_ALL_1;
2570 if (CCMask == SystemZ::CCMASK_CMP_NE)
2571 return SystemZ::CCMASK_TM_SOME_0;
2572 }
2573 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2574 if (CCMask == SystemZ::CCMASK_CMP_GT)
2575 return SystemZ::CCMASK_TM_ALL_1;
2576 if (CCMask == SystemZ::CCMASK_CMP_LE)
2577 return SystemZ::CCMASK_TM_SOME_0;
2578 }
2579 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2580 if (CCMask == SystemZ::CCMASK_CMP_GE)
2581 return SystemZ::CCMASK_TM_ALL_1;
2582 if (CCMask == SystemZ::CCMASK_CMP_LT)
2583 return SystemZ::CCMASK_TM_SOME_0;
2584 }
2585
2586 // Check for ordered comparisons with the top bit.
2587 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2588 if (CCMask == SystemZ::CCMASK_CMP_LE)
2589 return SystemZ::CCMASK_TM_MSB_0;
2590 if (CCMask == SystemZ::CCMASK_CMP_GT)
2591 return SystemZ::CCMASK_TM_MSB_1;
2592 }
2593 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2594 if (CCMask == SystemZ::CCMASK_CMP_LT)
2595 return SystemZ::CCMASK_TM_MSB_0;
2596 if (CCMask == SystemZ::CCMASK_CMP_GE)
2597 return SystemZ::CCMASK_TM_MSB_1;
2598 }
2599
2600 // If there are just two bits, we can do equality checks for Low and High
2601 // as well.
2602 if (Mask == Low + High) {
2603 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2604 return SystemZ::CCMASK_TM_MIXED_MSB_0;
2605 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2606 return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY;
2607 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2608 return SystemZ::CCMASK_TM_MIXED_MSB_1;
2609 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2610 return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY;
2611 }
2612
2613 // Looks like we've exhausted our options.
2614 return 0;
2615 }
2616
2617 // See whether C can be implemented as a TEST UNDER MASK instruction.
2618 // Update the arguments with the TM version if so.
adjustForTestUnderMask(SelectionDAG & DAG,const SDLoc & DL,Comparison & C)2619 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2620 Comparison &C) {
2621 // Check that we have a comparison with a constant.
2622 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2623 if (!ConstOp1)
2624 return;
2625 uint64_t CmpVal = ConstOp1->getZExtValue();
2626
2627 // Check whether the nonconstant input is an AND with a constant mask.
2628 Comparison NewC(C);
2629 uint64_t MaskVal;
2630 ConstantSDNode *Mask = nullptr;
2631 if (C.Op0.getOpcode() == ISD::AND) {
2632 NewC.Op0 = C.Op0.getOperand(0);
2633 NewC.Op1 = C.Op0.getOperand(1);
2634 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2635 if (!Mask)
2636 return;
2637 MaskVal = Mask->getZExtValue();
2638 } else {
2639 // There is no instruction to compare with a 64-bit immediate
2640 // so use TMHH instead if possible. We need an unsigned ordered
2641 // comparison with an i64 immediate.
2642 if (NewC.Op0.getValueType() != MVT::i64 ||
2643 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2644 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2645 NewC.ICmpType == SystemZICMP::SignedOnly)
2646 return;
2647 // Convert LE and GT comparisons into LT and GE.
2648 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2649 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2650 if (CmpVal == uint64_t(-1))
2651 return;
2652 CmpVal += 1;
2653 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2654 }
2655 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2656 // be masked off without changing the result.
2657 MaskVal = -(CmpVal & -CmpVal);
2658 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2659 }
2660 if (!MaskVal)
2661 return;
2662
2663 // Check whether the combination of mask, comparison value and comparison
2664 // type are suitable.
2665 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2666 unsigned NewCCMask, ShiftVal;
2667 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2668 NewC.Op0.getOpcode() == ISD::SHL &&
2669 isSimpleShift(NewC.Op0, ShiftVal) &&
2670 (MaskVal >> ShiftVal != 0) &&
2671 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2672 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2673 MaskVal >> ShiftVal,
2674 CmpVal >> ShiftVal,
2675 SystemZICMP::Any))) {
2676 NewC.Op0 = NewC.Op0.getOperand(0);
2677 MaskVal >>= ShiftVal;
2678 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2679 NewC.Op0.getOpcode() == ISD::SRL &&
2680 isSimpleShift(NewC.Op0, ShiftVal) &&
2681 (MaskVal << ShiftVal != 0) &&
2682 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2683 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2684 MaskVal << ShiftVal,
2685 CmpVal << ShiftVal,
2686 SystemZICMP::UnsignedOnly))) {
2687 NewC.Op0 = NewC.Op0.getOperand(0);
2688 MaskVal <<= ShiftVal;
2689 } else {
2690 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2691 NewC.ICmpType);
2692 if (!NewCCMask)
2693 return;
2694 }
2695
2696 // Go ahead and make the change.
2697 C.Opcode = SystemZISD::TM;
2698 C.Op0 = NewC.Op0;
2699 if (Mask && Mask->getZExtValue() == MaskVal)
2700 C.Op1 = SDValue(Mask, 0);
2701 else
2702 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2703 C.CCValid = SystemZ::CCMASK_TM;
2704 C.CCMask = NewCCMask;
2705 }
2706
2707 // See whether the comparison argument contains a redundant AND
2708 // and remove it if so. This sometimes happens due to the generic
2709 // BRCOND expansion.
adjustForRedundantAnd(SelectionDAG & DAG,const SDLoc & DL,Comparison & C)2710 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2711 Comparison &C) {
2712 if (C.Op0.getOpcode() != ISD::AND)
2713 return;
2714 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2715 if (!Mask)
2716 return;
2717 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2718 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2719 return;
2720
2721 C.Op0 = C.Op0.getOperand(0);
2722 }
2723
2724 // Return a Comparison that tests the condition-code result of intrinsic
2725 // node Call against constant integer CC using comparison code Cond.
2726 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2727 // and CCValid is the set of possible condition-code results.
getIntrinsicCmp(SelectionDAG & DAG,unsigned Opcode,SDValue Call,unsigned CCValid,uint64_t CC,ISD::CondCode Cond)2728 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2729 SDValue Call, unsigned CCValid, uint64_t CC,
2730 ISD::CondCode Cond) {
2731 Comparison C(Call, SDValue(), SDValue());
2732 C.Opcode = Opcode;
2733 C.CCValid = CCValid;
2734 if (Cond == ISD::SETEQ)
2735 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2736 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2737 else if (Cond == ISD::SETNE)
2738 // ...and the inverse of that.
2739 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2740 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2741 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2742 // always true for CC>3.
2743 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2744 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2745 // ...and the inverse of that.
2746 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2747 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2748 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2749 // always true for CC>3.
2750 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2751 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2752 // ...and the inverse of that.
2753 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2754 else
2755 llvm_unreachable("Unexpected integer comparison type");
2756 C.CCMask &= CCValid;
2757 return C;
2758 }
2759
2760 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
getCmp(SelectionDAG & DAG,SDValue CmpOp0,SDValue CmpOp1,ISD::CondCode Cond,const SDLoc & DL,SDValue Chain=SDValue (),bool IsSignaling=false)2761 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2762 ISD::CondCode Cond, const SDLoc &DL,
2763 SDValue Chain = SDValue(),
2764 bool IsSignaling = false) {
2765 if (CmpOp1.getOpcode() == ISD::Constant) {
2766 assert(!Chain);
2767 uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2768 unsigned Opcode, CCValid;
2769 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2770 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2771 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2772 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2773 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2774 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2775 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2776 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2777 }
2778 Comparison C(CmpOp0, CmpOp1, Chain);
2779 C.CCMask = CCMaskForCondCode(Cond);
2780 if (C.Op0.getValueType().isFloatingPoint()) {
2781 C.CCValid = SystemZ::CCMASK_FCMP;
2782 if (!C.Chain)
2783 C.Opcode = SystemZISD::FCMP;
2784 else if (!IsSignaling)
2785 C.Opcode = SystemZISD::STRICT_FCMP;
2786 else
2787 C.Opcode = SystemZISD::STRICT_FCMPS;
2788 adjustForFNeg(C);
2789 } else {
2790 assert(!C.Chain);
2791 C.CCValid = SystemZ::CCMASK_ICMP;
2792 C.Opcode = SystemZISD::ICMP;
2793 // Choose the type of comparison. Equality and inequality tests can
2794 // use either signed or unsigned comparisons. The choice also doesn't
2795 // matter if both sign bits are known to be clear. In those cases we
2796 // want to give the main isel code the freedom to choose whichever
2797 // form fits best.
2798 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2799 C.CCMask == SystemZ::CCMASK_CMP_NE ||
2800 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2801 C.ICmpType = SystemZICMP::Any;
2802 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2803 C.ICmpType = SystemZICMP::UnsignedOnly;
2804 else
2805 C.ICmpType = SystemZICMP::SignedOnly;
2806 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2807 adjustForRedundantAnd(DAG, DL, C);
2808 adjustZeroCmp(DAG, DL, C);
2809 adjustSubwordCmp(DAG, DL, C);
2810 adjustForSubtraction(DAG, DL, C);
2811 adjustForLTGFR(C);
2812 adjustICmpTruncate(DAG, DL, C);
2813 }
2814
2815 if (shouldSwapCmpOperands(C)) {
2816 std::swap(C.Op0, C.Op1);
2817 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2818 }
2819
2820 adjustForTestUnderMask(DAG, DL, C);
2821 return C;
2822 }
2823
2824 // Emit the comparison instruction described by C.
emitCmp(SelectionDAG & DAG,const SDLoc & DL,Comparison & C)2825 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2826 if (!C.Op1.getNode()) {
2827 SDNode *Node;
2828 switch (C.Op0.getOpcode()) {
2829 case ISD::INTRINSIC_W_CHAIN:
2830 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2831 return SDValue(Node, 0);
2832 case ISD::INTRINSIC_WO_CHAIN:
2833 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2834 return SDValue(Node, Node->getNumValues() - 1);
2835 default:
2836 llvm_unreachable("Invalid comparison operands");
2837 }
2838 }
2839 if (C.Opcode == SystemZISD::ICMP)
2840 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2841 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
2842 if (C.Opcode == SystemZISD::TM) {
2843 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2844 bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2845 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2846 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
2847 }
2848 if (C.Chain) {
2849 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
2850 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
2851 }
2852 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2853 }
2854
2855 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2856 // 64 bits. Extend is the extension type to use. Store the high part
2857 // in Hi and the low part in Lo.
lowerMUL_LOHI32(SelectionDAG & DAG,const SDLoc & DL,unsigned Extend,SDValue Op0,SDValue Op1,SDValue & Hi,SDValue & Lo)2858 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2859 SDValue Op0, SDValue Op1, SDValue &Hi,
2860 SDValue &Lo) {
2861 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2862 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2863 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2864 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2865 DAG.getConstant(32, DL, MVT::i64));
2866 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2867 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2868 }
2869
2870 // Lower a binary operation that produces two VT results, one in each
2871 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2872 // and Opcode performs the GR128 operation. Store the even register result
2873 // in Even and the odd register result in Odd.
lowerGR128Binary(SelectionDAG & DAG,const SDLoc & DL,EVT VT,unsigned Opcode,SDValue Op0,SDValue Op1,SDValue & Even,SDValue & Odd)2874 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2875 unsigned Opcode, SDValue Op0, SDValue Op1,
2876 SDValue &Even, SDValue &Odd) {
2877 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2878 bool Is32Bit = is32Bit(VT);
2879 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2880 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2881 }
2882
2883 // Return an i32 value that is 1 if the CC value produced by CCReg is
2884 // in the mask CCMask and 0 otherwise. CC is known to have a value
2885 // in CCValid, so other values can be ignored.
emitSETCC(SelectionDAG & DAG,const SDLoc & DL,SDValue CCReg,unsigned CCValid,unsigned CCMask)2886 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2887 unsigned CCValid, unsigned CCMask) {
2888 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
2889 DAG.getConstant(0, DL, MVT::i32),
2890 DAG.getTargetConstant(CCValid, DL, MVT::i32),
2891 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
2892 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2893 }
2894
2895 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2896 // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
2897 // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
2898 // floating-point comparisons, and CmpMode::SignalingFP for strict signaling
2899 // floating-point comparisons.
2900 enum class CmpMode { Int, FP, StrictFP, SignalingFP };
getVectorComparison(ISD::CondCode CC,CmpMode Mode)2901 static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) {
2902 switch (CC) {
2903 case ISD::SETOEQ:
2904 case ISD::SETEQ:
2905 switch (Mode) {
2906 case CmpMode::Int: return SystemZISD::VICMPE;
2907 case CmpMode::FP: return SystemZISD::VFCMPE;
2908 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
2909 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
2910 }
2911 llvm_unreachable("Bad mode");
2912
2913 case ISD::SETOGE:
2914 case ISD::SETGE:
2915 switch (Mode) {
2916 case CmpMode::Int: return 0;
2917 case CmpMode::FP: return SystemZISD::VFCMPHE;
2918 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
2919 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
2920 }
2921 llvm_unreachable("Bad mode");
2922
2923 case ISD::SETOGT:
2924 case ISD::SETGT:
2925 switch (Mode) {
2926 case CmpMode::Int: return SystemZISD::VICMPH;
2927 case CmpMode::FP: return SystemZISD::VFCMPH;
2928 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
2929 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
2930 }
2931 llvm_unreachable("Bad mode");
2932
2933 case ISD::SETUGT:
2934 switch (Mode) {
2935 case CmpMode::Int: return SystemZISD::VICMPHL;
2936 case CmpMode::FP: return 0;
2937 case CmpMode::StrictFP: return 0;
2938 case CmpMode::SignalingFP: return 0;
2939 }
2940 llvm_unreachable("Bad mode");
2941
2942 default:
2943 return 0;
2944 }
2945 }
2946
2947 // Return the SystemZISD vector comparison operation for CC or its inverse,
2948 // or 0 if neither can be done directly. Indicate in Invert whether the
2949 // result is for the inverse of CC. Mode is as above.
getVectorComparisonOrInvert(ISD::CondCode CC,CmpMode Mode,bool & Invert)2950 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode,
2951 bool &Invert) {
2952 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2953 Invert = false;
2954 return Opcode;
2955 }
2956
2957 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
2958 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2959 Invert = true;
2960 return Opcode;
2961 }
2962
2963 return 0;
2964 }
2965
2966 // Return a v2f64 that contains the extended form of elements Start and Start+1
2967 // of v4f32 value Op. If Chain is nonnull, return the strict form.
expandV4F32ToV2F64(SelectionDAG & DAG,int Start,const SDLoc & DL,SDValue Op,SDValue Chain)2968 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2969 SDValue Op, SDValue Chain) {
2970 int Mask[] = { Start, -1, Start + 1, -1 };
2971 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
2972 if (Chain) {
2973 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
2974 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
2975 }
2976 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2977 }
2978
2979 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2980 // producing a result of type VT. If Chain is nonnull, return the strict form.
getVectorCmp(SelectionDAG & DAG,unsigned Opcode,const SDLoc & DL,EVT VT,SDValue CmpOp0,SDValue CmpOp1,SDValue Chain) const2981 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2982 const SDLoc &DL, EVT VT,
2983 SDValue CmpOp0,
2984 SDValue CmpOp1,
2985 SDValue Chain) const {
2986 // There is no hardware support for v4f32 (unless we have the vector
2987 // enhancements facility 1), so extend the vector into two v2f64s
2988 // and compare those.
2989 if (CmpOp0.getValueType() == MVT::v4f32 &&
2990 !Subtarget.hasVectorEnhancements1()) {
2991 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
2992 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
2993 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
2994 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
2995 if (Chain) {
2996 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
2997 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
2998 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
2999 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3000 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3001 H1.getValue(1), L1.getValue(1),
3002 HRes.getValue(1), LRes.getValue(1) };
3003 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3004 SDValue Ops[2] = { Res, NewChain };
3005 return DAG.getMergeValues(Ops, DL);
3006 }
3007 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3008 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3009 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3010 }
3011 if (Chain) {
3012 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3013 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3014 }
3015 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3016 }
3017
3018 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3019 // an integer mask of type VT. If Chain is nonnull, we have a strict
3020 // floating-point comparison. If in addition IsSignaling is true, we have
3021 // a strict signaling floating-point comparison.
lowerVectorSETCC(SelectionDAG & DAG,const SDLoc & DL,EVT VT,ISD::CondCode CC,SDValue CmpOp0,SDValue CmpOp1,SDValue Chain,bool IsSignaling) const3022 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3023 const SDLoc &DL, EVT VT,
3024 ISD::CondCode CC,
3025 SDValue CmpOp0,
3026 SDValue CmpOp1,
3027 SDValue Chain,
3028 bool IsSignaling) const {
3029 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3030 assert (!Chain || IsFP);
3031 assert (!IsSignaling || Chain);
3032 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3033 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3034 bool Invert = false;
3035 SDValue Cmp;
3036 switch (CC) {
3037 // Handle tests for order using (or (ogt y x) (oge x y)).
3038 case ISD::SETUO:
3039 Invert = true;
3040 [[fallthrough]];
3041 case ISD::SETO: {
3042 assert(IsFP && "Unexpected integer comparison");
3043 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3044 DL, VT, CmpOp1, CmpOp0, Chain);
3045 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3046 DL, VT, CmpOp0, CmpOp1, Chain);
3047 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3048 if (Chain)
3049 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3050 LT.getValue(1), GE.getValue(1));
3051 break;
3052 }
3053
3054 // Handle <> tests using (or (ogt y x) (ogt x y)).
3055 case ISD::SETUEQ:
3056 Invert = true;
3057 [[fallthrough]];
3058 case ISD::SETONE: {
3059 assert(IsFP && "Unexpected integer comparison");
3060 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3061 DL, VT, CmpOp1, CmpOp0, Chain);
3062 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3063 DL, VT, CmpOp0, CmpOp1, Chain);
3064 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3065 if (Chain)
3066 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3067 LT.getValue(1), GT.getValue(1));
3068 break;
3069 }
3070
3071 // Otherwise a single comparison is enough. It doesn't really
3072 // matter whether we try the inversion or the swap first, since
3073 // there are no cases where both work.
3074 default:
3075 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3076 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3077 else {
3078 CC = ISD::getSetCCSwappedOperands(CC);
3079 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3080 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3081 else
3082 llvm_unreachable("Unhandled comparison");
3083 }
3084 if (Chain)
3085 Chain = Cmp.getValue(1);
3086 break;
3087 }
3088 if (Invert) {
3089 SDValue Mask =
3090 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3091 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3092 }
3093 if (Chain && Chain.getNode() != Cmp.getNode()) {
3094 SDValue Ops[2] = { Cmp, Chain };
3095 Cmp = DAG.getMergeValues(Ops, DL);
3096 }
3097 return Cmp;
3098 }
3099
lowerSETCC(SDValue Op,SelectionDAG & DAG) const3100 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3101 SelectionDAG &DAG) const {
3102 SDValue CmpOp0 = Op.getOperand(0);
3103 SDValue CmpOp1 = Op.getOperand(1);
3104 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3105 SDLoc DL(Op);
3106 EVT VT = Op.getValueType();
3107 if (VT.isVector())
3108 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3109
3110 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3111 SDValue CCReg = emitCmp(DAG, DL, C);
3112 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3113 }
3114
lowerSTRICT_FSETCC(SDValue Op,SelectionDAG & DAG,bool IsSignaling) const3115 SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3116 SelectionDAG &DAG,
3117 bool IsSignaling) const {
3118 SDValue Chain = Op.getOperand(0);
3119 SDValue CmpOp0 = Op.getOperand(1);
3120 SDValue CmpOp1 = Op.getOperand(2);
3121 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3122 SDLoc DL(Op);
3123 EVT VT = Op.getNode()->getValueType(0);
3124 if (VT.isVector()) {
3125 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3126 Chain, IsSignaling);
3127 return Res.getValue(Op.getResNo());
3128 }
3129
3130 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3131 SDValue CCReg = emitCmp(DAG, DL, C);
3132 CCReg->setFlags(Op->getFlags());
3133 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3134 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3135 return DAG.getMergeValues(Ops, DL);
3136 }
3137
lowerBR_CC(SDValue Op,SelectionDAG & DAG) const3138 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3139 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3140 SDValue CmpOp0 = Op.getOperand(2);
3141 SDValue CmpOp1 = Op.getOperand(3);
3142 SDValue Dest = Op.getOperand(4);
3143 SDLoc DL(Op);
3144
3145 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3146 SDValue CCReg = emitCmp(DAG, DL, C);
3147 return DAG.getNode(
3148 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3149 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3150 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3151 }
3152
3153 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3154 // allowing Pos and Neg to be wider than CmpOp.
isAbsolute(SDValue CmpOp,SDValue Pos,SDValue Neg)3155 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3156 return (Neg.getOpcode() == ISD::SUB &&
3157 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3158 cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
3159 Neg.getOperand(1) == Pos &&
3160 (Pos == CmpOp ||
3161 (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3162 Pos.getOperand(0) == CmpOp)));
3163 }
3164
3165 // Return the absolute or negative absolute of Op; IsNegative decides which.
getAbsolute(SelectionDAG & DAG,const SDLoc & DL,SDValue Op,bool IsNegative)3166 static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
3167 bool IsNegative) {
3168 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3169 if (IsNegative)
3170 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3171 DAG.getConstant(0, DL, Op.getValueType()), Op);
3172 return Op;
3173 }
3174
lowerSELECT_CC(SDValue Op,SelectionDAG & DAG) const3175 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3176 SelectionDAG &DAG) const {
3177 SDValue CmpOp0 = Op.getOperand(0);
3178 SDValue CmpOp1 = Op.getOperand(1);
3179 SDValue TrueOp = Op.getOperand(2);
3180 SDValue FalseOp = Op.getOperand(3);
3181 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3182 SDLoc DL(Op);
3183
3184 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3185
3186 // Check for absolute and negative-absolute selections, including those
3187 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3188 // This check supplements the one in DAGCombiner.
3189 if (C.Opcode == SystemZISD::ICMP &&
3190 C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3191 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3192 C.Op1.getOpcode() == ISD::Constant &&
3193 cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
3194 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3195 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3196 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3197 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3198 }
3199
3200 SDValue CCReg = emitCmp(DAG, DL, C);
3201 SDValue Ops[] = {TrueOp, FalseOp,
3202 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3203 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3204
3205 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3206 }
3207
lowerGlobalAddress(GlobalAddressSDNode * Node,SelectionDAG & DAG) const3208 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3209 SelectionDAG &DAG) const {
3210 SDLoc DL(Node);
3211 const GlobalValue *GV = Node->getGlobal();
3212 int64_t Offset = Node->getOffset();
3213 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3214 CodeModel::Model CM = DAG.getTarget().getCodeModel();
3215
3216 SDValue Result;
3217 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3218 if (isInt<32>(Offset)) {
3219 // Assign anchors at 1<<12 byte boundaries.
3220 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3221 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3222 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3223
3224 // The offset can be folded into the address if it is aligned to a
3225 // halfword.
3226 Offset -= Anchor;
3227 if (Offset != 0 && (Offset & 1) == 0) {
3228 SDValue Full =
3229 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3230 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3231 Offset = 0;
3232 }
3233 } else {
3234 // Conservatively load a constant offset greater than 32 bits into a
3235 // register below.
3236 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3237 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3238 }
3239 } else {
3240 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3241 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3242 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3243 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3244 }
3245
3246 // If there was a non-zero offset that we didn't fold, create an explicit
3247 // addition for it.
3248 if (Offset != 0)
3249 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3250 DAG.getConstant(Offset, DL, PtrVT));
3251
3252 return Result;
3253 }
3254
lowerTLSGetOffset(GlobalAddressSDNode * Node,SelectionDAG & DAG,unsigned Opcode,SDValue GOTOffset) const3255 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3256 SelectionDAG &DAG,
3257 unsigned Opcode,
3258 SDValue GOTOffset) const {
3259 SDLoc DL(Node);
3260 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3261 SDValue Chain = DAG.getEntryNode();
3262 SDValue Glue;
3263
3264 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3265 CallingConv::GHC)
3266 report_fatal_error("In GHC calling convention TLS is not supported");
3267
3268 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3269 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3270 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3271 Glue = Chain.getValue(1);
3272 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3273 Glue = Chain.getValue(1);
3274
3275 // The first call operand is the chain and the second is the TLS symbol.
3276 SmallVector<SDValue, 8> Ops;
3277 Ops.push_back(Chain);
3278 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3279 Node->getValueType(0),
3280 0, 0));
3281
3282 // Add argument registers to the end of the list so that they are
3283 // known live into the call.
3284 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3285 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3286
3287 // Add a register mask operand representing the call-preserved registers.
3288 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3289 const uint32_t *Mask =
3290 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3291 assert(Mask && "Missing call preserved mask for calling convention");
3292 Ops.push_back(DAG.getRegisterMask(Mask));
3293
3294 // Glue the call to the argument copies.
3295 Ops.push_back(Glue);
3296
3297 // Emit the call.
3298 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3299 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3300 Glue = Chain.getValue(1);
3301
3302 // Copy the return value from %r2.
3303 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3304 }
3305
lowerThreadPointer(const SDLoc & DL,SelectionDAG & DAG) const3306 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3307 SelectionDAG &DAG) const {
3308 SDValue Chain = DAG.getEntryNode();
3309 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3310
3311 // The high part of the thread pointer is in access register 0.
3312 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3313 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3314
3315 // The low part of the thread pointer is in access register 1.
3316 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3317 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3318
3319 // Merge them into a single 64-bit address.
3320 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3321 DAG.getConstant(32, DL, PtrVT));
3322 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3323 }
3324
lowerGlobalTLSAddress(GlobalAddressSDNode * Node,SelectionDAG & DAG) const3325 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3326 SelectionDAG &DAG) const {
3327 if (DAG.getTarget().useEmulatedTLS())
3328 return LowerToTLSEmulatedModel(Node, DAG);
3329 SDLoc DL(Node);
3330 const GlobalValue *GV = Node->getGlobal();
3331 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3332 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3333
3334 if (DAG.getMachineFunction().getFunction().getCallingConv() ==
3335 CallingConv::GHC)
3336 report_fatal_error("In GHC calling convention TLS is not supported");
3337
3338 SDValue TP = lowerThreadPointer(DL, DAG);
3339
3340 // Get the offset of GA from the thread pointer, based on the TLS model.
3341 SDValue Offset;
3342 switch (model) {
3343 case TLSModel::GeneralDynamic: {
3344 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3345 SystemZConstantPoolValue *CPV =
3346 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
3347
3348 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3349 Offset = DAG.getLoad(
3350 PtrVT, DL, DAG.getEntryNode(), Offset,
3351 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3352
3353 // Call __tls_get_offset to retrieve the offset.
3354 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3355 break;
3356 }
3357
3358 case TLSModel::LocalDynamic: {
3359 // Load the GOT offset of the module ID.
3360 SystemZConstantPoolValue *CPV =
3361 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
3362
3363 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3364 Offset = DAG.getLoad(
3365 PtrVT, DL, DAG.getEntryNode(), Offset,
3366 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3367
3368 // Call __tls_get_offset to retrieve the module base offset.
3369 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3370
3371 // Note: The SystemZLDCleanupPass will remove redundant computations
3372 // of the module base offset. Count total number of local-dynamic
3373 // accesses to trigger execution of that pass.
3374 SystemZMachineFunctionInfo* MFI =
3375 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
3376 MFI->incNumLocalDynamicTLSAccesses();
3377
3378 // Add the per-symbol offset.
3379 CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
3380
3381 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3382 DTPOffset = DAG.getLoad(
3383 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3384 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3385
3386 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3387 break;
3388 }
3389
3390 case TLSModel::InitialExec: {
3391 // Load the offset from the GOT.
3392 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3393 SystemZII::MO_INDNTPOFF);
3394 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
3395 Offset =
3396 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3397 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3398 break;
3399 }
3400
3401 case TLSModel::LocalExec: {
3402 // Force the offset into the constant pool and load it from there.
3403 SystemZConstantPoolValue *CPV =
3404 SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
3405
3406 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3407 Offset = DAG.getLoad(
3408 PtrVT, DL, DAG.getEntryNode(), Offset,
3409 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3410 break;
3411 }
3412 }
3413
3414 // Add the base and offset together.
3415 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3416 }
3417
lowerBlockAddress(BlockAddressSDNode * Node,SelectionDAG & DAG) const3418 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3419 SelectionDAG &DAG) const {
3420 SDLoc DL(Node);
3421 const BlockAddress *BA = Node->getBlockAddress();
3422 int64_t Offset = Node->getOffset();
3423 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3424
3425 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3426 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3427 return Result;
3428 }
3429
lowerJumpTable(JumpTableSDNode * JT,SelectionDAG & DAG) const3430 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3431 SelectionDAG &DAG) const {
3432 SDLoc DL(JT);
3433 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3434 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3435
3436 // Use LARL to load the address of the table.
3437 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3438 }
3439
lowerConstantPool(ConstantPoolSDNode * CP,SelectionDAG & DAG) const3440 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3441 SelectionDAG &DAG) const {
3442 SDLoc DL(CP);
3443 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3444
3445 SDValue Result;
3446 if (CP->isMachineConstantPoolEntry())
3447 Result =
3448 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3449 else
3450 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3451 CP->getOffset());
3452
3453 // Use LARL to load the address of the constant pool entry.
3454 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3455 }
3456
lowerFRAMEADDR(SDValue Op,SelectionDAG & DAG) const3457 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3458 SelectionDAG &DAG) const {
3459 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3460 MachineFunction &MF = DAG.getMachineFunction();
3461 MachineFrameInfo &MFI = MF.getFrameInfo();
3462 MFI.setFrameAddressIsTaken(true);
3463
3464 SDLoc DL(Op);
3465 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3466 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3467
3468 // By definition, the frame address is the address of the back chain. (In
3469 // the case of packed stack without backchain, return the address where the
3470 // backchain would have been stored. This will either be an unused space or
3471 // contain a saved register).
3472 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3473 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3474
3475 // FIXME The frontend should detect this case.
3476 if (Depth > 0) {
3477 report_fatal_error("Unsupported stack frame traversal count");
3478 }
3479
3480 return BackChain;
3481 }
3482
lowerRETURNADDR(SDValue Op,SelectionDAG & DAG) const3483 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3484 SelectionDAG &DAG) const {
3485 MachineFunction &MF = DAG.getMachineFunction();
3486 MachineFrameInfo &MFI = MF.getFrameInfo();
3487 MFI.setReturnAddressIsTaken(true);
3488
3489 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
3490 return SDValue();
3491
3492 SDLoc DL(Op);
3493 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3494 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3495
3496 // FIXME The frontend should detect this case.
3497 if (Depth > 0) {
3498 report_fatal_error("Unsupported stack frame traversal count");
3499 }
3500
3501 // Return R14D, which has the return address. Mark it an implicit live-in.
3502 Register LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3503 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3504 }
3505
lowerBITCAST(SDValue Op,SelectionDAG & DAG) const3506 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3507 SelectionDAG &DAG) const {
3508 SDLoc DL(Op);
3509 SDValue In = Op.getOperand(0);
3510 EVT InVT = In.getValueType();
3511 EVT ResVT = Op.getValueType();
3512
3513 // Convert loads directly. This is normally done by DAGCombiner,
3514 // but we need this case for bitcasts that are created during lowering
3515 // and which are then lowered themselves.
3516 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3517 if (ISD::isNormalLoad(LoadN)) {
3518 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3519 LoadN->getBasePtr(), LoadN->getMemOperand());
3520 // Update the chain uses.
3521 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3522 return NewLoad;
3523 }
3524
3525 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3526 SDValue In64;
3527 if (Subtarget.hasHighWord()) {
3528 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3529 MVT::i64);
3530 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3531 MVT::i64, SDValue(U64, 0), In);
3532 } else {
3533 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3534 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3535 DAG.getConstant(32, DL, MVT::i64));
3536 }
3537 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3538 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3539 DL, MVT::f32, Out64);
3540 }
3541 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3542 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3543 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3544 MVT::f64, SDValue(U64, 0), In);
3545 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3546 if (Subtarget.hasHighWord())
3547 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3548 MVT::i32, Out64);
3549 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3550 DAG.getConstant(32, DL, MVT::i64));
3551 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3552 }
3553 llvm_unreachable("Unexpected bitcast combination");
3554 }
3555
lowerVASTART(SDValue Op,SelectionDAG & DAG) const3556 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3557 SelectionDAG &DAG) const {
3558
3559 if (Subtarget.isTargetXPLINK64())
3560 return lowerVASTART_XPLINK(Op, DAG);
3561 else
3562 return lowerVASTART_ELF(Op, DAG);
3563 }
3564
lowerVASTART_XPLINK(SDValue Op,SelectionDAG & DAG) const3565 SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3566 SelectionDAG &DAG) const {
3567 MachineFunction &MF = DAG.getMachineFunction();
3568 SystemZMachineFunctionInfo *FuncInfo =
3569 MF.getInfo<SystemZMachineFunctionInfo>();
3570
3571 SDLoc DL(Op);
3572
3573 // vastart just stores the address of the VarArgsFrameIndex slot into the
3574 // memory location argument.
3575 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3576 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3577 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3578 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3579 MachinePointerInfo(SV));
3580 }
3581
lowerVASTART_ELF(SDValue Op,SelectionDAG & DAG) const3582 SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3583 SelectionDAG &DAG) const {
3584 MachineFunction &MF = DAG.getMachineFunction();
3585 SystemZMachineFunctionInfo *FuncInfo =
3586 MF.getInfo<SystemZMachineFunctionInfo>();
3587 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3588
3589 SDValue Chain = Op.getOperand(0);
3590 SDValue Addr = Op.getOperand(1);
3591 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3592 SDLoc DL(Op);
3593
3594 // The initial values of each field.
3595 const unsigned NumFields = 4;
3596 SDValue Fields[NumFields] = {
3597 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3598 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3599 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3600 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3601 };
3602
3603 // Store each field into its respective slot.
3604 SDValue MemOps[NumFields];
3605 unsigned Offset = 0;
3606 for (unsigned I = 0; I < NumFields; ++I) {
3607 SDValue FieldAddr = Addr;
3608 if (Offset != 0)
3609 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3610 DAG.getIntPtrConstant(Offset, DL));
3611 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3612 MachinePointerInfo(SV, Offset));
3613 Offset += 8;
3614 }
3615 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3616 }
3617
lowerVACOPY(SDValue Op,SelectionDAG & DAG) const3618 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3619 SelectionDAG &DAG) const {
3620 SDValue Chain = Op.getOperand(0);
3621 SDValue DstPtr = Op.getOperand(1);
3622 SDValue SrcPtr = Op.getOperand(2);
3623 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3624 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3625 SDLoc DL(Op);
3626
3627 uint32_t Sz =
3628 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3629 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3630 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3631 /*isTailCall*/ false, MachinePointerInfo(DstSV),
3632 MachinePointerInfo(SrcSV));
3633 }
3634
3635 SDValue
lowerDYNAMIC_STACKALLOC(SDValue Op,SelectionDAG & DAG) const3636 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3637 SelectionDAG &DAG) const {
3638 if (Subtarget.isTargetXPLINK64())
3639 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3640 else
3641 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3642 }
3643
3644 SDValue
lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,SelectionDAG & DAG) const3645 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3646 SelectionDAG &DAG) const {
3647 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3648 MachineFunction &MF = DAG.getMachineFunction();
3649 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3650 SDValue Chain = Op.getOperand(0);
3651 SDValue Size = Op.getOperand(1);
3652 SDValue Align = Op.getOperand(2);
3653 SDLoc DL(Op);
3654
3655 // If user has set the no alignment function attribute, ignore
3656 // alloca alignments.
3657 uint64_t AlignVal =
3658 (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3659
3660 uint64_t StackAlign = TFI->getStackAlignment();
3661 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3662 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3663
3664 SDValue NeededSpace = Size;
3665
3666 // Add extra space for alignment if needed.
3667 EVT PtrVT = getPointerTy(MF.getDataLayout());
3668 if (ExtraAlignSpace)
3669 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3670 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3671
3672 bool IsSigned = false;
3673 bool DoesNotReturn = false;
3674 bool IsReturnValueUsed = false;
3675 EVT VT = Op.getValueType();
3676 SDValue AllocaCall =
3677 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
3678 CallingConv::C, IsSigned, DL, DoesNotReturn,
3679 IsReturnValueUsed)
3680 .first;
3681
3682 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
3683 // to end of call in order to ensure it isn't broken up from the call
3684 // sequence.
3685 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
3686 Register SPReg = Regs.getStackPointerRegister();
3687 Chain = AllocaCall.getValue(1);
3688 SDValue Glue = AllocaCall.getValue(2);
3689 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
3690 Chain = NewSPRegNode.getValue(1);
3691
3692 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
3693 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
3694 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
3695
3696 // Dynamically realign if needed.
3697 if (ExtraAlignSpace) {
3698 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3699 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3700 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
3701 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
3702 }
3703
3704 SDValue Ops[2] = {Result, Chain};
3705 return DAG.getMergeValues(Ops, DL);
3706 }
3707
3708 SDValue
lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,SelectionDAG & DAG) const3709 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
3710 SelectionDAG &DAG) const {
3711 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3712 MachineFunction &MF = DAG.getMachineFunction();
3713 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3714 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3715
3716 SDValue Chain = Op.getOperand(0);
3717 SDValue Size = Op.getOperand(1);
3718 SDValue Align = Op.getOperand(2);
3719 SDLoc DL(Op);
3720
3721 // If user has set the no alignment function attribute, ignore
3722 // alloca alignments.
3723 uint64_t AlignVal =
3724 (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3725
3726 uint64_t StackAlign = TFI->getStackAlignment();
3727 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3728 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3729
3730 Register SPReg = getStackPointerRegisterToSaveRestore();
3731 SDValue NeededSpace = Size;
3732
3733 // Get a reference to the stack pointer.
3734 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3735
3736 // If we need a backchain, save it now.
3737 SDValue Backchain;
3738 if (StoreBackchain)
3739 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
3740 MachinePointerInfo());
3741
3742 // Add extra space for alignment if needed.
3743 if (ExtraAlignSpace)
3744 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3745 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3746
3747 // Get the new stack pointer value.
3748 SDValue NewSP;
3749 if (hasInlineStackProbe(MF)) {
3750 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
3751 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
3752 Chain = NewSP.getValue(1);
3753 }
3754 else {
3755 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3756 // Copy the new stack pointer back.
3757 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3758 }
3759
3760 // The allocated data lives above the 160 bytes allocated for the standard
3761 // frame, plus any outgoing stack arguments. We don't know how much that
3762 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3763 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3764 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3765
3766 // Dynamically realign if needed.
3767 if (RequiredAlign > StackAlign) {
3768 Result =
3769 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3770 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3771 Result =
3772 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3773 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3774 }
3775
3776 if (StoreBackchain)
3777 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
3778 MachinePointerInfo());
3779
3780 SDValue Ops[2] = { Result, Chain };
3781 return DAG.getMergeValues(Ops, DL);
3782 }
3783
lowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,SelectionDAG & DAG) const3784 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3785 SDValue Op, SelectionDAG &DAG) const {
3786 SDLoc DL(Op);
3787
3788 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3789 }
3790
lowerSMUL_LOHI(SDValue Op,SelectionDAG & DAG) const3791 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3792 SelectionDAG &DAG) const {
3793 EVT VT = Op.getValueType();
3794 SDLoc DL(Op);
3795 SDValue Ops[2];
3796 if (is32Bit(VT))
3797 // Just do a normal 64-bit multiplication and extract the results.
3798 // We define this so that it can be used for constant division.
3799 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3800 Op.getOperand(1), Ops[1], Ops[0]);
3801 else if (Subtarget.hasMiscellaneousExtensions2())
3802 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3803 // the high result in the even register. ISD::SMUL_LOHI is defined to
3804 // return the low half first, so the results are in reverse order.
3805 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
3806 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3807 else {
3808 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3809 //
3810 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3811 //
3812 // but using the fact that the upper halves are either all zeros
3813 // or all ones:
3814 //
3815 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3816 //
3817 // and grouping the right terms together since they are quicker than the
3818 // multiplication:
3819 //
3820 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3821 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3822 SDValue LL = Op.getOperand(0);
3823 SDValue RL = Op.getOperand(1);
3824 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3825 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3826 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3827 // the high result in the even register. ISD::SMUL_LOHI is defined to
3828 // return the low half first, so the results are in reverse order.
3829 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3830 LL, RL, Ops[1], Ops[0]);
3831 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3832 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3833 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3834 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3835 }
3836 return DAG.getMergeValues(Ops, DL);
3837 }
3838
lowerUMUL_LOHI(SDValue Op,SelectionDAG & DAG) const3839 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3840 SelectionDAG &DAG) const {
3841 EVT VT = Op.getValueType();
3842 SDLoc DL(Op);
3843 SDValue Ops[2];
3844 if (is32Bit(VT))
3845 // Just do a normal 64-bit multiplication and extract the results.
3846 // We define this so that it can be used for constant division.
3847 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3848 Op.getOperand(1), Ops[1], Ops[0]);
3849 else
3850 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3851 // the high result in the even register. ISD::UMUL_LOHI is defined to
3852 // return the low half first, so the results are in reverse order.
3853 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
3854 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3855 return DAG.getMergeValues(Ops, DL);
3856 }
3857
lowerSDIVREM(SDValue Op,SelectionDAG & DAG) const3858 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3859 SelectionDAG &DAG) const {
3860 SDValue Op0 = Op.getOperand(0);
3861 SDValue Op1 = Op.getOperand(1);
3862 EVT VT = Op.getValueType();
3863 SDLoc DL(Op);
3864
3865 // We use DSGF for 32-bit division. This means the first operand must
3866 // always be 64-bit, and the second operand should be 32-bit whenever
3867 // that is possible, to improve performance.
3868 if (is32Bit(VT))
3869 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3870 else if (DAG.ComputeNumSignBits(Op1) > 32)
3871 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3872
3873 // DSG(F) returns the remainder in the even register and the
3874 // quotient in the odd register.
3875 SDValue Ops[2];
3876 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3877 return DAG.getMergeValues(Ops, DL);
3878 }
3879
lowerUDIVREM(SDValue Op,SelectionDAG & DAG) const3880 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3881 SelectionDAG &DAG) const {
3882 EVT VT = Op.getValueType();
3883 SDLoc DL(Op);
3884
3885 // DL(G) returns the remainder in the even register and the
3886 // quotient in the odd register.
3887 SDValue Ops[2];
3888 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
3889 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3890 return DAG.getMergeValues(Ops, DL);
3891 }
3892
lowerOR(SDValue Op,SelectionDAG & DAG) const3893 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3894 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3895
3896 // Get the known-zero masks for each operand.
3897 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3898 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3899 DAG.computeKnownBits(Ops[1])};
3900
3901 // See if the upper 32 bits of one operand and the lower 32 bits of the
3902 // other are known zero. They are the low and high operands respectively.
3903 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3904 Known[1].Zero.getZExtValue() };
3905 unsigned High, Low;
3906 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3907 High = 1, Low = 0;
3908 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3909 High = 0, Low = 1;
3910 else
3911 return Op;
3912
3913 SDValue LowOp = Ops[Low];
3914 SDValue HighOp = Ops[High];
3915
3916 // If the high part is a constant, we're better off using IILH.
3917 if (HighOp.getOpcode() == ISD::Constant)
3918 return Op;
3919
3920 // If the low part is a constant that is outside the range of LHI,
3921 // then we're better off using IILF.
3922 if (LowOp.getOpcode() == ISD::Constant) {
3923 int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3924 if (!isInt<16>(Value))
3925 return Op;
3926 }
3927
3928 // Check whether the high part is an AND that doesn't change the
3929 // high 32 bits and just masks out low bits. We can skip it if so.
3930 if (HighOp.getOpcode() == ISD::AND &&
3931 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3932 SDValue HighOp0 = HighOp.getOperand(0);
3933 uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3934 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3935 HighOp = HighOp0;
3936 }
3937
3938 // Take advantage of the fact that all GR32 operations only change the
3939 // low 32 bits by truncating Low to an i32 and inserting it directly
3940 // using a subreg. The interesting cases are those where the truncation
3941 // can be folded.
3942 SDLoc DL(Op);
3943 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3944 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3945 MVT::i64, HighOp, Low32);
3946 }
3947
3948 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
lowerXALUO(SDValue Op,SelectionDAG & DAG) const3949 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3950 SelectionDAG &DAG) const {
3951 SDNode *N = Op.getNode();
3952 SDValue LHS = N->getOperand(0);
3953 SDValue RHS = N->getOperand(1);
3954 SDLoc DL(N);
3955 unsigned BaseOp = 0;
3956 unsigned CCValid = 0;
3957 unsigned CCMask = 0;
3958
3959 switch (Op.getOpcode()) {
3960 default: llvm_unreachable("Unknown instruction!");
3961 case ISD::SADDO:
3962 BaseOp = SystemZISD::SADDO;
3963 CCValid = SystemZ::CCMASK_ARITH;
3964 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3965 break;
3966 case ISD::SSUBO:
3967 BaseOp = SystemZISD::SSUBO;
3968 CCValid = SystemZ::CCMASK_ARITH;
3969 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW;
3970 break;
3971 case ISD::UADDO:
3972 BaseOp = SystemZISD::UADDO;
3973 CCValid = SystemZ::CCMASK_LOGICAL;
3974 CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
3975 break;
3976 case ISD::USUBO:
3977 BaseOp = SystemZISD::USUBO;
3978 CCValid = SystemZ::CCMASK_LOGICAL;
3979 CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
3980 break;
3981 }
3982
3983 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3984 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3985
3986 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3987 if (N->getValueType(1) == MVT::i1)
3988 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3989
3990 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3991 }
3992
isAddCarryChain(SDValue Carry)3993 static bool isAddCarryChain(SDValue Carry) {
3994 while (Carry.getOpcode() == ISD::ADDCARRY)
3995 Carry = Carry.getOperand(2);
3996 return Carry.getOpcode() == ISD::UADDO;
3997 }
3998
isSubBorrowChain(SDValue Carry)3999 static bool isSubBorrowChain(SDValue Carry) {
4000 while (Carry.getOpcode() == ISD::SUBCARRY)
4001 Carry = Carry.getOperand(2);
4002 return Carry.getOpcode() == ISD::USUBO;
4003 }
4004
4005 // Lower ADDCARRY/SUBCARRY nodes.
lowerADDSUBCARRY(SDValue Op,SelectionDAG & DAG) const4006 SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
4007 SelectionDAG &DAG) const {
4008
4009 SDNode *N = Op.getNode();
4010 MVT VT = N->getSimpleValueType(0);
4011
4012 // Let legalize expand this if it isn't a legal type yet.
4013 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4014 return SDValue();
4015
4016 SDValue LHS = N->getOperand(0);
4017 SDValue RHS = N->getOperand(1);
4018 SDValue Carry = Op.getOperand(2);
4019 SDLoc DL(N);
4020 unsigned BaseOp = 0;
4021 unsigned CCValid = 0;
4022 unsigned CCMask = 0;
4023
4024 switch (Op.getOpcode()) {
4025 default: llvm_unreachable("Unknown instruction!");
4026 case ISD::ADDCARRY:
4027 if (!isAddCarryChain(Carry))
4028 return SDValue();
4029
4030 BaseOp = SystemZISD::ADDCARRY;
4031 CCValid = SystemZ::CCMASK_LOGICAL;
4032 CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
4033 break;
4034 case ISD::SUBCARRY:
4035 if (!isSubBorrowChain(Carry))
4036 return SDValue();
4037
4038 BaseOp = SystemZISD::SUBCARRY;
4039 CCValid = SystemZ::CCMASK_LOGICAL;
4040 CCMask = SystemZ::CCMASK_LOGICAL_BORROW;
4041 break;
4042 }
4043
4044 // Set the condition code from the carry flag.
4045 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4046 DAG.getConstant(CCValid, DL, MVT::i32),
4047 DAG.getConstant(CCMask, DL, MVT::i32));
4048
4049 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4050 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4051
4052 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4053 if (N->getValueType(1) == MVT::i1)
4054 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4055
4056 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4057 }
4058
lowerCTPOP(SDValue Op,SelectionDAG & DAG) const4059 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4060 SelectionDAG &DAG) const {
4061 EVT VT = Op.getValueType();
4062 SDLoc DL(Op);
4063 Op = Op.getOperand(0);
4064
4065 // Handle vector types via VPOPCT.
4066 if (VT.isVector()) {
4067 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4068 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4069 switch (VT.getScalarSizeInBits()) {
4070 case 8:
4071 break;
4072 case 16: {
4073 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4074 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4075 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4076 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4077 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4078 break;
4079 }
4080 case 32: {
4081 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4082 DAG.getConstant(0, DL, MVT::i32));
4083 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4084 break;
4085 }
4086 case 64: {
4087 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4088 DAG.getConstant(0, DL, MVT::i32));
4089 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4090 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4091 break;
4092 }
4093 default:
4094 llvm_unreachable("Unexpected type");
4095 }
4096 return Op;
4097 }
4098
4099 // Get the known-zero mask for the operand.
4100 KnownBits Known = DAG.computeKnownBits(Op);
4101 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4102 if (NumSignificantBits == 0)
4103 return DAG.getConstant(0, DL, VT);
4104
4105 // Skip known-zero high parts of the operand.
4106 int64_t OrigBitSize = VT.getSizeInBits();
4107 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
4108 BitSize = std::min(BitSize, OrigBitSize);
4109
4110 // The POPCNT instruction counts the number of bits in each byte.
4111 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4112 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4113 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4114
4115 // Add up per-byte counts in a binary tree. All bits of Op at
4116 // position larger than BitSize remain zero throughout.
4117 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4118 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4119 if (BitSize != OrigBitSize)
4120 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4121 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4122 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4123 }
4124
4125 // Extract overall result from high byte.
4126 if (BitSize > 8)
4127 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4128 DAG.getConstant(BitSize - 8, DL, VT));
4129
4130 return Op;
4131 }
4132
lowerATOMIC_FENCE(SDValue Op,SelectionDAG & DAG) const4133 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4134 SelectionDAG &DAG) const {
4135 SDLoc DL(Op);
4136 AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
4137 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
4138 SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
4139 cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
4140
4141 // The only fence that needs an instruction is a sequentially-consistent
4142 // cross-thread fence.
4143 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4144 FenceSSID == SyncScope::System) {
4145 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4146 Op.getOperand(0)),
4147 0);
4148 }
4149
4150 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4151 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4152 }
4153
4154 // Op is an atomic load. Lower it into a normal volatile load.
lowerATOMIC_LOAD(SDValue Op,SelectionDAG & DAG) const4155 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
4156 SelectionDAG &DAG) const {
4157 auto *Node = cast<AtomicSDNode>(Op.getNode());
4158 return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
4159 Node->getChain(), Node->getBasePtr(),
4160 Node->getMemoryVT(), Node->getMemOperand());
4161 }
4162
4163 // Op is an atomic store. Lower it into a normal volatile store.
lowerATOMIC_STORE(SDValue Op,SelectionDAG & DAG) const4164 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
4165 SelectionDAG &DAG) const {
4166 auto *Node = cast<AtomicSDNode>(Op.getNode());
4167 SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
4168 Node->getBasePtr(), Node->getMemoryVT(),
4169 Node->getMemOperand());
4170 // We have to enforce sequential consistency by performing a
4171 // serialization operation after the store.
4172 if (Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent)
4173 Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
4174 MVT::Other, Chain), 0);
4175 return Chain;
4176 }
4177
4178 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4179 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
lowerATOMIC_LOAD_OP(SDValue Op,SelectionDAG & DAG,unsigned Opcode) const4180 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4181 SelectionDAG &DAG,
4182 unsigned Opcode) const {
4183 auto *Node = cast<AtomicSDNode>(Op.getNode());
4184
4185 // 32-bit operations need no code outside the main loop.
4186 EVT NarrowVT = Node->getMemoryVT();
4187 EVT WideVT = MVT::i32;
4188 if (NarrowVT == WideVT)
4189 return Op;
4190
4191 int64_t BitSize = NarrowVT.getSizeInBits();
4192 SDValue ChainIn = Node->getChain();
4193 SDValue Addr = Node->getBasePtr();
4194 SDValue Src2 = Node->getVal();
4195 MachineMemOperand *MMO = Node->getMemOperand();
4196 SDLoc DL(Node);
4197 EVT PtrVT = Addr.getValueType();
4198
4199 // Convert atomic subtracts of constants into additions.
4200 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4201 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4202 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
4203 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4204 }
4205
4206 // Get the address of the containing word.
4207 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4208 DAG.getConstant(-4, DL, PtrVT));
4209
4210 // Get the number of bits that the word must be rotated left in order
4211 // to bring the field to the top bits of a GR32.
4212 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4213 DAG.getConstant(3, DL, PtrVT));
4214 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4215
4216 // Get the complementing shift amount, for rotating a field in the top
4217 // bits back to its proper position.
4218 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4219 DAG.getConstant(0, DL, WideVT), BitShift);
4220
4221 // Extend the source operand to 32 bits and prepare it for the inner loop.
4222 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4223 // operations require the source to be shifted in advance. (This shift
4224 // can be folded if the source is constant.) For AND and NAND, the lower
4225 // bits must be set, while for other opcodes they should be left clear.
4226 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4227 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4228 DAG.getConstant(32 - BitSize, DL, WideVT));
4229 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4230 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
4231 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4232 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4233
4234 // Construct the ATOMIC_LOADW_* node.
4235 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4236 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4237 DAG.getConstant(BitSize, DL, WideVT) };
4238 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4239 NarrowVT, MMO);
4240
4241 // Rotate the result of the final CS so that the field is in the lower
4242 // bits of a GR32, then truncate it.
4243 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4244 DAG.getConstant(BitSize, DL, WideVT));
4245 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4246
4247 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4248 return DAG.getMergeValues(RetOps, DL);
4249 }
4250
4251 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
4252 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
4253 // operations into additions.
lowerATOMIC_LOAD_SUB(SDValue Op,SelectionDAG & DAG) const4254 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4255 SelectionDAG &DAG) const {
4256 auto *Node = cast<AtomicSDNode>(Op.getNode());
4257 EVT MemVT = Node->getMemoryVT();
4258 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4259 // A full-width operation.
4260 assert(Op.getValueType() == MemVT && "Mismatched VTs");
4261 SDValue Src2 = Node->getVal();
4262 SDValue NegSrc2;
4263 SDLoc DL(Src2);
4264
4265 if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
4266 // Use an addition if the operand is constant and either LAA(G) is
4267 // available or the negative value is in the range of A(G)FHI.
4268 int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
4269 if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
4270 NegSrc2 = DAG.getConstant(Value, DL, MemVT);
4271 } else if (Subtarget.hasInterlockedAccess1())
4272 // Use LAA(G) if available.
4273 NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
4274 Src2);
4275
4276 if (NegSrc2.getNode())
4277 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4278 Node->getChain(), Node->getBasePtr(), NegSrc2,
4279 Node->getMemOperand());
4280
4281 // Use the node as-is.
4282 return Op;
4283 }
4284
4285 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4286 }
4287
4288 // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
lowerATOMIC_CMP_SWAP(SDValue Op,SelectionDAG & DAG) const4289 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4290 SelectionDAG &DAG) const {
4291 auto *Node = cast<AtomicSDNode>(Op.getNode());
4292 SDValue ChainIn = Node->getOperand(0);
4293 SDValue Addr = Node->getOperand(1);
4294 SDValue CmpVal = Node->getOperand(2);
4295 SDValue SwapVal = Node->getOperand(3);
4296 MachineMemOperand *MMO = Node->getMemOperand();
4297 SDLoc DL(Node);
4298
4299 // We have native support for 32-bit and 64-bit compare and swap, but we
4300 // still need to expand extracting the "success" result from the CC.
4301 EVT NarrowVT = Node->getMemoryVT();
4302 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4303 if (NarrowVT == WideVT) {
4304 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4305 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4306 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
4307 DL, Tys, Ops, NarrowVT, MMO);
4308 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4309 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
4310
4311 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4312 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4313 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4314 return SDValue();
4315 }
4316
4317 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4318 // via a fullword ATOMIC_CMP_SWAPW operation.
4319 int64_t BitSize = NarrowVT.getSizeInBits();
4320 EVT PtrVT = Addr.getValueType();
4321
4322 // Get the address of the containing word.
4323 SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4324 DAG.getConstant(-4, DL, PtrVT));
4325
4326 // Get the number of bits that the word must be rotated left in order
4327 // to bring the field to the top bits of a GR32.
4328 SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4329 DAG.getConstant(3, DL, PtrVT));
4330 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4331
4332 // Get the complementing shift amount, for rotating a field in the top
4333 // bits back to its proper position.
4334 SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4335 DAG.getConstant(0, DL, WideVT), BitShift);
4336
4337 // Construct the ATOMIC_CMP_SWAPW node.
4338 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4339 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4340 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4341 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
4342 VTList, Ops, NarrowVT, MMO);
4343 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4344 SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ);
4345
4346 // emitAtomicCmpSwapW() will zero extend the result (original value).
4347 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4348 DAG.getValueType(NarrowVT));
4349 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4350 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4351 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4352 return SDValue();
4353 }
4354
4355 MachineMemOperand::Flags
getTargetMMOFlags(const Instruction & I) const4356 SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4357 // Because of how we convert atomic_load and atomic_store to normal loads and
4358 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4359 // since DAGCombine hasn't been updated to account for atomic, but non
4360 // volatile loads. (See D57601)
4361 if (auto *SI = dyn_cast<StoreInst>(&I))
4362 if (SI->isAtomic())
4363 return MachineMemOperand::MOVolatile;
4364 if (auto *LI = dyn_cast<LoadInst>(&I))
4365 if (LI->isAtomic())
4366 return MachineMemOperand::MOVolatile;
4367 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
4368 if (AI->isAtomic())
4369 return MachineMemOperand::MOVolatile;
4370 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
4371 if (AI->isAtomic())
4372 return MachineMemOperand::MOVolatile;
4373 return MachineMemOperand::MONone;
4374 }
4375
lowerSTACKSAVE(SDValue Op,SelectionDAG & DAG) const4376 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4377 SelectionDAG &DAG) const {
4378 MachineFunction &MF = DAG.getMachineFunction();
4379 const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
4380 auto *Regs = Subtarget->getSpecialRegisters();
4381 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
4382 report_fatal_error("Variable-sized stack allocations are not supported "
4383 "in GHC calling convention");
4384 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4385 Regs->getStackPointerRegister(), Op.getValueType());
4386 }
4387
lowerSTACKRESTORE(SDValue Op,SelectionDAG & DAG) const4388 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4389 SelectionDAG &DAG) const {
4390 MachineFunction &MF = DAG.getMachineFunction();
4391 const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
4392 auto *Regs = Subtarget->getSpecialRegisters();
4393 bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
4394
4395 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
4396 report_fatal_error("Variable-sized stack allocations are not supported "
4397 "in GHC calling convention");
4398
4399 SDValue Chain = Op.getOperand(0);
4400 SDValue NewSP = Op.getOperand(1);
4401 SDValue Backchain;
4402 SDLoc DL(Op);
4403
4404 if (StoreBackchain) {
4405 SDValue OldSP = DAG.getCopyFromReg(
4406 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
4407 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4408 MachinePointerInfo());
4409 }
4410
4411 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
4412
4413 if (StoreBackchain)
4414 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4415 MachinePointerInfo());
4416
4417 return Chain;
4418 }
4419
lowerPREFETCH(SDValue Op,SelectionDAG & DAG) const4420 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4421 SelectionDAG &DAG) const {
4422 bool IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
4423 if (!IsData)
4424 // Just preserve the chain.
4425 return Op.getOperand(0);
4426
4427 SDLoc DL(Op);
4428 bool IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
4429 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4430 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4431 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4432 Op.getOperand(1)};
4433 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
4434 Node->getVTList(), Ops,
4435 Node->getMemoryVT(), Node->getMemOperand());
4436 }
4437
4438 // Convert condition code in CCReg to an i32 value.
getCCResult(SelectionDAG & DAG,SDValue CCReg)4439 static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) {
4440 SDLoc DL(CCReg);
4441 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4442 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4443 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4444 }
4445
4446 SDValue
lowerINTRINSIC_W_CHAIN(SDValue Op,SelectionDAG & DAG) const4447 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4448 SelectionDAG &DAG) const {
4449 unsigned Opcode, CCValid;
4450 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4451 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
4452 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4453 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4454 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4455 return SDValue();
4456 }
4457
4458 return SDValue();
4459 }
4460
4461 SDValue
lowerINTRINSIC_WO_CHAIN(SDValue Op,SelectionDAG & DAG) const4462 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4463 SelectionDAG &DAG) const {
4464 unsigned Opcode, CCValid;
4465 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4466 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4467 if (Op->getNumValues() == 1)
4468 return getCCResult(DAG, SDValue(Node, 0));
4469 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
4470 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4471 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4472 }
4473
4474 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4475 switch (Id) {
4476 case Intrinsic::thread_pointer:
4477 return lowerThreadPointer(SDLoc(Op), DAG);
4478
4479 case Intrinsic::s390_vpdi:
4480 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4481 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4482
4483 case Intrinsic::s390_vperm:
4484 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4485 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4486
4487 case Intrinsic::s390_vuphb:
4488 case Intrinsic::s390_vuphh:
4489 case Intrinsic::s390_vuphf:
4490 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4491 Op.getOperand(1));
4492
4493 case Intrinsic::s390_vuplhb:
4494 case Intrinsic::s390_vuplhh:
4495 case Intrinsic::s390_vuplhf:
4496 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4497 Op.getOperand(1));
4498
4499 case Intrinsic::s390_vuplb:
4500 case Intrinsic::s390_vuplhw:
4501 case Intrinsic::s390_vuplf:
4502 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4503 Op.getOperand(1));
4504
4505 case Intrinsic::s390_vupllb:
4506 case Intrinsic::s390_vupllh:
4507 case Intrinsic::s390_vupllf:
4508 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4509 Op.getOperand(1));
4510
4511 case Intrinsic::s390_vsumb:
4512 case Intrinsic::s390_vsumh:
4513 case Intrinsic::s390_vsumgh:
4514 case Intrinsic::s390_vsumgf:
4515 case Intrinsic::s390_vsumqf:
4516 case Intrinsic::s390_vsumqg:
4517 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4518 Op.getOperand(1), Op.getOperand(2));
4519 }
4520
4521 return SDValue();
4522 }
4523
4524 namespace {
4525 // Says that SystemZISD operation Opcode can be used to perform the equivalent
4526 // of a VPERM with permute vector Bytes. If Opcode takes three operands,
4527 // Operand is the constant third operand, otherwise it is the number of
4528 // bytes in each element of the result.
4529 struct Permute {
4530 unsigned Opcode;
4531 unsigned Operand;
4532 unsigned char Bytes[SystemZ::VectorBytes];
4533 };
4534 }
4535
4536 static const Permute PermuteForms[] = {
4537 // VMRHG
4538 { SystemZISD::MERGE_HIGH, 8,
4539 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4540 // VMRHF
4541 { SystemZISD::MERGE_HIGH, 4,
4542 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4543 // VMRHH
4544 { SystemZISD::MERGE_HIGH, 2,
4545 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4546 // VMRHB
4547 { SystemZISD::MERGE_HIGH, 1,
4548 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4549 // VMRLG
4550 { SystemZISD::MERGE_LOW, 8,
4551 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4552 // VMRLF
4553 { SystemZISD::MERGE_LOW, 4,
4554 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4555 // VMRLH
4556 { SystemZISD::MERGE_LOW, 2,
4557 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4558 // VMRLB
4559 { SystemZISD::MERGE_LOW, 1,
4560 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4561 // VPKG
4562 { SystemZISD::PACK, 4,
4563 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4564 // VPKF
4565 { SystemZISD::PACK, 2,
4566 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4567 // VPKH
4568 { SystemZISD::PACK, 1,
4569 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4570 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4571 { SystemZISD::PERMUTE_DWORDS, 4,
4572 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4573 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4574 { SystemZISD::PERMUTE_DWORDS, 1,
4575 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4576 };
4577
4578 // Called after matching a vector shuffle against a particular pattern.
4579 // Both the original shuffle and the pattern have two vector operands.
4580 // OpNos[0] is the operand of the original shuffle that should be used for
4581 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4582 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4583 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4584 // for operands 0 and 1 of the pattern.
chooseShuffleOpNos(int * OpNos,unsigned & OpNo0,unsigned & OpNo1)4585 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4586 if (OpNos[0] < 0) {
4587 if (OpNos[1] < 0)
4588 return false;
4589 OpNo0 = OpNo1 = OpNos[1];
4590 } else if (OpNos[1] < 0) {
4591 OpNo0 = OpNo1 = OpNos[0];
4592 } else {
4593 OpNo0 = OpNos[0];
4594 OpNo1 = OpNos[1];
4595 }
4596 return true;
4597 }
4598
4599 // Bytes is a VPERM-like permute vector, except that -1 is used for
4600 // undefined bytes. Return true if the VPERM can be implemented using P.
4601 // When returning true set OpNo0 to the VPERM operand that should be
4602 // used for operand 0 of P and likewise OpNo1 for operand 1 of P.
4603 //
4604 // For example, if swapping the VPERM operands allows P to match, OpNo0
4605 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
4606 // operand, but rewriting it to use two duplicated operands allows it to
4607 // match P, then OpNo0 and OpNo1 will be the same.
matchPermute(const SmallVectorImpl<int> & Bytes,const Permute & P,unsigned & OpNo0,unsigned & OpNo1)4608 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
4609 unsigned &OpNo0, unsigned &OpNo1) {
4610 int OpNos[] = { -1, -1 };
4611 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4612 int Elt = Bytes[I];
4613 if (Elt >= 0) {
4614 // Make sure that the two permute vectors use the same suboperand
4615 // byte number. Only the operand numbers (the high bits) are
4616 // allowed to differ.
4617 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
4618 return false;
4619 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
4620 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
4621 // Make sure that the operand mappings are consistent with previous
4622 // elements.
4623 if (OpNos[ModelOpNo] == 1 - RealOpNo)
4624 return false;
4625 OpNos[ModelOpNo] = RealOpNo;
4626 }
4627 }
4628 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4629 }
4630
4631 // As above, but search for a matching permute.
matchPermute(const SmallVectorImpl<int> & Bytes,unsigned & OpNo0,unsigned & OpNo1)4632 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
4633 unsigned &OpNo0, unsigned &OpNo1) {
4634 for (auto &P : PermuteForms)
4635 if (matchPermute(Bytes, P, OpNo0, OpNo1))
4636 return &P;
4637 return nullptr;
4638 }
4639
4640 // Bytes is a VPERM-like permute vector, except that -1 is used for
4641 // undefined bytes. This permute is an operand of an outer permute.
4642 // See whether redistributing the -1 bytes gives a shuffle that can be
4643 // implemented using P. If so, set Transform to a VPERM-like permute vector
4644 // that, when applied to the result of P, gives the original permute in Bytes.
matchDoublePermute(const SmallVectorImpl<int> & Bytes,const Permute & P,SmallVectorImpl<int> & Transform)4645 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4646 const Permute &P,
4647 SmallVectorImpl<int> &Transform) {
4648 unsigned To = 0;
4649 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
4650 int Elt = Bytes[From];
4651 if (Elt < 0)
4652 // Byte number From of the result is undefined.
4653 Transform[From] = -1;
4654 else {
4655 while (P.Bytes[To] != Elt) {
4656 To += 1;
4657 if (To == SystemZ::VectorBytes)
4658 return false;
4659 }
4660 Transform[From] = To;
4661 }
4662 }
4663 return true;
4664 }
4665
4666 // As above, but search for a matching permute.
matchDoublePermute(const SmallVectorImpl<int> & Bytes,SmallVectorImpl<int> & Transform)4667 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
4668 SmallVectorImpl<int> &Transform) {
4669 for (auto &P : PermuteForms)
4670 if (matchDoublePermute(Bytes, P, Transform))
4671 return &P;
4672 return nullptr;
4673 }
4674
4675 // Convert the mask of the given shuffle op into a byte-level mask,
4676 // as if it had type vNi8.
getVPermMask(SDValue ShuffleOp,SmallVectorImpl<int> & Bytes)4677 static bool getVPermMask(SDValue ShuffleOp,
4678 SmallVectorImpl<int> &Bytes) {
4679 EVT VT = ShuffleOp.getValueType();
4680 unsigned NumElements = VT.getVectorNumElements();
4681 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4682
4683 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
4684 Bytes.resize(NumElements * BytesPerElement, -1);
4685 for (unsigned I = 0; I < NumElements; ++I) {
4686 int Index = VSN->getMaskElt(I);
4687 if (Index >= 0)
4688 for (unsigned J = 0; J < BytesPerElement; ++J)
4689 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4690 }
4691 return true;
4692 }
4693 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
4694 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
4695 unsigned Index = ShuffleOp.getConstantOperandVal(1);
4696 Bytes.resize(NumElements * BytesPerElement, -1);
4697 for (unsigned I = 0; I < NumElements; ++I)
4698 for (unsigned J = 0; J < BytesPerElement; ++J)
4699 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
4700 return true;
4701 }
4702 return false;
4703 }
4704
4705 // Bytes is a VPERM-like permute vector, except that -1 is used for
4706 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
4707 // the result come from a contiguous sequence of bytes from one input.
4708 // Set Base to the selector for the first byte if so.
getShuffleInput(const SmallVectorImpl<int> & Bytes,unsigned Start,unsigned BytesPerElement,int & Base)4709 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
4710 unsigned BytesPerElement, int &Base) {
4711 Base = -1;
4712 for (unsigned I = 0; I < BytesPerElement; ++I) {
4713 if (Bytes[Start + I] >= 0) {
4714 unsigned Elem = Bytes[Start + I];
4715 if (Base < 0) {
4716 Base = Elem - I;
4717 // Make sure the bytes would come from one input operand.
4718 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
4719 return false;
4720 } else if (unsigned(Base) != Elem - I)
4721 return false;
4722 }
4723 }
4724 return true;
4725 }
4726
4727 // Bytes is a VPERM-like permute vector, except that -1 is used for
4728 // undefined bytes. Return true if it can be performed using VSLDB.
4729 // When returning true, set StartIndex to the shift amount and OpNo0
4730 // and OpNo1 to the VPERM operands that should be used as the first
4731 // and second shift operand respectively.
isShlDoublePermute(const SmallVectorImpl<int> & Bytes,unsigned & StartIndex,unsigned & OpNo0,unsigned & OpNo1)4732 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
4733 unsigned &StartIndex, unsigned &OpNo0,
4734 unsigned &OpNo1) {
4735 int OpNos[] = { -1, -1 };
4736 int Shift = -1;
4737 for (unsigned I = 0; I < 16; ++I) {
4738 int Index = Bytes[I];
4739 if (Index >= 0) {
4740 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
4741 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
4742 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
4743 if (Shift < 0)
4744 Shift = ExpectedShift;
4745 else if (Shift != ExpectedShift)
4746 return false;
4747 // Make sure that the operand mappings are consistent with previous
4748 // elements.
4749 if (OpNos[ModelOpNo] == 1 - RealOpNo)
4750 return false;
4751 OpNos[ModelOpNo] = RealOpNo;
4752 }
4753 }
4754 StartIndex = Shift;
4755 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
4756 }
4757
4758 // Create a node that performs P on operands Op0 and Op1, casting the
4759 // operands to the appropriate type. The type of the result is determined by P.
getPermuteNode(SelectionDAG & DAG,const SDLoc & DL,const Permute & P,SDValue Op0,SDValue Op1)4760 static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4761 const Permute &P, SDValue Op0, SDValue Op1) {
4762 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
4763 // elements of a PACK are twice as wide as the outputs.
4764 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
4765 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
4766 P.Operand);
4767 // Cast both operands to the appropriate type.
4768 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
4769 SystemZ::VectorBytes / InBytes);
4770 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
4771 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
4772 SDValue Op;
4773 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
4774 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
4775 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
4776 } else if (P.Opcode == SystemZISD::PACK) {
4777 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
4778 SystemZ::VectorBytes / P.Operand);
4779 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
4780 } else {
4781 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
4782 }
4783 return Op;
4784 }
4785
isZeroVector(SDValue N)4786 static bool isZeroVector(SDValue N) {
4787 if (N->getOpcode() == ISD::BITCAST)
4788 N = N->getOperand(0);
4789 if (N->getOpcode() == ISD::SPLAT_VECTOR)
4790 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
4791 return Op->getZExtValue() == 0;
4792 return ISD::isBuildVectorAllZeros(N.getNode());
4793 }
4794
4795 // Return the index of the zero/undef vector, or UINT32_MAX if not found.
findZeroVectorIdx(SDValue * Ops,unsigned Num)4796 static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
4797 for (unsigned I = 0; I < Num ; I++)
4798 if (isZeroVector(Ops[I]))
4799 return I;
4800 return UINT32_MAX;
4801 }
4802
4803 // Bytes is a VPERM-like permute vector, except that -1 is used for
4804 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using
4805 // VSLDB or VPERM.
getGeneralPermuteNode(SelectionDAG & DAG,const SDLoc & DL,SDValue * Ops,const SmallVectorImpl<int> & Bytes)4806 static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL,
4807 SDValue *Ops,
4808 const SmallVectorImpl<int> &Bytes) {
4809 for (unsigned I = 0; I < 2; ++I)
4810 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
4811
4812 // First see whether VSLDB can be used.
4813 unsigned StartIndex, OpNo0, OpNo1;
4814 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
4815 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
4816 Ops[OpNo1],
4817 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
4818
4819 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
4820 // eliminate a zero vector by reusing any zero index in the permute vector.
4821 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
4822 if (ZeroVecIdx != UINT32_MAX) {
4823 bool MaskFirst = true;
4824 int ZeroIdx = -1;
4825 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4826 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
4827 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
4828 if (OpNo == ZeroVecIdx && I == 0) {
4829 // If the first byte is zero, use mask as first operand.
4830 ZeroIdx = 0;
4831 break;
4832 }
4833 if (OpNo != ZeroVecIdx && Byte == 0) {
4834 // If mask contains a zero, use it by placing that vector first.
4835 ZeroIdx = I + SystemZ::VectorBytes;
4836 MaskFirst = false;
4837 break;
4838 }
4839 }
4840 if (ZeroIdx != -1) {
4841 SDValue IndexNodes[SystemZ::VectorBytes];
4842 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
4843 if (Bytes[I] >= 0) {
4844 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
4845 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
4846 if (OpNo == ZeroVecIdx)
4847 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
4848 else {
4849 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
4850 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
4851 }
4852 } else
4853 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4854 }
4855 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4856 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
4857 if (MaskFirst)
4858 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
4859 Mask);
4860 else
4861 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
4862 Mask);
4863 }
4864 }
4865
4866 SDValue IndexNodes[SystemZ::VectorBytes];
4867 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
4868 if (Bytes[I] >= 0)
4869 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
4870 else
4871 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
4872 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
4873 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
4874 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
4875 }
4876
4877 namespace {
4878 // Describes a general N-operand vector shuffle.
4879 struct GeneralShuffle {
GeneralShuffle__anon6742e6340411::GeneralShuffle4880 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
4881 void addUndef();
4882 bool add(SDValue, unsigned);
4883 SDValue getNode(SelectionDAG &, const SDLoc &);
4884 void tryPrepareForUnpack();
unpackWasPrepared__anon6742e6340411::GeneralShuffle4885 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
4886 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
4887
4888 // The operands of the shuffle.
4889 SmallVector<SDValue, SystemZ::VectorBytes> Ops;
4890
4891 // Index I is -1 if byte I of the result is undefined. Otherwise the
4892 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
4893 // Bytes[I] / SystemZ::VectorBytes.
4894 SmallVector<int, SystemZ::VectorBytes> Bytes;
4895
4896 // The type of the shuffle result.
4897 EVT VT;
4898
4899 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
4900 unsigned UnpackFromEltSize;
4901 };
4902 }
4903
4904 // Add an extra undefined element to the shuffle.
addUndef()4905 void GeneralShuffle::addUndef() {
4906 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4907 for (unsigned I = 0; I < BytesPerElement; ++I)
4908 Bytes.push_back(-1);
4909 }
4910
4911 // Add an extra element to the shuffle, taking it from element Elem of Op.
4912 // A null Op indicates a vector input whose value will be calculated later;
4913 // there is at most one such input per shuffle and it always has the same
4914 // type as the result. Aborts and returns false if the source vector elements
4915 // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
4916 // LLVM they become implicitly extended, but this is rare and not optimized.
add(SDValue Op,unsigned Elem)4917 bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
4918 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
4919
4920 // The source vector can have wider elements than the result,
4921 // either through an explicit TRUNCATE or because of type legalization.
4922 // We want the least significant part.
4923 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
4924 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
4925
4926 // Return false if the source elements are smaller than their destination
4927 // elements.
4928 if (FromBytesPerElement < BytesPerElement)
4929 return false;
4930
4931 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
4932 (FromBytesPerElement - BytesPerElement));
4933
4934 // Look through things like shuffles and bitcasts.
4935 while (Op.getNode()) {
4936 if (Op.getOpcode() == ISD::BITCAST)
4937 Op = Op.getOperand(0);
4938 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
4939 // See whether the bytes we need come from a contiguous part of one
4940 // operand.
4941 SmallVector<int, SystemZ::VectorBytes> OpBytes;
4942 if (!getVPermMask(Op, OpBytes))
4943 break;
4944 int NewByte;
4945 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
4946 break;
4947 if (NewByte < 0) {
4948 addUndef();
4949 return true;
4950 }
4951 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
4952 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
4953 } else if (Op.isUndef()) {
4954 addUndef();
4955 return true;
4956 } else
4957 break;
4958 }
4959
4960 // Make sure that the source of the extraction is in Ops.
4961 unsigned OpNo = 0;
4962 for (; OpNo < Ops.size(); ++OpNo)
4963 if (Ops[OpNo] == Op)
4964 break;
4965 if (OpNo == Ops.size())
4966 Ops.push_back(Op);
4967
4968 // Add the element to Bytes.
4969 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
4970 for (unsigned I = 0; I < BytesPerElement; ++I)
4971 Bytes.push_back(Base + I);
4972
4973 return true;
4974 }
4975
4976 // Return SDNodes for the completed shuffle.
getNode(SelectionDAG & DAG,const SDLoc & DL)4977 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
4978 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
4979
4980 if (Ops.size() == 0)
4981 return DAG.getUNDEF(VT);
4982
4983 // Use a single unpack if possible as the last operation.
4984 tryPrepareForUnpack();
4985
4986 // Make sure that there are at least two shuffle operands.
4987 if (Ops.size() == 1)
4988 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
4989
4990 // Create a tree of shuffles, deferring root node until after the loop.
4991 // Try to redistribute the undefined elements of non-root nodes so that
4992 // the non-root shuffles match something like a pack or merge, then adjust
4993 // the parent node's permute vector to compensate for the new order.
4994 // Among other things, this copes with vectors like <2 x i16> that were
4995 // padded with undefined elements during type legalization.
4996 //
4997 // In the best case this redistribution will lead to the whole tree
4998 // using packs and merges. It should rarely be a loss in other cases.
4999 unsigned Stride = 1;
5000 for (; Stride * 2 < Ops.size(); Stride *= 2) {
5001 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
5002 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5003
5004 // Create a mask for just these two operands.
5005 SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
5006 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5007 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
5008 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
5009 if (OpNo == I)
5010 NewBytes[J] = Byte;
5011 else if (OpNo == I + Stride)
5012 NewBytes[J] = SystemZ::VectorBytes + Byte;
5013 else
5014 NewBytes[J] = -1;
5015 }
5016 // See if it would be better to reorganize NewMask to avoid using VPERM.
5017 SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
5018 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
5019 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
5020 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
5021 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5022 if (NewBytes[J] >= 0) {
5023 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
5024 "Invalid double permute");
5025 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
5026 } else
5027 assert(NewBytesMap[J] < 0 && "Invalid double permute");
5028 }
5029 } else {
5030 // Just use NewBytes on the operands.
5031 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
5032 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
5033 if (NewBytes[J] >= 0)
5034 Bytes[J] = I * SystemZ::VectorBytes + J;
5035 }
5036 }
5037 }
5038
5039 // Now we just have 2 inputs. Put the second operand in Ops[1].
5040 if (Stride > 1) {
5041 Ops[1] = Ops[Stride];
5042 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5043 if (Bytes[I] >= int(SystemZ::VectorBytes))
5044 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5045 }
5046
5047 // Look for an instruction that can do the permute without resorting
5048 // to VPERM.
5049 unsigned OpNo0, OpNo1;
5050 SDValue Op;
5051 if (unpackWasPrepared() && Ops[1].isUndef())
5052 Op = Ops[0];
5053 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5054 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5055 else
5056 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5057
5058 Op = insertUnpackIfPrepared(DAG, DL, Op);
5059
5060 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5061 }
5062
5063 #ifndef NDEBUG
dumpBytes(const SmallVectorImpl<int> & Bytes,std::string Msg)5064 static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5065 dbgs() << Msg.c_str() << " { ";
5066 for (unsigned i = 0; i < Bytes.size(); i++)
5067 dbgs() << Bytes[i] << " ";
5068 dbgs() << "}\n";
5069 }
5070 #endif
5071
5072 // If the Bytes vector matches an unpack operation, prepare to do the unpack
5073 // after all else by removing the zero vector and the effect of the unpack on
5074 // Bytes.
tryPrepareForUnpack()5075 void GeneralShuffle::tryPrepareForUnpack() {
5076 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5077 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
5078 return;
5079
5080 // Only do this if removing the zero vector reduces the depth, otherwise
5081 // the critical path will increase with the final unpack.
5082 if (Ops.size() > 2 &&
5083 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5084 return;
5085
5086 // Find an unpack that would allow removing the zero vector from Ops.
5087 UnpackFromEltSize = 1;
5088 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5089 bool MatchUnpack = true;
5090 SmallVector<int, SystemZ::VectorBytes> SrcBytes;
5091 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5092 unsigned ToEltSize = UnpackFromEltSize * 2;
5093 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5094 if (!IsZextByte)
5095 SrcBytes.push_back(Bytes[Elt]);
5096 if (Bytes[Elt] != -1) {
5097 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5098 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5099 MatchUnpack = false;
5100 break;
5101 }
5102 }
5103 }
5104 if (MatchUnpack) {
5105 if (Ops.size() == 2) {
5106 // Don't use unpack if a single source operand needs rearrangement.
5107 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5108 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5109 UnpackFromEltSize = UINT_MAX;
5110 return;
5111 }
5112 }
5113 break;
5114 }
5115 }
5116 if (UnpackFromEltSize > 4)
5117 return;
5118
5119 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
5120 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
5121 << ".\n";
5122 dumpBytes(Bytes, "Original Bytes vector:"););
5123
5124 // Apply the unpack in reverse to the Bytes array.
5125 unsigned B = 0;
5126 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5127 Elt += UnpackFromEltSize;
5128 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5129 Bytes[B] = Bytes[Elt];
5130 }
5131 while (B < SystemZ::VectorBytes)
5132 Bytes[B++] = -1;
5133
5134 // Remove the zero vector from Ops
5135 Ops.erase(&Ops[ZeroVecOpNo]);
5136 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5137 if (Bytes[I] >= 0) {
5138 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5139 if (OpNo > ZeroVecOpNo)
5140 Bytes[I] -= SystemZ::VectorBytes;
5141 }
5142
5143 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
5144 dbgs() << "\n";);
5145 }
5146
insertUnpackIfPrepared(SelectionDAG & DAG,const SDLoc & DL,SDValue Op)5147 SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5148 const SDLoc &DL,
5149 SDValue Op) {
5150 if (!unpackWasPrepared())
5151 return Op;
5152 unsigned InBits = UnpackFromEltSize * 8;
5153 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5154 SystemZ::VectorBits / InBits);
5155 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5156 unsigned OutBits = InBits * 2;
5157 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5158 SystemZ::VectorBits / OutBits);
5159 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5160 }
5161
5162 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
isScalarToVector(SDValue Op)5163 static bool isScalarToVector(SDValue Op) {
5164 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5165 if (!Op.getOperand(I).isUndef())
5166 return false;
5167 return true;
5168 }
5169
5170 // Return a vector of type VT that contains Value in the first element.
5171 // The other elements don't matter.
buildScalarToVector(SelectionDAG & DAG,const SDLoc & DL,EVT VT,SDValue Value)5172 static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5173 SDValue Value) {
5174 // If we have a constant, replicate it to all elements and let the
5175 // BUILD_VECTOR lowering take care of it.
5176 if (Value.getOpcode() == ISD::Constant ||
5177 Value.getOpcode() == ISD::ConstantFP) {
5178 SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
5179 return DAG.getBuildVector(VT, DL, Ops);
5180 }
5181 if (Value.isUndef())
5182 return DAG.getUNDEF(VT);
5183 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5184 }
5185
5186 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5187 // element 1. Used for cases in which replication is cheap.
buildMergeScalars(SelectionDAG & DAG,const SDLoc & DL,EVT VT,SDValue Op0,SDValue Op1)5188 static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5189 SDValue Op0, SDValue Op1) {
5190 if (Op0.isUndef()) {
5191 if (Op1.isUndef())
5192 return DAG.getUNDEF(VT);
5193 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5194 }
5195 if (Op1.isUndef())
5196 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5197 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5198 buildScalarToVector(DAG, DL, VT, Op0),
5199 buildScalarToVector(DAG, DL, VT, Op1));
5200 }
5201
5202 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5203 // vector for them.
joinDwords(SelectionDAG & DAG,const SDLoc & DL,SDValue Op0,SDValue Op1)5204 static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0,
5205 SDValue Op1) {
5206 if (Op0.isUndef() && Op1.isUndef())
5207 return DAG.getUNDEF(MVT::v2i64);
5208 // If one of the two inputs is undefined then replicate the other one,
5209 // in order to avoid using another register unnecessarily.
5210 if (Op0.isUndef())
5211 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5212 else if (Op1.isUndef())
5213 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5214 else {
5215 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5216 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5217 }
5218 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5219 }
5220
5221 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5222 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5223 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5224 // would benefit from this representation and return it if so.
tryBuildVectorShuffle(SelectionDAG & DAG,BuildVectorSDNode * BVN)5225 static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
5226 BuildVectorSDNode *BVN) {
5227 EVT VT = BVN->getValueType(0);
5228 unsigned NumElements = VT.getVectorNumElements();
5229
5230 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5231 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5232 // need a BUILD_VECTOR, add an additional placeholder operand for that
5233 // BUILD_VECTOR and store its operands in ResidueOps.
5234 GeneralShuffle GS(VT);
5235 SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
5236 bool FoundOne = false;
5237 for (unsigned I = 0; I < NumElements; ++I) {
5238 SDValue Op = BVN->getOperand(I);
5239 if (Op.getOpcode() == ISD::TRUNCATE)
5240 Op = Op.getOperand(0);
5241 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5242 Op.getOperand(1).getOpcode() == ISD::Constant) {
5243 unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
5244 if (!GS.add(Op.getOperand(0), Elem))
5245 return SDValue();
5246 FoundOne = true;
5247 } else if (Op.isUndef()) {
5248 GS.addUndef();
5249 } else {
5250 if (!GS.add(SDValue(), ResidueOps.size()))
5251 return SDValue();
5252 ResidueOps.push_back(BVN->getOperand(I));
5253 }
5254 }
5255
5256 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5257 if (!FoundOne)
5258 return SDValue();
5259
5260 // Create the BUILD_VECTOR for the remaining elements, if any.
5261 if (!ResidueOps.empty()) {
5262 while (ResidueOps.size() < NumElements)
5263 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5264 for (auto &Op : GS.Ops) {
5265 if (!Op.getNode()) {
5266 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5267 break;
5268 }
5269 }
5270 }
5271 return GS.getNode(DAG, SDLoc(BVN));
5272 }
5273
isVectorElementLoad(SDValue Op) const5274 bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5275 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5276 return true;
5277 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5278 return true;
5279 return false;
5280 }
5281
5282 // Combine GPR scalar values Elems into a vector of type VT.
5283 SDValue
buildVector(SelectionDAG & DAG,const SDLoc & DL,EVT VT,SmallVectorImpl<SDValue> & Elems) const5284 SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5285 SmallVectorImpl<SDValue> &Elems) const {
5286 // See whether there is a single replicated value.
5287 SDValue Single;
5288 unsigned int NumElements = Elems.size();
5289 unsigned int Count = 0;
5290 for (auto Elem : Elems) {
5291 if (!Elem.isUndef()) {
5292 if (!Single.getNode())
5293 Single = Elem;
5294 else if (Elem != Single) {
5295 Single = SDValue();
5296 break;
5297 }
5298 Count += 1;
5299 }
5300 }
5301 // There are three cases here:
5302 //
5303 // - if the only defined element is a loaded one, the best sequence
5304 // is a replicating load.
5305 //
5306 // - otherwise, if the only defined element is an i64 value, we will
5307 // end up with the same VLVGP sequence regardless of whether we short-cut
5308 // for replication or fall through to the later code.
5309 //
5310 // - otherwise, if the only defined element is an i32 or smaller value,
5311 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5312 // This is only a win if the single defined element is used more than once.
5313 // In other cases we're better off using a single VLVGx.
5314 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5315 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5316
5317 // If all elements are loads, use VLREP/VLEs (below).
5318 bool AllLoads = true;
5319 for (auto Elem : Elems)
5320 if (!isVectorElementLoad(Elem)) {
5321 AllLoads = false;
5322 break;
5323 }
5324
5325 // The best way of building a v2i64 from two i64s is to use VLVGP.
5326 if (VT == MVT::v2i64 && !AllLoads)
5327 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5328
5329 // Use a 64-bit merge high to combine two doubles.
5330 if (VT == MVT::v2f64 && !AllLoads)
5331 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5332
5333 // Build v4f32 values directly from the FPRs:
5334 //
5335 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
5336 // V V VMRHF
5337 // <ABxx> <CDxx>
5338 // V VMRHG
5339 // <ABCD>
5340 if (VT == MVT::v4f32 && !AllLoads) {
5341 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5342 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
5343 // Avoid unnecessary undefs by reusing the other operand.
5344 if (Op01.isUndef())
5345 Op01 = Op23;
5346 else if (Op23.isUndef())
5347 Op23 = Op01;
5348 // Merging identical replications is a no-op.
5349 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
5350 return Op01;
5351 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
5352 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
5353 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
5354 DL, MVT::v2i64, Op01, Op23);
5355 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5356 }
5357
5358 // Collect the constant terms.
5359 SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
5360 SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
5361
5362 unsigned NumConstants = 0;
5363 for (unsigned I = 0; I < NumElements; ++I) {
5364 SDValue Elem = Elems[I];
5365 if (Elem.getOpcode() == ISD::Constant ||
5366 Elem.getOpcode() == ISD::ConstantFP) {
5367 NumConstants += 1;
5368 Constants[I] = Elem;
5369 Done[I] = true;
5370 }
5371 }
5372 // If there was at least one constant, fill in the other elements of
5373 // Constants with undefs to get a full vector constant and use that
5374 // as the starting point.
5375 SDValue Result;
5376 SDValue ReplicatedVal;
5377 if (NumConstants > 0) {
5378 for (unsigned I = 0; I < NumElements; ++I)
5379 if (!Constants[I].getNode())
5380 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
5381 Result = DAG.getBuildVector(VT, DL, Constants);
5382 } else {
5383 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
5384 // avoid a false dependency on any previous contents of the vector
5385 // register.
5386
5387 // Use a VLREP if at least one element is a load. Make sure to replicate
5388 // the load with the most elements having its value.
5389 std::map<const SDNode*, unsigned> UseCounts;
5390 SDNode *LoadMaxUses = nullptr;
5391 for (unsigned I = 0; I < NumElements; ++I)
5392 if (isVectorElementLoad(Elems[I])) {
5393 SDNode *Ld = Elems[I].getNode();
5394 UseCounts[Ld]++;
5395 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
5396 LoadMaxUses = Ld;
5397 }
5398 if (LoadMaxUses != nullptr) {
5399 ReplicatedVal = SDValue(LoadMaxUses, 0);
5400 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
5401 } else {
5402 // Try to use VLVGP.
5403 unsigned I1 = NumElements / 2 - 1;
5404 unsigned I2 = NumElements - 1;
5405 bool Def1 = !Elems[I1].isUndef();
5406 bool Def2 = !Elems[I2].isUndef();
5407 if (Def1 || Def2) {
5408 SDValue Elem1 = Elems[Def1 ? I1 : I2];
5409 SDValue Elem2 = Elems[Def2 ? I2 : I1];
5410 Result = DAG.getNode(ISD::BITCAST, DL, VT,
5411 joinDwords(DAG, DL, Elem1, Elem2));
5412 Done[I1] = true;
5413 Done[I2] = true;
5414 } else
5415 Result = DAG.getUNDEF(VT);
5416 }
5417 }
5418
5419 // Use VLVGx to insert the other elements.
5420 for (unsigned I = 0; I < NumElements; ++I)
5421 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
5422 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
5423 DAG.getConstant(I, DL, MVT::i32));
5424 return Result;
5425 }
5426
lowerBUILD_VECTOR(SDValue Op,SelectionDAG & DAG) const5427 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
5428 SelectionDAG &DAG) const {
5429 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
5430 SDLoc DL(Op);
5431 EVT VT = Op.getValueType();
5432
5433 if (BVN->isConstant()) {
5434 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
5435 return Op;
5436
5437 // Fall back to loading it from memory.
5438 return SDValue();
5439 }
5440
5441 // See if we should use shuffles to construct the vector from other vectors.
5442 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
5443 return Res;
5444
5445 // Detect SCALAR_TO_VECTOR conversions.
5446 if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
5447 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
5448
5449 // Otherwise use buildVector to build the vector up from GPRs.
5450 unsigned NumElements = Op.getNumOperands();
5451 SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
5452 for (unsigned I = 0; I < NumElements; ++I)
5453 Ops[I] = Op.getOperand(I);
5454 return buildVector(DAG, DL, VT, Ops);
5455 }
5456
lowerVECTOR_SHUFFLE(SDValue Op,SelectionDAG & DAG) const5457 SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
5458 SelectionDAG &DAG) const {
5459 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
5460 SDLoc DL(Op);
5461 EVT VT = Op.getValueType();
5462 unsigned NumElements = VT.getVectorNumElements();
5463
5464 if (VSN->isSplat()) {
5465 SDValue Op0 = Op.getOperand(0);
5466 unsigned Index = VSN->getSplatIndex();
5467 assert(Index < VT.getVectorNumElements() &&
5468 "Splat index should be defined and in first operand");
5469 // See whether the value we're splatting is directly available as a scalar.
5470 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5471 Op0.getOpcode() == ISD::BUILD_VECTOR)
5472 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
5473 // Otherwise keep it as a vector-to-vector operation.
5474 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
5475 DAG.getTargetConstant(Index, DL, MVT::i32));
5476 }
5477
5478 GeneralShuffle GS(VT);
5479 for (unsigned I = 0; I < NumElements; ++I) {
5480 int Elt = VSN->getMaskElt(I);
5481 if (Elt < 0)
5482 GS.addUndef();
5483 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
5484 unsigned(Elt) % NumElements))
5485 return SDValue();
5486 }
5487 return GS.getNode(DAG, SDLoc(VSN));
5488 }
5489
lowerSCALAR_TO_VECTOR(SDValue Op,SelectionDAG & DAG) const5490 SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
5491 SelectionDAG &DAG) const {
5492 SDLoc DL(Op);
5493 // Just insert the scalar into element 0 of an undefined vector.
5494 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
5495 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
5496 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
5497 }
5498
lowerINSERT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const5499 SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
5500 SelectionDAG &DAG) const {
5501 // Handle insertions of floating-point values.
5502 SDLoc DL(Op);
5503 SDValue Op0 = Op.getOperand(0);
5504 SDValue Op1 = Op.getOperand(1);
5505 SDValue Op2 = Op.getOperand(2);
5506 EVT VT = Op.getValueType();
5507
5508 // Insertions into constant indices of a v2f64 can be done using VPDI.
5509 // However, if the inserted value is a bitcast or a constant then it's
5510 // better to use GPRs, as below.
5511 if (VT == MVT::v2f64 &&
5512 Op1.getOpcode() != ISD::BITCAST &&
5513 Op1.getOpcode() != ISD::ConstantFP &&
5514 Op2.getOpcode() == ISD::Constant) {
5515 uint64_t Index = cast<ConstantSDNode>(Op2)->getZExtValue();
5516 unsigned Mask = VT.getVectorNumElements() - 1;
5517 if (Index <= Mask)
5518 return Op;
5519 }
5520
5521 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
5522 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
5523 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
5524 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
5525 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
5526 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
5527 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5528 }
5529
5530 SDValue
lowerEXTRACT_VECTOR_ELT(SDValue Op,SelectionDAG & DAG) const5531 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
5532 SelectionDAG &DAG) const {
5533 // Handle extractions of floating-point values.
5534 SDLoc DL(Op);
5535 SDValue Op0 = Op.getOperand(0);
5536 SDValue Op1 = Op.getOperand(1);
5537 EVT VT = Op.getValueType();
5538 EVT VecVT = Op0.getValueType();
5539
5540 // Extractions of constant indices can be done directly.
5541 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
5542 uint64_t Index = CIndexN->getZExtValue();
5543 unsigned Mask = VecVT.getVectorNumElements() - 1;
5544 if (Index <= Mask)
5545 return Op;
5546 }
5547
5548 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5549 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
5550 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
5551 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
5552 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
5553 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5554 }
5555
5556 SDValue SystemZTargetLowering::
lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,SelectionDAG & DAG) const5557 lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5558 SDValue PackedOp = Op.getOperand(0);
5559 EVT OutVT = Op.getValueType();
5560 EVT InVT = PackedOp.getValueType();
5561 unsigned ToBits = OutVT.getScalarSizeInBits();
5562 unsigned FromBits = InVT.getScalarSizeInBits();
5563 do {
5564 FromBits *= 2;
5565 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5566 SystemZ::VectorBits / FromBits);
5567 PackedOp =
5568 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
5569 } while (FromBits != ToBits);
5570 return PackedOp;
5571 }
5572
5573 // Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5574 SDValue SystemZTargetLowering::
lowerZERO_EXTEND_VECTOR_INREG(SDValue Op,SelectionDAG & DAG) const5575 lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5576 SDValue PackedOp = Op.getOperand(0);
5577 SDLoc DL(Op);
5578 EVT OutVT = Op.getValueType();
5579 EVT InVT = PackedOp.getValueType();
5580 unsigned InNumElts = InVT.getVectorNumElements();
5581 unsigned OutNumElts = OutVT.getVectorNumElements();
5582 unsigned NumInPerOut = InNumElts / OutNumElts;
5583
5584 SDValue ZeroVec =
5585 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5586
5587 SmallVector<int, 16> Mask(InNumElts);
5588 unsigned ZeroVecElt = InNumElts;
5589 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
5590 unsigned MaskElt = PackedElt * NumInPerOut;
5591 unsigned End = MaskElt + NumInPerOut - 1;
5592 for (; MaskElt < End; MaskElt++)
5593 Mask[MaskElt] = ZeroVecElt++;
5594 Mask[MaskElt] = PackedElt;
5595 }
5596 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
5597 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
5598 }
5599
lowerShift(SDValue Op,SelectionDAG & DAG,unsigned ByScalar) const5600 SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
5601 unsigned ByScalar) const {
5602 // Look for cases where a vector shift can use the *_BY_SCALAR form.
5603 SDValue Op0 = Op.getOperand(0);
5604 SDValue Op1 = Op.getOperand(1);
5605 SDLoc DL(Op);
5606 EVT VT = Op.getValueType();
5607 unsigned ElemBitSize = VT.getScalarSizeInBits();
5608
5609 // See whether the shift vector is a splat represented as BUILD_VECTOR.
5610 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
5611 APInt SplatBits, SplatUndef;
5612 unsigned SplatBitSize;
5613 bool HasAnyUndefs;
5614 // Check for constant splats. Use ElemBitSize as the minimum element
5615 // width and reject splats that need wider elements.
5616 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
5617 ElemBitSize, true) &&
5618 SplatBitSize == ElemBitSize) {
5619 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
5620 DL, MVT::i32);
5621 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5622 }
5623 // Check for variable splats.
5624 BitVector UndefElements;
5625 SDValue Splat = BVN->getSplatValue(&UndefElements);
5626 if (Splat) {
5627 // Since i32 is the smallest legal type, we either need a no-op
5628 // or a truncation.
5629 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
5630 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5631 }
5632 }
5633
5634 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
5635 // and the shift amount is directly available in a GPR.
5636 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
5637 if (VSN->isSplat()) {
5638 SDValue VSNOp0 = VSN->getOperand(0);
5639 unsigned Index = VSN->getSplatIndex();
5640 assert(Index < VT.getVectorNumElements() &&
5641 "Splat index should be defined and in first operand");
5642 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5643 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
5644 // Since i32 is the smallest legal type, we either need a no-op
5645 // or a truncation.
5646 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
5647 VSNOp0.getOperand(Index));
5648 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
5649 }
5650 }
5651 }
5652
5653 // Otherwise just treat the current form as legal.
5654 return Op;
5655 }
5656
lowerIS_FPCLASS(SDValue Op,SelectionDAG & DAG) const5657 SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
5658 SelectionDAG &DAG) const {
5659 SDLoc DL(Op);
5660 MVT ResultVT = Op.getSimpleValueType();
5661 SDValue Arg = Op.getOperand(0);
5662 auto CNode = cast<ConstantSDNode>(Op.getOperand(1));
5663 unsigned Check = CNode->getZExtValue();
5664
5665 unsigned TDCMask = 0;
5666 if (Check & fcSNan)
5667 TDCMask |= SystemZ::TDCMASK_SNAN_PLUS | SystemZ::TDCMASK_SNAN_MINUS;
5668 if (Check & fcQNan)
5669 TDCMask |= SystemZ::TDCMASK_QNAN_PLUS | SystemZ::TDCMASK_QNAN_MINUS;
5670 if (Check & fcPosInf)
5671 TDCMask |= SystemZ::TDCMASK_INFINITY_PLUS;
5672 if (Check & fcNegInf)
5673 TDCMask |= SystemZ::TDCMASK_INFINITY_MINUS;
5674 if (Check & fcPosNormal)
5675 TDCMask |= SystemZ::TDCMASK_NORMAL_PLUS;
5676 if (Check & fcNegNormal)
5677 TDCMask |= SystemZ::TDCMASK_NORMAL_MINUS;
5678 if (Check & fcPosSubnormal)
5679 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_PLUS;
5680 if (Check & fcNegSubnormal)
5681 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_MINUS;
5682 if (Check & fcPosZero)
5683 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
5684 if (Check & fcNegZero)
5685 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
5686 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
5687
5688 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
5689 return getCCResult(DAG, Intr);
5690 }
5691
LowerOperation(SDValue Op,SelectionDAG & DAG) const5692 SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
5693 SelectionDAG &DAG) const {
5694 switch (Op.getOpcode()) {
5695 case ISD::FRAMEADDR:
5696 return lowerFRAMEADDR(Op, DAG);
5697 case ISD::RETURNADDR:
5698 return lowerRETURNADDR(Op, DAG);
5699 case ISD::BR_CC:
5700 return lowerBR_CC(Op, DAG);
5701 case ISD::SELECT_CC:
5702 return lowerSELECT_CC(Op, DAG);
5703 case ISD::SETCC:
5704 return lowerSETCC(Op, DAG);
5705 case ISD::STRICT_FSETCC:
5706 return lowerSTRICT_FSETCC(Op, DAG, false);
5707 case ISD::STRICT_FSETCCS:
5708 return lowerSTRICT_FSETCC(Op, DAG, true);
5709 case ISD::GlobalAddress:
5710 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
5711 case ISD::GlobalTLSAddress:
5712 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
5713 case ISD::BlockAddress:
5714 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
5715 case ISD::JumpTable:
5716 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
5717 case ISD::ConstantPool:
5718 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
5719 case ISD::BITCAST:
5720 return lowerBITCAST(Op, DAG);
5721 case ISD::VASTART:
5722 return lowerVASTART(Op, DAG);
5723 case ISD::VACOPY:
5724 return lowerVACOPY(Op, DAG);
5725 case ISD::DYNAMIC_STACKALLOC:
5726 return lowerDYNAMIC_STACKALLOC(Op, DAG);
5727 case ISD::GET_DYNAMIC_AREA_OFFSET:
5728 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
5729 case ISD::SMUL_LOHI:
5730 return lowerSMUL_LOHI(Op, DAG);
5731 case ISD::UMUL_LOHI:
5732 return lowerUMUL_LOHI(Op, DAG);
5733 case ISD::SDIVREM:
5734 return lowerSDIVREM(Op, DAG);
5735 case ISD::UDIVREM:
5736 return lowerUDIVREM(Op, DAG);
5737 case ISD::SADDO:
5738 case ISD::SSUBO:
5739 case ISD::UADDO:
5740 case ISD::USUBO:
5741 return lowerXALUO(Op, DAG);
5742 case ISD::ADDCARRY:
5743 case ISD::SUBCARRY:
5744 return lowerADDSUBCARRY(Op, DAG);
5745 case ISD::OR:
5746 return lowerOR(Op, DAG);
5747 case ISD::CTPOP:
5748 return lowerCTPOP(Op, DAG);
5749 case ISD::ATOMIC_FENCE:
5750 return lowerATOMIC_FENCE(Op, DAG);
5751 case ISD::ATOMIC_SWAP:
5752 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
5753 case ISD::ATOMIC_STORE:
5754 return lowerATOMIC_STORE(Op, DAG);
5755 case ISD::ATOMIC_LOAD:
5756 return lowerATOMIC_LOAD(Op, DAG);
5757 case ISD::ATOMIC_LOAD_ADD:
5758 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
5759 case ISD::ATOMIC_LOAD_SUB:
5760 return lowerATOMIC_LOAD_SUB(Op, DAG);
5761 case ISD::ATOMIC_LOAD_AND:
5762 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
5763 case ISD::ATOMIC_LOAD_OR:
5764 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
5765 case ISD::ATOMIC_LOAD_XOR:
5766 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
5767 case ISD::ATOMIC_LOAD_NAND:
5768 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
5769 case ISD::ATOMIC_LOAD_MIN:
5770 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
5771 case ISD::ATOMIC_LOAD_MAX:
5772 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
5773 case ISD::ATOMIC_LOAD_UMIN:
5774 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
5775 case ISD::ATOMIC_LOAD_UMAX:
5776 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
5777 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
5778 return lowerATOMIC_CMP_SWAP(Op, DAG);
5779 case ISD::STACKSAVE:
5780 return lowerSTACKSAVE(Op, DAG);
5781 case ISD::STACKRESTORE:
5782 return lowerSTACKRESTORE(Op, DAG);
5783 case ISD::PREFETCH:
5784 return lowerPREFETCH(Op, DAG);
5785 case ISD::INTRINSIC_W_CHAIN:
5786 return lowerINTRINSIC_W_CHAIN(Op, DAG);
5787 case ISD::INTRINSIC_WO_CHAIN:
5788 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
5789 case ISD::BUILD_VECTOR:
5790 return lowerBUILD_VECTOR(Op, DAG);
5791 case ISD::VECTOR_SHUFFLE:
5792 return lowerVECTOR_SHUFFLE(Op, DAG);
5793 case ISD::SCALAR_TO_VECTOR:
5794 return lowerSCALAR_TO_VECTOR(Op, DAG);
5795 case ISD::INSERT_VECTOR_ELT:
5796 return lowerINSERT_VECTOR_ELT(Op, DAG);
5797 case ISD::EXTRACT_VECTOR_ELT:
5798 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
5799 case ISD::SIGN_EXTEND_VECTOR_INREG:
5800 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
5801 case ISD::ZERO_EXTEND_VECTOR_INREG:
5802 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
5803 case ISD::SHL:
5804 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
5805 case ISD::SRL:
5806 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
5807 case ISD::SRA:
5808 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
5809 case ISD::IS_FPCLASS:
5810 return lowerIS_FPCLASS(Op, DAG);
5811 case ISD::GET_ROUNDING:
5812 return lowerGET_ROUNDING(Op, DAG);
5813 default:
5814 llvm_unreachable("Unexpected node to lower");
5815 }
5816 }
5817
5818 // Lower operations with invalid operand or result types (currently used
5819 // only for 128-bit integer types).
5820 void
LowerOperationWrapper(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const5821 SystemZTargetLowering::LowerOperationWrapper(SDNode *N,
5822 SmallVectorImpl<SDValue> &Results,
5823 SelectionDAG &DAG) const {
5824 switch (N->getOpcode()) {
5825 case ISD::ATOMIC_LOAD: {
5826 SDLoc DL(N);
5827 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
5828 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
5829 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5830 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
5831 DL, Tys, Ops, MVT::i128, MMO);
5832 Results.push_back(lowerGR128ToI128(DAG, Res));
5833 Results.push_back(Res.getValue(1));
5834 break;
5835 }
5836 case ISD::ATOMIC_STORE: {
5837 SDLoc DL(N);
5838 SDVTList Tys = DAG.getVTList(MVT::Other);
5839 SDValue Ops[] = { N->getOperand(0),
5840 lowerI128ToGR128(DAG, N->getOperand(2)),
5841 N->getOperand(1) };
5842 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5843 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
5844 DL, Tys, Ops, MVT::i128, MMO);
5845 // We have to enforce sequential consistency by performing a
5846 // serialization operation after the store.
5847 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
5848 AtomicOrdering::SequentiallyConsistent)
5849 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
5850 MVT::Other, Res), 0);
5851 Results.push_back(Res);
5852 break;
5853 }
5854 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
5855 SDLoc DL(N);
5856 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
5857 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
5858 lowerI128ToGR128(DAG, N->getOperand(2)),
5859 lowerI128ToGR128(DAG, N->getOperand(3)) };
5860 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
5861 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
5862 DL, Tys, Ops, MVT::i128, MMO);
5863 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
5864 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ);
5865 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
5866 Results.push_back(lowerGR128ToI128(DAG, Res));
5867 Results.push_back(Success);
5868 Results.push_back(Res.getValue(2));
5869 break;
5870 }
5871 case ISD::BITCAST: {
5872 SDValue Src = N->getOperand(0);
5873 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
5874 !useSoftFloat()) {
5875 SDLoc DL(N);
5876 SDValue Lo, Hi;
5877 if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
5878 SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
5879 Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
5880 DAG.getConstant(1, DL, MVT::i32));
5881 Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
5882 DAG.getConstant(0, DL, MVT::i32));
5883 } else {
5884 assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&
5885 "Unrecognized register class for f128.");
5886 SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
5887 DL, MVT::f64, Src);
5888 SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
5889 DL, MVT::f64, Src);
5890 Lo = DAG.getNode(ISD::BITCAST, DL, MVT::i64, LoFP);
5891 Hi = DAG.getNode(ISD::BITCAST, DL, MVT::i64, HiFP);
5892 }
5893 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
5894 }
5895 break;
5896 }
5897 default:
5898 llvm_unreachable("Unexpected node to lower");
5899 }
5900 }
5901
5902 void
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const5903 SystemZTargetLowering::ReplaceNodeResults(SDNode *N,
5904 SmallVectorImpl<SDValue> &Results,
5905 SelectionDAG &DAG) const {
5906 return LowerOperationWrapper(N, Results, DAG);
5907 }
5908
getTargetNodeName(unsigned Opcode) const5909 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
5910 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
5911 switch ((SystemZISD::NodeType)Opcode) {
5912 case SystemZISD::FIRST_NUMBER: break;
5913 OPCODE(RET_FLAG);
5914 OPCODE(CALL);
5915 OPCODE(SIBCALL);
5916 OPCODE(TLS_GDCALL);
5917 OPCODE(TLS_LDCALL);
5918 OPCODE(PCREL_WRAPPER);
5919 OPCODE(PCREL_OFFSET);
5920 OPCODE(ICMP);
5921 OPCODE(FCMP);
5922 OPCODE(STRICT_FCMP);
5923 OPCODE(STRICT_FCMPS);
5924 OPCODE(TM);
5925 OPCODE(BR_CCMASK);
5926 OPCODE(SELECT_CCMASK);
5927 OPCODE(ADJDYNALLOC);
5928 OPCODE(PROBED_ALLOCA);
5929 OPCODE(POPCNT);
5930 OPCODE(SMUL_LOHI);
5931 OPCODE(UMUL_LOHI);
5932 OPCODE(SDIVREM);
5933 OPCODE(UDIVREM);
5934 OPCODE(SADDO);
5935 OPCODE(SSUBO);
5936 OPCODE(UADDO);
5937 OPCODE(USUBO);
5938 OPCODE(ADDCARRY);
5939 OPCODE(SUBCARRY);
5940 OPCODE(GET_CCMASK);
5941 OPCODE(MVC);
5942 OPCODE(NC);
5943 OPCODE(OC);
5944 OPCODE(XC);
5945 OPCODE(CLC);
5946 OPCODE(MEMSET_MVC);
5947 OPCODE(STPCPY);
5948 OPCODE(STRCMP);
5949 OPCODE(SEARCH_STRING);
5950 OPCODE(IPM);
5951 OPCODE(TBEGIN);
5952 OPCODE(TBEGIN_NOFLOAT);
5953 OPCODE(TEND);
5954 OPCODE(BYTE_MASK);
5955 OPCODE(ROTATE_MASK);
5956 OPCODE(REPLICATE);
5957 OPCODE(JOIN_DWORDS);
5958 OPCODE(SPLAT);
5959 OPCODE(MERGE_HIGH);
5960 OPCODE(MERGE_LOW);
5961 OPCODE(SHL_DOUBLE);
5962 OPCODE(PERMUTE_DWORDS);
5963 OPCODE(PERMUTE);
5964 OPCODE(PACK);
5965 OPCODE(PACKS_CC);
5966 OPCODE(PACKLS_CC);
5967 OPCODE(UNPACK_HIGH);
5968 OPCODE(UNPACKL_HIGH);
5969 OPCODE(UNPACK_LOW);
5970 OPCODE(UNPACKL_LOW);
5971 OPCODE(VSHL_BY_SCALAR);
5972 OPCODE(VSRL_BY_SCALAR);
5973 OPCODE(VSRA_BY_SCALAR);
5974 OPCODE(VSUM);
5975 OPCODE(VICMPE);
5976 OPCODE(VICMPH);
5977 OPCODE(VICMPHL);
5978 OPCODE(VICMPES);
5979 OPCODE(VICMPHS);
5980 OPCODE(VICMPHLS);
5981 OPCODE(VFCMPE);
5982 OPCODE(STRICT_VFCMPE);
5983 OPCODE(STRICT_VFCMPES);
5984 OPCODE(VFCMPH);
5985 OPCODE(STRICT_VFCMPH);
5986 OPCODE(STRICT_VFCMPHS);
5987 OPCODE(VFCMPHE);
5988 OPCODE(STRICT_VFCMPHE);
5989 OPCODE(STRICT_VFCMPHES);
5990 OPCODE(VFCMPES);
5991 OPCODE(VFCMPHS);
5992 OPCODE(VFCMPHES);
5993 OPCODE(VFTCI);
5994 OPCODE(VEXTEND);
5995 OPCODE(STRICT_VEXTEND);
5996 OPCODE(VROUND);
5997 OPCODE(STRICT_VROUND);
5998 OPCODE(VTM);
5999 OPCODE(VFAE_CC);
6000 OPCODE(VFAEZ_CC);
6001 OPCODE(VFEE_CC);
6002 OPCODE(VFEEZ_CC);
6003 OPCODE(VFENE_CC);
6004 OPCODE(VFENEZ_CC);
6005 OPCODE(VISTR_CC);
6006 OPCODE(VSTRC_CC);
6007 OPCODE(VSTRCZ_CC);
6008 OPCODE(VSTRS_CC);
6009 OPCODE(VSTRSZ_CC);
6010 OPCODE(TDC);
6011 OPCODE(ATOMIC_SWAPW);
6012 OPCODE(ATOMIC_LOADW_ADD);
6013 OPCODE(ATOMIC_LOADW_SUB);
6014 OPCODE(ATOMIC_LOADW_AND);
6015 OPCODE(ATOMIC_LOADW_OR);
6016 OPCODE(ATOMIC_LOADW_XOR);
6017 OPCODE(ATOMIC_LOADW_NAND);
6018 OPCODE(ATOMIC_LOADW_MIN);
6019 OPCODE(ATOMIC_LOADW_MAX);
6020 OPCODE(ATOMIC_LOADW_UMIN);
6021 OPCODE(ATOMIC_LOADW_UMAX);
6022 OPCODE(ATOMIC_CMP_SWAPW);
6023 OPCODE(ATOMIC_CMP_SWAP);
6024 OPCODE(ATOMIC_LOAD_128);
6025 OPCODE(ATOMIC_STORE_128);
6026 OPCODE(ATOMIC_CMP_SWAP_128);
6027 OPCODE(LRV);
6028 OPCODE(STRV);
6029 OPCODE(VLER);
6030 OPCODE(VSTER);
6031 OPCODE(PREFETCH);
6032 }
6033 return nullptr;
6034 #undef OPCODE
6035 }
6036
6037 // Return true if VT is a vector whose elements are a whole number of bytes
6038 // in width. Also check for presence of vector support.
canTreatAsByteVector(EVT VT) const6039 bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
6040 if (!Subtarget.hasVector())
6041 return false;
6042
6043 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
6044 }
6045
6046 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
6047 // producing a result of type ResVT. Op is a possibly bitcast version
6048 // of the input vector and Index is the index (based on type VecVT) that
6049 // should be extracted. Return the new extraction if a simplification
6050 // was possible or if Force is true.
combineExtract(const SDLoc & DL,EVT ResVT,EVT VecVT,SDValue Op,unsigned Index,DAGCombinerInfo & DCI,bool Force) const6051 SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
6052 EVT VecVT, SDValue Op,
6053 unsigned Index,
6054 DAGCombinerInfo &DCI,
6055 bool Force) const {
6056 SelectionDAG &DAG = DCI.DAG;
6057
6058 // The number of bytes being extracted.
6059 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6060
6061 for (;;) {
6062 unsigned Opcode = Op.getOpcode();
6063 if (Opcode == ISD::BITCAST)
6064 // Look through bitcasts.
6065 Op = Op.getOperand(0);
6066 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
6067 canTreatAsByteVector(Op.getValueType())) {
6068 // Get a VPERM-like permute mask and see whether the bytes covered
6069 // by the extracted element are a contiguous sequence from one
6070 // source operand.
6071 SmallVector<int, SystemZ::VectorBytes> Bytes;
6072 if (!getVPermMask(Op, Bytes))
6073 break;
6074 int First;
6075 if (!getShuffleInput(Bytes, Index * BytesPerElement,
6076 BytesPerElement, First))
6077 break;
6078 if (First < 0)
6079 return DAG.getUNDEF(ResVT);
6080 // Make sure the contiguous sequence starts at a multiple of the
6081 // original element size.
6082 unsigned Byte = unsigned(First) % Bytes.size();
6083 if (Byte % BytesPerElement != 0)
6084 break;
6085 // We can get the extracted value directly from an input.
6086 Index = Byte / BytesPerElement;
6087 Op = Op.getOperand(unsigned(First) / Bytes.size());
6088 Force = true;
6089 } else if (Opcode == ISD::BUILD_VECTOR &&
6090 canTreatAsByteVector(Op.getValueType())) {
6091 // We can only optimize this case if the BUILD_VECTOR elements are
6092 // at least as wide as the extracted value.
6093 EVT OpVT = Op.getValueType();
6094 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6095 if (OpBytesPerElement < BytesPerElement)
6096 break;
6097 // Make sure that the least-significant bit of the extracted value
6098 // is the least significant bit of an input.
6099 unsigned End = (Index + 1) * BytesPerElement;
6100 if (End % OpBytesPerElement != 0)
6101 break;
6102 // We're extracting the low part of one operand of the BUILD_VECTOR.
6103 Op = Op.getOperand(End / OpBytesPerElement - 1);
6104 if (!Op.getValueType().isInteger()) {
6105 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
6106 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
6107 DCI.AddToWorklist(Op.getNode());
6108 }
6109 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
6110 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6111 if (VT != ResVT) {
6112 DCI.AddToWorklist(Op.getNode());
6113 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
6114 }
6115 return Op;
6116 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6117 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
6118 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6119 canTreatAsByteVector(Op.getValueType()) &&
6120 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
6121 // Make sure that only the unextended bits are significant.
6122 EVT ExtVT = Op.getValueType();
6123 EVT OpVT = Op.getOperand(0).getValueType();
6124 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
6125 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6126 unsigned Byte = Index * BytesPerElement;
6127 unsigned SubByte = Byte % ExtBytesPerElement;
6128 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
6129 if (SubByte < MinSubByte ||
6130 SubByte + BytesPerElement > ExtBytesPerElement)
6131 break;
6132 // Get the byte offset of the unextended element
6133 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
6134 // ...then add the byte offset relative to that element.
6135 Byte += SubByte - MinSubByte;
6136 if (Byte % BytesPerElement != 0)
6137 break;
6138 Op = Op.getOperand(0);
6139 Index = Byte / BytesPerElement;
6140 Force = true;
6141 } else
6142 break;
6143 }
6144 if (Force) {
6145 if (Op.getValueType() != VecVT) {
6146 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
6147 DCI.AddToWorklist(Op.getNode());
6148 }
6149 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
6150 DAG.getConstant(Index, DL, MVT::i32));
6151 }
6152 return SDValue();
6153 }
6154
6155 // Optimize vector operations in scalar value Op on the basis that Op
6156 // is truncated to TruncVT.
combineTruncateExtract(const SDLoc & DL,EVT TruncVT,SDValue Op,DAGCombinerInfo & DCI) const6157 SDValue SystemZTargetLowering::combineTruncateExtract(
6158 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
6159 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
6160 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
6161 // of type TruncVT.
6162 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6163 TruncVT.getSizeInBits() % 8 == 0) {
6164 SDValue Vec = Op.getOperand(0);
6165 EVT VecVT = Vec.getValueType();
6166 if (canTreatAsByteVector(VecVT)) {
6167 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6168 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6169 unsigned TruncBytes = TruncVT.getStoreSize();
6170 if (BytesPerElement % TruncBytes == 0) {
6171 // Calculate the value of Y' in the above description. We are
6172 // splitting the original elements into Scale equal-sized pieces
6173 // and for truncation purposes want the last (least-significant)
6174 // of these pieces for IndexN. This is easiest to do by calculating
6175 // the start index of the following element and then subtracting 1.
6176 unsigned Scale = BytesPerElement / TruncBytes;
6177 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
6178
6179 // Defer the creation of the bitcast from X to combineExtract,
6180 // which might be able to optimize the extraction.
6181 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
6182 VecVT.getStoreSize() / TruncBytes);
6183 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
6184 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
6185 }
6186 }
6187 }
6188 }
6189 return SDValue();
6190 }
6191
combineZERO_EXTEND(SDNode * N,DAGCombinerInfo & DCI) const6192 SDValue SystemZTargetLowering::combineZERO_EXTEND(
6193 SDNode *N, DAGCombinerInfo &DCI) const {
6194 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
6195 SelectionDAG &DAG = DCI.DAG;
6196 SDValue N0 = N->getOperand(0);
6197 EVT VT = N->getValueType(0);
6198 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
6199 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
6200 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6201 if (TrueOp && FalseOp) {
6202 SDLoc DL(N0);
6203 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
6204 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
6205 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
6206 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
6207 // If N0 has multiple uses, change other uses as well.
6208 if (!N0.hasOneUse()) {
6209 SDValue TruncSelect =
6210 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
6211 DCI.CombineTo(N0.getNode(), TruncSelect);
6212 }
6213 return NewSelect;
6214 }
6215 }
6216 return SDValue();
6217 }
6218
combineSIGN_EXTEND_INREG(SDNode * N,DAGCombinerInfo & DCI) const6219 SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
6220 SDNode *N, DAGCombinerInfo &DCI) const {
6221 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
6222 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
6223 // into (select_cc LHS, RHS, -1, 0, COND)
6224 SelectionDAG &DAG = DCI.DAG;
6225 SDValue N0 = N->getOperand(0);
6226 EVT VT = N->getValueType(0);
6227 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6228 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
6229 N0 = N0.getOperand(0);
6230 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
6231 SDLoc DL(N0);
6232 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
6233 DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
6234 N0.getOperand(2) };
6235 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
6236 }
6237 return SDValue();
6238 }
6239
combineSIGN_EXTEND(SDNode * N,DAGCombinerInfo & DCI) const6240 SDValue SystemZTargetLowering::combineSIGN_EXTEND(
6241 SDNode *N, DAGCombinerInfo &DCI) const {
6242 // Convert (sext (ashr (shl X, C1), C2)) to
6243 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
6244 // cheap as narrower ones.
6245 SelectionDAG &DAG = DCI.DAG;
6246 SDValue N0 = N->getOperand(0);
6247 EVT VT = N->getValueType(0);
6248 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
6249 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6250 SDValue Inner = N0.getOperand(0);
6251 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
6252 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
6253 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
6254 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
6255 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
6256 EVT ShiftVT = N0.getOperand(1).getValueType();
6257 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
6258 Inner.getOperand(0));
6259 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
6260 DAG.getConstant(NewShlAmt, SDLoc(Inner),
6261 ShiftVT));
6262 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
6263 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
6264 }
6265 }
6266 }
6267 return SDValue();
6268 }
6269
combineMERGE(SDNode * N,DAGCombinerInfo & DCI) const6270 SDValue SystemZTargetLowering::combineMERGE(
6271 SDNode *N, DAGCombinerInfo &DCI) const {
6272 SelectionDAG &DAG = DCI.DAG;
6273 unsigned Opcode = N->getOpcode();
6274 SDValue Op0 = N->getOperand(0);
6275 SDValue Op1 = N->getOperand(1);
6276 if (Op0.getOpcode() == ISD::BITCAST)
6277 Op0 = Op0.getOperand(0);
6278 if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
6279 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
6280 // for v4f32.
6281 if (Op1 == N->getOperand(0))
6282 return Op1;
6283 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
6284 EVT VT = Op1.getValueType();
6285 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
6286 if (ElemBytes <= 4) {
6287 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
6288 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
6289 EVT InVT = VT.changeVectorElementTypeToInteger();
6290 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
6291 SystemZ::VectorBytes / ElemBytes / 2);
6292 if (VT != InVT) {
6293 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
6294 DCI.AddToWorklist(Op1.getNode());
6295 }
6296 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
6297 DCI.AddToWorklist(Op.getNode());
6298 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
6299 }
6300 }
6301 return SDValue();
6302 }
6303
combineLOAD(SDNode * N,DAGCombinerInfo & DCI) const6304 SDValue SystemZTargetLowering::combineLOAD(
6305 SDNode *N, DAGCombinerInfo &DCI) const {
6306 SelectionDAG &DAG = DCI.DAG;
6307 EVT LdVT = N->getValueType(0);
6308 if (LdVT.isVector() || LdVT.isInteger())
6309 return SDValue();
6310 // Transform a scalar load that is REPLICATEd as well as having other
6311 // use(s) to the form where the other use(s) use the first element of the
6312 // REPLICATE instead of the load. Otherwise instruction selection will not
6313 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
6314 // point loads.
6315
6316 SDValue Replicate;
6317 SmallVector<SDNode*, 8> OtherUses;
6318 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6319 UI != UE; ++UI) {
6320 if (UI->getOpcode() == SystemZISD::REPLICATE) {
6321 if (Replicate)
6322 return SDValue(); // Should never happen
6323 Replicate = SDValue(*UI, 0);
6324 }
6325 else if (UI.getUse().getResNo() == 0)
6326 OtherUses.push_back(*UI);
6327 }
6328 if (!Replicate || OtherUses.empty())
6329 return SDValue();
6330
6331 SDLoc DL(N);
6332 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
6333 Replicate, DAG.getConstant(0, DL, MVT::i32));
6334 // Update uses of the loaded Value while preserving old chains.
6335 for (SDNode *U : OtherUses) {
6336 SmallVector<SDValue, 8> Ops;
6337 for (SDValue Op : U->ops())
6338 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
6339 DAG.UpdateNodeOperands(U, Ops);
6340 }
6341 return SDValue(N, 0);
6342 }
6343
canLoadStoreByteSwapped(EVT VT) const6344 bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
6345 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
6346 return true;
6347 if (Subtarget.hasVectorEnhancements2())
6348 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64)
6349 return true;
6350 return false;
6351 }
6352
isVectorElementSwap(ArrayRef<int> M,EVT VT)6353 static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) {
6354 if (!VT.isVector() || !VT.isSimple() ||
6355 VT.getSizeInBits() != 128 ||
6356 VT.getScalarSizeInBits() % 8 != 0)
6357 return false;
6358
6359 unsigned NumElts = VT.getVectorNumElements();
6360 for (unsigned i = 0; i < NumElts; ++i) {
6361 if (M[i] < 0) continue; // ignore UNDEF indices
6362 if ((unsigned) M[i] != NumElts - 1 - i)
6363 return false;
6364 }
6365
6366 return true;
6367 }
6368
isOnlyUsedByStores(SDValue StoredVal,SelectionDAG & DAG)6369 static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
6370 for (auto *U : StoredVal->uses()) {
6371 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
6372 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
6373 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
6374 continue;
6375 } else if (isa<BuildVectorSDNode>(U)) {
6376 SDValue BuildVector = SDValue(U, 0);
6377 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
6378 isOnlyUsedByStores(BuildVector, DAG))
6379 continue;
6380 }
6381 return false;
6382 }
6383 return true;
6384 }
6385
combineSTORE(SDNode * N,DAGCombinerInfo & DCI) const6386 SDValue SystemZTargetLowering::combineSTORE(
6387 SDNode *N, DAGCombinerInfo &DCI) const {
6388 SelectionDAG &DAG = DCI.DAG;
6389 auto *SN = cast<StoreSDNode>(N);
6390 auto &Op1 = N->getOperand(1);
6391 EVT MemVT = SN->getMemoryVT();
6392 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
6393 // for the extraction to be done on a vMiN value, so that we can use VSTE.
6394 // If X has wider elements then convert it to:
6395 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
6396 if (MemVT.isInteger() && SN->isTruncatingStore()) {
6397 if (SDValue Value =
6398 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
6399 DCI.AddToWorklist(Value.getNode());
6400
6401 // Rewrite the store with the new form of stored value.
6402 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
6403 SN->getBasePtr(), SN->getMemoryVT(),
6404 SN->getMemOperand());
6405 }
6406 }
6407 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
6408 if (!SN->isTruncatingStore() &&
6409 Op1.getOpcode() == ISD::BSWAP &&
6410 Op1.getNode()->hasOneUse() &&
6411 canLoadStoreByteSwapped(Op1.getValueType())) {
6412
6413 SDValue BSwapOp = Op1.getOperand(0);
6414
6415 if (BSwapOp.getValueType() == MVT::i16)
6416 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
6417
6418 SDValue Ops[] = {
6419 N->getOperand(0), BSwapOp, N->getOperand(2)
6420 };
6421
6422 return
6423 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
6424 Ops, MemVT, SN->getMemOperand());
6425 }
6426 // Combine STORE (element-swap) into VSTER
6427 if (!SN->isTruncatingStore() &&
6428 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
6429 Op1.getNode()->hasOneUse() &&
6430 Subtarget.hasVectorEnhancements2()) {
6431 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
6432 ArrayRef<int> ShuffleMask = SVN->getMask();
6433 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
6434 SDValue Ops[] = {
6435 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
6436 };
6437
6438 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
6439 DAG.getVTList(MVT::Other),
6440 Ops, MemVT, SN->getMemOperand());
6441 }
6442 }
6443
6444 // Replicate a reg or immediate with VREP instead of scalar multiply or
6445 // immediate load. It seems best to do this during the first DAGCombine as
6446 // it is straight-forward to handle the zero-extend node in the initial
6447 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
6448 // extracting an i16 element from a v16i8 vector).
6449 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
6450 isOnlyUsedByStores(Op1, DAG)) {
6451 SDValue Word = SDValue();
6452 EVT WordVT;
6453
6454 // Find a replicated immediate and return it if found in Word and its
6455 // type in WordVT.
6456 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
6457 // Some constants are better handled with a scalar store.
6458 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
6459 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
6460 return;
6461 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue()));
6462 if (VCI.isVectorConstantLegal(Subtarget) &&
6463 VCI.Opcode == SystemZISD::REPLICATE) {
6464 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
6465 WordVT = VCI.VecVT.getScalarType();
6466 }
6467 };
6468
6469 // Find a replicated register and return it if found in Word and its type
6470 // in WordVT.
6471 auto FindReplicatedReg = [&](SDValue MulOp) {
6472 EVT MulVT = MulOp.getValueType();
6473 if (MulOp->getOpcode() == ISD::MUL &&
6474 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
6475 // Find a zero extended value and its type.
6476 SDValue LHS = MulOp->getOperand(0);
6477 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
6478 WordVT = LHS->getOperand(0).getValueType();
6479 else if (LHS->getOpcode() == ISD::AssertZext)
6480 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
6481 else
6482 return;
6483 // Find a replicating constant, e.g. 0x00010001.
6484 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
6485 SystemZVectorConstantInfo VCI(
6486 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
6487 if (VCI.isVectorConstantLegal(Subtarget) &&
6488 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
6489 WordVT == VCI.VecVT.getScalarType())
6490 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
6491 }
6492 }
6493 };
6494
6495 if (isa<BuildVectorSDNode>(Op1) &&
6496 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
6497 SDValue SplatVal = Op1->getOperand(0);
6498 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
6499 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
6500 else
6501 FindReplicatedReg(SplatVal);
6502 } else {
6503 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
6504 FindReplicatedImm(C, MemVT.getStoreSize());
6505 else
6506 FindReplicatedReg(Op1);
6507 }
6508
6509 if (Word != SDValue()) {
6510 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
6511 "Bad type handling");
6512 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
6513 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
6514 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
6515 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
6516 SN->getBasePtr(), SN->getMemOperand());
6517 }
6518 }
6519
6520 return SDValue();
6521 }
6522
combineVECTOR_SHUFFLE(SDNode * N,DAGCombinerInfo & DCI) const6523 SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
6524 SDNode *N, DAGCombinerInfo &DCI) const {
6525 SelectionDAG &DAG = DCI.DAG;
6526 // Combine element-swap (LOAD) into VLER
6527 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
6528 N->getOperand(0).hasOneUse() &&
6529 Subtarget.hasVectorEnhancements2()) {
6530 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
6531 ArrayRef<int> ShuffleMask = SVN->getMask();
6532 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
6533 SDValue Load = N->getOperand(0);
6534 LoadSDNode *LD = cast<LoadSDNode>(Load);
6535
6536 // Create the element-swapping load.
6537 SDValue Ops[] = {
6538 LD->getChain(), // Chain
6539 LD->getBasePtr() // Ptr
6540 };
6541 SDValue ESLoad =
6542 DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
6543 DAG.getVTList(LD->getValueType(0), MVT::Other),
6544 Ops, LD->getMemoryVT(), LD->getMemOperand());
6545
6546 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
6547 // by the load dead.
6548 DCI.CombineTo(N, ESLoad);
6549
6550 // Next, combine the load away, we give it a bogus result value but a real
6551 // chain result. The result value is dead because the shuffle is dead.
6552 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
6553
6554 // Return N so it doesn't get rechecked!
6555 return SDValue(N, 0);
6556 }
6557 }
6558
6559 return SDValue();
6560 }
6561
combineEXTRACT_VECTOR_ELT(SDNode * N,DAGCombinerInfo & DCI) const6562 SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
6563 SDNode *N, DAGCombinerInfo &DCI) const {
6564 SelectionDAG &DAG = DCI.DAG;
6565
6566 if (!Subtarget.hasVector())
6567 return SDValue();
6568
6569 // Look through bitcasts that retain the number of vector elements.
6570 SDValue Op = N->getOperand(0);
6571 if (Op.getOpcode() == ISD::BITCAST &&
6572 Op.getValueType().isVector() &&
6573 Op.getOperand(0).getValueType().isVector() &&
6574 Op.getValueType().getVectorNumElements() ==
6575 Op.getOperand(0).getValueType().getVectorNumElements())
6576 Op = Op.getOperand(0);
6577
6578 // Pull BSWAP out of a vector extraction.
6579 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
6580 EVT VecVT = Op.getValueType();
6581 EVT EltVT = VecVT.getVectorElementType();
6582 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
6583 Op.getOperand(0), N->getOperand(1));
6584 DCI.AddToWorklist(Op.getNode());
6585 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
6586 if (EltVT != N->getValueType(0)) {
6587 DCI.AddToWorklist(Op.getNode());
6588 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
6589 }
6590 return Op;
6591 }
6592
6593 // Try to simplify a vector extraction.
6594 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
6595 SDValue Op0 = N->getOperand(0);
6596 EVT VecVT = Op0.getValueType();
6597 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
6598 IndexN->getZExtValue(), DCI, false);
6599 }
6600 return SDValue();
6601 }
6602
combineJOIN_DWORDS(SDNode * N,DAGCombinerInfo & DCI) const6603 SDValue SystemZTargetLowering::combineJOIN_DWORDS(
6604 SDNode *N, DAGCombinerInfo &DCI) const {
6605 SelectionDAG &DAG = DCI.DAG;
6606 // (join_dwords X, X) == (replicate X)
6607 if (N->getOperand(0) == N->getOperand(1))
6608 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
6609 N->getOperand(0));
6610 return SDValue();
6611 }
6612
MergeInputChains(SDNode * N1,SDNode * N2)6613 static SDValue MergeInputChains(SDNode *N1, SDNode *N2) {
6614 SDValue Chain1 = N1->getOperand(0);
6615 SDValue Chain2 = N2->getOperand(0);
6616
6617 // Trivial case: both nodes take the same chain.
6618 if (Chain1 == Chain2)
6619 return Chain1;
6620
6621 // FIXME - we could handle more complex cases via TokenFactor,
6622 // assuming we can verify that this would not create a cycle.
6623 return SDValue();
6624 }
6625
combineFP_ROUND(SDNode * N,DAGCombinerInfo & DCI) const6626 SDValue SystemZTargetLowering::combineFP_ROUND(
6627 SDNode *N, DAGCombinerInfo &DCI) const {
6628
6629 if (!Subtarget.hasVector())
6630 return SDValue();
6631
6632 // (fpround (extract_vector_elt X 0))
6633 // (fpround (extract_vector_elt X 1)) ->
6634 // (extract_vector_elt (VROUND X) 0)
6635 // (extract_vector_elt (VROUND X) 2)
6636 //
6637 // This is a special case since the target doesn't really support v2f32s.
6638 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
6639 SelectionDAG &DAG = DCI.DAG;
6640 SDValue Op0 = N->getOperand(OpNo);
6641 if (N->getValueType(0) == MVT::f32 &&
6642 Op0.hasOneUse() &&
6643 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6644 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
6645 Op0.getOperand(1).getOpcode() == ISD::Constant &&
6646 cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
6647 SDValue Vec = Op0.getOperand(0);
6648 for (auto *U : Vec->uses()) {
6649 if (U != Op0.getNode() &&
6650 U->hasOneUse() &&
6651 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6652 U->getOperand(0) == Vec &&
6653 U->getOperand(1).getOpcode() == ISD::Constant &&
6654 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
6655 SDValue OtherRound = SDValue(*U->use_begin(), 0);
6656 if (OtherRound.getOpcode() == N->getOpcode() &&
6657 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
6658 OtherRound.getValueType() == MVT::f32) {
6659 SDValue VRound, Chain;
6660 if (N->isStrictFPOpcode()) {
6661 Chain = MergeInputChains(N, OtherRound.getNode());
6662 if (!Chain)
6663 continue;
6664 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
6665 {MVT::v4f32, MVT::Other}, {Chain, Vec});
6666 Chain = VRound.getValue(1);
6667 } else
6668 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
6669 MVT::v4f32, Vec);
6670 DCI.AddToWorklist(VRound.getNode());
6671 SDValue Extract1 =
6672 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
6673 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
6674 DCI.AddToWorklist(Extract1.getNode());
6675 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
6676 if (Chain)
6677 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
6678 SDValue Extract0 =
6679 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
6680 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
6681 if (Chain)
6682 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
6683 N->getVTList(), Extract0, Chain);
6684 return Extract0;
6685 }
6686 }
6687 }
6688 }
6689 return SDValue();
6690 }
6691
combineFP_EXTEND(SDNode * N,DAGCombinerInfo & DCI) const6692 SDValue SystemZTargetLowering::combineFP_EXTEND(
6693 SDNode *N, DAGCombinerInfo &DCI) const {
6694
6695 if (!Subtarget.hasVector())
6696 return SDValue();
6697
6698 // (fpextend (extract_vector_elt X 0))
6699 // (fpextend (extract_vector_elt X 2)) ->
6700 // (extract_vector_elt (VEXTEND X) 0)
6701 // (extract_vector_elt (VEXTEND X) 1)
6702 //
6703 // This is a special case since the target doesn't really support v2f32s.
6704 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
6705 SelectionDAG &DAG = DCI.DAG;
6706 SDValue Op0 = N->getOperand(OpNo);
6707 if (N->getValueType(0) == MVT::f64 &&
6708 Op0.hasOneUse() &&
6709 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6710 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
6711 Op0.getOperand(1).getOpcode() == ISD::Constant &&
6712 cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
6713 SDValue Vec = Op0.getOperand(0);
6714 for (auto *U : Vec->uses()) {
6715 if (U != Op0.getNode() &&
6716 U->hasOneUse() &&
6717 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6718 U->getOperand(0) == Vec &&
6719 U->getOperand(1).getOpcode() == ISD::Constant &&
6720 cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) {
6721 SDValue OtherExtend = SDValue(*U->use_begin(), 0);
6722 if (OtherExtend.getOpcode() == N->getOpcode() &&
6723 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
6724 OtherExtend.getValueType() == MVT::f64) {
6725 SDValue VExtend, Chain;
6726 if (N->isStrictFPOpcode()) {
6727 Chain = MergeInputChains(N, OtherExtend.getNode());
6728 if (!Chain)
6729 continue;
6730 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
6731 {MVT::v2f64, MVT::Other}, {Chain, Vec});
6732 Chain = VExtend.getValue(1);
6733 } else
6734 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
6735 MVT::v2f64, Vec);
6736 DCI.AddToWorklist(VExtend.getNode());
6737 SDValue Extract1 =
6738 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
6739 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
6740 DCI.AddToWorklist(Extract1.getNode());
6741 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
6742 if (Chain)
6743 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
6744 SDValue Extract0 =
6745 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
6746 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
6747 if (Chain)
6748 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
6749 N->getVTList(), Extract0, Chain);
6750 return Extract0;
6751 }
6752 }
6753 }
6754 }
6755 return SDValue();
6756 }
6757
combineINT_TO_FP(SDNode * N,DAGCombinerInfo & DCI) const6758 SDValue SystemZTargetLowering::combineINT_TO_FP(
6759 SDNode *N, DAGCombinerInfo &DCI) const {
6760 if (DCI.Level != BeforeLegalizeTypes)
6761 return SDValue();
6762 SelectionDAG &DAG = DCI.DAG;
6763 LLVMContext &Ctx = *DAG.getContext();
6764 unsigned Opcode = N->getOpcode();
6765 EVT OutVT = N->getValueType(0);
6766 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
6767 SDValue Op = N->getOperand(0);
6768 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
6769 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
6770
6771 // Insert an extension before type-legalization to avoid scalarization, e.g.:
6772 // v2f64 = uint_to_fp v2i16
6773 // =>
6774 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
6775 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
6776 OutScalarBits <= 64) {
6777 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
6778 EVT ExtVT = EVT::getVectorVT(
6779 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
6780 unsigned ExtOpcode =
6781 (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
6782 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
6783 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
6784 }
6785 return SDValue();
6786 }
6787
combineBSWAP(SDNode * N,DAGCombinerInfo & DCI) const6788 SDValue SystemZTargetLowering::combineBSWAP(
6789 SDNode *N, DAGCombinerInfo &DCI) const {
6790 SelectionDAG &DAG = DCI.DAG;
6791 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
6792 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
6793 N->getOperand(0).hasOneUse() &&
6794 canLoadStoreByteSwapped(N->getValueType(0))) {
6795 SDValue Load = N->getOperand(0);
6796 LoadSDNode *LD = cast<LoadSDNode>(Load);
6797
6798 // Create the byte-swapping load.
6799 SDValue Ops[] = {
6800 LD->getChain(), // Chain
6801 LD->getBasePtr() // Ptr
6802 };
6803 EVT LoadVT = N->getValueType(0);
6804 if (LoadVT == MVT::i16)
6805 LoadVT = MVT::i32;
6806 SDValue BSLoad =
6807 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
6808 DAG.getVTList(LoadVT, MVT::Other),
6809 Ops, LD->getMemoryVT(), LD->getMemOperand());
6810
6811 // If this is an i16 load, insert the truncate.
6812 SDValue ResVal = BSLoad;
6813 if (N->getValueType(0) == MVT::i16)
6814 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
6815
6816 // First, combine the bswap away. This makes the value produced by the
6817 // load dead.
6818 DCI.CombineTo(N, ResVal);
6819
6820 // Next, combine the load away, we give it a bogus result value but a real
6821 // chain result. The result value is dead because the bswap is dead.
6822 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
6823
6824 // Return N so it doesn't get rechecked!
6825 return SDValue(N, 0);
6826 }
6827
6828 // Look through bitcasts that retain the number of vector elements.
6829 SDValue Op = N->getOperand(0);
6830 if (Op.getOpcode() == ISD::BITCAST &&
6831 Op.getValueType().isVector() &&
6832 Op.getOperand(0).getValueType().isVector() &&
6833 Op.getValueType().getVectorNumElements() ==
6834 Op.getOperand(0).getValueType().getVectorNumElements())
6835 Op = Op.getOperand(0);
6836
6837 // Push BSWAP into a vector insertion if at least one side then simplifies.
6838 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
6839 SDValue Vec = Op.getOperand(0);
6840 SDValue Elt = Op.getOperand(1);
6841 SDValue Idx = Op.getOperand(2);
6842
6843 if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) ||
6844 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
6845 DAG.isConstantIntBuildVectorOrConstantInt(Elt) ||
6846 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
6847 (canLoadStoreByteSwapped(N->getValueType(0)) &&
6848 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
6849 EVT VecVT = N->getValueType(0);
6850 EVT EltVT = N->getValueType(0).getVectorElementType();
6851 if (VecVT != Vec.getValueType()) {
6852 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
6853 DCI.AddToWorklist(Vec.getNode());
6854 }
6855 if (EltVT != Elt.getValueType()) {
6856 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
6857 DCI.AddToWorklist(Elt.getNode());
6858 }
6859 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
6860 DCI.AddToWorklist(Vec.getNode());
6861 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
6862 DCI.AddToWorklist(Elt.getNode());
6863 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
6864 Vec, Elt, Idx);
6865 }
6866 }
6867
6868 // Push BSWAP into a vector shuffle if at least one side then simplifies.
6869 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
6870 if (SV && Op.hasOneUse()) {
6871 SDValue Op0 = Op.getOperand(0);
6872 SDValue Op1 = Op.getOperand(1);
6873
6874 if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) ||
6875 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
6876 DAG.isConstantIntBuildVectorOrConstantInt(Op1) ||
6877 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
6878 EVT VecVT = N->getValueType(0);
6879 if (VecVT != Op0.getValueType()) {
6880 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
6881 DCI.AddToWorklist(Op0.getNode());
6882 }
6883 if (VecVT != Op1.getValueType()) {
6884 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
6885 DCI.AddToWorklist(Op1.getNode());
6886 }
6887 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
6888 DCI.AddToWorklist(Op0.getNode());
6889 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
6890 DCI.AddToWorklist(Op1.getNode());
6891 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
6892 }
6893 }
6894
6895 return SDValue();
6896 }
6897
combineCCMask(SDValue & CCReg,int & CCValid,int & CCMask)6898 static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
6899 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
6900 // set by the CCReg instruction using the CCValid / CCMask masks,
6901 // If the CCReg instruction is itself a ICMP testing the condition
6902 // code set by some other instruction, see whether we can directly
6903 // use that condition code.
6904
6905 // Verify that we have an ICMP against some constant.
6906 if (CCValid != SystemZ::CCMASK_ICMP)
6907 return false;
6908 auto *ICmp = CCReg.getNode();
6909 if (ICmp->getOpcode() != SystemZISD::ICMP)
6910 return false;
6911 auto *CompareLHS = ICmp->getOperand(0).getNode();
6912 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
6913 if (!CompareRHS)
6914 return false;
6915
6916 // Optimize the case where CompareLHS is a SELECT_CCMASK.
6917 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
6918 // Verify that we have an appropriate mask for a EQ or NE comparison.
6919 bool Invert = false;
6920 if (CCMask == SystemZ::CCMASK_CMP_NE)
6921 Invert = !Invert;
6922 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
6923 return false;
6924
6925 // Verify that the ICMP compares against one of select values.
6926 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
6927 if (!TrueVal)
6928 return false;
6929 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
6930 if (!FalseVal)
6931 return false;
6932 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
6933 Invert = !Invert;
6934 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
6935 return false;
6936
6937 // Compute the effective CC mask for the new branch or select.
6938 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
6939 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
6940 if (!NewCCValid || !NewCCMask)
6941 return false;
6942 CCValid = NewCCValid->getZExtValue();
6943 CCMask = NewCCMask->getZExtValue();
6944 if (Invert)
6945 CCMask ^= CCValid;
6946
6947 // Return the updated CCReg link.
6948 CCReg = CompareLHS->getOperand(4);
6949 return true;
6950 }
6951
6952 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
6953 if (CompareLHS->getOpcode() == ISD::SRA) {
6954 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
6955 if (!SRACount || SRACount->getZExtValue() != 30)
6956 return false;
6957 auto *SHL = CompareLHS->getOperand(0).getNode();
6958 if (SHL->getOpcode() != ISD::SHL)
6959 return false;
6960 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
6961 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
6962 return false;
6963 auto *IPM = SHL->getOperand(0).getNode();
6964 if (IPM->getOpcode() != SystemZISD::IPM)
6965 return false;
6966
6967 // Avoid introducing CC spills (because SRA would clobber CC).
6968 if (!CompareLHS->hasOneUse())
6969 return false;
6970 // Verify that the ICMP compares against zero.
6971 if (CompareRHS->getZExtValue() != 0)
6972 return false;
6973
6974 // Compute the effective CC mask for the new branch or select.
6975 CCMask = SystemZ::reverseCCMask(CCMask);
6976
6977 // Return the updated CCReg link.
6978 CCReg = IPM->getOperand(0);
6979 return true;
6980 }
6981
6982 return false;
6983 }
6984
combineBR_CCMASK(SDNode * N,DAGCombinerInfo & DCI) const6985 SDValue SystemZTargetLowering::combineBR_CCMASK(
6986 SDNode *N, DAGCombinerInfo &DCI) const {
6987 SelectionDAG &DAG = DCI.DAG;
6988
6989 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
6990 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
6991 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
6992 if (!CCValid || !CCMask)
6993 return SDValue();
6994
6995 int CCValidVal = CCValid->getZExtValue();
6996 int CCMaskVal = CCMask->getZExtValue();
6997 SDValue Chain = N->getOperand(0);
6998 SDValue CCReg = N->getOperand(4);
6999
7000 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7001 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
7002 Chain,
7003 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7004 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7005 N->getOperand(3), CCReg);
7006 return SDValue();
7007 }
7008
combineSELECT_CCMASK(SDNode * N,DAGCombinerInfo & DCI) const7009 SDValue SystemZTargetLowering::combineSELECT_CCMASK(
7010 SDNode *N, DAGCombinerInfo &DCI) const {
7011 SelectionDAG &DAG = DCI.DAG;
7012
7013 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
7014 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
7015 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
7016 if (!CCValid || !CCMask)
7017 return SDValue();
7018
7019 int CCValidVal = CCValid->getZExtValue();
7020 int CCMaskVal = CCMask->getZExtValue();
7021 SDValue CCReg = N->getOperand(4);
7022
7023 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7024 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
7025 N->getOperand(0), N->getOperand(1),
7026 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7027 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7028 CCReg);
7029 return SDValue();
7030 }
7031
7032
combineGET_CCMASK(SDNode * N,DAGCombinerInfo & DCI) const7033 SDValue SystemZTargetLowering::combineGET_CCMASK(
7034 SDNode *N, DAGCombinerInfo &DCI) const {
7035
7036 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
7037 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7038 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7039 if (!CCValid || !CCMask)
7040 return SDValue();
7041 int CCValidVal = CCValid->getZExtValue();
7042 int CCMaskVal = CCMask->getZExtValue();
7043
7044 SDValue Select = N->getOperand(0);
7045 if (Select->getOpcode() == ISD::TRUNCATE)
7046 Select = Select->getOperand(0);
7047 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
7048 return SDValue();
7049
7050 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
7051 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
7052 if (!SelectCCValid || !SelectCCMask)
7053 return SDValue();
7054 int SelectCCValidVal = SelectCCValid->getZExtValue();
7055 int SelectCCMaskVal = SelectCCMask->getZExtValue();
7056
7057 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
7058 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
7059 if (!TrueVal || !FalseVal)
7060 return SDValue();
7061 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
7062 ;
7063 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
7064 SelectCCMaskVal ^= SelectCCValidVal;
7065 else
7066 return SDValue();
7067
7068 if (SelectCCValidVal & ~CCValidVal)
7069 return SDValue();
7070 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
7071 return SDValue();
7072
7073 return Select->getOperand(4);
7074 }
7075
combineIntDIVREM(SDNode * N,DAGCombinerInfo & DCI) const7076 SDValue SystemZTargetLowering::combineIntDIVREM(
7077 SDNode *N, DAGCombinerInfo &DCI) const {
7078 SelectionDAG &DAG = DCI.DAG;
7079 EVT VT = N->getValueType(0);
7080 // In the case where the divisor is a vector of constants a cheaper
7081 // sequence of instructions can replace the divide. BuildSDIV is called to
7082 // do this during DAG combining, but it only succeeds when it can build a
7083 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
7084 // since it is not Legal but Custom it can only happen before
7085 // legalization. Therefore we must scalarize this early before Combine
7086 // 1. For widened vectors, this is already the result of type legalization.
7087 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
7088 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
7089 return DAG.UnrollVectorOp(N);
7090 return SDValue();
7091 }
7092
combineINTRINSIC(SDNode * N,DAGCombinerInfo & DCI) const7093 SDValue SystemZTargetLowering::combineINTRINSIC(
7094 SDNode *N, DAGCombinerInfo &DCI) const {
7095 SelectionDAG &DAG = DCI.DAG;
7096
7097 unsigned Id = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
7098 switch (Id) {
7099 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
7100 // or larger is simply a vector load.
7101 case Intrinsic::s390_vll:
7102 case Intrinsic::s390_vlrl:
7103 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
7104 if (C->getZExtValue() >= 15)
7105 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
7106 N->getOperand(3), MachinePointerInfo());
7107 break;
7108 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
7109 case Intrinsic::s390_vstl:
7110 case Intrinsic::s390_vstrl:
7111 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
7112 if (C->getZExtValue() >= 15)
7113 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
7114 N->getOperand(4), MachinePointerInfo());
7115 break;
7116 }
7117
7118 return SDValue();
7119 }
7120
unwrapAddress(SDValue N) const7121 SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
7122 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
7123 return N->getOperand(0);
7124 return N;
7125 }
7126
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const7127 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
7128 DAGCombinerInfo &DCI) const {
7129 switch(N->getOpcode()) {
7130 default: break;
7131 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
7132 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
7133 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
7134 case SystemZISD::MERGE_HIGH:
7135 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
7136 case ISD::LOAD: return combineLOAD(N, DCI);
7137 case ISD::STORE: return combineSTORE(N, DCI);
7138 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
7139 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
7140 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
7141 case ISD::STRICT_FP_ROUND:
7142 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
7143 case ISD::STRICT_FP_EXTEND:
7144 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
7145 case ISD::SINT_TO_FP:
7146 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
7147 case ISD::BSWAP: return combineBSWAP(N, DCI);
7148 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
7149 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
7150 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
7151 case ISD::SDIV:
7152 case ISD::UDIV:
7153 case ISD::SREM:
7154 case ISD::UREM: return combineIntDIVREM(N, DCI);
7155 case ISD::INTRINSIC_W_CHAIN:
7156 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
7157 }
7158
7159 return SDValue();
7160 }
7161
7162 // Return the demanded elements for the OpNo source operand of Op. DemandedElts
7163 // are for Op.
getDemandedSrcElements(SDValue Op,const APInt & DemandedElts,unsigned OpNo)7164 static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
7165 unsigned OpNo) {
7166 EVT VT = Op.getValueType();
7167 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
7168 APInt SrcDemE;
7169 unsigned Opcode = Op.getOpcode();
7170 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7171 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7172 switch (Id) {
7173 case Intrinsic::s390_vpksh: // PACKS
7174 case Intrinsic::s390_vpksf:
7175 case Intrinsic::s390_vpksg:
7176 case Intrinsic::s390_vpkshs: // PACKS_CC
7177 case Intrinsic::s390_vpksfs:
7178 case Intrinsic::s390_vpksgs:
7179 case Intrinsic::s390_vpklsh: // PACKLS
7180 case Intrinsic::s390_vpklsf:
7181 case Intrinsic::s390_vpklsg:
7182 case Intrinsic::s390_vpklshs: // PACKLS_CC
7183 case Intrinsic::s390_vpklsfs:
7184 case Intrinsic::s390_vpklsgs:
7185 // VECTOR PACK truncates the elements of two source vectors into one.
7186 SrcDemE = DemandedElts;
7187 if (OpNo == 2)
7188 SrcDemE.lshrInPlace(NumElts / 2);
7189 SrcDemE = SrcDemE.trunc(NumElts / 2);
7190 break;
7191 // VECTOR UNPACK extends half the elements of the source vector.
7192 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7193 case Intrinsic::s390_vuphh:
7194 case Intrinsic::s390_vuphf:
7195 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7196 case Intrinsic::s390_vuplhh:
7197 case Intrinsic::s390_vuplhf:
7198 SrcDemE = APInt(NumElts * 2, 0);
7199 SrcDemE.insertBits(DemandedElts, 0);
7200 break;
7201 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7202 case Intrinsic::s390_vuplhw:
7203 case Intrinsic::s390_vuplf:
7204 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7205 case Intrinsic::s390_vupllh:
7206 case Intrinsic::s390_vupllf:
7207 SrcDemE = APInt(NumElts * 2, 0);
7208 SrcDemE.insertBits(DemandedElts, NumElts);
7209 break;
7210 case Intrinsic::s390_vpdi: {
7211 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
7212 SrcDemE = APInt(NumElts, 0);
7213 if (!DemandedElts[OpNo - 1])
7214 break;
7215 unsigned Mask = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
7216 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
7217 // Demand input element 0 or 1, given by the mask bit value.
7218 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
7219 break;
7220 }
7221 case Intrinsic::s390_vsldb: {
7222 // VECTOR SHIFT LEFT DOUBLE BY BYTE
7223 assert(VT == MVT::v16i8 && "Unexpected type.");
7224 unsigned FirstIdx = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
7225 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
7226 unsigned NumSrc0Els = 16 - FirstIdx;
7227 SrcDemE = APInt(NumElts, 0);
7228 if (OpNo == 1) {
7229 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
7230 SrcDemE.insertBits(DemEls, FirstIdx);
7231 } else {
7232 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
7233 SrcDemE.insertBits(DemEls, 0);
7234 }
7235 break;
7236 }
7237 case Intrinsic::s390_vperm:
7238 SrcDemE = APInt(NumElts, 1);
7239 break;
7240 default:
7241 llvm_unreachable("Unhandled intrinsic.");
7242 break;
7243 }
7244 } else {
7245 switch (Opcode) {
7246 case SystemZISD::JOIN_DWORDS:
7247 // Scalar operand.
7248 SrcDemE = APInt(1, 1);
7249 break;
7250 case SystemZISD::SELECT_CCMASK:
7251 SrcDemE = DemandedElts;
7252 break;
7253 default:
7254 llvm_unreachable("Unhandled opcode.");
7255 break;
7256 }
7257 }
7258 return SrcDemE;
7259 }
7260
computeKnownBitsBinOp(const SDValue Op,KnownBits & Known,const APInt & DemandedElts,const SelectionDAG & DAG,unsigned Depth,unsigned OpNo)7261 static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
7262 const APInt &DemandedElts,
7263 const SelectionDAG &DAG, unsigned Depth,
7264 unsigned OpNo) {
7265 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7266 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7267 KnownBits LHSKnown =
7268 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7269 KnownBits RHSKnown =
7270 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7271 Known = KnownBits::commonBits(LHSKnown, RHSKnown);
7272 }
7273
7274 void
computeKnownBitsForTargetNode(const SDValue Op,KnownBits & Known,const APInt & DemandedElts,const SelectionDAG & DAG,unsigned Depth) const7275 SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
7276 KnownBits &Known,
7277 const APInt &DemandedElts,
7278 const SelectionDAG &DAG,
7279 unsigned Depth) const {
7280 Known.resetAll();
7281
7282 // Intrinsic CC result is returned in the two low bits.
7283 unsigned tmp0, tmp1; // not used
7284 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
7285 Known.Zero.setBitsFrom(2);
7286 return;
7287 }
7288 EVT VT = Op.getValueType();
7289 if (Op.getResNo() != 0 || VT == MVT::Untyped)
7290 return;
7291 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
7292 "KnownBits does not match VT in bitwidth");
7293 assert ((!VT.isVector() ||
7294 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
7295 "DemandedElts does not match VT number of elements");
7296 unsigned BitWidth = Known.getBitWidth();
7297 unsigned Opcode = Op.getOpcode();
7298 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7299 bool IsLogical = false;
7300 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7301 switch (Id) {
7302 case Intrinsic::s390_vpksh: // PACKS
7303 case Intrinsic::s390_vpksf:
7304 case Intrinsic::s390_vpksg:
7305 case Intrinsic::s390_vpkshs: // PACKS_CC
7306 case Intrinsic::s390_vpksfs:
7307 case Intrinsic::s390_vpksgs:
7308 case Intrinsic::s390_vpklsh: // PACKLS
7309 case Intrinsic::s390_vpklsf:
7310 case Intrinsic::s390_vpklsg:
7311 case Intrinsic::s390_vpklshs: // PACKLS_CC
7312 case Intrinsic::s390_vpklsfs:
7313 case Intrinsic::s390_vpklsgs:
7314 case Intrinsic::s390_vpdi:
7315 case Intrinsic::s390_vsldb:
7316 case Intrinsic::s390_vperm:
7317 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
7318 break;
7319 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7320 case Intrinsic::s390_vuplhh:
7321 case Intrinsic::s390_vuplhf:
7322 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7323 case Intrinsic::s390_vupllh:
7324 case Intrinsic::s390_vupllf:
7325 IsLogical = true;
7326 [[fallthrough]];
7327 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7328 case Intrinsic::s390_vuphh:
7329 case Intrinsic::s390_vuphf:
7330 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7331 case Intrinsic::s390_vuplhw:
7332 case Intrinsic::s390_vuplf: {
7333 SDValue SrcOp = Op.getOperand(1);
7334 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
7335 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
7336 if (IsLogical) {
7337 Known = Known.zext(BitWidth);
7338 } else
7339 Known = Known.sext(BitWidth);
7340 break;
7341 }
7342 default:
7343 break;
7344 }
7345 } else {
7346 switch (Opcode) {
7347 case SystemZISD::JOIN_DWORDS:
7348 case SystemZISD::SELECT_CCMASK:
7349 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
7350 break;
7351 case SystemZISD::REPLICATE: {
7352 SDValue SrcOp = Op.getOperand(0);
7353 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
7354 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
7355 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
7356 break;
7357 }
7358 default:
7359 break;
7360 }
7361 }
7362
7363 // Known has the width of the source operand(s). Adjust if needed to match
7364 // the passed bitwidth.
7365 if (Known.getBitWidth() != BitWidth)
7366 Known = Known.anyextOrTrunc(BitWidth);
7367 }
7368
computeNumSignBitsBinOp(SDValue Op,const APInt & DemandedElts,const SelectionDAG & DAG,unsigned Depth,unsigned OpNo)7369 static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
7370 const SelectionDAG &DAG, unsigned Depth,
7371 unsigned OpNo) {
7372 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7373 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7374 if (LHS == 1) return 1; // Early out.
7375 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7376 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7377 if (RHS == 1) return 1; // Early out.
7378 unsigned Common = std::min(LHS, RHS);
7379 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
7380 EVT VT = Op.getValueType();
7381 unsigned VTBits = VT.getScalarSizeInBits();
7382 if (SrcBitWidth > VTBits) { // PACK
7383 unsigned SrcExtraBits = SrcBitWidth - VTBits;
7384 if (Common > SrcExtraBits)
7385 return (Common - SrcExtraBits);
7386 return 1;
7387 }
7388 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
7389 return Common;
7390 }
7391
7392 unsigned
ComputeNumSignBitsForTargetNode(SDValue Op,const APInt & DemandedElts,const SelectionDAG & DAG,unsigned Depth) const7393 SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
7394 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
7395 unsigned Depth) const {
7396 if (Op.getResNo() != 0)
7397 return 1;
7398 unsigned Opcode = Op.getOpcode();
7399 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7400 unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7401 switch (Id) {
7402 case Intrinsic::s390_vpksh: // PACKS
7403 case Intrinsic::s390_vpksf:
7404 case Intrinsic::s390_vpksg:
7405 case Intrinsic::s390_vpkshs: // PACKS_CC
7406 case Intrinsic::s390_vpksfs:
7407 case Intrinsic::s390_vpksgs:
7408 case Intrinsic::s390_vpklsh: // PACKLS
7409 case Intrinsic::s390_vpklsf:
7410 case Intrinsic::s390_vpklsg:
7411 case Intrinsic::s390_vpklshs: // PACKLS_CC
7412 case Intrinsic::s390_vpklsfs:
7413 case Intrinsic::s390_vpklsgs:
7414 case Intrinsic::s390_vpdi:
7415 case Intrinsic::s390_vsldb:
7416 case Intrinsic::s390_vperm:
7417 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
7418 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7419 case Intrinsic::s390_vuphh:
7420 case Intrinsic::s390_vuphf:
7421 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7422 case Intrinsic::s390_vuplhw:
7423 case Intrinsic::s390_vuplf: {
7424 SDValue PackedOp = Op.getOperand(1);
7425 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
7426 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
7427 EVT VT = Op.getValueType();
7428 unsigned VTBits = VT.getScalarSizeInBits();
7429 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
7430 return Tmp;
7431 }
7432 default:
7433 break;
7434 }
7435 } else {
7436 switch (Opcode) {
7437 case SystemZISD::SELECT_CCMASK:
7438 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
7439 default:
7440 break;
7441 }
7442 }
7443
7444 return 1;
7445 }
7446
7447 unsigned
getStackProbeSize(const MachineFunction & MF) const7448 SystemZTargetLowering::getStackProbeSize(const MachineFunction &MF) const {
7449 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
7450 unsigned StackAlign = TFI->getStackAlignment();
7451 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
7452 "Unexpected stack alignment");
7453 // The default stack probe size is 4096 if the function has no
7454 // stack-probe-size attribute.
7455 unsigned StackProbeSize =
7456 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
7457 // Round down to the stack alignment.
7458 StackProbeSize &= ~(StackAlign - 1);
7459 return StackProbeSize ? StackProbeSize : StackAlign;
7460 }
7461
7462 //===----------------------------------------------------------------------===//
7463 // Custom insertion
7464 //===----------------------------------------------------------------------===//
7465
7466 // Force base value Base into a register before MI. Return the register.
forceReg(MachineInstr & MI,MachineOperand & Base,const SystemZInstrInfo * TII)7467 static Register forceReg(MachineInstr &MI, MachineOperand &Base,
7468 const SystemZInstrInfo *TII) {
7469 MachineBasicBlock *MBB = MI.getParent();
7470 MachineFunction &MF = *MBB->getParent();
7471 MachineRegisterInfo &MRI = MF.getRegInfo();
7472
7473 if (Base.isReg()) {
7474 // Copy Base into a new virtual register to help register coalescing in
7475 // cases with multiple uses.
7476 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
7477 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
7478 .add(Base);
7479 return Reg;
7480 }
7481
7482 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
7483 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
7484 .add(Base)
7485 .addImm(0)
7486 .addReg(0);
7487 return Reg;
7488 }
7489
7490 // The CC operand of MI might be missing a kill marker because there
7491 // were multiple uses of CC, and ISel didn't know which to mark.
7492 // Figure out whether MI should have had a kill marker.
checkCCKill(MachineInstr & MI,MachineBasicBlock * MBB)7493 static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) {
7494 // Scan forward through BB for a use/def of CC.
7495 MachineBasicBlock::iterator miI(std::next(MachineBasicBlock::iterator(MI)));
7496 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
7497 const MachineInstr& mi = *miI;
7498 if (mi.readsRegister(SystemZ::CC))
7499 return false;
7500 if (mi.definesRegister(SystemZ::CC))
7501 break; // Should have kill-flag - update below.
7502 }
7503
7504 // If we hit the end of the block, check whether CC is live into a
7505 // successor.
7506 if (miI == MBB->end()) {
7507 for (const MachineBasicBlock *Succ : MBB->successors())
7508 if (Succ->isLiveIn(SystemZ::CC))
7509 return false;
7510 }
7511
7512 return true;
7513 }
7514
7515 // Return true if it is OK for this Select pseudo-opcode to be cascaded
7516 // together with other Select pseudo-opcodes into a single basic-block with
7517 // a conditional jump around it.
isSelectPseudo(MachineInstr & MI)7518 static bool isSelectPseudo(MachineInstr &MI) {
7519 switch (MI.getOpcode()) {
7520 case SystemZ::Select32:
7521 case SystemZ::Select64:
7522 case SystemZ::SelectF32:
7523 case SystemZ::SelectF64:
7524 case SystemZ::SelectF128:
7525 case SystemZ::SelectVR32:
7526 case SystemZ::SelectVR64:
7527 case SystemZ::SelectVR128:
7528 return true;
7529
7530 default:
7531 return false;
7532 }
7533 }
7534
7535 // Helper function, which inserts PHI functions into SinkMBB:
7536 // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
7537 // where %FalseValue(i) and %TrueValue(i) are taken from Selects.
createPHIsForSelects(SmallVector<MachineInstr *,8> & Selects,MachineBasicBlock * TrueMBB,MachineBasicBlock * FalseMBB,MachineBasicBlock * SinkMBB)7538 static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects,
7539 MachineBasicBlock *TrueMBB,
7540 MachineBasicBlock *FalseMBB,
7541 MachineBasicBlock *SinkMBB) {
7542 MachineFunction *MF = TrueMBB->getParent();
7543 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
7544
7545 MachineInstr *FirstMI = Selects.front();
7546 unsigned CCValid = FirstMI->getOperand(3).getImm();
7547 unsigned CCMask = FirstMI->getOperand(4).getImm();
7548
7549 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
7550
7551 // As we are creating the PHIs, we have to be careful if there is more than
7552 // one. Later Selects may reference the results of earlier Selects, but later
7553 // PHIs have to reference the individual true/false inputs from earlier PHIs.
7554 // That also means that PHI construction must work forward from earlier to
7555 // later, and that the code must maintain a mapping from earlier PHI's
7556 // destination registers, and the registers that went into the PHI.
7557 DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable;
7558
7559 for (auto *MI : Selects) {
7560 Register DestReg = MI->getOperand(0).getReg();
7561 Register TrueReg = MI->getOperand(1).getReg();
7562 Register FalseReg = MI->getOperand(2).getReg();
7563
7564 // If this Select we are generating is the opposite condition from
7565 // the jump we generated, then we have to swap the operands for the
7566 // PHI that is going to be generated.
7567 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
7568 std::swap(TrueReg, FalseReg);
7569
7570 if (RegRewriteTable.find(TrueReg) != RegRewriteTable.end())
7571 TrueReg = RegRewriteTable[TrueReg].first;
7572
7573 if (RegRewriteTable.find(FalseReg) != RegRewriteTable.end())
7574 FalseReg = RegRewriteTable[FalseReg].second;
7575
7576 DebugLoc DL = MI->getDebugLoc();
7577 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
7578 .addReg(TrueReg).addMBB(TrueMBB)
7579 .addReg(FalseReg).addMBB(FalseMBB);
7580
7581 // Add this PHI to the rewrite table.
7582 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
7583 }
7584
7585 MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
7586 }
7587
7588 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
7589 MachineBasicBlock *
emitSelect(MachineInstr & MI,MachineBasicBlock * MBB) const7590 SystemZTargetLowering::emitSelect(MachineInstr &MI,
7591 MachineBasicBlock *MBB) const {
7592 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
7593 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
7594
7595 unsigned CCValid = MI.getOperand(3).getImm();
7596 unsigned CCMask = MI.getOperand(4).getImm();
7597
7598 // If we have a sequence of Select* pseudo instructions using the
7599 // same condition code value, we want to expand all of them into
7600 // a single pair of basic blocks using the same condition.
7601 SmallVector<MachineInstr*, 8> Selects;
7602 SmallVector<MachineInstr*, 8> DbgValues;
7603 Selects.push_back(&MI);
7604 unsigned Count = 0;
7605 for (MachineInstr &NextMI : llvm::make_range(
7606 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
7607 if (isSelectPseudo(NextMI)) {
7608 assert(NextMI.getOperand(3).getImm() == CCValid &&
7609 "Bad CCValid operands since CC was not redefined.");
7610 if (NextMI.getOperand(4).getImm() == CCMask ||
7611 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
7612 Selects.push_back(&NextMI);
7613 continue;
7614 }
7615 break;
7616 }
7617 if (NextMI.definesRegister(SystemZ::CC) || NextMI.usesCustomInsertionHook())
7618 break;
7619 bool User = false;
7620 for (auto *SelMI : Selects)
7621 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
7622 User = true;
7623 break;
7624 }
7625 if (NextMI.isDebugInstr()) {
7626 if (User) {
7627 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
7628 DbgValues.push_back(&NextMI);
7629 }
7630 } else if (User || ++Count > 20)
7631 break;
7632 }
7633
7634 MachineInstr *LastMI = Selects.back();
7635 bool CCKilled =
7636 (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
7637 MachineBasicBlock *StartMBB = MBB;
7638 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
7639 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
7640
7641 // Unless CC was killed in the last Select instruction, mark it as
7642 // live-in to both FalseMBB and JoinMBB.
7643 if (!CCKilled) {
7644 FalseMBB->addLiveIn(SystemZ::CC);
7645 JoinMBB->addLiveIn(SystemZ::CC);
7646 }
7647
7648 // StartMBB:
7649 // BRC CCMask, JoinMBB
7650 // # fallthrough to FalseMBB
7651 MBB = StartMBB;
7652 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
7653 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
7654 MBB->addSuccessor(JoinMBB);
7655 MBB->addSuccessor(FalseMBB);
7656
7657 // FalseMBB:
7658 // # fallthrough to JoinMBB
7659 MBB = FalseMBB;
7660 MBB->addSuccessor(JoinMBB);
7661
7662 // JoinMBB:
7663 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
7664 // ...
7665 MBB = JoinMBB;
7666 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
7667 for (auto *SelMI : Selects)
7668 SelMI->eraseFromParent();
7669
7670 MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
7671 for (auto *DbgMI : DbgValues)
7672 MBB->splice(InsertPos, StartMBB, DbgMI);
7673
7674 return JoinMBB;
7675 }
7676
7677 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
7678 // StoreOpcode is the store to use and Invert says whether the store should
7679 // happen when the condition is false rather than true. If a STORE ON
7680 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
emitCondStore(MachineInstr & MI,MachineBasicBlock * MBB,unsigned StoreOpcode,unsigned STOCOpcode,bool Invert) const7681 MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
7682 MachineBasicBlock *MBB,
7683 unsigned StoreOpcode,
7684 unsigned STOCOpcode,
7685 bool Invert) const {
7686 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
7687
7688 Register SrcReg = MI.getOperand(0).getReg();
7689 MachineOperand Base = MI.getOperand(1);
7690 int64_t Disp = MI.getOperand(2).getImm();
7691 Register IndexReg = MI.getOperand(3).getReg();
7692 unsigned CCValid = MI.getOperand(4).getImm();
7693 unsigned CCMask = MI.getOperand(5).getImm();
7694 DebugLoc DL = MI.getDebugLoc();
7695
7696 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
7697
7698 // ISel pattern matching also adds a load memory operand of the same
7699 // address, so take special care to find the storing memory operand.
7700 MachineMemOperand *MMO = nullptr;
7701 for (auto *I : MI.memoperands())
7702 if (I->isStore()) {
7703 MMO = I;
7704 break;
7705 }
7706
7707 // Use STOCOpcode if possible. We could use different store patterns in
7708 // order to avoid matching the index register, but the performance trade-offs
7709 // might be more complicated in that case.
7710 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
7711 if (Invert)
7712 CCMask ^= CCValid;
7713
7714 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
7715 .addReg(SrcReg)
7716 .add(Base)
7717 .addImm(Disp)
7718 .addImm(CCValid)
7719 .addImm(CCMask)
7720 .addMemOperand(MMO);
7721
7722 MI.eraseFromParent();
7723 return MBB;
7724 }
7725
7726 // Get the condition needed to branch around the store.
7727 if (!Invert)
7728 CCMask ^= CCValid;
7729
7730 MachineBasicBlock *StartMBB = MBB;
7731 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
7732 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
7733
7734 // Unless CC was killed in the CondStore instruction, mark it as
7735 // live-in to both FalseMBB and JoinMBB.
7736 if (!MI.killsRegister(SystemZ::CC) && !checkCCKill(MI, JoinMBB)) {
7737 FalseMBB->addLiveIn(SystemZ::CC);
7738 JoinMBB->addLiveIn(SystemZ::CC);
7739 }
7740
7741 // StartMBB:
7742 // BRC CCMask, JoinMBB
7743 // # fallthrough to FalseMBB
7744 MBB = StartMBB;
7745 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7746 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
7747 MBB->addSuccessor(JoinMBB);
7748 MBB->addSuccessor(FalseMBB);
7749
7750 // FalseMBB:
7751 // store %SrcReg, %Disp(%Index,%Base)
7752 // # fallthrough to JoinMBB
7753 MBB = FalseMBB;
7754 BuildMI(MBB, DL, TII->get(StoreOpcode))
7755 .addReg(SrcReg)
7756 .add(Base)
7757 .addImm(Disp)
7758 .addReg(IndexReg)
7759 .addMemOperand(MMO);
7760 MBB->addSuccessor(JoinMBB);
7761
7762 MI.eraseFromParent();
7763 return JoinMBB;
7764 }
7765
7766 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_LOAD{,W}_*
7767 // or ATOMIC_SWAP{,W} instruction MI. BinOpcode is the instruction that
7768 // performs the binary operation elided by "*", or 0 for ATOMIC_SWAP{,W}.
7769 // BitSize is the width of the field in bits, or 0 if this is a partword
7770 // ATOMIC_LOADW_* or ATOMIC_SWAPW instruction, in which case the bitsize
7771 // is one of the operands. Invert says whether the field should be
7772 // inverted after performing BinOpcode (e.g. for NAND).
emitAtomicLoadBinary(MachineInstr & MI,MachineBasicBlock * MBB,unsigned BinOpcode,unsigned BitSize,bool Invert) const7773 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
7774 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
7775 unsigned BitSize, bool Invert) const {
7776 MachineFunction &MF = *MBB->getParent();
7777 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
7778 MachineRegisterInfo &MRI = MF.getRegInfo();
7779 bool IsSubWord = (BitSize < 32);
7780
7781 // Extract the operands. Base can be a register or a frame index.
7782 // Src2 can be a register or immediate.
7783 Register Dest = MI.getOperand(0).getReg();
7784 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
7785 int64_t Disp = MI.getOperand(2).getImm();
7786 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
7787 Register BitShift = IsSubWord ? MI.getOperand(4).getReg() : Register();
7788 Register NegBitShift = IsSubWord ? MI.getOperand(5).getReg() : Register();
7789 DebugLoc DL = MI.getDebugLoc();
7790 if (IsSubWord)
7791 BitSize = MI.getOperand(6).getImm();
7792
7793 // Subword operations use 32-bit registers.
7794 const TargetRegisterClass *RC = (BitSize <= 32 ?
7795 &SystemZ::GR32BitRegClass :
7796 &SystemZ::GR64BitRegClass);
7797 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
7798 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
7799
7800 // Get the right opcodes for the displacement.
7801 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
7802 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
7803 assert(LOpcode && CSOpcode && "Displacement out of range");
7804
7805 // Create virtual registers for temporary results.
7806 Register OrigVal = MRI.createVirtualRegister(RC);
7807 Register OldVal = MRI.createVirtualRegister(RC);
7808 Register NewVal = (BinOpcode || IsSubWord ?
7809 MRI.createVirtualRegister(RC) : Src2.getReg());
7810 Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
7811 Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
7812
7813 // Insert a basic block for the main loop.
7814 MachineBasicBlock *StartMBB = MBB;
7815 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
7816 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
7817
7818 // StartMBB:
7819 // ...
7820 // %OrigVal = L Disp(%Base)
7821 // # fall through to LoopMBB
7822 MBB = StartMBB;
7823 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
7824 MBB->addSuccessor(LoopMBB);
7825
7826 // LoopMBB:
7827 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
7828 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
7829 // %RotatedNewVal = OP %RotatedOldVal, %Src2
7830 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
7831 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
7832 // JNE LoopMBB
7833 // # fall through to DoneMBB
7834 MBB = LoopMBB;
7835 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
7836 .addReg(OrigVal).addMBB(StartMBB)
7837 .addReg(Dest).addMBB(LoopMBB);
7838 if (IsSubWord)
7839 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
7840 .addReg(OldVal).addReg(BitShift).addImm(0);
7841 if (Invert) {
7842 // Perform the operation normally and then invert every bit of the field.
7843 Register Tmp = MRI.createVirtualRegister(RC);
7844 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
7845 if (BitSize <= 32)
7846 // XILF with the upper BitSize bits set.
7847 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
7848 .addReg(Tmp).addImm(-1U << (32 - BitSize));
7849 else {
7850 // Use LCGR and add -1 to the result, which is more compact than
7851 // an XILF, XILH pair.
7852 Register Tmp2 = MRI.createVirtualRegister(RC);
7853 BuildMI(MBB, DL, TII->get(SystemZ::LCGR), Tmp2).addReg(Tmp);
7854 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), RotatedNewVal)
7855 .addReg(Tmp2).addImm(-1);
7856 }
7857 } else if (BinOpcode)
7858 // A simply binary operation.
7859 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
7860 .addReg(RotatedOldVal)
7861 .add(Src2);
7862 else if (IsSubWord)
7863 // Use RISBG to rotate Src2 into position and use it to replace the
7864 // field in RotatedOldVal.
7865 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
7866 .addReg(RotatedOldVal).addReg(Src2.getReg())
7867 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
7868 if (IsSubWord)
7869 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
7870 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
7871 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
7872 .addReg(OldVal)
7873 .addReg(NewVal)
7874 .add(Base)
7875 .addImm(Disp);
7876 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7877 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
7878 MBB->addSuccessor(LoopMBB);
7879 MBB->addSuccessor(DoneMBB);
7880
7881 MI.eraseFromParent();
7882 return DoneMBB;
7883 }
7884
7885 // Implement EmitInstrWithCustomInserter for pseudo
7886 // ATOMIC_LOAD{,W}_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
7887 // instruction that should be used to compare the current field with the
7888 // minimum or maximum value. KeepOldMask is the BRC condition-code mask
7889 // for when the current field should be kept. BitSize is the width of
7890 // the field in bits, or 0 if this is a partword ATOMIC_LOADW_* instruction.
emitAtomicLoadMinMax(MachineInstr & MI,MachineBasicBlock * MBB,unsigned CompareOpcode,unsigned KeepOldMask,unsigned BitSize) const7891 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
7892 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
7893 unsigned KeepOldMask, unsigned BitSize) const {
7894 MachineFunction &MF = *MBB->getParent();
7895 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
7896 MachineRegisterInfo &MRI = MF.getRegInfo();
7897 bool IsSubWord = (BitSize < 32);
7898
7899 // Extract the operands. Base can be a register or a frame index.
7900 Register Dest = MI.getOperand(0).getReg();
7901 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
7902 int64_t Disp = MI.getOperand(2).getImm();
7903 Register Src2 = MI.getOperand(3).getReg();
7904 Register BitShift = (IsSubWord ? MI.getOperand(4).getReg() : Register());
7905 Register NegBitShift = (IsSubWord ? MI.getOperand(5).getReg() : Register());
7906 DebugLoc DL = MI.getDebugLoc();
7907 if (IsSubWord)
7908 BitSize = MI.getOperand(6).getImm();
7909
7910 // Subword operations use 32-bit registers.
7911 const TargetRegisterClass *RC = (BitSize <= 32 ?
7912 &SystemZ::GR32BitRegClass :
7913 &SystemZ::GR64BitRegClass);
7914 unsigned LOpcode = BitSize <= 32 ? SystemZ::L : SystemZ::LG;
7915 unsigned CSOpcode = BitSize <= 32 ? SystemZ::CS : SystemZ::CSG;
7916
7917 // Get the right opcodes for the displacement.
7918 LOpcode = TII->getOpcodeForOffset(LOpcode, Disp);
7919 CSOpcode = TII->getOpcodeForOffset(CSOpcode, Disp);
7920 assert(LOpcode && CSOpcode && "Displacement out of range");
7921
7922 // Create virtual registers for temporary results.
7923 Register OrigVal = MRI.createVirtualRegister(RC);
7924 Register OldVal = MRI.createVirtualRegister(RC);
7925 Register NewVal = MRI.createVirtualRegister(RC);
7926 Register RotatedOldVal = (IsSubWord ? MRI.createVirtualRegister(RC) : OldVal);
7927 Register RotatedAltVal = (IsSubWord ? MRI.createVirtualRegister(RC) : Src2);
7928 Register RotatedNewVal = (IsSubWord ? MRI.createVirtualRegister(RC) : NewVal);
7929
7930 // Insert 3 basic blocks for the loop.
7931 MachineBasicBlock *StartMBB = MBB;
7932 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
7933 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
7934 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
7935 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
7936
7937 // StartMBB:
7938 // ...
7939 // %OrigVal = L Disp(%Base)
7940 // # fall through to LoopMBB
7941 MBB = StartMBB;
7942 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
7943 MBB->addSuccessor(LoopMBB);
7944
7945 // LoopMBB:
7946 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
7947 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
7948 // CompareOpcode %RotatedOldVal, %Src2
7949 // BRC KeepOldMask, UpdateMBB
7950 MBB = LoopMBB;
7951 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
7952 .addReg(OrigVal).addMBB(StartMBB)
7953 .addReg(Dest).addMBB(UpdateMBB);
7954 if (IsSubWord)
7955 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
7956 .addReg(OldVal).addReg(BitShift).addImm(0);
7957 BuildMI(MBB, DL, TII->get(CompareOpcode))
7958 .addReg(RotatedOldVal).addReg(Src2);
7959 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7960 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
7961 MBB->addSuccessor(UpdateMBB);
7962 MBB->addSuccessor(UseAltMBB);
7963
7964 // UseAltMBB:
7965 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
7966 // # fall through to UpdateMBB
7967 MBB = UseAltMBB;
7968 if (IsSubWord)
7969 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
7970 .addReg(RotatedOldVal).addReg(Src2)
7971 .addImm(32).addImm(31 + BitSize).addImm(0);
7972 MBB->addSuccessor(UpdateMBB);
7973
7974 // UpdateMBB:
7975 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
7976 // [ %RotatedAltVal, UseAltMBB ]
7977 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
7978 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
7979 // JNE LoopMBB
7980 // # fall through to DoneMBB
7981 MBB = UpdateMBB;
7982 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
7983 .addReg(RotatedOldVal).addMBB(LoopMBB)
7984 .addReg(RotatedAltVal).addMBB(UseAltMBB);
7985 if (IsSubWord)
7986 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
7987 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
7988 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
7989 .addReg(OldVal)
7990 .addReg(NewVal)
7991 .add(Base)
7992 .addImm(Disp);
7993 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
7994 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
7995 MBB->addSuccessor(LoopMBB);
7996 MBB->addSuccessor(DoneMBB);
7997
7998 MI.eraseFromParent();
7999 return DoneMBB;
8000 }
8001
8002 // Implement EmitInstrWithCustomInserter for pseudo ATOMIC_CMP_SWAPW
8003 // instruction MI.
8004 MachineBasicBlock *
emitAtomicCmpSwapW(MachineInstr & MI,MachineBasicBlock * MBB) const8005 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
8006 MachineBasicBlock *MBB) const {
8007 MachineFunction &MF = *MBB->getParent();
8008 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8009 MachineRegisterInfo &MRI = MF.getRegInfo();
8010
8011 // Extract the operands. Base can be a register or a frame index.
8012 Register Dest = MI.getOperand(0).getReg();
8013 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8014 int64_t Disp = MI.getOperand(2).getImm();
8015 Register CmpVal = MI.getOperand(3).getReg();
8016 Register OrigSwapVal = MI.getOperand(4).getReg();
8017 Register BitShift = MI.getOperand(5).getReg();
8018 Register NegBitShift = MI.getOperand(6).getReg();
8019 int64_t BitSize = MI.getOperand(7).getImm();
8020 DebugLoc DL = MI.getDebugLoc();
8021
8022 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
8023
8024 // Get the right opcodes for the displacement and zero-extension.
8025 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8026 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8027 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
8028 assert(LOpcode && CSOpcode && "Displacement out of range");
8029
8030 // Create virtual registers for temporary results.
8031 Register OrigOldVal = MRI.createVirtualRegister(RC);
8032 Register OldVal = MRI.createVirtualRegister(RC);
8033 Register SwapVal = MRI.createVirtualRegister(RC);
8034 Register StoreVal = MRI.createVirtualRegister(RC);
8035 Register OldValRot = MRI.createVirtualRegister(RC);
8036 Register RetryOldVal = MRI.createVirtualRegister(RC);
8037 Register RetrySwapVal = MRI.createVirtualRegister(RC);
8038
8039 // Insert 2 basic blocks for the loop.
8040 MachineBasicBlock *StartMBB = MBB;
8041 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8042 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8043 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
8044
8045 // StartMBB:
8046 // ...
8047 // %OrigOldVal = L Disp(%Base)
8048 // # fall through to LoopMBB
8049 MBB = StartMBB;
8050 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
8051 .add(Base)
8052 .addImm(Disp)
8053 .addReg(0);
8054 MBB->addSuccessor(LoopMBB);
8055
8056 // LoopMBB:
8057 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
8058 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
8059 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
8060 // ^^ The low BitSize bits contain the field
8061 // of interest.
8062 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
8063 // ^^ Replace the upper 32-BitSize bits of the
8064 // swap value with those that we loaded and rotated.
8065 // %Dest = LL[CH] %OldValRot
8066 // CR %Dest, %CmpVal
8067 // JNE DoneMBB
8068 // # Fall through to SetMBB
8069 MBB = LoopMBB;
8070 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8071 .addReg(OrigOldVal).addMBB(StartMBB)
8072 .addReg(RetryOldVal).addMBB(SetMBB);
8073 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
8074 .addReg(OrigSwapVal).addMBB(StartMBB)
8075 .addReg(RetrySwapVal).addMBB(SetMBB);
8076 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
8077 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
8078 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
8079 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
8080 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
8081 .addReg(OldValRot);
8082 BuildMI(MBB, DL, TII->get(SystemZ::CR))
8083 .addReg(Dest).addReg(CmpVal);
8084 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8085 .addImm(SystemZ::CCMASK_ICMP)
8086 .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB);
8087 MBB->addSuccessor(DoneMBB);
8088 MBB->addSuccessor(SetMBB);
8089
8090 // SetMBB:
8091 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
8092 // ^^ Rotate the new field to its proper position.
8093 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
8094 // JNE LoopMBB
8095 // # fall through to ExitMBB
8096 MBB = SetMBB;
8097 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
8098 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
8099 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
8100 .addReg(OldVal)
8101 .addReg(StoreVal)
8102 .add(Base)
8103 .addImm(Disp);
8104 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8105 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB);
8106 MBB->addSuccessor(LoopMBB);
8107 MBB->addSuccessor(DoneMBB);
8108
8109 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
8110 // to the block after the loop. At this point, CC may have been defined
8111 // either by the CR in LoopMBB or by the CS in SetMBB.
8112 if (!MI.registerDefIsDead(SystemZ::CC))
8113 DoneMBB->addLiveIn(SystemZ::CC);
8114
8115 MI.eraseFromParent();
8116 return DoneMBB;
8117 }
8118
8119 // Emit a move from two GR64s to a GR128.
8120 MachineBasicBlock *
emitPair128(MachineInstr & MI,MachineBasicBlock * MBB) const8121 SystemZTargetLowering::emitPair128(MachineInstr &MI,
8122 MachineBasicBlock *MBB) const {
8123 MachineFunction &MF = *MBB->getParent();
8124 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8125 MachineRegisterInfo &MRI = MF.getRegInfo();
8126 DebugLoc DL = MI.getDebugLoc();
8127
8128 Register Dest = MI.getOperand(0).getReg();
8129 Register Hi = MI.getOperand(1).getReg();
8130 Register Lo = MI.getOperand(2).getReg();
8131 Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8132 Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8133
8134 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1);
8135 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2)
8136 .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64);
8137 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8138 .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64);
8139
8140 MI.eraseFromParent();
8141 return MBB;
8142 }
8143
8144 // Emit an extension from a GR64 to a GR128. ClearEven is true
8145 // if the high register of the GR128 value must be cleared or false if
8146 // it's "don't care".
emitExt128(MachineInstr & MI,MachineBasicBlock * MBB,bool ClearEven) const8147 MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
8148 MachineBasicBlock *MBB,
8149 bool ClearEven) const {
8150 MachineFunction &MF = *MBB->getParent();
8151 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8152 MachineRegisterInfo &MRI = MF.getRegInfo();
8153 DebugLoc DL = MI.getDebugLoc();
8154
8155 Register Dest = MI.getOperand(0).getReg();
8156 Register Src = MI.getOperand(1).getReg();
8157 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8158
8159 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
8160 if (ClearEven) {
8161 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8162 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8163
8164 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
8165 .addImm(0);
8166 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
8167 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
8168 In128 = NewIn128;
8169 }
8170 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8171 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
8172
8173 MI.eraseFromParent();
8174 return MBB;
8175 }
8176
8177 MachineBasicBlock *
emitMemMemWrapper(MachineInstr & MI,MachineBasicBlock * MBB,unsigned Opcode,bool IsMemset) const8178 SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
8179 MachineBasicBlock *MBB,
8180 unsigned Opcode, bool IsMemset) const {
8181 MachineFunction &MF = *MBB->getParent();
8182 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8183 MachineRegisterInfo &MRI = MF.getRegInfo();
8184 DebugLoc DL = MI.getDebugLoc();
8185
8186 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
8187 uint64_t DestDisp = MI.getOperand(1).getImm();
8188 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
8189 uint64_t SrcDisp;
8190
8191 // Fold the displacement Disp if it is out of range.
8192 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
8193 if (!isUInt<12>(Disp)) {
8194 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8195 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
8196 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
8197 .add(Base).addImm(Disp).addReg(0);
8198 Base = MachineOperand::CreateReg(Reg, false);
8199 Disp = 0;
8200 }
8201 };
8202
8203 if (!IsMemset) {
8204 SrcBase = earlyUseOperand(MI.getOperand(2));
8205 SrcDisp = MI.getOperand(3).getImm();
8206 } else {
8207 SrcBase = DestBase;
8208 SrcDisp = DestDisp++;
8209 foldDisplIfNeeded(DestBase, DestDisp);
8210 }
8211
8212 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
8213 bool IsImmForm = LengthMO.isImm();
8214 bool IsRegForm = !IsImmForm;
8215
8216 // Build and insert one Opcode of Length, with special treatment for memset.
8217 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
8218 MachineBasicBlock::iterator InsPos,
8219 MachineOperand DBase, uint64_t DDisp,
8220 MachineOperand SBase, uint64_t SDisp,
8221 unsigned Length) -> void {
8222 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
8223 if (IsMemset) {
8224 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
8225 if (ByteMO.isImm())
8226 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
8227 .add(SBase).addImm(SDisp).add(ByteMO);
8228 else
8229 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
8230 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
8231 if (--Length == 0)
8232 return;
8233 }
8234 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
8235 .add(DBase).addImm(DDisp).addImm(Length)
8236 .add(SBase).addImm(SDisp)
8237 .setMemRefs(MI.memoperands());
8238 };
8239
8240 bool NeedsLoop = false;
8241 uint64_t ImmLength = 0;
8242 Register LenAdjReg = SystemZ::NoRegister;
8243 if (IsImmForm) {
8244 ImmLength = LengthMO.getImm();
8245 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
8246 if (ImmLength == 0) {
8247 MI.eraseFromParent();
8248 return MBB;
8249 }
8250 if (Opcode == SystemZ::CLC) {
8251 if (ImmLength > 3 * 256)
8252 // A two-CLC sequence is a clear win over a loop, not least because
8253 // it needs only one branch. A three-CLC sequence needs the same
8254 // number of branches as a loop (i.e. 2), but is shorter. That
8255 // brings us to lengths greater than 768 bytes. It seems relatively
8256 // likely that a difference will be found within the first 768 bytes,
8257 // so we just optimize for the smallest number of branch
8258 // instructions, in order to avoid polluting the prediction buffer
8259 // too much.
8260 NeedsLoop = true;
8261 } else if (ImmLength > 6 * 256)
8262 // The heuristic we use is to prefer loops for anything that would
8263 // require 7 or more MVCs. With these kinds of sizes there isn't much
8264 // to choose between straight-line code and looping code, since the
8265 // time will be dominated by the MVCs themselves.
8266 NeedsLoop = true;
8267 } else {
8268 NeedsLoop = true;
8269 LenAdjReg = LengthMO.getReg();
8270 }
8271
8272 // When generating more than one CLC, all but the last will need to
8273 // branch to the end when a difference is found.
8274 MachineBasicBlock *EndMBB =
8275 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
8276 ? SystemZ::splitBlockAfter(MI, MBB)
8277 : nullptr);
8278
8279 if (NeedsLoop) {
8280 Register StartCountReg =
8281 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8282 if (IsImmForm) {
8283 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
8284 ImmLength &= 255;
8285 } else {
8286 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
8287 .addReg(LenAdjReg)
8288 .addReg(0)
8289 .addImm(8);
8290 }
8291
8292 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
8293 auto loadZeroAddress = [&]() -> MachineOperand {
8294 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8295 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
8296 return MachineOperand::CreateReg(Reg, false);
8297 };
8298 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
8299 DestBase = loadZeroAddress();
8300 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
8301 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
8302
8303 MachineBasicBlock *StartMBB = nullptr;
8304 MachineBasicBlock *LoopMBB = nullptr;
8305 MachineBasicBlock *NextMBB = nullptr;
8306 MachineBasicBlock *DoneMBB = nullptr;
8307 MachineBasicBlock *AllDoneMBB = nullptr;
8308
8309 Register StartSrcReg = forceReg(MI, SrcBase, TII);
8310 Register StartDestReg =
8311 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
8312
8313 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
8314 Register ThisSrcReg = MRI.createVirtualRegister(RC);
8315 Register ThisDestReg =
8316 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
8317 Register NextSrcReg = MRI.createVirtualRegister(RC);
8318 Register NextDestReg =
8319 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
8320 RC = &SystemZ::GR64BitRegClass;
8321 Register ThisCountReg = MRI.createVirtualRegister(RC);
8322 Register NextCountReg = MRI.createVirtualRegister(RC);
8323
8324 if (IsRegForm) {
8325 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8326 StartMBB = SystemZ::emitBlockAfter(MBB);
8327 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8328 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
8329 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
8330
8331 // MBB:
8332 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
8333 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8334 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
8335 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8336 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
8337 .addMBB(AllDoneMBB);
8338 MBB->addSuccessor(AllDoneMBB);
8339 if (!IsMemset)
8340 MBB->addSuccessor(StartMBB);
8341 else {
8342 // MemsetOneCheckMBB:
8343 // # Jump to MemsetOneMBB for a memset of length 1, or
8344 // # fall thru to StartMBB.
8345 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
8346 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
8347 MBB->addSuccessor(MemsetOneCheckMBB);
8348 MBB = MemsetOneCheckMBB;
8349 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8350 .addReg(LenAdjReg).addImm(-1);
8351 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8352 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
8353 .addMBB(MemsetOneMBB);
8354 MBB->addSuccessor(MemsetOneMBB, {10, 100});
8355 MBB->addSuccessor(StartMBB, {90, 100});
8356
8357 // MemsetOneMBB:
8358 // # Jump back to AllDoneMBB after a single MVI or STC.
8359 MBB = MemsetOneMBB;
8360 insertMemMemOp(MBB, MBB->end(),
8361 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
8362 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
8363 1);
8364 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
8365 MBB->addSuccessor(AllDoneMBB);
8366 }
8367
8368 // StartMBB:
8369 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
8370 MBB = StartMBB;
8371 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8372 .addReg(StartCountReg).addImm(0);
8373 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8374 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
8375 .addMBB(DoneMBB);
8376 MBB->addSuccessor(DoneMBB);
8377 MBB->addSuccessor(LoopMBB);
8378 }
8379 else {
8380 StartMBB = MBB;
8381 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8382 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8383 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
8384
8385 // StartMBB:
8386 // # fall through to LoopMBB
8387 MBB->addSuccessor(LoopMBB);
8388
8389 DestBase = MachineOperand::CreateReg(NextDestReg, false);
8390 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
8391 if (EndMBB && !ImmLength)
8392 // If the loop handled the whole CLC range, DoneMBB will be empty with
8393 // CC live-through into EndMBB, so add it as live-in.
8394 DoneMBB->addLiveIn(SystemZ::CC);
8395 }
8396
8397 // LoopMBB:
8398 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
8399 // [ %NextDestReg, NextMBB ]
8400 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
8401 // [ %NextSrcReg, NextMBB ]
8402 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
8403 // [ %NextCountReg, NextMBB ]
8404 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
8405 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
8406 // ( JLH EndMBB )
8407 //
8408 // The prefetch is used only for MVC. The JLH is used only for CLC.
8409 MBB = LoopMBB;
8410 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
8411 .addReg(StartDestReg).addMBB(StartMBB)
8412 .addReg(NextDestReg).addMBB(NextMBB);
8413 if (!HaveSingleBase)
8414 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
8415 .addReg(StartSrcReg).addMBB(StartMBB)
8416 .addReg(NextSrcReg).addMBB(NextMBB);
8417 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
8418 .addReg(StartCountReg).addMBB(StartMBB)
8419 .addReg(NextCountReg).addMBB(NextMBB);
8420 if (Opcode == SystemZ::MVC)
8421 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
8422 .addImm(SystemZ::PFD_WRITE)
8423 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
8424 insertMemMemOp(MBB, MBB->end(),
8425 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
8426 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
8427 if (EndMBB) {
8428 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8429 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
8430 .addMBB(EndMBB);
8431 MBB->addSuccessor(EndMBB);
8432 MBB->addSuccessor(NextMBB);
8433 }
8434
8435 // NextMBB:
8436 // %NextDestReg = LA 256(%ThisDestReg)
8437 // %NextSrcReg = LA 256(%ThisSrcReg)
8438 // %NextCountReg = AGHI %ThisCountReg, -1
8439 // CGHI %NextCountReg, 0
8440 // JLH LoopMBB
8441 // # fall through to DoneMBB
8442 //
8443 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
8444 MBB = NextMBB;
8445 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
8446 .addReg(ThisDestReg).addImm(256).addReg(0);
8447 if (!HaveSingleBase)
8448 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
8449 .addReg(ThisSrcReg).addImm(256).addReg(0);
8450 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
8451 .addReg(ThisCountReg).addImm(-1);
8452 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8453 .addReg(NextCountReg).addImm(0);
8454 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8455 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
8456 .addMBB(LoopMBB);
8457 MBB->addSuccessor(LoopMBB);
8458 MBB->addSuccessor(DoneMBB);
8459
8460 MBB = DoneMBB;
8461 if (IsRegForm) {
8462 // DoneMBB:
8463 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
8464 // # Use EXecute Relative Long for the remainder of the bytes. The target
8465 // instruction of the EXRL will have a length field of 1 since 0 is an
8466 // illegal value. The number of bytes processed becomes (%LenAdjReg &
8467 // 0xff) + 1.
8468 // # Fall through to AllDoneMBB.
8469 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8470 Register RemDestReg = HaveSingleBase ? RemSrcReg
8471 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8472 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
8473 .addReg(StartDestReg).addMBB(StartMBB)
8474 .addReg(NextDestReg).addMBB(NextMBB);
8475 if (!HaveSingleBase)
8476 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
8477 .addReg(StartSrcReg).addMBB(StartMBB)
8478 .addReg(NextSrcReg).addMBB(NextMBB);
8479 if (IsMemset)
8480 insertMemMemOp(MBB, MBB->end(),
8481 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
8482 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
8483 MachineInstrBuilder EXRL_MIB =
8484 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
8485 .addImm(Opcode)
8486 .addReg(LenAdjReg)
8487 .addReg(RemDestReg).addImm(DestDisp)
8488 .addReg(RemSrcReg).addImm(SrcDisp);
8489 MBB->addSuccessor(AllDoneMBB);
8490 MBB = AllDoneMBB;
8491 if (Opcode != SystemZ::MVC) {
8492 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
8493 if (EndMBB)
8494 MBB->addLiveIn(SystemZ::CC);
8495 }
8496 }
8497 MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
8498 }
8499
8500 // Handle any remaining bytes with straight-line code.
8501 while (ImmLength > 0) {
8502 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
8503 // The previous iteration might have created out-of-range displacements.
8504 // Apply them using LA/LAY if so.
8505 foldDisplIfNeeded(DestBase, DestDisp);
8506 foldDisplIfNeeded(SrcBase, SrcDisp);
8507 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
8508 DestDisp += ThisLength;
8509 SrcDisp += ThisLength;
8510 ImmLength -= ThisLength;
8511 // If there's another CLC to go, branch to the end if a difference
8512 // was found.
8513 if (EndMBB && ImmLength > 0) {
8514 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
8515 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8516 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE)
8517 .addMBB(EndMBB);
8518 MBB->addSuccessor(EndMBB);
8519 MBB->addSuccessor(NextMBB);
8520 MBB = NextMBB;
8521 }
8522 }
8523 if (EndMBB) {
8524 MBB->addSuccessor(EndMBB);
8525 MBB = EndMBB;
8526 MBB->addLiveIn(SystemZ::CC);
8527 }
8528
8529 MI.eraseFromParent();
8530 return MBB;
8531 }
8532
8533 // Decompose string pseudo-instruction MI into a loop that continually performs
8534 // Opcode until CC != 3.
emitStringWrapper(MachineInstr & MI,MachineBasicBlock * MBB,unsigned Opcode) const8535 MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
8536 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
8537 MachineFunction &MF = *MBB->getParent();
8538 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8539 MachineRegisterInfo &MRI = MF.getRegInfo();
8540 DebugLoc DL = MI.getDebugLoc();
8541
8542 uint64_t End1Reg = MI.getOperand(0).getReg();
8543 uint64_t Start1Reg = MI.getOperand(1).getReg();
8544 uint64_t Start2Reg = MI.getOperand(2).getReg();
8545 uint64_t CharReg = MI.getOperand(3).getReg();
8546
8547 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
8548 uint64_t This1Reg = MRI.createVirtualRegister(RC);
8549 uint64_t This2Reg = MRI.createVirtualRegister(RC);
8550 uint64_t End2Reg = MRI.createVirtualRegister(RC);
8551
8552 MachineBasicBlock *StartMBB = MBB;
8553 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8554 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8555
8556 // StartMBB:
8557 // # fall through to LoopMBB
8558 MBB->addSuccessor(LoopMBB);
8559
8560 // LoopMBB:
8561 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
8562 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
8563 // R0L = %CharReg
8564 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
8565 // JO LoopMBB
8566 // # fall through to DoneMBB
8567 //
8568 // The load of R0L can be hoisted by post-RA LICM.
8569 MBB = LoopMBB;
8570
8571 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
8572 .addReg(Start1Reg).addMBB(StartMBB)
8573 .addReg(End1Reg).addMBB(LoopMBB);
8574 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
8575 .addReg(Start2Reg).addMBB(StartMBB)
8576 .addReg(End2Reg).addMBB(LoopMBB);
8577 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
8578 BuildMI(MBB, DL, TII->get(Opcode))
8579 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
8580 .addReg(This1Reg).addReg(This2Reg);
8581 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8582 .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB);
8583 MBB->addSuccessor(LoopMBB);
8584 MBB->addSuccessor(DoneMBB);
8585
8586 DoneMBB->addLiveIn(SystemZ::CC);
8587
8588 MI.eraseFromParent();
8589 return DoneMBB;
8590 }
8591
8592 // Update TBEGIN instruction with final opcode and register clobbers.
emitTransactionBegin(MachineInstr & MI,MachineBasicBlock * MBB,unsigned Opcode,bool NoFloat) const8593 MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
8594 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
8595 bool NoFloat) const {
8596 MachineFunction &MF = *MBB->getParent();
8597 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
8598 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8599
8600 // Update opcode.
8601 MI.setDesc(TII->get(Opcode));
8602
8603 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
8604 // Make sure to add the corresponding GRSM bits if they are missing.
8605 uint64_t Control = MI.getOperand(2).getImm();
8606 static const unsigned GPRControlBit[16] = {
8607 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
8608 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
8609 };
8610 Control |= GPRControlBit[15];
8611 if (TFI->hasFP(MF))
8612 Control |= GPRControlBit[11];
8613 MI.getOperand(2).setImm(Control);
8614
8615 // Add GPR clobbers.
8616 for (int I = 0; I < 16; I++) {
8617 if ((Control & GPRControlBit[I]) == 0) {
8618 unsigned Reg = SystemZMC::GR64Regs[I];
8619 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
8620 }
8621 }
8622
8623 // Add FPR/VR clobbers.
8624 if (!NoFloat && (Control & 4) != 0) {
8625 if (Subtarget.hasVector()) {
8626 for (unsigned Reg : SystemZMC::VR128Regs) {
8627 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
8628 }
8629 } else {
8630 for (unsigned Reg : SystemZMC::FP64Regs) {
8631 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
8632 }
8633 }
8634 }
8635
8636 return MBB;
8637 }
8638
emitLoadAndTestCmp0(MachineInstr & MI,MachineBasicBlock * MBB,unsigned Opcode) const8639 MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
8640 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
8641 MachineFunction &MF = *MBB->getParent();
8642 MachineRegisterInfo *MRI = &MF.getRegInfo();
8643 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8644 DebugLoc DL = MI.getDebugLoc();
8645
8646 Register SrcReg = MI.getOperand(0).getReg();
8647
8648 // Create new virtual register of the same class as source.
8649 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
8650 Register DstReg = MRI->createVirtualRegister(RC);
8651
8652 // Replace pseudo with a normal load-and-test that models the def as
8653 // well.
8654 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
8655 .addReg(SrcReg)
8656 .setMIFlags(MI.getFlags());
8657 MI.eraseFromParent();
8658
8659 return MBB;
8660 }
8661
emitProbedAlloca(MachineInstr & MI,MachineBasicBlock * MBB) const8662 MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
8663 MachineInstr &MI, MachineBasicBlock *MBB) const {
8664 MachineFunction &MF = *MBB->getParent();
8665 MachineRegisterInfo *MRI = &MF.getRegInfo();
8666 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8667 DebugLoc DL = MI.getDebugLoc();
8668 const unsigned ProbeSize = getStackProbeSize(MF);
8669 Register DstReg = MI.getOperand(0).getReg();
8670 Register SizeReg = MI.getOperand(2).getReg();
8671
8672 MachineBasicBlock *StartMBB = MBB;
8673 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
8674 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
8675 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
8676 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
8677 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
8678
8679 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
8680 MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1));
8681
8682 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8683 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8684
8685 // LoopTestMBB
8686 // BRC TailTestMBB
8687 // # fallthrough to LoopBodyMBB
8688 StartMBB->addSuccessor(LoopTestMBB);
8689 MBB = LoopTestMBB;
8690 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
8691 .addReg(SizeReg)
8692 .addMBB(StartMBB)
8693 .addReg(IncReg)
8694 .addMBB(LoopBodyMBB);
8695 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
8696 .addReg(PHIReg)
8697 .addImm(ProbeSize);
8698 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8699 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT)
8700 .addMBB(TailTestMBB);
8701 MBB->addSuccessor(LoopBodyMBB);
8702 MBB->addSuccessor(TailTestMBB);
8703
8704 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
8705 // J LoopTestMBB
8706 MBB = LoopBodyMBB;
8707 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
8708 .addReg(PHIReg)
8709 .addImm(ProbeSize);
8710 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
8711 .addReg(SystemZ::R15D)
8712 .addImm(ProbeSize);
8713 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
8714 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
8715 .setMemRefs(VolLdMMO);
8716 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
8717 MBB->addSuccessor(LoopTestMBB);
8718
8719 // TailTestMBB
8720 // BRC DoneMBB
8721 // # fallthrough to TailMBB
8722 MBB = TailTestMBB;
8723 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8724 .addReg(PHIReg)
8725 .addImm(0);
8726 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8727 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ)
8728 .addMBB(DoneMBB);
8729 MBB->addSuccessor(TailMBB);
8730 MBB->addSuccessor(DoneMBB);
8731
8732 // TailMBB
8733 // # fallthrough to DoneMBB
8734 MBB = TailMBB;
8735 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
8736 .addReg(SystemZ::R15D)
8737 .addReg(PHIReg);
8738 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
8739 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
8740 .setMemRefs(VolLdMMO);
8741 MBB->addSuccessor(DoneMBB);
8742
8743 // DoneMBB
8744 MBB = DoneMBB;
8745 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
8746 .addReg(SystemZ::R15D);
8747
8748 MI.eraseFromParent();
8749 return DoneMBB;
8750 }
8751
8752 SDValue SystemZTargetLowering::
getBackchainAddress(SDValue SP,SelectionDAG & DAG) const8753 getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
8754 MachineFunction &MF = DAG.getMachineFunction();
8755 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
8756 SDLoc DL(SP);
8757 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
8758 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
8759 }
8760
EmitInstrWithCustomInserter(MachineInstr & MI,MachineBasicBlock * MBB) const8761 MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
8762 MachineInstr &MI, MachineBasicBlock *MBB) const {
8763 switch (MI.getOpcode()) {
8764 case SystemZ::Select32:
8765 case SystemZ::Select64:
8766 case SystemZ::SelectF32:
8767 case SystemZ::SelectF64:
8768 case SystemZ::SelectF128:
8769 case SystemZ::SelectVR32:
8770 case SystemZ::SelectVR64:
8771 case SystemZ::SelectVR128:
8772 return emitSelect(MI, MBB);
8773
8774 case SystemZ::CondStore8Mux:
8775 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
8776 case SystemZ::CondStore8MuxInv:
8777 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
8778 case SystemZ::CondStore16Mux:
8779 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
8780 case SystemZ::CondStore16MuxInv:
8781 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
8782 case SystemZ::CondStore32Mux:
8783 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
8784 case SystemZ::CondStore32MuxInv:
8785 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
8786 case SystemZ::CondStore8:
8787 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
8788 case SystemZ::CondStore8Inv:
8789 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
8790 case SystemZ::CondStore16:
8791 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
8792 case SystemZ::CondStore16Inv:
8793 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
8794 case SystemZ::CondStore32:
8795 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
8796 case SystemZ::CondStore32Inv:
8797 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
8798 case SystemZ::CondStore64:
8799 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
8800 case SystemZ::CondStore64Inv:
8801 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
8802 case SystemZ::CondStoreF32:
8803 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
8804 case SystemZ::CondStoreF32Inv:
8805 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
8806 case SystemZ::CondStoreF64:
8807 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
8808 case SystemZ::CondStoreF64Inv:
8809 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
8810
8811 case SystemZ::PAIR128:
8812 return emitPair128(MI, MBB);
8813 case SystemZ::AEXT128:
8814 return emitExt128(MI, MBB, false);
8815 case SystemZ::ZEXT128:
8816 return emitExt128(MI, MBB, true);
8817
8818 case SystemZ::ATOMIC_SWAPW:
8819 return emitAtomicLoadBinary(MI, MBB, 0, 0);
8820 case SystemZ::ATOMIC_SWAP_32:
8821 return emitAtomicLoadBinary(MI, MBB, 0, 32);
8822 case SystemZ::ATOMIC_SWAP_64:
8823 return emitAtomicLoadBinary(MI, MBB, 0, 64);
8824
8825 case SystemZ::ATOMIC_LOADW_AR:
8826 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 0);
8827 case SystemZ::ATOMIC_LOADW_AFI:
8828 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 0);
8829 case SystemZ::ATOMIC_LOAD_AR:
8830 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR, 32);
8831 case SystemZ::ATOMIC_LOAD_AHI:
8832 return emitAtomicLoadBinary(MI, MBB, SystemZ::AHI, 32);
8833 case SystemZ::ATOMIC_LOAD_AFI:
8834 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI, 32);
8835 case SystemZ::ATOMIC_LOAD_AGR:
8836 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGR, 64);
8837 case SystemZ::ATOMIC_LOAD_AGHI:
8838 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGHI, 64);
8839 case SystemZ::ATOMIC_LOAD_AGFI:
8840 return emitAtomicLoadBinary(MI, MBB, SystemZ::AGFI, 64);
8841
8842 case SystemZ::ATOMIC_LOADW_SR:
8843 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 0);
8844 case SystemZ::ATOMIC_LOAD_SR:
8845 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR, 32);
8846 case SystemZ::ATOMIC_LOAD_SGR:
8847 return emitAtomicLoadBinary(MI, MBB, SystemZ::SGR, 64);
8848
8849 case SystemZ::ATOMIC_LOADW_NR:
8850 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0);
8851 case SystemZ::ATOMIC_LOADW_NILH:
8852 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0);
8853 case SystemZ::ATOMIC_LOAD_NR:
8854 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32);
8855 case SystemZ::ATOMIC_LOAD_NILL:
8856 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32);
8857 case SystemZ::ATOMIC_LOAD_NILH:
8858 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32);
8859 case SystemZ::ATOMIC_LOAD_NILF:
8860 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32);
8861 case SystemZ::ATOMIC_LOAD_NGR:
8862 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64);
8863 case SystemZ::ATOMIC_LOAD_NILL64:
8864 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64);
8865 case SystemZ::ATOMIC_LOAD_NILH64:
8866 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64);
8867 case SystemZ::ATOMIC_LOAD_NIHL64:
8868 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64);
8869 case SystemZ::ATOMIC_LOAD_NIHH64:
8870 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64);
8871 case SystemZ::ATOMIC_LOAD_NILF64:
8872 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64);
8873 case SystemZ::ATOMIC_LOAD_NIHF64:
8874 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64);
8875
8876 case SystemZ::ATOMIC_LOADW_OR:
8877 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 0);
8878 case SystemZ::ATOMIC_LOADW_OILH:
8879 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 0);
8880 case SystemZ::ATOMIC_LOAD_OR:
8881 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR, 32);
8882 case SystemZ::ATOMIC_LOAD_OILL:
8883 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL, 32);
8884 case SystemZ::ATOMIC_LOAD_OILH:
8885 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH, 32);
8886 case SystemZ::ATOMIC_LOAD_OILF:
8887 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF, 32);
8888 case SystemZ::ATOMIC_LOAD_OGR:
8889 return emitAtomicLoadBinary(MI, MBB, SystemZ::OGR, 64);
8890 case SystemZ::ATOMIC_LOAD_OILL64:
8891 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILL64, 64);
8892 case SystemZ::ATOMIC_LOAD_OILH64:
8893 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH64, 64);
8894 case SystemZ::ATOMIC_LOAD_OIHL64:
8895 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHL64, 64);
8896 case SystemZ::ATOMIC_LOAD_OIHH64:
8897 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHH64, 64);
8898 case SystemZ::ATOMIC_LOAD_OILF64:
8899 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILF64, 64);
8900 case SystemZ::ATOMIC_LOAD_OIHF64:
8901 return emitAtomicLoadBinary(MI, MBB, SystemZ::OIHF64, 64);
8902
8903 case SystemZ::ATOMIC_LOADW_XR:
8904 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 0);
8905 case SystemZ::ATOMIC_LOADW_XILF:
8906 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 0);
8907 case SystemZ::ATOMIC_LOAD_XR:
8908 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR, 32);
8909 case SystemZ::ATOMIC_LOAD_XILF:
8910 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF, 32);
8911 case SystemZ::ATOMIC_LOAD_XGR:
8912 return emitAtomicLoadBinary(MI, MBB, SystemZ::XGR, 64);
8913 case SystemZ::ATOMIC_LOAD_XILF64:
8914 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF64, 64);
8915 case SystemZ::ATOMIC_LOAD_XIHF64:
8916 return emitAtomicLoadBinary(MI, MBB, SystemZ::XIHF64, 64);
8917
8918 case SystemZ::ATOMIC_LOADW_NRi:
8919 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 0, true);
8920 case SystemZ::ATOMIC_LOADW_NILHi:
8921 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 0, true);
8922 case SystemZ::ATOMIC_LOAD_NRi:
8923 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, 32, true);
8924 case SystemZ::ATOMIC_LOAD_NILLi:
8925 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL, 32, true);
8926 case SystemZ::ATOMIC_LOAD_NILHi:
8927 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, 32, true);
8928 case SystemZ::ATOMIC_LOAD_NILFi:
8929 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF, 32, true);
8930 case SystemZ::ATOMIC_LOAD_NGRi:
8931 return emitAtomicLoadBinary(MI, MBB, SystemZ::NGR, 64, true);
8932 case SystemZ::ATOMIC_LOAD_NILL64i:
8933 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILL64, 64, true);
8934 case SystemZ::ATOMIC_LOAD_NILH64i:
8935 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH64, 64, true);
8936 case SystemZ::ATOMIC_LOAD_NIHL64i:
8937 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHL64, 64, true);
8938 case SystemZ::ATOMIC_LOAD_NIHH64i:
8939 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHH64, 64, true);
8940 case SystemZ::ATOMIC_LOAD_NILF64i:
8941 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILF64, 64, true);
8942 case SystemZ::ATOMIC_LOAD_NIHF64i:
8943 return emitAtomicLoadBinary(MI, MBB, SystemZ::NIHF64, 64, true);
8944
8945 case SystemZ::ATOMIC_LOADW_MIN:
8946 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
8947 SystemZ::CCMASK_CMP_LE, 0);
8948 case SystemZ::ATOMIC_LOAD_MIN_32:
8949 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
8950 SystemZ::CCMASK_CMP_LE, 32);
8951 case SystemZ::ATOMIC_LOAD_MIN_64:
8952 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
8953 SystemZ::CCMASK_CMP_LE, 64);
8954
8955 case SystemZ::ATOMIC_LOADW_MAX:
8956 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
8957 SystemZ::CCMASK_CMP_GE, 0);
8958 case SystemZ::ATOMIC_LOAD_MAX_32:
8959 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR,
8960 SystemZ::CCMASK_CMP_GE, 32);
8961 case SystemZ::ATOMIC_LOAD_MAX_64:
8962 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CGR,
8963 SystemZ::CCMASK_CMP_GE, 64);
8964
8965 case SystemZ::ATOMIC_LOADW_UMIN:
8966 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
8967 SystemZ::CCMASK_CMP_LE, 0);
8968 case SystemZ::ATOMIC_LOAD_UMIN_32:
8969 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
8970 SystemZ::CCMASK_CMP_LE, 32);
8971 case SystemZ::ATOMIC_LOAD_UMIN_64:
8972 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
8973 SystemZ::CCMASK_CMP_LE, 64);
8974
8975 case SystemZ::ATOMIC_LOADW_UMAX:
8976 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
8977 SystemZ::CCMASK_CMP_GE, 0);
8978 case SystemZ::ATOMIC_LOAD_UMAX_32:
8979 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR,
8980 SystemZ::CCMASK_CMP_GE, 32);
8981 case SystemZ::ATOMIC_LOAD_UMAX_64:
8982 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLGR,
8983 SystemZ::CCMASK_CMP_GE, 64);
8984
8985 case SystemZ::ATOMIC_CMP_SWAPW:
8986 return emitAtomicCmpSwapW(MI, MBB);
8987 case SystemZ::MVCImm:
8988 case SystemZ::MVCReg:
8989 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
8990 case SystemZ::NCImm:
8991 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
8992 case SystemZ::OCImm:
8993 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
8994 case SystemZ::XCImm:
8995 case SystemZ::XCReg:
8996 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
8997 case SystemZ::CLCImm:
8998 case SystemZ::CLCReg:
8999 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
9000 case SystemZ::MemsetImmImm:
9001 case SystemZ::MemsetImmReg:
9002 case SystemZ::MemsetRegImm:
9003 case SystemZ::MemsetRegReg:
9004 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
9005 case SystemZ::CLSTLoop:
9006 return emitStringWrapper(MI, MBB, SystemZ::CLST);
9007 case SystemZ::MVSTLoop:
9008 return emitStringWrapper(MI, MBB, SystemZ::MVST);
9009 case SystemZ::SRSTLoop:
9010 return emitStringWrapper(MI, MBB, SystemZ::SRST);
9011 case SystemZ::TBEGIN:
9012 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
9013 case SystemZ::TBEGIN_nofloat:
9014 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
9015 case SystemZ::TBEGINC:
9016 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
9017 case SystemZ::LTEBRCompare_VecPseudo:
9018 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
9019 case SystemZ::LTDBRCompare_VecPseudo:
9020 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
9021 case SystemZ::LTXBRCompare_VecPseudo:
9022 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
9023
9024 case SystemZ::PROBED_ALLOCA:
9025 return emitProbedAlloca(MI, MBB);
9026
9027 case TargetOpcode::STACKMAP:
9028 case TargetOpcode::PATCHPOINT:
9029 return emitPatchPoint(MI, MBB);
9030
9031 default:
9032 llvm_unreachable("Unexpected instr type to insert");
9033 }
9034 }
9035
9036 // This is only used by the isel schedulers, and is needed only to prevent
9037 // compiler from crashing when list-ilp is used.
9038 const TargetRegisterClass *
getRepRegClassFor(MVT VT) const9039 SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
9040 if (VT == MVT::Untyped)
9041 return &SystemZ::ADDR128BitRegClass;
9042 return TargetLowering::getRepRegClassFor(VT);
9043 }
9044
lowerGET_ROUNDING(SDValue Op,SelectionDAG & DAG) const9045 SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
9046 SelectionDAG &DAG) const {
9047 SDLoc dl(Op);
9048 /*
9049 The rounding method is in FPC Byte 3 bits 6-7, and has the following
9050 settings:
9051 00 Round to nearest
9052 01 Round to 0
9053 10 Round to +inf
9054 11 Round to -inf
9055
9056 FLT_ROUNDS, on the other hand, expects the following:
9057 -1 Undefined
9058 0 Round to 0
9059 1 Round to nearest
9060 2 Round to +inf
9061 3 Round to -inf
9062 */
9063
9064 // Save FPC to register.
9065 SDValue Chain = Op.getOperand(0);
9066 SDValue EFPC(
9067 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
9068 Chain = EFPC.getValue(1);
9069
9070 // Transform as necessary
9071 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
9072 DAG.getConstant(3, dl, MVT::i32));
9073 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
9074 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
9075 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
9076 DAG.getConstant(1, dl, MVT::i32)));
9077
9078 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
9079 DAG.getConstant(1, dl, MVT::i32));
9080 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
9081
9082 return DAG.getMergeValues({RetVal, Chain}, dl);
9083 }
9084