10b57cec5SDimitry Andric //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This is the parent TargetLowering class for hardware code gen
110b57cec5SDimitry Andric /// targets.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
140b57cec5SDimitry Andric 
150b57cec5SDimitry Andric #include "AMDGPUISelLowering.h"
160b57cec5SDimitry Andric #include "AMDGPU.h"
17e8d8bef9SDimitry Andric #include "AMDGPUInstrInfo.h"
18e8d8bef9SDimitry Andric #include "AMDGPUMachineFunction.h"
190b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h"
200b57cec5SDimitry Andric #include "llvm/CodeGen/Analysis.h"
2106c3fb27SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
2281ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h"
230b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
24e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h"
2506c3fb27SDimitry Andric #include "llvm/IR/PatternMatch.h"
26e8d8bef9SDimitry Andric #include "llvm/Support/CommandLine.h"
270b57cec5SDimitry Andric #include "llvm/Support/KnownBits.h"
28e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h"
29e8d8bef9SDimitry Andric 
300b57cec5SDimitry Andric using namespace llvm;
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric #include "AMDGPUGenCallingConv.inc"
330b57cec5SDimitry Andric 
345ffd83dbSDimitry Andric static cl::opt<bool> AMDGPUBypassSlowDiv(
355ffd83dbSDimitry Andric   "amdgpu-bypass-slow-div",
365ffd83dbSDimitry Andric   cl::desc("Skip 64-bit divide for dynamic 32-bit values"),
375ffd83dbSDimitry Andric   cl::init(true));
385ffd83dbSDimitry Andric 
390b57cec5SDimitry Andric // Find a larger type to do a load / store of a vector with.
getEquivalentMemType(LLVMContext & Ctx,EVT VT)400b57cec5SDimitry Andric EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
410b57cec5SDimitry Andric   unsigned StoreSize = VT.getStoreSizeInBits();
420b57cec5SDimitry Andric   if (StoreSize <= 32)
430b57cec5SDimitry Andric     return EVT::getIntegerVT(Ctx, StoreSize);
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric   assert(StoreSize % 32 == 0 && "Store size not a multiple of 32");
460b57cec5SDimitry Andric   return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
470b57cec5SDimitry Andric }
480b57cec5SDimitry Andric 
numBitsUnsigned(SDValue Op,SelectionDAG & DAG)490b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) {
50349cc55cSDimitry Andric   return DAG.computeKnownBits(Op).countMaxActiveBits();
510b57cec5SDimitry Andric }
520b57cec5SDimitry Andric 
numBitsSigned(SDValue Op,SelectionDAG & DAG)530b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) {
540b57cec5SDimitry Andric   // In order for this to be a signed 24-bit value, bit 23, must
550b57cec5SDimitry Andric   // be a sign bit.
5604eeddc0SDimitry Andric   return DAG.ComputeMaxSignificantBits(Op);
570b57cec5SDimitry Andric }
580b57cec5SDimitry Andric 
AMDGPUTargetLowering(const TargetMachine & TM,const AMDGPUSubtarget & STI)590b57cec5SDimitry Andric AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
600b57cec5SDimitry Andric                                            const AMDGPUSubtarget &STI)
610b57cec5SDimitry Andric     : TargetLowering(TM), Subtarget(&STI) {
620b57cec5SDimitry Andric   // Lower floating point store/load to integer store/load to reduce the number
630b57cec5SDimitry Andric   // of patterns in tablegen.
640b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::f32, Promote);
650b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
660b57cec5SDimitry Andric 
670b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
680b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
690b57cec5SDimitry Andric 
700b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v3f32, Promote);
710b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v3f32, MVT::v3i32);
720b57cec5SDimitry Andric 
730b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
740b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
750b57cec5SDimitry Andric 
760b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v5f32, Promote);
770b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v5f32, MVT::v5i32);
780b57cec5SDimitry Andric 
79fe6060f1SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v6f32, Promote);
80fe6060f1SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v6f32, MVT::v6i32);
81fe6060f1SDimitry Andric 
82fe6060f1SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v7f32, Promote);
83fe6060f1SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v7f32, MVT::v7i32);
84fe6060f1SDimitry Andric 
850b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
860b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
870b57cec5SDimitry Andric 
88bdd1243dSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v9f32, Promote);
89bdd1243dSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v9f32, MVT::v9i32);
90bdd1243dSDimitry Andric 
91bdd1243dSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v10f32, Promote);
92bdd1243dSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v10f32, MVT::v10i32);
93bdd1243dSDimitry Andric 
94bdd1243dSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v11f32, Promote);
95bdd1243dSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v11f32, MVT::v11i32);
96bdd1243dSDimitry Andric 
97bdd1243dSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v12f32, Promote);
98bdd1243dSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v12f32, MVT::v12i32);
99bdd1243dSDimitry Andric 
1000b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
1010b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
1020b57cec5SDimitry Andric 
1030b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v32f32, Promote);
1040b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v32f32, MVT::v32i32);
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::i64, Promote);
1070b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32);
1080b57cec5SDimitry Andric 
1090b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1100b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v2i64, MVT::v4i32);
1110b57cec5SDimitry Andric 
1120b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::f64, Promote);
1130b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::f64, MVT::v2i32);
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v2f64, Promote);
1160b57cec5SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v4i32);
1170b57cec5SDimitry Andric 
118fe6060f1SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v3i64, Promote);
119fe6060f1SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v3i64, MVT::v6i32);
120fe6060f1SDimitry Andric 
1215ffd83dbSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v4i64, Promote);
1225ffd83dbSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v4i64, MVT::v8i32);
1235ffd83dbSDimitry Andric 
124fe6060f1SDimitry Andric   setOperationAction(ISD::LOAD, MVT::v3f64, Promote);
125fe6060f1SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v3f64, MVT::v6i32);
126fe6060f1SDimitry Andric 
1275ffd83dbSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v4f64, Promote);
1285ffd83dbSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v4f64, MVT::v8i32);
1295ffd83dbSDimitry Andric 
1305ffd83dbSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v8i64, Promote);
1315ffd83dbSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v8i64, MVT::v16i32);
1325ffd83dbSDimitry Andric 
1335ffd83dbSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v8f64, Promote);
1345ffd83dbSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v8f64, MVT::v16i32);
1355ffd83dbSDimitry Andric 
1365ffd83dbSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v16i64, Promote);
1375ffd83dbSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v16i64, MVT::v32i32);
1385ffd83dbSDimitry Andric 
1395ffd83dbSDimitry Andric   setOperationAction(ISD::LOAD, MVT::v16f64, Promote);
1405ffd83dbSDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::v16f64, MVT::v32i32);
1415ffd83dbSDimitry Andric 
14206c3fb27SDimitry Andric   setOperationAction(ISD::LOAD, MVT::i128, Promote);
14306c3fb27SDimitry Andric   AddPromotedToType(ISD::LOAD, MVT::i128, MVT::v4i32);
14406c3fb27SDimitry Andric 
1450b57cec5SDimitry Andric   // There are no 64-bit extloads. These should be done as a 32-bit extload and
1460b57cec5SDimitry Andric   // an extension to 64-bit.
14781ad6265SDimitry Andric   for (MVT VT : MVT::integer_valuetypes())
14881ad6265SDimitry Andric     setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i64, VT,
14981ad6265SDimitry Andric                      Expand);
1500b57cec5SDimitry Andric 
1510b57cec5SDimitry Andric   for (MVT VT : MVT::integer_valuetypes()) {
1520b57cec5SDimitry Andric     if (VT == MVT::i64)
1530b57cec5SDimitry Andric       continue;
1540b57cec5SDimitry Andric 
15581ad6265SDimitry Andric     for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}) {
15681ad6265SDimitry Andric       setLoadExtAction(Op, VT, MVT::i1, Promote);
15781ad6265SDimitry Andric       setLoadExtAction(Op, VT, MVT::i8, Legal);
15881ad6265SDimitry Andric       setLoadExtAction(Op, VT, MVT::i16, Legal);
15981ad6265SDimitry Andric       setLoadExtAction(Op, VT, MVT::i32, Expand);
16081ad6265SDimitry Andric     }
1610b57cec5SDimitry Andric   }
1620b57cec5SDimitry Andric 
16381ad6265SDimitry Andric   for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
16481ad6265SDimitry Andric     for (auto MemVT :
16581ad6265SDimitry Andric          {MVT::v2i8, MVT::v4i8, MVT::v2i16, MVT::v3i16, MVT::v4i16})
16681ad6265SDimitry Andric       setLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}, VT, MemVT,
16781ad6265SDimitry Andric                        Expand);
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
170bdd1243dSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
1710b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
172cb14a3feSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2bf16, Expand);
1738bcb0991SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand);
174cb14a3feSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3bf16, Expand);
1750b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
176cb14a3feSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand);
1770b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand);
178cb14a3feSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8bf16, Expand);
1798bcb0991SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand);
180cb14a3feSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16bf16, Expand);
1818bcb0991SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand);
182cb14a3feSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32bf16, Expand);
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
1850b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
186fe6060f1SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f32, Expand);
1870b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
1880b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f32, Expand);
1895ffd83dbSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f32, Expand);
1900b57cec5SDimitry Andric 
1910b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
192bdd1243dSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
1930b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
194cb14a3feSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2bf16, Expand);
195fe6060f1SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f16, Expand);
196cb14a3feSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3bf16, Expand);
1970b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
198cb14a3feSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand);
1990b57cec5SDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand);
200cb14a3feSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8bf16, Expand);
2015ffd83dbSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f16, Expand);
202cb14a3feSDimitry Andric   setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16bf16, Expand);
2030b57cec5SDimitry Andric 
2040b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::f32, Promote);
2050b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v2f32, Promote);
2080b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
2090b57cec5SDimitry Andric 
2100b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v3f32, Promote);
2110b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v3f32, MVT::v3i32);
2120b57cec5SDimitry Andric 
2130b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v4f32, Promote);
2140b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
2150b57cec5SDimitry Andric 
2160b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v5f32, Promote);
2170b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v5f32, MVT::v5i32);
2180b57cec5SDimitry Andric 
219fe6060f1SDimitry Andric   setOperationAction(ISD::STORE, MVT::v6f32, Promote);
220fe6060f1SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v6f32, MVT::v6i32);
221fe6060f1SDimitry Andric 
222fe6060f1SDimitry Andric   setOperationAction(ISD::STORE, MVT::v7f32, Promote);
223fe6060f1SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v7f32, MVT::v7i32);
224fe6060f1SDimitry Andric 
2250b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v8f32, Promote);
2260b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
2270b57cec5SDimitry Andric 
228bdd1243dSDimitry Andric   setOperationAction(ISD::STORE, MVT::v9f32, Promote);
229bdd1243dSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v9f32, MVT::v9i32);
230bdd1243dSDimitry Andric 
231bdd1243dSDimitry Andric   setOperationAction(ISD::STORE, MVT::v10f32, Promote);
232bdd1243dSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v10f32, MVT::v10i32);
233bdd1243dSDimitry Andric 
234bdd1243dSDimitry Andric   setOperationAction(ISD::STORE, MVT::v11f32, Promote);
235bdd1243dSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v11f32, MVT::v11i32);
236bdd1243dSDimitry Andric 
237bdd1243dSDimitry Andric   setOperationAction(ISD::STORE, MVT::v12f32, Promote);
238bdd1243dSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v12f32, MVT::v12i32);
239bdd1243dSDimitry Andric 
2400b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v16f32, Promote);
2410b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
2420b57cec5SDimitry Andric 
2430b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v32f32, Promote);
2440b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v32f32, MVT::v32i32);
2450b57cec5SDimitry Andric 
2460b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::i64, Promote);
2470b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
2480b57cec5SDimitry Andric 
2490b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v2i64, Promote);
2500b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v2i64, MVT::v4i32);
2510b57cec5SDimitry Andric 
2520b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::f64, Promote);
2530b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::f64, MVT::v2i32);
2540b57cec5SDimitry Andric 
2550b57cec5SDimitry Andric   setOperationAction(ISD::STORE, MVT::v2f64, Promote);
2560b57cec5SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v4i32);
2570b57cec5SDimitry Andric 
258fe6060f1SDimitry Andric   setOperationAction(ISD::STORE, MVT::v3i64, Promote);
259fe6060f1SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v3i64, MVT::v6i32);
260fe6060f1SDimitry Andric 
261fe6060f1SDimitry Andric   setOperationAction(ISD::STORE, MVT::v3f64, Promote);
262fe6060f1SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v3f64, MVT::v6i32);
263fe6060f1SDimitry Andric 
2645ffd83dbSDimitry Andric   setOperationAction(ISD::STORE, MVT::v4i64, Promote);
2655ffd83dbSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v4i64, MVT::v8i32);
2665ffd83dbSDimitry Andric 
2675ffd83dbSDimitry Andric   setOperationAction(ISD::STORE, MVT::v4f64, Promote);
2685ffd83dbSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v4f64, MVT::v8i32);
2695ffd83dbSDimitry Andric 
2705ffd83dbSDimitry Andric   setOperationAction(ISD::STORE, MVT::v8i64, Promote);
2715ffd83dbSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v8i64, MVT::v16i32);
2725ffd83dbSDimitry Andric 
2735ffd83dbSDimitry Andric   setOperationAction(ISD::STORE, MVT::v8f64, Promote);
2745ffd83dbSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v8f64, MVT::v16i32);
2755ffd83dbSDimitry Andric 
2765ffd83dbSDimitry Andric   setOperationAction(ISD::STORE, MVT::v16i64, Promote);
2775ffd83dbSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v16i64, MVT::v32i32);
2785ffd83dbSDimitry Andric 
2795ffd83dbSDimitry Andric   setOperationAction(ISD::STORE, MVT::v16f64, Promote);
2805ffd83dbSDimitry Andric   AddPromotedToType(ISD::STORE, MVT::v16f64, MVT::v32i32);
2815ffd83dbSDimitry Andric 
28206c3fb27SDimitry Andric   setOperationAction(ISD::STORE, MVT::i128, Promote);
28306c3fb27SDimitry Andric   AddPromotedToType(ISD::STORE, MVT::i128, MVT::v4i32);
28406c3fb27SDimitry Andric 
2850b57cec5SDimitry Andric   setTruncStoreAction(MVT::i64, MVT::i1, Expand);
2860b57cec5SDimitry Andric   setTruncStoreAction(MVT::i64, MVT::i8, Expand);
2870b57cec5SDimitry Andric   setTruncStoreAction(MVT::i64, MVT::i16, Expand);
2880b57cec5SDimitry Andric   setTruncStoreAction(MVT::i64, MVT::i32, Expand);
2890b57cec5SDimitry Andric 
2900b57cec5SDimitry Andric   setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand);
2910b57cec5SDimitry Andric   setTruncStoreAction(MVT::v2i64, MVT::v2i8, Expand);
2920b57cec5SDimitry Andric   setTruncStoreAction(MVT::v2i64, MVT::v2i16, Expand);
2930b57cec5SDimitry Andric   setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand);
2940b57cec5SDimitry Andric 
295bdd1243dSDimitry Andric   setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
2960b57cec5SDimitry Andric   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
2970b57cec5SDimitry Andric   setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand);
2988bcb0991SDimitry Andric   setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand);
2990b57cec5SDimitry Andric   setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand);
3000b57cec5SDimitry Andric   setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand);
3018bcb0991SDimitry Andric   setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand);
3028bcb0991SDimitry Andric   setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand);
3030b57cec5SDimitry Andric 
304bdd1243dSDimitry Andric   setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
3050b57cec5SDimitry Andric   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
3060b57cec5SDimitry Andric   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
3070b57cec5SDimitry Andric 
3080b57cec5SDimitry Andric   setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
3090b57cec5SDimitry Andric   setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand);
3100b57cec5SDimitry Andric 
311fe6060f1SDimitry Andric   setTruncStoreAction(MVT::v3i64, MVT::v3i32, Expand);
312fe6060f1SDimitry Andric   setTruncStoreAction(MVT::v3i64, MVT::v3i16, Expand);
313fe6060f1SDimitry Andric   setTruncStoreAction(MVT::v3f64, MVT::v3f32, Expand);
314fe6060f1SDimitry Andric   setTruncStoreAction(MVT::v3f64, MVT::v3f16, Expand);
315fe6060f1SDimitry Andric 
3165ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v4i64, MVT::v4i32, Expand);
3175ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v4i64, MVT::v4i16, Expand);
3180b57cec5SDimitry Andric   setTruncStoreAction(MVT::v4f64, MVT::v4f32, Expand);
3190b57cec5SDimitry Andric   setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand);
3200b57cec5SDimitry Andric 
3210b57cec5SDimitry Andric   setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand);
3220b57cec5SDimitry Andric   setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand);
3230b57cec5SDimitry Andric 
3245ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v16f64, MVT::v16f32, Expand);
3255ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v16f64, MVT::v16f16, Expand);
3265ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand);
3275ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand);
3285ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand);
3295ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand);
3305ffd83dbSDimitry Andric   setTruncStoreAction(MVT::v16i64, MVT::v16i1, Expand);
3310b57cec5SDimitry Andric 
33281ad6265SDimitry Andric   setOperationAction(ISD::Constant, {MVT::i32, MVT::i64}, Legal);
33381ad6265SDimitry Andric   setOperationAction(ISD::ConstantFP, {MVT::f32, MVT::f64}, Legal);
3340b57cec5SDimitry Andric 
33581ad6265SDimitry Andric   setOperationAction({ISD::BR_JT, ISD::BRIND}, MVT::Other, Expand);
3360b57cec5SDimitry Andric 
3375f757f3fSDimitry Andric   // For R600, this is totally unsupported, just custom lower to produce an
3385f757f3fSDimitry Andric   // error.
3390b57cec5SDimitry Andric   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
3400b57cec5SDimitry Andric 
3410b57cec5SDimitry Andric   // Library functions.  These default to Expand, but we have instructions
3420b57cec5SDimitry Andric   // for them.
3435f757f3fSDimitry Andric   setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR,
3445f757f3fSDimitry Andric                       ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
34581ad6265SDimitry Andric                      MVT::f32, Legal);
3460b57cec5SDimitry Andric 
34706c3fb27SDimitry Andric   setOperationAction(ISD::FLOG2, MVT::f32, Custom);
34881ad6265SDimitry Andric   setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom);
3490b57cec5SDimitry Andric 
3505f757f3fSDimitry Andric   setOperationAction(
3515f757f3fSDimitry Andric       {ISD::FLOG, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10}, MVT::f32,
35206c3fb27SDimitry Andric       Custom);
3530b57cec5SDimitry Andric 
354bdd1243dSDimitry Andric   setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
355bdd1243dSDimitry Andric 
3565f757f3fSDimitry Andric   setOperationAction(ISD::FRINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
3570b57cec5SDimitry Andric 
35881ad6265SDimitry Andric   setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom);
3590b57cec5SDimitry Andric 
360bdd1243dSDimitry Andric   if (Subtarget->has16BitInsts())
361bdd1243dSDimitry Andric     setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);
36206c3fb27SDimitry Andric   else {
363bdd1243dSDimitry Andric     setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal);
36406c3fb27SDimitry Andric     setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom);
36506c3fb27SDimitry Andric   }
36606c3fb27SDimitry Andric 
3675f757f3fSDimitry Andric   setOperationAction({ISD::FLOG10, ISD::FLOG, ISD::FEXP, ISD::FEXP10}, MVT::f16,
3685f757f3fSDimitry Andric                      Custom);
369bdd1243dSDimitry Andric 
370bdd1243dSDimitry Andric   // FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches
371bdd1243dSDimitry Andric   // scalarization code. Can be removed when IS_FPCLASS expand isn't called by
372bdd1243dSDimitry Andric   // default unless marked custom/legal.
373bdd1243dSDimitry Andric   setOperationAction(
374bdd1243dSDimitry Andric       ISD::IS_FPCLASS,
375bdd1243dSDimitry Andric       {MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16, MVT::v2f32, MVT::v3f32,
376bdd1243dSDimitry Andric        MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32,
377bdd1243dSDimitry Andric        MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64, MVT::v16f64},
378bdd1243dSDimitry Andric       Custom);
379bdd1243dSDimitry Andric 
3800b57cec5SDimitry Andric   // Expand to fneg + fadd.
3810b57cec5SDimitry Andric   setOperationAction(ISD::FSUB, MVT::f64, Expand);
3820b57cec5SDimitry Andric 
38381ad6265SDimitry Andric   setOperationAction(ISD::CONCAT_VECTORS,
38481ad6265SDimitry Andric                      {MVT::v3i32,  MVT::v3f32,  MVT::v4i32,  MVT::v4f32,
38581ad6265SDimitry Andric                       MVT::v5i32,  MVT::v5f32,  MVT::v6i32,  MVT::v6f32,
386bdd1243dSDimitry Andric                       MVT::v7i32,  MVT::v7f32,  MVT::v8i32,  MVT::v8f32,
387bdd1243dSDimitry Andric                       MVT::v9i32,  MVT::v9f32,  MVT::v10i32, MVT::v10f32,
388bdd1243dSDimitry Andric                       MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32},
38981ad6265SDimitry Andric                      Custom);
3901db9f3b2SDimitry Andric 
3911db9f3b2SDimitry Andric   // FIXME: Why is v8f16/v8bf16 missing?
39281ad6265SDimitry Andric   setOperationAction(
39381ad6265SDimitry Andric       ISD::EXTRACT_SUBVECTOR,
3941db9f3b2SDimitry Andric       {MVT::v2f16,  MVT::v2bf16, MVT::v2i16,  MVT::v4f16,  MVT::v4bf16,
3951db9f3b2SDimitry Andric        MVT::v4i16,  MVT::v2f32,  MVT::v2i32,  MVT::v3f32,  MVT::v3i32,
3961db9f3b2SDimitry Andric        MVT::v4f32,  MVT::v4i32,  MVT::v5f32,  MVT::v5i32,  MVT::v6f32,
3971db9f3b2SDimitry Andric        MVT::v6i32,  MVT::v7f32,  MVT::v7i32,  MVT::v8f32,  MVT::v8i32,
3981db9f3b2SDimitry Andric        MVT::v9f32,  MVT::v9i32,  MVT::v10i32, MVT::v10f32, MVT::v11i32,
3991db9f3b2SDimitry Andric        MVT::v11f32, MVT::v12i32, MVT::v12f32, MVT::v16f16, MVT::v16bf16,
4001db9f3b2SDimitry Andric        MVT::v16i16, MVT::v16f32, MVT::v16i32, MVT::v32f32, MVT::v32i32,
4011db9f3b2SDimitry Andric        MVT::v2f64,  MVT::v2i64,  MVT::v3f64,  MVT::v3i64,  MVT::v4f64,
4021db9f3b2SDimitry Andric        MVT::v4i64,  MVT::v8f64,  MVT::v8i64,  MVT::v16f64, MVT::v16i64,
4031db9f3b2SDimitry Andric        MVT::v32i16, MVT::v32f16, MVT::v32bf16},
40481ad6265SDimitry Andric       Custom);
4050b57cec5SDimitry Andric 
4060b57cec5SDimitry Andric   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
40781ad6265SDimitry Andric   setOperationAction(ISD::FP_TO_FP16, {MVT::f64, MVT::f32}, Custom);
4080b57cec5SDimitry Andric 
4090b57cec5SDimitry Andric   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
4100b57cec5SDimitry Andric   for (MVT VT : ScalarIntVTs) {
4110b57cec5SDimitry Andric     // These should use [SU]DIVREM, so set them to expand
41281ad6265SDimitry Andric     setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT,
41381ad6265SDimitry Andric                        Expand);
4140b57cec5SDimitry Andric 
4150b57cec5SDimitry Andric     // GPU does not have divrem function for signed or unsigned.
41681ad6265SDimitry Andric     setOperationAction({ISD::SDIVREM, ISD::UDIVREM}, VT, Custom);
4170b57cec5SDimitry Andric 
4180b57cec5SDimitry Andric     // GPU does not have [S|U]MUL_LOHI functions as a single instruction.
41981ad6265SDimitry Andric     setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
4200b57cec5SDimitry Andric 
42181ad6265SDimitry Andric     setOperationAction({ISD::BSWAP, ISD::CTTZ, ISD::CTLZ}, VT, Expand);
4220b57cec5SDimitry Andric 
4230b57cec5SDimitry Andric     // AMDGPU uses ADDC/SUBC/ADDE/SUBE
42481ad6265SDimitry Andric     setOperationAction({ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT, Legal);
4250b57cec5SDimitry Andric   }
4260b57cec5SDimitry Andric 
4275ffd83dbSDimitry Andric   // The hardware supports 32-bit FSHR, but not FSHL.
4285ffd83dbSDimitry Andric   setOperationAction(ISD::FSHR, MVT::i32, Legal);
4295ffd83dbSDimitry Andric 
4300b57cec5SDimitry Andric   // The hardware supports 32-bit ROTR, but not ROTL.
43181ad6265SDimitry Andric   setOperationAction(ISD::ROTL, {MVT::i32, MVT::i64}, Expand);
4320b57cec5SDimitry Andric   setOperationAction(ISD::ROTR, MVT::i64, Expand);
4330b57cec5SDimitry Andric 
43481ad6265SDimitry Andric   setOperationAction({ISD::MULHU, ISD::MULHS}, MVT::i16, Expand);
435e8d8bef9SDimitry Andric 
43681ad6265SDimitry Andric   setOperationAction({ISD::MUL, ISD::MULHU, ISD::MULHS}, MVT::i64, Expand);
43781ad6265SDimitry Andric   setOperationAction(
43881ad6265SDimitry Andric       {ISD::UINT_TO_FP, ISD::SINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
43981ad6265SDimitry Andric       MVT::i64, Custom);
4400b57cec5SDimitry Andric   setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
4410b57cec5SDimitry Andric 
44281ad6265SDimitry Andric   setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, MVT::i32,
44381ad6265SDimitry Andric                      Legal);
4440b57cec5SDimitry Andric 
44581ad6265SDimitry Andric   setOperationAction(
44681ad6265SDimitry Andric       {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
44781ad6265SDimitry Andric       MVT::i64, Custom);
4480b57cec5SDimitry Andric 
4497a6dacacSDimitry Andric   for (auto VT : {MVT::i8, MVT::i16})
4507a6dacacSDimitry Andric     setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, VT, Custom);
4517a6dacacSDimitry Andric 
4520b57cec5SDimitry Andric   static const MVT::SimpleValueType VectorIntTypes[] = {
453bdd1243dSDimitry Andric       MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32,
454bdd1243dSDimitry Andric       MVT::v9i32, MVT::v10i32, MVT::v11i32, MVT::v12i32};
4550b57cec5SDimitry Andric 
4560b57cec5SDimitry Andric   for (MVT VT : VectorIntTypes) {
4570b57cec5SDimitry Andric     // Expand the following operations for the current type by default.
45881ad6265SDimitry Andric     setOperationAction({ISD::ADD,        ISD::AND,     ISD::FP_TO_SINT,
45981ad6265SDimitry Andric                         ISD::FP_TO_UINT, ISD::MUL,     ISD::MULHU,
46081ad6265SDimitry Andric                         ISD::MULHS,      ISD::OR,      ISD::SHL,
46181ad6265SDimitry Andric                         ISD::SRA,        ISD::SRL,     ISD::ROTL,
46281ad6265SDimitry Andric                         ISD::ROTR,       ISD::SUB,     ISD::SINT_TO_FP,
46381ad6265SDimitry Andric                         ISD::UINT_TO_FP, ISD::SDIV,    ISD::UDIV,
46481ad6265SDimitry Andric                         ISD::SREM,       ISD::UREM,    ISD::SMUL_LOHI,
46581ad6265SDimitry Andric                         ISD::UMUL_LOHI,  ISD::SDIVREM, ISD::UDIVREM,
46681ad6265SDimitry Andric                         ISD::SELECT,     ISD::VSELECT, ISD::SELECT_CC,
46781ad6265SDimitry Andric                         ISD::XOR,        ISD::BSWAP,   ISD::CTPOP,
46881ad6265SDimitry Andric                         ISD::CTTZ,       ISD::CTLZ,    ISD::VECTOR_SHUFFLE,
46981ad6265SDimitry Andric                         ISD::SETCC},
47081ad6265SDimitry Andric                        VT, Expand);
4710b57cec5SDimitry Andric   }
4720b57cec5SDimitry Andric 
4730b57cec5SDimitry Andric   static const MVT::SimpleValueType FloatVectorTypes[] = {
474bdd1243dSDimitry Andric       MVT::v2f32, MVT::v3f32,  MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32,
475bdd1243dSDimitry Andric       MVT::v9f32, MVT::v10f32, MVT::v11f32, MVT::v12f32};
4760b57cec5SDimitry Andric 
4770b57cec5SDimitry Andric   for (MVT VT : FloatVectorTypes) {
47881ad6265SDimitry Andric     setOperationAction(
4795f757f3fSDimitry Andric         {ISD::FABS,          ISD::FMINNUM,        ISD::FMAXNUM,
4805f757f3fSDimitry Andric          ISD::FADD,          ISD::FCEIL,          ISD::FCOS,
4815f757f3fSDimitry Andric          ISD::FDIV,          ISD::FEXP2,          ISD::FEXP,
4825f757f3fSDimitry Andric          ISD::FEXP10,        ISD::FLOG2,          ISD::FREM,
4835f757f3fSDimitry Andric          ISD::FLOG,          ISD::FLOG10,         ISD::FPOW,
4845f757f3fSDimitry Andric          ISD::FFLOOR,        ISD::FTRUNC,         ISD::FMUL,
4855f757f3fSDimitry Andric          ISD::FMA,           ISD::FRINT,          ISD::FNEARBYINT,
4865f757f3fSDimitry Andric          ISD::FSQRT,         ISD::FSIN,           ISD::FSUB,
4875f757f3fSDimitry Andric          ISD::FNEG,          ISD::VSELECT,        ISD::SELECT_CC,
4885f757f3fSDimitry Andric          ISD::FCOPYSIGN,     ISD::VECTOR_SHUFFLE, ISD::SETCC,
4895f757f3fSDimitry Andric          ISD::FCANONICALIZE, ISD::FROUNDEVEN},
49081ad6265SDimitry Andric         VT, Expand);
4910b57cec5SDimitry Andric   }
4920b57cec5SDimitry Andric 
4930b57cec5SDimitry Andric   // This causes using an unrolled select operation rather than expansion with
4940b57cec5SDimitry Andric   // bit operations. This is in general better, but the alternative using BFI
4950b57cec5SDimitry Andric   // instructions may be better if the select sources are SGPRs.
4960b57cec5SDimitry Andric   setOperationAction(ISD::SELECT, MVT::v2f32, Promote);
4970b57cec5SDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v2f32, MVT::v2i32);
4980b57cec5SDimitry Andric 
4990b57cec5SDimitry Andric   setOperationAction(ISD::SELECT, MVT::v3f32, Promote);
5000b57cec5SDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v3f32, MVT::v3i32);
5010b57cec5SDimitry Andric 
5020b57cec5SDimitry Andric   setOperationAction(ISD::SELECT, MVT::v4f32, Promote);
5030b57cec5SDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32);
5040b57cec5SDimitry Andric 
5050b57cec5SDimitry Andric   setOperationAction(ISD::SELECT, MVT::v5f32, Promote);
5060b57cec5SDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v5f32, MVT::v5i32);
5070b57cec5SDimitry Andric 
508fe6060f1SDimitry Andric   setOperationAction(ISD::SELECT, MVT::v6f32, Promote);
509fe6060f1SDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v6f32, MVT::v6i32);
510fe6060f1SDimitry Andric 
511fe6060f1SDimitry Andric   setOperationAction(ISD::SELECT, MVT::v7f32, Promote);
512fe6060f1SDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v7f32, MVT::v7i32);
513fe6060f1SDimitry Andric 
514bdd1243dSDimitry Andric   setOperationAction(ISD::SELECT, MVT::v9f32, Promote);
515bdd1243dSDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v9f32, MVT::v9i32);
516bdd1243dSDimitry Andric 
517bdd1243dSDimitry Andric   setOperationAction(ISD::SELECT, MVT::v10f32, Promote);
518bdd1243dSDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v10f32, MVT::v10i32);
519bdd1243dSDimitry Andric 
520bdd1243dSDimitry Andric   setOperationAction(ISD::SELECT, MVT::v11f32, Promote);
521bdd1243dSDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v11f32, MVT::v11i32);
522bdd1243dSDimitry Andric 
523bdd1243dSDimitry Andric   setOperationAction(ISD::SELECT, MVT::v12f32, Promote);
524bdd1243dSDimitry Andric   AddPromotedToType(ISD::SELECT, MVT::v12f32, MVT::v12i32);
525bdd1243dSDimitry Andric 
526cb14a3feSDimitry Andric   // Disable most libcalls.
527cb14a3feSDimitry Andric   for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) {
528cb14a3feSDimitry Andric     if (I < RTLIB::ATOMIC_LOAD || I > RTLIB::ATOMIC_FETCH_NAND_16)
5290b57cec5SDimitry Andric       setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
530cb14a3feSDimitry Andric   }
5310b57cec5SDimitry Andric 
5320b57cec5SDimitry Andric   setSchedulingPreference(Sched::RegPressure);
5330b57cec5SDimitry Andric   setJumpIsExpensive(true);
5340b57cec5SDimitry Andric 
5350b57cec5SDimitry Andric   // FIXME: This is only partially true. If we have to do vector compares, any
5360b57cec5SDimitry Andric   // SGPR pair can be a condition register. If we have a uniform condition, we
5370b57cec5SDimitry Andric   // are better off doing SALU operations, where there is only one SCC. For now,
5380b57cec5SDimitry Andric   // we don't have a way of knowing during instruction selection if a condition
5390b57cec5SDimitry Andric   // will be uniform and we always use vector compares. Assume we are using
5400b57cec5SDimitry Andric   // vector compares until that is fixed.
5410b57cec5SDimitry Andric   setHasMultipleConditionRegisters(true);
5420b57cec5SDimitry Andric 
5430b57cec5SDimitry Andric   setMinCmpXchgSizeInBits(32);
5440b57cec5SDimitry Andric   setSupportsUnalignedAtomics(false);
5450b57cec5SDimitry Andric 
5460b57cec5SDimitry Andric   PredictableSelectIsExpensive = false;
5470b57cec5SDimitry Andric 
5480b57cec5SDimitry Andric   // We want to find all load dependencies for long chains of stores to enable
5490b57cec5SDimitry Andric   // merging into very wide vectors. The problem is with vectors with > 4
5500b57cec5SDimitry Andric   // elements. MergeConsecutiveStores will attempt to merge these because x8/x16
5510b57cec5SDimitry Andric   // vectors are a legal type, even though we have to split the loads
5520b57cec5SDimitry Andric   // usually. When we can more precisely specify load legality per address
5530b57cec5SDimitry Andric   // space, we should be able to make FindBetterChain/MergeConsecutiveStores
5540b57cec5SDimitry Andric   // smarter so that they can figure out what to do in 2 iterations without all
5550b57cec5SDimitry Andric   // N > 4 stores on the same chain.
5560b57cec5SDimitry Andric   GatherAllAliasesMaxDepth = 16;
5570b57cec5SDimitry Andric 
5580b57cec5SDimitry Andric   // memcpy/memmove/memset are expanded in the IR, so we shouldn't need to worry
5590b57cec5SDimitry Andric   // about these during lowering.
5600b57cec5SDimitry Andric   MaxStoresPerMemcpy  = 0xffffffff;
5610b57cec5SDimitry Andric   MaxStoresPerMemmove = 0xffffffff;
5620b57cec5SDimitry Andric   MaxStoresPerMemset  = 0xffffffff;
5630b57cec5SDimitry Andric 
5645ffd83dbSDimitry Andric   // The expansion for 64-bit division is enormous.
5655ffd83dbSDimitry Andric   if (AMDGPUBypassSlowDiv)
5665ffd83dbSDimitry Andric     addBypassSlowDiv(64, 32);
5675ffd83dbSDimitry Andric 
56881ad6265SDimitry Andric   setTargetDAGCombine({ISD::BITCAST,    ISD::SHL,
56981ad6265SDimitry Andric                        ISD::SRA,        ISD::SRL,
57081ad6265SDimitry Andric                        ISD::TRUNCATE,   ISD::MUL,
57181ad6265SDimitry Andric                        ISD::SMUL_LOHI,  ISD::UMUL_LOHI,
57281ad6265SDimitry Andric                        ISD::MULHU,      ISD::MULHS,
57381ad6265SDimitry Andric                        ISD::SELECT,     ISD::SELECT_CC,
57481ad6265SDimitry Andric                        ISD::STORE,      ISD::FADD,
57581ad6265SDimitry Andric                        ISD::FSUB,       ISD::FNEG,
57681ad6265SDimitry Andric                        ISD::FABS,       ISD::AssertZext,
57781ad6265SDimitry Andric                        ISD::AssertSext, ISD::INTRINSIC_WO_CHAIN});
578cb14a3feSDimitry Andric 
579cb14a3feSDimitry Andric   setMaxAtomicSizeInBitsSupported(64);
580b3edf446SDimitry Andric   setMaxDivRemBitWidthSupported(64);
5810b57cec5SDimitry Andric }
5820b57cec5SDimitry Andric 
mayIgnoreSignedZero(SDValue Op) const583e8d8bef9SDimitry Andric bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const {
584e8d8bef9SDimitry Andric   if (getTargetMachine().Options.NoSignedZerosFPMath)
585e8d8bef9SDimitry Andric     return true;
586e8d8bef9SDimitry Andric 
587e8d8bef9SDimitry Andric   const auto Flags = Op.getNode()->getFlags();
588e8d8bef9SDimitry Andric   if (Flags.hasNoSignedZeros())
589e8d8bef9SDimitry Andric     return true;
590e8d8bef9SDimitry Andric 
591e8d8bef9SDimitry Andric   return false;
592e8d8bef9SDimitry Andric }
593e8d8bef9SDimitry Andric 
5940b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
5950b57cec5SDimitry Andric // Target Information
5960b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
5970b57cec5SDimitry Andric 
5980b57cec5SDimitry Andric LLVM_READNONE
fnegFoldsIntoOpcode(unsigned Opc)59906c3fb27SDimitry Andric static bool fnegFoldsIntoOpcode(unsigned Opc) {
6000b57cec5SDimitry Andric   switch (Opc) {
6010b57cec5SDimitry Andric   case ISD::FADD:
6020b57cec5SDimitry Andric   case ISD::FSUB:
6030b57cec5SDimitry Andric   case ISD::FMUL:
6040b57cec5SDimitry Andric   case ISD::FMA:
6050b57cec5SDimitry Andric   case ISD::FMAD:
6060b57cec5SDimitry Andric   case ISD::FMINNUM:
6070b57cec5SDimitry Andric   case ISD::FMAXNUM:
6080b57cec5SDimitry Andric   case ISD::FMINNUM_IEEE:
6090b57cec5SDimitry Andric   case ISD::FMAXNUM_IEEE:
6105f757f3fSDimitry Andric   case ISD::FMINIMUM:
6115f757f3fSDimitry Andric   case ISD::FMAXIMUM:
61206c3fb27SDimitry Andric   case ISD::SELECT:
6130b57cec5SDimitry Andric   case ISD::FSIN:
6140b57cec5SDimitry Andric   case ISD::FTRUNC:
6150b57cec5SDimitry Andric   case ISD::FRINT:
6160b57cec5SDimitry Andric   case ISD::FNEARBYINT:
6175f757f3fSDimitry Andric   case ISD::FROUNDEVEN:
6180b57cec5SDimitry Andric   case ISD::FCANONICALIZE:
6190b57cec5SDimitry Andric   case AMDGPUISD::RCP:
6200b57cec5SDimitry Andric   case AMDGPUISD::RCP_LEGACY:
6210b57cec5SDimitry Andric   case AMDGPUISD::RCP_IFLAG:
6220b57cec5SDimitry Andric   case AMDGPUISD::SIN_HW:
6230b57cec5SDimitry Andric   case AMDGPUISD::FMUL_LEGACY:
6240b57cec5SDimitry Andric   case AMDGPUISD::FMIN_LEGACY:
6250b57cec5SDimitry Andric   case AMDGPUISD::FMAX_LEGACY:
6260b57cec5SDimitry Andric   case AMDGPUISD::FMED3:
627e8d8bef9SDimitry Andric     // TODO: handle llvm.amdgcn.fma.legacy
6280b57cec5SDimitry Andric     return true;
62906c3fb27SDimitry Andric   case ISD::BITCAST:
63006c3fb27SDimitry Andric     llvm_unreachable("bitcast is special cased");
6310b57cec5SDimitry Andric   default:
6320b57cec5SDimitry Andric     return false;
6330b57cec5SDimitry Andric   }
6340b57cec5SDimitry Andric }
6350b57cec5SDimitry Andric 
fnegFoldsIntoOp(const SDNode * N)63606c3fb27SDimitry Andric static bool fnegFoldsIntoOp(const SDNode *N) {
63706c3fb27SDimitry Andric   unsigned Opc = N->getOpcode();
63806c3fb27SDimitry Andric   if (Opc == ISD::BITCAST) {
63906c3fb27SDimitry Andric     // TODO: Is there a benefit to checking the conditions performFNegCombine
64006c3fb27SDimitry Andric     // does? We don't for the other cases.
64106c3fb27SDimitry Andric     SDValue BCSrc = N->getOperand(0);
64206c3fb27SDimitry Andric     if (BCSrc.getOpcode() == ISD::BUILD_VECTOR) {
64306c3fb27SDimitry Andric       return BCSrc.getNumOperands() == 2 &&
64406c3fb27SDimitry Andric              BCSrc.getOperand(1).getValueSizeInBits() == 32;
64506c3fb27SDimitry Andric     }
64606c3fb27SDimitry Andric 
64706c3fb27SDimitry Andric     return BCSrc.getOpcode() == ISD::SELECT && BCSrc.getValueType() == MVT::f32;
64806c3fb27SDimitry Andric   }
64906c3fb27SDimitry Andric 
65006c3fb27SDimitry Andric   return fnegFoldsIntoOpcode(Opc);
65106c3fb27SDimitry Andric }
65206c3fb27SDimitry Andric 
6530b57cec5SDimitry Andric /// \p returns true if the operation will definitely need to use a 64-bit
6540b57cec5SDimitry Andric /// encoding, and thus will use a VOP3 encoding regardless of the source
6550b57cec5SDimitry Andric /// modifiers.
6560b57cec5SDimitry Andric LLVM_READONLY
opMustUseVOP3Encoding(const SDNode * N,MVT VT)6570b57cec5SDimitry Andric static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) {
65806c3fb27SDimitry Andric   return (N->getNumOperands() > 2 && N->getOpcode() != ISD::SELECT) ||
65906c3fb27SDimitry Andric          VT == MVT::f64;
66006c3fb27SDimitry Andric }
66106c3fb27SDimitry Andric 
66206c3fb27SDimitry Andric /// Return true if v_cndmask_b32 will support fabs/fneg source modifiers for the
66306c3fb27SDimitry Andric /// type for ISD::SELECT.
66406c3fb27SDimitry Andric LLVM_READONLY
selectSupportsSourceMods(const SDNode * N)66506c3fb27SDimitry Andric static bool selectSupportsSourceMods(const SDNode *N) {
66606c3fb27SDimitry Andric   // TODO: Only applies if select will be vector
66706c3fb27SDimitry Andric   return N->getValueType(0) == MVT::f32;
6680b57cec5SDimitry Andric }
6690b57cec5SDimitry Andric 
6700b57cec5SDimitry Andric // Most FP instructions support source modifiers, but this could be refined
6710b57cec5SDimitry Andric // slightly.
6720b57cec5SDimitry Andric LLVM_READONLY
hasSourceMods(const SDNode * N)6730b57cec5SDimitry Andric static bool hasSourceMods(const SDNode *N) {
6740b57cec5SDimitry Andric   if (isa<MemSDNode>(N))
6750b57cec5SDimitry Andric     return false;
6760b57cec5SDimitry Andric 
6770b57cec5SDimitry Andric   switch (N->getOpcode()) {
6780b57cec5SDimitry Andric   case ISD::CopyToReg:
6790b57cec5SDimitry Andric   case ISD::FDIV:
6800b57cec5SDimitry Andric   case ISD::FREM:
6810b57cec5SDimitry Andric   case ISD::INLINEASM:
6820b57cec5SDimitry Andric   case ISD::INLINEASM_BR:
6830b57cec5SDimitry Andric   case AMDGPUISD::DIV_SCALE:
6848bcb0991SDimitry Andric   case ISD::INTRINSIC_W_CHAIN:
6850b57cec5SDimitry Andric 
6860b57cec5SDimitry Andric   // TODO: Should really be looking at the users of the bitcast. These are
6870b57cec5SDimitry Andric   // problematic because bitcasts are used to legalize all stores to integer
6880b57cec5SDimitry Andric   // types.
6890b57cec5SDimitry Andric   case ISD::BITCAST:
6900b57cec5SDimitry Andric     return false;
6918bcb0991SDimitry Andric   case ISD::INTRINSIC_WO_CHAIN: {
692647cbc5dSDimitry Andric     switch (N->getConstantOperandVal(0)) {
6938bcb0991SDimitry Andric     case Intrinsic::amdgcn_interp_p1:
6948bcb0991SDimitry Andric     case Intrinsic::amdgcn_interp_p2:
6958bcb0991SDimitry Andric     case Intrinsic::amdgcn_interp_mov:
6968bcb0991SDimitry Andric     case Intrinsic::amdgcn_interp_p1_f16:
6978bcb0991SDimitry Andric     case Intrinsic::amdgcn_interp_p2_f16:
6988bcb0991SDimitry Andric       return false;
6998bcb0991SDimitry Andric     default:
7008bcb0991SDimitry Andric       return true;
7018bcb0991SDimitry Andric     }
7028bcb0991SDimitry Andric   }
70306c3fb27SDimitry Andric   case ISD::SELECT:
70406c3fb27SDimitry Andric     return selectSupportsSourceMods(N);
7050b57cec5SDimitry Andric   default:
7060b57cec5SDimitry Andric     return true;
7070b57cec5SDimitry Andric   }
7080b57cec5SDimitry Andric }
7090b57cec5SDimitry Andric 
allUsesHaveSourceMods(const SDNode * N,unsigned CostThreshold)7100b57cec5SDimitry Andric bool AMDGPUTargetLowering::allUsesHaveSourceMods(const SDNode *N,
7110b57cec5SDimitry Andric                                                  unsigned CostThreshold) {
7120b57cec5SDimitry Andric   // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus
7130b57cec5SDimitry Andric   // it is truly free to use a source modifier in all cases. If there are
7140b57cec5SDimitry Andric   // multiple users but for each one will necessitate using VOP3, there will be
7150b57cec5SDimitry Andric   // a code size increase. Try to avoid increasing code size unless we know it
7160b57cec5SDimitry Andric   // will save on the instruction count.
7170b57cec5SDimitry Andric   unsigned NumMayIncreaseSize = 0;
7180b57cec5SDimitry Andric   MVT VT = N->getValueType(0).getScalarType().getSimpleVT();
7190b57cec5SDimitry Andric 
72006c3fb27SDimitry Andric   assert(!N->use_empty());
72106c3fb27SDimitry Andric 
7220b57cec5SDimitry Andric   // XXX - Should this limit number of uses to check?
7230b57cec5SDimitry Andric   for (const SDNode *U : N->uses()) {
7240b57cec5SDimitry Andric     if (!hasSourceMods(U))
7250b57cec5SDimitry Andric       return false;
7260b57cec5SDimitry Andric 
7270b57cec5SDimitry Andric     if (!opMustUseVOP3Encoding(U, VT)) {
7280b57cec5SDimitry Andric       if (++NumMayIncreaseSize > CostThreshold)
7290b57cec5SDimitry Andric         return false;
7300b57cec5SDimitry Andric     }
7310b57cec5SDimitry Andric   }
7320b57cec5SDimitry Andric 
7330b57cec5SDimitry Andric   return true;
7340b57cec5SDimitry Andric }
7350b57cec5SDimitry Andric 
getTypeForExtReturn(LLVMContext & Context,EVT VT,ISD::NodeType ExtendKind) const7365ffd83dbSDimitry Andric EVT AMDGPUTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
7375ffd83dbSDimitry Andric                                               ISD::NodeType ExtendKind) const {
7385ffd83dbSDimitry Andric   assert(!VT.isVector() && "only scalar expected");
7395ffd83dbSDimitry Andric 
7405ffd83dbSDimitry Andric   // Round to the next multiple of 32-bits.
7415ffd83dbSDimitry Andric   unsigned Size = VT.getSizeInBits();
7425ffd83dbSDimitry Andric   if (Size <= 32)
7435ffd83dbSDimitry Andric     return MVT::i32;
7445ffd83dbSDimitry Andric   return EVT::getIntegerVT(Context, 32 * ((Size + 31) / 32));
7455ffd83dbSDimitry Andric }
7465ffd83dbSDimitry Andric 
getVectorIdxTy(const DataLayout &) const7470b57cec5SDimitry Andric MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const {
7480b57cec5SDimitry Andric   return MVT::i32;
7490b57cec5SDimitry Andric }
7500b57cec5SDimitry Andric 
isSelectSupported(SelectSupportKind SelType) const7510b57cec5SDimitry Andric bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const {
7520b57cec5SDimitry Andric   return true;
7530b57cec5SDimitry Andric }
7540b57cec5SDimitry Andric 
7550b57cec5SDimitry Andric // The backend supports 32 and 64 bit floating point immediates.
7560b57cec5SDimitry Andric // FIXME: Why are we reporting vectors of FP immediates as legal?
isFPImmLegal(const APFloat & Imm,EVT VT,bool ForCodeSize) const7570b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
7580b57cec5SDimitry Andric                                         bool ForCodeSize) const {
7590b57cec5SDimitry Andric   EVT ScalarVT = VT.getScalarType();
7600b57cec5SDimitry Andric   return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 ||
7610b57cec5SDimitry Andric          (ScalarVT == MVT::f16 && Subtarget->has16BitInsts()));
7620b57cec5SDimitry Andric }
7630b57cec5SDimitry Andric 
7640b57cec5SDimitry Andric // We don't want to shrink f64 / f32 constants.
ShouldShrinkFPConstant(EVT VT) const7650b57cec5SDimitry Andric bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const {
7660b57cec5SDimitry Andric   EVT ScalarVT = VT.getScalarType();
7670b57cec5SDimitry Andric   return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64);
7680b57cec5SDimitry Andric }
7690b57cec5SDimitry Andric 
shouldReduceLoadWidth(SDNode * N,ISD::LoadExtType ExtTy,EVT NewVT) const7700b57cec5SDimitry Andric bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
7710b57cec5SDimitry Andric                                                  ISD::LoadExtType ExtTy,
7720b57cec5SDimitry Andric                                                  EVT NewVT) const {
7730b57cec5SDimitry Andric   // TODO: This may be worth removing. Check regression tests for diffs.
7740b57cec5SDimitry Andric   if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT))
7750b57cec5SDimitry Andric     return false;
7760b57cec5SDimitry Andric 
7770b57cec5SDimitry Andric   unsigned NewSize = NewVT.getStoreSizeInBits();
7780b57cec5SDimitry Andric 
7795ffd83dbSDimitry Andric   // If we are reducing to a 32-bit load or a smaller multi-dword load,
7805ffd83dbSDimitry Andric   // this is always better.
7815ffd83dbSDimitry Andric   if (NewSize >= 32)
7820b57cec5SDimitry Andric     return true;
7830b57cec5SDimitry Andric 
7840b57cec5SDimitry Andric   EVT OldVT = N->getValueType(0);
7850b57cec5SDimitry Andric   unsigned OldSize = OldVT.getStoreSizeInBits();
7860b57cec5SDimitry Andric 
7870b57cec5SDimitry Andric   MemSDNode *MN = cast<MemSDNode>(N);
7880b57cec5SDimitry Andric   unsigned AS = MN->getAddressSpace();
7890b57cec5SDimitry Andric   // Do not shrink an aligned scalar load to sub-dword.
7900b57cec5SDimitry Andric   // Scalar engine cannot do sub-dword loads.
7917a6dacacSDimitry Andric   // TODO: Update this for GFX12 which does have scalar sub-dword loads.
79281ad6265SDimitry Andric   if (OldSize >= 32 && NewSize < 32 && MN->getAlign() >= Align(4) &&
7930b57cec5SDimitry Andric       (AS == AMDGPUAS::CONSTANT_ADDRESS ||
7940b57cec5SDimitry Andric        AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
79581ad6265SDimitry Andric        (isa<LoadSDNode>(N) && AS == AMDGPUAS::GLOBAL_ADDRESS &&
79681ad6265SDimitry Andric         MN->isInvariant())) &&
7970b57cec5SDimitry Andric       AMDGPUInstrInfo::isUniformMMO(MN->getMemOperand()))
7980b57cec5SDimitry Andric     return false;
7990b57cec5SDimitry Andric 
8000b57cec5SDimitry Andric   // Don't produce extloads from sub 32-bit types. SI doesn't have scalar
8010b57cec5SDimitry Andric   // extloads, so doing one requires using a buffer_load. In cases where we
8020b57cec5SDimitry Andric   // still couldn't use a scalar load, using the wider load shouldn't really
8030b57cec5SDimitry Andric   // hurt anything.
8040b57cec5SDimitry Andric 
8050b57cec5SDimitry Andric   // If the old size already had to be an extload, there's no harm in continuing
8060b57cec5SDimitry Andric   // to reduce the width.
8070b57cec5SDimitry Andric   return (OldSize < 32);
8080b57cec5SDimitry Andric }
8090b57cec5SDimitry Andric 
isLoadBitCastBeneficial(EVT LoadTy,EVT CastTy,const SelectionDAG & DAG,const MachineMemOperand & MMO) const8100b57cec5SDimitry Andric bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy,
8110b57cec5SDimitry Andric                                                    const SelectionDAG &DAG,
8120b57cec5SDimitry Andric                                                    const MachineMemOperand &MMO) const {
8130b57cec5SDimitry Andric 
8140b57cec5SDimitry Andric   assert(LoadTy.getSizeInBits() == CastTy.getSizeInBits());
8150b57cec5SDimitry Andric 
8160b57cec5SDimitry Andric   if (LoadTy.getScalarType() == MVT::i32)
8170b57cec5SDimitry Andric     return false;
8180b57cec5SDimitry Andric 
8190b57cec5SDimitry Andric   unsigned LScalarSize = LoadTy.getScalarSizeInBits();
8200b57cec5SDimitry Andric   unsigned CastScalarSize = CastTy.getScalarSizeInBits();
8210b57cec5SDimitry Andric 
8220b57cec5SDimitry Andric   if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))
8230b57cec5SDimitry Andric     return false;
8240b57cec5SDimitry Andric 
825bdd1243dSDimitry Andric   unsigned Fast = 0;
8268bcb0991SDimitry Andric   return allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
8278bcb0991SDimitry Andric                                         CastTy, MMO, &Fast) &&
8288bcb0991SDimitry Andric          Fast;
8290b57cec5SDimitry Andric }
8300b57cec5SDimitry Andric 
8310b57cec5SDimitry Andric // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
8320b57cec5SDimitry Andric // profitable with the expansion for 64-bit since it's generally good to
8330b57cec5SDimitry Andric // speculate things.
isCheapToSpeculateCttz(Type * Ty) const834bdd1243dSDimitry Andric bool AMDGPUTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
8350b57cec5SDimitry Andric   return true;
8360b57cec5SDimitry Andric }
8370b57cec5SDimitry Andric 
isCheapToSpeculateCtlz(Type * Ty) const838bdd1243dSDimitry Andric bool AMDGPUTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
8390b57cec5SDimitry Andric   return true;
8400b57cec5SDimitry Andric }
8410b57cec5SDimitry Andric 
isSDNodeAlwaysUniform(const SDNode * N) const8420b57cec5SDimitry Andric bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const {
8430b57cec5SDimitry Andric   switch (N->getOpcode()) {
8440b57cec5SDimitry Andric   case ISD::EntryToken:
8450b57cec5SDimitry Andric   case ISD::TokenFactor:
8460b57cec5SDimitry Andric     return true;
847e8d8bef9SDimitry Andric   case ISD::INTRINSIC_WO_CHAIN: {
848647cbc5dSDimitry Andric     unsigned IntrID = N->getConstantOperandVal(0);
8490b57cec5SDimitry Andric     switch (IntrID) {
8500b57cec5SDimitry Andric     case Intrinsic::amdgcn_readfirstlane:
8510b57cec5SDimitry Andric     case Intrinsic::amdgcn_readlane:
8520b57cec5SDimitry Andric       return true;
8530b57cec5SDimitry Andric     }
854e8d8bef9SDimitry Andric     return false;
8550b57cec5SDimitry Andric   }
8560b57cec5SDimitry Andric   case ISD::LOAD:
8578bcb0991SDimitry Andric     if (cast<LoadSDNode>(N)->getMemOperand()->getAddrSpace() ==
8588bcb0991SDimitry Andric         AMDGPUAS::CONSTANT_ADDRESS_32BIT)
8590b57cec5SDimitry Andric       return true;
8600b57cec5SDimitry Andric     return false;
86181ad6265SDimitry Andric   case AMDGPUISD::SETCC: // ballot-style instruction
86281ad6265SDimitry Andric     return true;
8630b57cec5SDimitry Andric   }
864e8d8bef9SDimitry Andric   return false;
8650b57cec5SDimitry Andric }
8660b57cec5SDimitry Andric 
getNegatedExpression(SDValue Op,SelectionDAG & DAG,bool LegalOperations,bool ForCodeSize,NegatibleCost & Cost,unsigned Depth) const8675ffd83dbSDimitry Andric SDValue AMDGPUTargetLowering::getNegatedExpression(
8685ffd83dbSDimitry Andric     SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize,
8695ffd83dbSDimitry Andric     NegatibleCost &Cost, unsigned Depth) const {
8705ffd83dbSDimitry Andric 
8715ffd83dbSDimitry Andric   switch (Op.getOpcode()) {
8725ffd83dbSDimitry Andric   case ISD::FMA:
8735ffd83dbSDimitry Andric   case ISD::FMAD: {
8745ffd83dbSDimitry Andric     // Negating a fma is not free if it has users without source mods.
8755ffd83dbSDimitry Andric     if (!allUsesHaveSourceMods(Op.getNode()))
8765ffd83dbSDimitry Andric       return SDValue();
8775ffd83dbSDimitry Andric     break;
8785ffd83dbSDimitry Andric   }
87906c3fb27SDimitry Andric   case AMDGPUISD::RCP: {
88006c3fb27SDimitry Andric     SDValue Src = Op.getOperand(0);
88106c3fb27SDimitry Andric     EVT VT = Op.getValueType();
88206c3fb27SDimitry Andric     SDLoc SL(Op);
88306c3fb27SDimitry Andric 
88406c3fb27SDimitry Andric     SDValue NegSrc = getNegatedExpression(Src, DAG, LegalOperations,
88506c3fb27SDimitry Andric                                           ForCodeSize, Cost, Depth + 1);
88606c3fb27SDimitry Andric     if (NegSrc)
88706c3fb27SDimitry Andric       return DAG.getNode(AMDGPUISD::RCP, SL, VT, NegSrc, Op->getFlags());
88806c3fb27SDimitry Andric     return SDValue();
88906c3fb27SDimitry Andric   }
8905ffd83dbSDimitry Andric   default:
8915ffd83dbSDimitry Andric     break;
8925ffd83dbSDimitry Andric   }
8935ffd83dbSDimitry Andric 
8945ffd83dbSDimitry Andric   return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations,
8955ffd83dbSDimitry Andric                                               ForCodeSize, Cost, Depth);
8965ffd83dbSDimitry Andric }
8975ffd83dbSDimitry Andric 
8980b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
8990b57cec5SDimitry Andric // Target Properties
9000b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
9010b57cec5SDimitry Andric 
isFAbsFree(EVT VT) const9020b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
9030b57cec5SDimitry Andric   assert(VT.isFloatingPoint());
9040b57cec5SDimitry Andric 
9050b57cec5SDimitry Andric   // Packed operations do not have a fabs modifier.
9060b57cec5SDimitry Andric   return VT == MVT::f32 || VT == MVT::f64 ||
9070b57cec5SDimitry Andric          (Subtarget->has16BitInsts() && VT == MVT::f16);
9080b57cec5SDimitry Andric }
9090b57cec5SDimitry Andric 
isFNegFree(EVT VT) const9100b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
9110b57cec5SDimitry Andric   assert(VT.isFloatingPoint());
912fe6060f1SDimitry Andric   // Report this based on the end legalized type.
913fe6060f1SDimitry Andric   VT = VT.getScalarType();
914fe6060f1SDimitry Andric   return VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f16;
9150b57cec5SDimitry Andric }
9160b57cec5SDimitry Andric 
storeOfVectorConstantIsCheap(bool IsZero,EVT MemVT,unsigned NumElem,unsigned AS) const91706c3fb27SDimitry Andric bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT,
9180b57cec5SDimitry Andric                                                          unsigned NumElem,
9190b57cec5SDimitry Andric                                                          unsigned AS) const {
9200b57cec5SDimitry Andric   return true;
9210b57cec5SDimitry Andric }
9220b57cec5SDimitry Andric 
aggressivelyPreferBuildVectorSources(EVT VecVT) const9230b57cec5SDimitry Andric bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const {
9240b57cec5SDimitry Andric   // There are few operations which truly have vector input operands. Any vector
9250b57cec5SDimitry Andric   // operation is going to involve operations on each component, and a
9260b57cec5SDimitry Andric   // build_vector will be a copy per element, so it always makes sense to use a
9270b57cec5SDimitry Andric   // build_vector input in place of the extracted element to avoid a copy into a
9280b57cec5SDimitry Andric   // super register.
9290b57cec5SDimitry Andric   //
9300b57cec5SDimitry Andric   // We should probably only do this if all users are extracts only, but this
9310b57cec5SDimitry Andric   // should be the common case.
9320b57cec5SDimitry Andric   return true;
9330b57cec5SDimitry Andric }
9340b57cec5SDimitry Andric 
isTruncateFree(EVT Source,EVT Dest) const9350b57cec5SDimitry Andric bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
9360b57cec5SDimitry Andric   // Truncate is just accessing a subregister.
9370b57cec5SDimitry Andric 
9380b57cec5SDimitry Andric   unsigned SrcSize = Source.getSizeInBits();
9390b57cec5SDimitry Andric   unsigned DestSize = Dest.getSizeInBits();
9400b57cec5SDimitry Andric 
9410b57cec5SDimitry Andric   return DestSize < SrcSize && DestSize % 32 == 0 ;
9420b57cec5SDimitry Andric }
9430b57cec5SDimitry Andric 
isTruncateFree(Type * Source,Type * Dest) const9440b57cec5SDimitry Andric bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
9450b57cec5SDimitry Andric   // Truncate is just accessing a subregister.
9460b57cec5SDimitry Andric 
9470b57cec5SDimitry Andric   unsigned SrcSize = Source->getScalarSizeInBits();
9480b57cec5SDimitry Andric   unsigned DestSize = Dest->getScalarSizeInBits();
9490b57cec5SDimitry Andric 
9500b57cec5SDimitry Andric   if (DestSize== 16 && Subtarget->has16BitInsts())
9510b57cec5SDimitry Andric     return SrcSize >= 32;
9520b57cec5SDimitry Andric 
9530b57cec5SDimitry Andric   return DestSize < SrcSize && DestSize % 32 == 0;
9540b57cec5SDimitry Andric }
9550b57cec5SDimitry Andric 
isZExtFree(Type * Src,Type * Dest) const9560b57cec5SDimitry Andric bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
9570b57cec5SDimitry Andric   unsigned SrcSize = Src->getScalarSizeInBits();
9580b57cec5SDimitry Andric   unsigned DestSize = Dest->getScalarSizeInBits();
9590b57cec5SDimitry Andric 
9600b57cec5SDimitry Andric   if (SrcSize == 16 && Subtarget->has16BitInsts())
9610b57cec5SDimitry Andric     return DestSize >= 32;
9620b57cec5SDimitry Andric 
9630b57cec5SDimitry Andric   return SrcSize == 32 && DestSize == 64;
9640b57cec5SDimitry Andric }
9650b57cec5SDimitry Andric 
isZExtFree(EVT Src,EVT Dest) const9660b57cec5SDimitry Andric bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
9670b57cec5SDimitry Andric   // Any register load of a 64-bit value really requires 2 32-bit moves. For all
9680b57cec5SDimitry Andric   // practical purposes, the extra mov 0 to load a 64-bit is free.  As used,
9690b57cec5SDimitry Andric   // this will enable reducing 64-bit operations the 32-bit, which is always
9700b57cec5SDimitry Andric   // good.
9710b57cec5SDimitry Andric 
9720b57cec5SDimitry Andric   if (Src == MVT::i16)
9730b57cec5SDimitry Andric     return Dest == MVT::i32 ||Dest == MVT::i64 ;
9740b57cec5SDimitry Andric 
9750b57cec5SDimitry Andric   return Src == MVT::i32 && Dest == MVT::i64;
9760b57cec5SDimitry Andric }
9770b57cec5SDimitry Andric 
isNarrowingProfitable(EVT SrcVT,EVT DestVT) const9780b57cec5SDimitry Andric bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
9790b57cec5SDimitry Andric   // There aren't really 64-bit registers, but pairs of 32-bit ones and only a
9800b57cec5SDimitry Andric   // limited number of native 64-bit operations. Shrinking an operation to fit
9810b57cec5SDimitry Andric   // in a single 32-bit register should always be helpful. As currently used,
9820b57cec5SDimitry Andric   // this is much less general than the name suggests, and is only used in
9830b57cec5SDimitry Andric   // places trying to reduce the sizes of loads. Shrinking loads to < 32-bits is
9840b57cec5SDimitry Andric   // not profitable, and may actually be harmful.
9850b57cec5SDimitry Andric   return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32;
9860b57cec5SDimitry Andric }
9870b57cec5SDimitry Andric 
isDesirableToCommuteWithShift(const SDNode * N,CombineLevel Level) const988bdd1243dSDimitry Andric bool AMDGPUTargetLowering::isDesirableToCommuteWithShift(
989bdd1243dSDimitry Andric     const SDNode* N, CombineLevel Level) const {
990bdd1243dSDimitry Andric   assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
991bdd1243dSDimitry Andric           N->getOpcode() == ISD::SRL) &&
992bdd1243dSDimitry Andric          "Expected shift op");
993bdd1243dSDimitry Andric   // Always commute pre-type legalization and right shifts.
994bdd1243dSDimitry Andric   // We're looking for shl(or(x,y),z) patterns.
995bdd1243dSDimitry Andric   if (Level < CombineLevel::AfterLegalizeTypes ||
996bdd1243dSDimitry Andric       N->getOpcode() != ISD::SHL || N->getOperand(0).getOpcode() != ISD::OR)
997bdd1243dSDimitry Andric     return true;
998bdd1243dSDimitry Andric 
999bdd1243dSDimitry Andric   // If only user is a i32 right-shift, then don't destroy a BFE pattern.
1000bdd1243dSDimitry Andric   if (N->getValueType(0) == MVT::i32 && N->use_size() == 1 &&
1001bdd1243dSDimitry Andric       (N->use_begin()->getOpcode() == ISD::SRA ||
1002bdd1243dSDimitry Andric        N->use_begin()->getOpcode() == ISD::SRL))
1003bdd1243dSDimitry Andric     return false;
1004bdd1243dSDimitry Andric 
1005bdd1243dSDimitry Andric   // Don't destroy or(shl(load_zext(),c), load_zext()) patterns.
1006bdd1243dSDimitry Andric   auto IsShiftAndLoad = [](SDValue LHS, SDValue RHS) {
1007bdd1243dSDimitry Andric     if (LHS.getOpcode() != ISD::SHL)
1008bdd1243dSDimitry Andric       return false;
1009bdd1243dSDimitry Andric     auto *RHSLd = dyn_cast<LoadSDNode>(RHS);
1010bdd1243dSDimitry Andric     auto *LHS0 = dyn_cast<LoadSDNode>(LHS.getOperand(0));
1011bdd1243dSDimitry Andric     auto *LHS1 = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
1012bdd1243dSDimitry Andric     return LHS0 && LHS1 && RHSLd && LHS0->getExtensionType() == ISD::ZEXTLOAD &&
1013bdd1243dSDimitry Andric            LHS1->getAPIntValue() == LHS0->getMemoryVT().getScalarSizeInBits() &&
1014bdd1243dSDimitry Andric            RHSLd->getExtensionType() == ISD::ZEXTLOAD;
1015bdd1243dSDimitry Andric   };
1016bdd1243dSDimitry Andric   SDValue LHS = N->getOperand(0).getOperand(0);
1017bdd1243dSDimitry Andric   SDValue RHS = N->getOperand(0).getOperand(1);
1018bdd1243dSDimitry Andric   return !(IsShiftAndLoad(LHS, RHS) || IsShiftAndLoad(RHS, LHS));
1019bdd1243dSDimitry Andric }
1020bdd1243dSDimitry Andric 
10210b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
10220b57cec5SDimitry Andric // TargetLowering Callbacks
10230b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
10240b57cec5SDimitry Andric 
CCAssignFnForCall(CallingConv::ID CC,bool IsVarArg)10250b57cec5SDimitry Andric CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC,
10260b57cec5SDimitry Andric                                                   bool IsVarArg) {
10270b57cec5SDimitry Andric   switch (CC) {
10280b57cec5SDimitry Andric   case CallingConv::AMDGPU_VS:
10290b57cec5SDimitry Andric   case CallingConv::AMDGPU_GS:
10300b57cec5SDimitry Andric   case CallingConv::AMDGPU_PS:
10310b57cec5SDimitry Andric   case CallingConv::AMDGPU_CS:
10320b57cec5SDimitry Andric   case CallingConv::AMDGPU_HS:
10330b57cec5SDimitry Andric   case CallingConv::AMDGPU_ES:
10340b57cec5SDimitry Andric   case CallingConv::AMDGPU_LS:
10350b57cec5SDimitry Andric     return CC_AMDGPU;
10365f757f3fSDimitry Andric   case CallingConv::AMDGPU_CS_Chain:
10375f757f3fSDimitry Andric   case CallingConv::AMDGPU_CS_ChainPreserve:
10385f757f3fSDimitry Andric     return CC_AMDGPU_CS_CHAIN;
10390b57cec5SDimitry Andric   case CallingConv::C:
10400b57cec5SDimitry Andric   case CallingConv::Fast:
10410b57cec5SDimitry Andric   case CallingConv::Cold:
10420b57cec5SDimitry Andric     return CC_AMDGPU_Func;
1043e8d8bef9SDimitry Andric   case CallingConv::AMDGPU_Gfx:
1044e8d8bef9SDimitry Andric     return CC_SI_Gfx;
10450b57cec5SDimitry Andric   case CallingConv::AMDGPU_KERNEL:
10460b57cec5SDimitry Andric   case CallingConv::SPIR_KERNEL:
10470b57cec5SDimitry Andric   default:
10480b57cec5SDimitry Andric     report_fatal_error("Unsupported calling convention for call");
10490b57cec5SDimitry Andric   }
10500b57cec5SDimitry Andric }
10510b57cec5SDimitry Andric 
CCAssignFnForReturn(CallingConv::ID CC,bool IsVarArg)10520b57cec5SDimitry Andric CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC,
10530b57cec5SDimitry Andric                                                     bool IsVarArg) {
10540b57cec5SDimitry Andric   switch (CC) {
10550b57cec5SDimitry Andric   case CallingConv::AMDGPU_KERNEL:
10560b57cec5SDimitry Andric   case CallingConv::SPIR_KERNEL:
10570b57cec5SDimitry Andric     llvm_unreachable("kernels should not be handled here");
10580b57cec5SDimitry Andric   case CallingConv::AMDGPU_VS:
10590b57cec5SDimitry Andric   case CallingConv::AMDGPU_GS:
10600b57cec5SDimitry Andric   case CallingConv::AMDGPU_PS:
10610b57cec5SDimitry Andric   case CallingConv::AMDGPU_CS:
10625f757f3fSDimitry Andric   case CallingConv::AMDGPU_CS_Chain:
10635f757f3fSDimitry Andric   case CallingConv::AMDGPU_CS_ChainPreserve:
10640b57cec5SDimitry Andric   case CallingConv::AMDGPU_HS:
10650b57cec5SDimitry Andric   case CallingConv::AMDGPU_ES:
10660b57cec5SDimitry Andric   case CallingConv::AMDGPU_LS:
10670b57cec5SDimitry Andric     return RetCC_SI_Shader;
1068e8d8bef9SDimitry Andric   case CallingConv::AMDGPU_Gfx:
1069e8d8bef9SDimitry Andric     return RetCC_SI_Gfx;
10700b57cec5SDimitry Andric   case CallingConv::C:
10710b57cec5SDimitry Andric   case CallingConv::Fast:
10720b57cec5SDimitry Andric   case CallingConv::Cold:
10730b57cec5SDimitry Andric     return RetCC_AMDGPU_Func;
10740b57cec5SDimitry Andric   default:
10750b57cec5SDimitry Andric     report_fatal_error("Unsupported calling convention.");
10760b57cec5SDimitry Andric   }
10770b57cec5SDimitry Andric }
10780b57cec5SDimitry Andric 
10790b57cec5SDimitry Andric /// The SelectionDAGBuilder will automatically promote function arguments
10800b57cec5SDimitry Andric /// with illegal types.  However, this does not work for the AMDGPU targets
10810b57cec5SDimitry Andric /// since the function arguments are stored in memory as these illegal types.
10820b57cec5SDimitry Andric /// In order to handle this properly we need to get the original types sizes
10830b57cec5SDimitry Andric /// from the LLVM IR Function and fixup the ISD:InputArg values before
10840b57cec5SDimitry Andric /// passing them to AnalyzeFormalArguments()
10850b57cec5SDimitry Andric 
10860b57cec5SDimitry Andric /// When the SelectionDAGBuilder computes the Ins, it takes care of splitting
10870b57cec5SDimitry Andric /// input values across multiple registers.  Each item in the Ins array
10880b57cec5SDimitry Andric /// represents a single value that will be stored in registers.  Ins[x].VT is
10890b57cec5SDimitry Andric /// the value type of the value that will be stored in the register, so
10900b57cec5SDimitry Andric /// whatever SDNode we lower the argument to needs to be this type.
10910b57cec5SDimitry Andric ///
10920b57cec5SDimitry Andric /// In order to correctly lower the arguments we need to know the size of each
10930b57cec5SDimitry Andric /// argument.  Since Ins[x].VT gives us the size of the register that will
10940b57cec5SDimitry Andric /// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
1095349cc55cSDimitry Andric /// for the original function argument so that we can deduce the correct memory
10960b57cec5SDimitry Andric /// type to use for Ins[x].  In most cases the correct memory type will be
10970b57cec5SDimitry Andric /// Ins[x].ArgVT.  However, this will not always be the case.  If, for example,
10980b57cec5SDimitry Andric /// we have a kernel argument of type v8i8, this argument will be split into
10990b57cec5SDimitry Andric /// 8 parts and each part will be represented by its own item in the Ins array.
11000b57cec5SDimitry Andric /// For each part the Ins[x].ArgVT will be the v8i8, which is the full type of
11010b57cec5SDimitry Andric /// the argument before it was split.  From this, we deduce that the memory type
11020b57cec5SDimitry Andric /// for each individual part is i8.  We pass the memory type as LocVT to the
11030b57cec5SDimitry Andric /// calling convention analysis function and the register type (Ins[x].VT) as
11040b57cec5SDimitry Andric /// the ValVT.
analyzeFormalArgumentsCompute(CCState & State,const SmallVectorImpl<ISD::InputArg> & Ins) const11050b57cec5SDimitry Andric void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
11060b57cec5SDimitry Andric   CCState &State,
11070b57cec5SDimitry Andric   const SmallVectorImpl<ISD::InputArg> &Ins) const {
11080b57cec5SDimitry Andric   const MachineFunction &MF = State.getMachineFunction();
11090b57cec5SDimitry Andric   const Function &Fn = MF.getFunction();
11100b57cec5SDimitry Andric   LLVMContext &Ctx = Fn.getParent()->getContext();
11110b57cec5SDimitry Andric   const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
111206c3fb27SDimitry Andric   const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset();
11130b57cec5SDimitry Andric   CallingConv::ID CC = Fn.getCallingConv();
11140b57cec5SDimitry Andric 
11155ffd83dbSDimitry Andric   Align MaxAlign = Align(1);
11160b57cec5SDimitry Andric   uint64_t ExplicitArgOffset = 0;
11170b57cec5SDimitry Andric   const DataLayout &DL = Fn.getParent()->getDataLayout();
11180b57cec5SDimitry Andric 
11190b57cec5SDimitry Andric   unsigned InIndex = 0;
11200b57cec5SDimitry Andric 
11210b57cec5SDimitry Andric   for (const Argument &Arg : Fn.args()) {
1122e8d8bef9SDimitry Andric     const bool IsByRef = Arg.hasByRefAttr();
11230b57cec5SDimitry Andric     Type *BaseArgTy = Arg.getType();
1124e8d8bef9SDimitry Andric     Type *MemArgTy = IsByRef ? Arg.getParamByRefType() : BaseArgTy;
112581ad6265SDimitry Andric     Align Alignment = DL.getValueOrABITypeAlignment(
1126bdd1243dSDimitry Andric         IsByRef ? Arg.getParamAlign() : std::nullopt, MemArgTy);
112781ad6265SDimitry Andric     MaxAlign = std::max(Alignment, MaxAlign);
1128e8d8bef9SDimitry Andric     uint64_t AllocSize = DL.getTypeAllocSize(MemArgTy);
11290b57cec5SDimitry Andric 
11305ffd83dbSDimitry Andric     uint64_t ArgOffset = alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset;
11315ffd83dbSDimitry Andric     ExplicitArgOffset = alignTo(ExplicitArgOffset, Alignment) + AllocSize;
11320b57cec5SDimitry Andric 
11330b57cec5SDimitry Andric     // We're basically throwing away everything passed into us and starting over
11340b57cec5SDimitry Andric     // to get accurate in-memory offsets. The "PartOffset" is completely useless
11350b57cec5SDimitry Andric     // to us as computed in Ins.
11360b57cec5SDimitry Andric     //
11370b57cec5SDimitry Andric     // We also need to figure out what type legalization is trying to do to get
11380b57cec5SDimitry Andric     // the correct memory offsets.
11390b57cec5SDimitry Andric 
11400b57cec5SDimitry Andric     SmallVector<EVT, 16> ValueVTs;
11410b57cec5SDimitry Andric     SmallVector<uint64_t, 16> Offsets;
11420b57cec5SDimitry Andric     ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset);
11430b57cec5SDimitry Andric 
11440b57cec5SDimitry Andric     for (unsigned Value = 0, NumValues = ValueVTs.size();
11450b57cec5SDimitry Andric          Value != NumValues; ++Value) {
11460b57cec5SDimitry Andric       uint64_t BasePartOffset = Offsets[Value];
11470b57cec5SDimitry Andric 
11480b57cec5SDimitry Andric       EVT ArgVT = ValueVTs[Value];
11490b57cec5SDimitry Andric       EVT MemVT = ArgVT;
11500b57cec5SDimitry Andric       MVT RegisterVT = getRegisterTypeForCallingConv(Ctx, CC, ArgVT);
11510b57cec5SDimitry Andric       unsigned NumRegs = getNumRegistersForCallingConv(Ctx, CC, ArgVT);
11520b57cec5SDimitry Andric 
11530b57cec5SDimitry Andric       if (NumRegs == 1) {
11540b57cec5SDimitry Andric         // This argument is not split, so the IR type is the memory type.
11550b57cec5SDimitry Andric         if (ArgVT.isExtended()) {
11560b57cec5SDimitry Andric           // We have an extended type, like i24, so we should just use the
11570b57cec5SDimitry Andric           // register type.
11580b57cec5SDimitry Andric           MemVT = RegisterVT;
11590b57cec5SDimitry Andric         } else {
11600b57cec5SDimitry Andric           MemVT = ArgVT;
11610b57cec5SDimitry Andric         }
11620b57cec5SDimitry Andric       } else if (ArgVT.isVector() && RegisterVT.isVector() &&
11630b57cec5SDimitry Andric                  ArgVT.getScalarType() == RegisterVT.getScalarType()) {
11640b57cec5SDimitry Andric         assert(ArgVT.getVectorNumElements() > RegisterVT.getVectorNumElements());
11650b57cec5SDimitry Andric         // We have a vector value which has been split into a vector with
11660b57cec5SDimitry Andric         // the same scalar type, but fewer elements.  This should handle
11670b57cec5SDimitry Andric         // all the floating-point vector types.
11680b57cec5SDimitry Andric         MemVT = RegisterVT;
11690b57cec5SDimitry Andric       } else if (ArgVT.isVector() &&
11700b57cec5SDimitry Andric                  ArgVT.getVectorNumElements() == NumRegs) {
11710b57cec5SDimitry Andric         // This arg has been split so that each element is stored in a separate
11720b57cec5SDimitry Andric         // register.
11730b57cec5SDimitry Andric         MemVT = ArgVT.getScalarType();
11740b57cec5SDimitry Andric       } else if (ArgVT.isExtended()) {
11750b57cec5SDimitry Andric         // We have an extended type, like i65.
11760b57cec5SDimitry Andric         MemVT = RegisterVT;
11770b57cec5SDimitry Andric       } else {
11780b57cec5SDimitry Andric         unsigned MemoryBits = ArgVT.getStoreSizeInBits() / NumRegs;
11790b57cec5SDimitry Andric         assert(ArgVT.getStoreSizeInBits() % NumRegs == 0);
11800b57cec5SDimitry Andric         if (RegisterVT.isInteger()) {
11810b57cec5SDimitry Andric           MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits);
11820b57cec5SDimitry Andric         } else if (RegisterVT.isVector()) {
11830b57cec5SDimitry Andric           assert(!RegisterVT.getScalarType().isFloatingPoint());
11840b57cec5SDimitry Andric           unsigned NumElements = RegisterVT.getVectorNumElements();
11850b57cec5SDimitry Andric           assert(MemoryBits % NumElements == 0);
11860b57cec5SDimitry Andric           // This vector type has been split into another vector type with
11870b57cec5SDimitry Andric           // a different elements size.
11880b57cec5SDimitry Andric           EVT ScalarVT = EVT::getIntegerVT(State.getContext(),
11890b57cec5SDimitry Andric                                            MemoryBits / NumElements);
11900b57cec5SDimitry Andric           MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements);
11910b57cec5SDimitry Andric         } else {
11920b57cec5SDimitry Andric           llvm_unreachable("cannot deduce memory type.");
11930b57cec5SDimitry Andric         }
11940b57cec5SDimitry Andric       }
11950b57cec5SDimitry Andric 
11960b57cec5SDimitry Andric       // Convert one element vectors to scalar.
11970b57cec5SDimitry Andric       if (MemVT.isVector() && MemVT.getVectorNumElements() == 1)
11980b57cec5SDimitry Andric         MemVT = MemVT.getScalarType();
11990b57cec5SDimitry Andric 
12000b57cec5SDimitry Andric       // Round up vec3/vec5 argument.
12010b57cec5SDimitry Andric       if (MemVT.isVector() && !MemVT.isPow2VectorType()) {
12020b57cec5SDimitry Andric         assert(MemVT.getVectorNumElements() == 3 ||
1203bdd1243dSDimitry Andric                MemVT.getVectorNumElements() == 5 ||
1204bdd1243dSDimitry Andric                (MemVT.getVectorNumElements() >= 9 &&
1205bdd1243dSDimitry Andric                 MemVT.getVectorNumElements() <= 12));
12060b57cec5SDimitry Andric         MemVT = MemVT.getPow2VectorType(State.getContext());
12075ffd83dbSDimitry Andric       } else if (!MemVT.isSimple() && !MemVT.isVector()) {
12085ffd83dbSDimitry Andric         MemVT = MemVT.getRoundIntegerType(State.getContext());
12090b57cec5SDimitry Andric       }
12100b57cec5SDimitry Andric 
12110b57cec5SDimitry Andric       unsigned PartOffset = 0;
12120b57cec5SDimitry Andric       for (unsigned i = 0; i != NumRegs; ++i) {
12130b57cec5SDimitry Andric         State.addLoc(CCValAssign::getCustomMem(InIndex++, RegisterVT,
12140b57cec5SDimitry Andric                                                BasePartOffset + PartOffset,
12150b57cec5SDimitry Andric                                                MemVT.getSimpleVT(),
12160b57cec5SDimitry Andric                                                CCValAssign::Full));
12170b57cec5SDimitry Andric         PartOffset += MemVT.getStoreSize();
12180b57cec5SDimitry Andric       }
12190b57cec5SDimitry Andric     }
12200b57cec5SDimitry Andric   }
12210b57cec5SDimitry Andric }
12220b57cec5SDimitry Andric 
LowerReturn(SDValue Chain,CallingConv::ID CallConv,bool isVarArg,const SmallVectorImpl<ISD::OutputArg> & Outs,const SmallVectorImpl<SDValue> & OutVals,const SDLoc & DL,SelectionDAG & DAG) const12230b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerReturn(
12240b57cec5SDimitry Andric   SDValue Chain, CallingConv::ID CallConv,
12250b57cec5SDimitry Andric   bool isVarArg,
12260b57cec5SDimitry Andric   const SmallVectorImpl<ISD::OutputArg> &Outs,
12270b57cec5SDimitry Andric   const SmallVectorImpl<SDValue> &OutVals,
12280b57cec5SDimitry Andric   const SDLoc &DL, SelectionDAG &DAG) const {
12290b57cec5SDimitry Andric   // FIXME: Fails for r600 tests
12300b57cec5SDimitry Andric   //assert(!isVarArg && Outs.empty() && OutVals.empty() &&
12310b57cec5SDimitry Andric   // "wave terminate should not have return values");
12320b57cec5SDimitry Andric   return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain);
12330b57cec5SDimitry Andric }
12340b57cec5SDimitry Andric 
12350b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
12360b57cec5SDimitry Andric // Target specific lowering
12370b57cec5SDimitry Andric //===---------------------------------------------------------------------===//
12380b57cec5SDimitry Andric 
12390b57cec5SDimitry Andric /// Selects the correct CCAssignFn for a given CallingConvention value.
CCAssignFnForCall(CallingConv::ID CC,bool IsVarArg)12400b57cec5SDimitry Andric CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
12410b57cec5SDimitry Andric                                                     bool IsVarArg) {
12420b57cec5SDimitry Andric   return AMDGPUCallLowering::CCAssignFnForCall(CC, IsVarArg);
12430b57cec5SDimitry Andric }
12440b57cec5SDimitry Andric 
CCAssignFnForReturn(CallingConv::ID CC,bool IsVarArg)12450b57cec5SDimitry Andric CCAssignFn *AMDGPUTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
12460b57cec5SDimitry Andric                                                       bool IsVarArg) {
12470b57cec5SDimitry Andric   return AMDGPUCallLowering::CCAssignFnForReturn(CC, IsVarArg);
12480b57cec5SDimitry Andric }
12490b57cec5SDimitry Andric 
addTokenForArgument(SDValue Chain,SelectionDAG & DAG,MachineFrameInfo & MFI,int ClobberedFI) const12500b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::addTokenForArgument(SDValue Chain,
12510b57cec5SDimitry Andric                                                   SelectionDAG &DAG,
12520b57cec5SDimitry Andric                                                   MachineFrameInfo &MFI,
12530b57cec5SDimitry Andric                                                   int ClobberedFI) const {
12540b57cec5SDimitry Andric   SmallVector<SDValue, 8> ArgChains;
12550b57cec5SDimitry Andric   int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
12560b57cec5SDimitry Andric   int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
12570b57cec5SDimitry Andric 
12580b57cec5SDimitry Andric   // Include the original chain at the beginning of the list. When this is
12590b57cec5SDimitry Andric   // used by target LowerCall hooks, this helps legalize find the
12600b57cec5SDimitry Andric   // CALLSEQ_BEGIN node.
12610b57cec5SDimitry Andric   ArgChains.push_back(Chain);
12620b57cec5SDimitry Andric 
12630b57cec5SDimitry Andric   // Add a chain value for each stack argument corresponding
1264349cc55cSDimitry Andric   for (SDNode *U : DAG.getEntryNode().getNode()->uses()) {
1265349cc55cSDimitry Andric     if (LoadSDNode *L = dyn_cast<LoadSDNode>(U)) {
12660b57cec5SDimitry Andric       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) {
12670b57cec5SDimitry Andric         if (FI->getIndex() < 0) {
12680b57cec5SDimitry Andric           int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
12690b57cec5SDimitry Andric           int64_t InLastByte = InFirstByte;
12700b57cec5SDimitry Andric           InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
12710b57cec5SDimitry Andric 
12720b57cec5SDimitry Andric           if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
12730b57cec5SDimitry Andric               (FirstByte <= InFirstByte && InFirstByte <= LastByte))
12740b57cec5SDimitry Andric             ArgChains.push_back(SDValue(L, 1));
12750b57cec5SDimitry Andric         }
12760b57cec5SDimitry Andric       }
12770b57cec5SDimitry Andric     }
12780b57cec5SDimitry Andric   }
12790b57cec5SDimitry Andric 
12800b57cec5SDimitry Andric   // Build a tokenfactor for all the chains.
12810b57cec5SDimitry Andric   return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
12820b57cec5SDimitry Andric }
12830b57cec5SDimitry Andric 
lowerUnhandledCall(CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals,StringRef Reason) const12840b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::lowerUnhandledCall(CallLoweringInfo &CLI,
12850b57cec5SDimitry Andric                                                  SmallVectorImpl<SDValue> &InVals,
12860b57cec5SDimitry Andric                                                  StringRef Reason) const {
12870b57cec5SDimitry Andric   SDValue Callee = CLI.Callee;
12880b57cec5SDimitry Andric   SelectionDAG &DAG = CLI.DAG;
12890b57cec5SDimitry Andric 
12900b57cec5SDimitry Andric   const Function &Fn = DAG.getMachineFunction().getFunction();
12910b57cec5SDimitry Andric 
12920b57cec5SDimitry Andric   StringRef FuncName("<unknown>");
12930b57cec5SDimitry Andric 
12940b57cec5SDimitry Andric   if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee))
12950b57cec5SDimitry Andric     FuncName = G->getSymbol();
12960b57cec5SDimitry Andric   else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
12970b57cec5SDimitry Andric     FuncName = G->getGlobal()->getName();
12980b57cec5SDimitry Andric 
12990b57cec5SDimitry Andric   DiagnosticInfoUnsupported NoCalls(
13000b57cec5SDimitry Andric     Fn, Reason + FuncName, CLI.DL.getDebugLoc());
13010b57cec5SDimitry Andric   DAG.getContext()->diagnose(NoCalls);
13020b57cec5SDimitry Andric 
13030b57cec5SDimitry Andric   if (!CLI.IsTailCall) {
13040b57cec5SDimitry Andric     for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I)
13050b57cec5SDimitry Andric       InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT));
13060b57cec5SDimitry Andric   }
13070b57cec5SDimitry Andric 
13080b57cec5SDimitry Andric   return DAG.getEntryNode();
13090b57cec5SDimitry Andric }
13100b57cec5SDimitry Andric 
LowerCall(CallLoweringInfo & CLI,SmallVectorImpl<SDValue> & InVals) const13110b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
13120b57cec5SDimitry Andric                                         SmallVectorImpl<SDValue> &InVals) const {
13130b57cec5SDimitry Andric   return lowerUnhandledCall(CLI, InVals, "unsupported call to function ");
13140b57cec5SDimitry Andric }
13150b57cec5SDimitry Andric 
LowerDYNAMIC_STACKALLOC(SDValue Op,SelectionDAG & DAG) const13160b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
13170b57cec5SDimitry Andric                                                       SelectionDAG &DAG) const {
13180b57cec5SDimitry Andric   const Function &Fn = DAG.getMachineFunction().getFunction();
13190b57cec5SDimitry Andric 
13200b57cec5SDimitry Andric   DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "unsupported dynamic alloca",
13210b57cec5SDimitry Andric                                             SDLoc(Op).getDebugLoc());
13220b57cec5SDimitry Andric   DAG.getContext()->diagnose(NoDynamicAlloca);
13230b57cec5SDimitry Andric   auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)};
13240b57cec5SDimitry Andric   return DAG.getMergeValues(Ops, SDLoc());
13250b57cec5SDimitry Andric }
13260b57cec5SDimitry Andric 
LowerOperation(SDValue Op,SelectionDAG & DAG) const13270b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
13280b57cec5SDimitry Andric                                              SelectionDAG &DAG) const {
13290b57cec5SDimitry Andric   switch (Op.getOpcode()) {
13300b57cec5SDimitry Andric   default:
13310b57cec5SDimitry Andric     Op->print(errs(), &DAG);
13320b57cec5SDimitry Andric     llvm_unreachable("Custom lowering code for this "
13330b57cec5SDimitry Andric                      "instruction is not implemented yet!");
13340b57cec5SDimitry Andric     break;
13350b57cec5SDimitry Andric   case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
13360b57cec5SDimitry Andric   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
13370b57cec5SDimitry Andric   case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
13380b57cec5SDimitry Andric   case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
13390b57cec5SDimitry Andric   case ISD::SDIVREM: return LowerSDIVREM(Op, DAG);
13400b57cec5SDimitry Andric   case ISD::FREM: return LowerFREM(Op, DAG);
13410b57cec5SDimitry Andric   case ISD::FCEIL: return LowerFCEIL(Op, DAG);
13420b57cec5SDimitry Andric   case ISD::FTRUNC: return LowerFTRUNC(Op, DAG);
13430b57cec5SDimitry Andric   case ISD::FRINT: return LowerFRINT(Op, DAG);
13440b57cec5SDimitry Andric   case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG);
1345bdd1243dSDimitry Andric   case ISD::FROUNDEVEN:
1346bdd1243dSDimitry Andric     return LowerFROUNDEVEN(Op, DAG);
13470b57cec5SDimitry Andric   case ISD::FROUND: return LowerFROUND(Op, DAG);
13480b57cec5SDimitry Andric   case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
134906c3fb27SDimitry Andric   case ISD::FLOG2:
135006c3fb27SDimitry Andric     return LowerFLOG2(Op, DAG);
13510b57cec5SDimitry Andric   case ISD::FLOG:
13520b57cec5SDimitry Andric   case ISD::FLOG10:
135306c3fb27SDimitry Andric     return LowerFLOGCommon(Op, DAG);
13540b57cec5SDimitry Andric   case ISD::FEXP:
13555f757f3fSDimitry Andric   case ISD::FEXP10:
13560b57cec5SDimitry Andric     return lowerFEXP(Op, DAG);
135706c3fb27SDimitry Andric   case ISD::FEXP2:
135806c3fb27SDimitry Andric     return lowerFEXP2(Op, DAG);
13590b57cec5SDimitry Andric   case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
13600b57cec5SDimitry Andric   case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
13610b57cec5SDimitry Andric   case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
1362fe6060f1SDimitry Andric   case ISD::FP_TO_SINT:
1363fe6060f1SDimitry Andric   case ISD::FP_TO_UINT:
1364fe6060f1SDimitry Andric     return LowerFP_TO_INT(Op, DAG);
13650b57cec5SDimitry Andric   case ISD::CTTZ:
13660b57cec5SDimitry Andric   case ISD::CTTZ_ZERO_UNDEF:
13670b57cec5SDimitry Andric   case ISD::CTLZ:
13680b57cec5SDimitry Andric   case ISD::CTLZ_ZERO_UNDEF:
13690b57cec5SDimitry Andric     return LowerCTLZ_CTTZ(Op, DAG);
13700b57cec5SDimitry Andric   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
13710b57cec5SDimitry Andric   }
13720b57cec5SDimitry Andric   return Op;
13730b57cec5SDimitry Andric }
13740b57cec5SDimitry Andric 
ReplaceNodeResults(SDNode * N,SmallVectorImpl<SDValue> & Results,SelectionDAG & DAG) const13750b57cec5SDimitry Andric void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
13760b57cec5SDimitry Andric                                               SmallVectorImpl<SDValue> &Results,
13770b57cec5SDimitry Andric                                               SelectionDAG &DAG) const {
13780b57cec5SDimitry Andric   switch (N->getOpcode()) {
13790b57cec5SDimitry Andric   case ISD::SIGN_EXTEND_INREG:
13800b57cec5SDimitry Andric     // Different parts of legalization seem to interpret which type of
13810b57cec5SDimitry Andric     // sign_extend_inreg is the one to check for custom lowering. The extended
13820b57cec5SDimitry Andric     // from type is what really matters, but some places check for custom
13830b57cec5SDimitry Andric     // lowering of the result type. This results in trying to use
13840b57cec5SDimitry Andric     // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
13850b57cec5SDimitry Andric     // nothing here and let the illegal result integer be handled normally.
13860b57cec5SDimitry Andric     return;
138706c3fb27SDimitry Andric   case ISD::FLOG2:
138806c3fb27SDimitry Andric     if (SDValue Lowered = LowerFLOG2(SDValue(N, 0), DAG))
138906c3fb27SDimitry Andric       Results.push_back(Lowered);
139006c3fb27SDimitry Andric     return;
139106c3fb27SDimitry Andric   case ISD::FLOG:
139206c3fb27SDimitry Andric   case ISD::FLOG10:
139306c3fb27SDimitry Andric     if (SDValue Lowered = LowerFLOGCommon(SDValue(N, 0), DAG))
139406c3fb27SDimitry Andric       Results.push_back(Lowered);
139506c3fb27SDimitry Andric     return;
139606c3fb27SDimitry Andric   case ISD::FEXP2:
139706c3fb27SDimitry Andric     if (SDValue Lowered = lowerFEXP2(SDValue(N, 0), DAG))
139806c3fb27SDimitry Andric       Results.push_back(Lowered);
139906c3fb27SDimitry Andric     return;
140006c3fb27SDimitry Andric   case ISD::FEXP:
14015f757f3fSDimitry Andric   case ISD::FEXP10:
140206c3fb27SDimitry Andric     if (SDValue Lowered = lowerFEXP(SDValue(N, 0), DAG))
140306c3fb27SDimitry Andric       Results.push_back(Lowered);
140406c3fb27SDimitry Andric     return;
14057a6dacacSDimitry Andric   case ISD::CTLZ:
14067a6dacacSDimitry Andric   case ISD::CTLZ_ZERO_UNDEF:
14077a6dacacSDimitry Andric     if (auto Lowered = lowerCTLZResults(SDValue(N, 0u), DAG))
14087a6dacacSDimitry Andric       Results.push_back(Lowered);
14097a6dacacSDimitry Andric     return;
14100b57cec5SDimitry Andric   default:
14110b57cec5SDimitry Andric     return;
14120b57cec5SDimitry Andric   }
14130b57cec5SDimitry Andric }
14140b57cec5SDimitry Andric 
LowerGlobalAddress(AMDGPUMachineFunction * MFI,SDValue Op,SelectionDAG & DAG) const14150b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
14160b57cec5SDimitry Andric                                                  SDValue Op,
14170b57cec5SDimitry Andric                                                  SelectionDAG &DAG) const {
14180b57cec5SDimitry Andric 
14190b57cec5SDimitry Andric   const DataLayout &DL = DAG.getDataLayout();
14200b57cec5SDimitry Andric   GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
14210b57cec5SDimitry Andric   const GlobalValue *GV = G->getGlobal();
14220b57cec5SDimitry Andric 
142306c3fb27SDimitry Andric   if (!MFI->isModuleEntryFunction()) {
142406c3fb27SDimitry Andric     if (std::optional<uint32_t> Address =
142506c3fb27SDimitry Andric             AMDGPUMachineFunction::getLDSAbsoluteAddress(*GV)) {
142606c3fb27SDimitry Andric       return DAG.getConstant(*Address, SDLoc(Op), Op.getValueType());
142706c3fb27SDimitry Andric     }
142806c3fb27SDimitry Andric   }
142906c3fb27SDimitry Andric 
14300b57cec5SDimitry Andric   if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
14310b57cec5SDimitry Andric       G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
1432fe6060f1SDimitry Andric     if (!MFI->isModuleEntryFunction() &&
1433fe6060f1SDimitry Andric         !GV->getName().equals("llvm.amdgcn.module.lds")) {
14345ffd83dbSDimitry Andric       SDLoc DL(Op);
14350b57cec5SDimitry Andric       const Function &Fn = DAG.getMachineFunction().getFunction();
14360b57cec5SDimitry Andric       DiagnosticInfoUnsupported BadLDSDecl(
14375ffd83dbSDimitry Andric         Fn, "local memory global used by non-kernel function",
14385ffd83dbSDimitry Andric         DL.getDebugLoc(), DS_Warning);
14390b57cec5SDimitry Andric       DAG.getContext()->diagnose(BadLDSDecl);
14405ffd83dbSDimitry Andric 
14415ffd83dbSDimitry Andric       // We currently don't have a way to correctly allocate LDS objects that
14425ffd83dbSDimitry Andric       // aren't directly associated with a kernel. We do force inlining of
14435ffd83dbSDimitry Andric       // functions that use local objects. However, if these dead functions are
14445ffd83dbSDimitry Andric       // not eliminated, we don't want a compile time error. Just emit a warning
14455ffd83dbSDimitry Andric       // and a trap, since there should be no callable path here.
14465ffd83dbSDimitry Andric       SDValue Trap = DAG.getNode(ISD::TRAP, DL, MVT::Other, DAG.getEntryNode());
14475ffd83dbSDimitry Andric       SDValue OutputChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
14485ffd83dbSDimitry Andric                                         Trap, DAG.getRoot());
14495ffd83dbSDimitry Andric       DAG.setRoot(OutputChain);
14505ffd83dbSDimitry Andric       return DAG.getUNDEF(Op.getValueType());
14510b57cec5SDimitry Andric     }
14520b57cec5SDimitry Andric 
14530b57cec5SDimitry Andric     // XXX: What does the value of G->getOffset() mean?
14540b57cec5SDimitry Andric     assert(G->getOffset() == 0 &&
14550b57cec5SDimitry Andric          "Do not know what to do with an non-zero offset");
14560b57cec5SDimitry Andric 
14570b57cec5SDimitry Andric     // TODO: We could emit code to handle the initialization somewhere.
1458349cc55cSDimitry Andric     // We ignore the initializer for now and legalize it to allow selection.
1459349cc55cSDimitry Andric     // The initializer will anyway get errored out during assembly emission.
14605ffd83dbSDimitry Andric     unsigned Offset = MFI->allocateLDSGlobal(DL, *cast<GlobalVariable>(GV));
14610b57cec5SDimitry Andric     return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
14620b57cec5SDimitry Andric   }
14630b57cec5SDimitry Andric   return SDValue();
14640b57cec5SDimitry Andric }
14650b57cec5SDimitry Andric 
LowerCONCAT_VECTORS(SDValue Op,SelectionDAG & DAG) const14660b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
14670b57cec5SDimitry Andric                                                   SelectionDAG &DAG) const {
14680b57cec5SDimitry Andric   SmallVector<SDValue, 8> Args;
1469bdd1243dSDimitry Andric   SDLoc SL(Op);
14700b57cec5SDimitry Andric 
14710b57cec5SDimitry Andric   EVT VT = Op.getValueType();
1472bdd1243dSDimitry Andric   if (VT.getVectorElementType().getSizeInBits() < 32) {
1473bdd1243dSDimitry Andric     unsigned OpBitSize = Op.getOperand(0).getValueType().getSizeInBits();
1474bdd1243dSDimitry Andric     if (OpBitSize >= 32 && OpBitSize % 32 == 0) {
1475bdd1243dSDimitry Andric       unsigned NewNumElt = OpBitSize / 32;
1476bdd1243dSDimitry Andric       EVT NewEltVT = (NewNumElt == 1) ? MVT::i32
1477bdd1243dSDimitry Andric                                       : EVT::getVectorVT(*DAG.getContext(),
1478bdd1243dSDimitry Andric                                                          MVT::i32, NewNumElt);
1479bdd1243dSDimitry Andric       for (const SDUse &U : Op->ops()) {
1480bdd1243dSDimitry Andric         SDValue In = U.get();
1481bdd1243dSDimitry Andric         SDValue NewIn = DAG.getNode(ISD::BITCAST, SL, NewEltVT, In);
1482bdd1243dSDimitry Andric         if (NewNumElt > 1)
1483bdd1243dSDimitry Andric           DAG.ExtractVectorElements(NewIn, Args);
1484bdd1243dSDimitry Andric         else
1485bdd1243dSDimitry Andric           Args.push_back(NewIn);
1486bdd1243dSDimitry Andric       }
14870b57cec5SDimitry Andric 
1488bdd1243dSDimitry Andric       EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
1489bdd1243dSDimitry Andric                                    NewNumElt * Op.getNumOperands());
1490bdd1243dSDimitry Andric       SDValue BV = DAG.getBuildVector(NewVT, SL, Args);
14910b57cec5SDimitry Andric       return DAG.getNode(ISD::BITCAST, SL, VT, BV);
14920b57cec5SDimitry Andric     }
1493bdd1243dSDimitry Andric   }
14940b57cec5SDimitry Andric 
14950b57cec5SDimitry Andric   for (const SDUse &U : Op->ops())
14960b57cec5SDimitry Andric     DAG.ExtractVectorElements(U.get(), Args);
14970b57cec5SDimitry Andric 
1498bdd1243dSDimitry Andric   return DAG.getBuildVector(Op.getValueType(), SL, Args);
14990b57cec5SDimitry Andric }
15000b57cec5SDimitry Andric 
LowerEXTRACT_SUBVECTOR(SDValue Op,SelectionDAG & DAG) const15010b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
15020b57cec5SDimitry Andric                                                      SelectionDAG &DAG) const {
150306c3fb27SDimitry Andric   SDLoc SL(Op);
15040b57cec5SDimitry Andric   SmallVector<SDValue, 8> Args;
1505647cbc5dSDimitry Andric   unsigned Start = Op.getConstantOperandVal(1);
15060b57cec5SDimitry Andric   EVT VT = Op.getValueType();
1507fe6060f1SDimitry Andric   EVT SrcVT = Op.getOperand(0).getValueType();
1508fe6060f1SDimitry Andric 
150906c3fb27SDimitry Andric   if (VT.getScalarSizeInBits() == 16 && Start % 2 == 0) {
151006c3fb27SDimitry Andric     unsigned NumElt = VT.getVectorNumElements();
151106c3fb27SDimitry Andric     unsigned NumSrcElt = SrcVT.getVectorNumElements();
151206c3fb27SDimitry Andric     assert(NumElt % 2 == 0 && NumSrcElt % 2 == 0 && "expect legal types");
1513fe6060f1SDimitry Andric 
151406c3fb27SDimitry Andric     // Extract 32-bit registers at a time.
151506c3fb27SDimitry Andric     EVT NewSrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumSrcElt / 2);
151606c3fb27SDimitry Andric     EVT NewVT = NumElt == 2
151706c3fb27SDimitry Andric                     ? MVT::i32
151806c3fb27SDimitry Andric                     : EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElt / 2);
151906c3fb27SDimitry Andric     SDValue Tmp = DAG.getNode(ISD::BITCAST, SL, NewSrcVT, Op.getOperand(0));
152004eeddc0SDimitry Andric 
152106c3fb27SDimitry Andric     DAG.ExtractVectorElements(Tmp, Args, Start / 2, NumElt / 2);
152206c3fb27SDimitry Andric     if (NumElt == 2)
152306c3fb27SDimitry Andric       Tmp = Args[0];
152406c3fb27SDimitry Andric     else
152506c3fb27SDimitry Andric       Tmp = DAG.getBuildVector(NewVT, SL, Args);
152606c3fb27SDimitry Andric 
152706c3fb27SDimitry Andric     return DAG.getNode(ISD::BITCAST, SL, VT, Tmp);
152806c3fb27SDimitry Andric   }
152981ad6265SDimitry Andric 
15300b57cec5SDimitry Andric   DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
15310b57cec5SDimitry Andric                             VT.getVectorNumElements());
15320b57cec5SDimitry Andric 
153306c3fb27SDimitry Andric   return DAG.getBuildVector(Op.getValueType(), SL, Args);
15340b57cec5SDimitry Andric }
15350b57cec5SDimitry Andric 
153606c3fb27SDimitry Andric // TODO: Handle fabs too
peekFNeg(SDValue Val)153706c3fb27SDimitry Andric static SDValue peekFNeg(SDValue Val) {
153806c3fb27SDimitry Andric   if (Val.getOpcode() == ISD::FNEG)
153906c3fb27SDimitry Andric     return Val.getOperand(0);
15400b57cec5SDimitry Andric 
154106c3fb27SDimitry Andric   return Val;
154206c3fb27SDimitry Andric }
154306c3fb27SDimitry Andric 
peekFPSignOps(SDValue Val)154406c3fb27SDimitry Andric static SDValue peekFPSignOps(SDValue Val) {
154506c3fb27SDimitry Andric   if (Val.getOpcode() == ISD::FNEG)
154606c3fb27SDimitry Andric     Val = Val.getOperand(0);
154706c3fb27SDimitry Andric   if (Val.getOpcode() == ISD::FABS)
154806c3fb27SDimitry Andric     Val = Val.getOperand(0);
154906c3fb27SDimitry Andric   if (Val.getOpcode() == ISD::FCOPYSIGN)
155006c3fb27SDimitry Andric     Val = Val.getOperand(0);
155106c3fb27SDimitry Andric   return Val;
155206c3fb27SDimitry Andric }
155306c3fb27SDimitry Andric 
combineFMinMaxLegacyImpl(const SDLoc & DL,EVT VT,SDValue LHS,SDValue RHS,SDValue True,SDValue False,SDValue CC,DAGCombinerInfo & DCI) const155406c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::combineFMinMaxLegacyImpl(
155506c3fb27SDimitry Andric     const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True,
155606c3fb27SDimitry Andric     SDValue False, SDValue CC, DAGCombinerInfo &DCI) const {
15570b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
15580b57cec5SDimitry Andric   ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
15590b57cec5SDimitry Andric   switch (CCOpcode) {
15600b57cec5SDimitry Andric   case ISD::SETOEQ:
15610b57cec5SDimitry Andric   case ISD::SETONE:
15620b57cec5SDimitry Andric   case ISD::SETUNE:
15630b57cec5SDimitry Andric   case ISD::SETNE:
15640b57cec5SDimitry Andric   case ISD::SETUEQ:
15650b57cec5SDimitry Andric   case ISD::SETEQ:
15660b57cec5SDimitry Andric   case ISD::SETFALSE:
15670b57cec5SDimitry Andric   case ISD::SETFALSE2:
15680b57cec5SDimitry Andric   case ISD::SETTRUE:
15690b57cec5SDimitry Andric   case ISD::SETTRUE2:
15700b57cec5SDimitry Andric   case ISD::SETUO:
15710b57cec5SDimitry Andric   case ISD::SETO:
15720b57cec5SDimitry Andric     break;
15730b57cec5SDimitry Andric   case ISD::SETULE:
15740b57cec5SDimitry Andric   case ISD::SETULT: {
15750b57cec5SDimitry Andric     if (LHS == True)
15760b57cec5SDimitry Andric       return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
15770b57cec5SDimitry Andric     return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
15780b57cec5SDimitry Andric   }
15790b57cec5SDimitry Andric   case ISD::SETOLE:
15800b57cec5SDimitry Andric   case ISD::SETOLT:
15810b57cec5SDimitry Andric   case ISD::SETLE:
15820b57cec5SDimitry Andric   case ISD::SETLT: {
15830b57cec5SDimitry Andric     // Ordered. Assume ordered for undefined.
15840b57cec5SDimitry Andric 
15850b57cec5SDimitry Andric     // Only do this after legalization to avoid interfering with other combines
15860b57cec5SDimitry Andric     // which might occur.
15870b57cec5SDimitry Andric     if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
15880b57cec5SDimitry Andric         !DCI.isCalledByLegalizer())
15890b57cec5SDimitry Andric       return SDValue();
15900b57cec5SDimitry Andric 
15910b57cec5SDimitry Andric     // We need to permute the operands to get the correct NaN behavior. The
15920b57cec5SDimitry Andric     // selected operand is the second one based on the failing compare with NaN,
15930b57cec5SDimitry Andric     // so permute it based on the compare type the hardware uses.
15940b57cec5SDimitry Andric     if (LHS == True)
15950b57cec5SDimitry Andric       return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
15960b57cec5SDimitry Andric     return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
15970b57cec5SDimitry Andric   }
15980b57cec5SDimitry Andric   case ISD::SETUGE:
15990b57cec5SDimitry Andric   case ISD::SETUGT: {
16000b57cec5SDimitry Andric     if (LHS == True)
16010b57cec5SDimitry Andric       return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS);
16020b57cec5SDimitry Andric     return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS);
16030b57cec5SDimitry Andric   }
16040b57cec5SDimitry Andric   case ISD::SETGT:
16050b57cec5SDimitry Andric   case ISD::SETGE:
16060b57cec5SDimitry Andric   case ISD::SETOGE:
16070b57cec5SDimitry Andric   case ISD::SETOGT: {
16080b57cec5SDimitry Andric     if (DCI.getDAGCombineLevel() < AfterLegalizeDAG &&
16090b57cec5SDimitry Andric         !DCI.isCalledByLegalizer())
16100b57cec5SDimitry Andric       return SDValue();
16110b57cec5SDimitry Andric 
16120b57cec5SDimitry Andric     if (LHS == True)
16130b57cec5SDimitry Andric       return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS);
16140b57cec5SDimitry Andric     return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS);
16150b57cec5SDimitry Andric   }
16160b57cec5SDimitry Andric   case ISD::SETCC_INVALID:
16170b57cec5SDimitry Andric     llvm_unreachable("Invalid setcc condcode!");
16180b57cec5SDimitry Andric   }
16190b57cec5SDimitry Andric   return SDValue();
16200b57cec5SDimitry Andric }
16210b57cec5SDimitry Andric 
162206c3fb27SDimitry Andric /// Generate Min/Max node
combineFMinMaxLegacy(const SDLoc & DL,EVT VT,SDValue LHS,SDValue RHS,SDValue True,SDValue False,SDValue CC,DAGCombinerInfo & DCI) const162306c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
162406c3fb27SDimitry Andric                                                    SDValue LHS, SDValue RHS,
162506c3fb27SDimitry Andric                                                    SDValue True, SDValue False,
162606c3fb27SDimitry Andric                                                    SDValue CC,
162706c3fb27SDimitry Andric                                                    DAGCombinerInfo &DCI) const {
162806c3fb27SDimitry Andric   if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
162906c3fb27SDimitry Andric     return combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, True, False, CC, DCI);
163006c3fb27SDimitry Andric 
163106c3fb27SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
163206c3fb27SDimitry Andric 
163306c3fb27SDimitry Andric   // If we can't directly match this, try to see if we can fold an fneg to
163406c3fb27SDimitry Andric   // match.
163506c3fb27SDimitry Andric 
163606c3fb27SDimitry Andric   ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
163706c3fb27SDimitry Andric   ConstantFPSDNode *CFalse = dyn_cast<ConstantFPSDNode>(False);
163806c3fb27SDimitry Andric   SDValue NegTrue = peekFNeg(True);
163906c3fb27SDimitry Andric 
164006c3fb27SDimitry Andric   // Undo the combine foldFreeOpFromSelect does if it helps us match the
164106c3fb27SDimitry Andric   // fmin/fmax.
164206c3fb27SDimitry Andric   //
164306c3fb27SDimitry Andric   // select (fcmp olt (lhs, K)), (fneg lhs), -K
164406c3fb27SDimitry Andric   // -> fneg (fmin_legacy lhs, K)
164506c3fb27SDimitry Andric   //
164606c3fb27SDimitry Andric   // TODO: Use getNegatedExpression
164706c3fb27SDimitry Andric   if (LHS == NegTrue && CFalse && CRHS) {
164806c3fb27SDimitry Andric     APFloat NegRHS = neg(CRHS->getValueAPF());
164906c3fb27SDimitry Andric     if (NegRHS == CFalse->getValueAPF()) {
165006c3fb27SDimitry Andric       SDValue Combined =
165106c3fb27SDimitry Andric           combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, NegTrue, False, CC, DCI);
165206c3fb27SDimitry Andric       if (Combined)
165306c3fb27SDimitry Andric         return DAG.getNode(ISD::FNEG, DL, VT, Combined);
165406c3fb27SDimitry Andric       return SDValue();
165506c3fb27SDimitry Andric     }
165606c3fb27SDimitry Andric   }
165706c3fb27SDimitry Andric 
165806c3fb27SDimitry Andric   return SDValue();
165906c3fb27SDimitry Andric }
166006c3fb27SDimitry Andric 
16610b57cec5SDimitry Andric std::pair<SDValue, SDValue>
split64BitValue(SDValue Op,SelectionDAG & DAG) const16620b57cec5SDimitry Andric AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const {
16630b57cec5SDimitry Andric   SDLoc SL(Op);
16640b57cec5SDimitry Andric 
16650b57cec5SDimitry Andric   SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
16660b57cec5SDimitry Andric 
16670b57cec5SDimitry Andric   const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
16680b57cec5SDimitry Andric   const SDValue One = DAG.getConstant(1, SL, MVT::i32);
16690b57cec5SDimitry Andric 
16700b57cec5SDimitry Andric   SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
16710b57cec5SDimitry Andric   SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
16720b57cec5SDimitry Andric 
1673bdd1243dSDimitry Andric   return std::pair(Lo, Hi);
16740b57cec5SDimitry Andric }
16750b57cec5SDimitry Andric 
getLoHalf64(SDValue Op,SelectionDAG & DAG) const16760b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getLoHalf64(SDValue Op, SelectionDAG &DAG) const {
16770b57cec5SDimitry Andric   SDLoc SL(Op);
16780b57cec5SDimitry Andric 
16790b57cec5SDimitry Andric   SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
16800b57cec5SDimitry Andric   const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
16810b57cec5SDimitry Andric   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
16820b57cec5SDimitry Andric }
16830b57cec5SDimitry Andric 
getHiHalf64(SDValue Op,SelectionDAG & DAG) const16840b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getHiHalf64(SDValue Op, SelectionDAG &DAG) const {
16850b57cec5SDimitry Andric   SDLoc SL(Op);
16860b57cec5SDimitry Andric 
16870b57cec5SDimitry Andric   SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
16880b57cec5SDimitry Andric   const SDValue One = DAG.getConstant(1, SL, MVT::i32);
16890b57cec5SDimitry Andric   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
16900b57cec5SDimitry Andric }
16910b57cec5SDimitry Andric 
16920b57cec5SDimitry Andric // Split a vector type into two parts. The first part is a power of two vector.
16930b57cec5SDimitry Andric // The second part is whatever is left over, and is a scalar if it would
16940b57cec5SDimitry Andric // otherwise be a 1-vector.
16950b57cec5SDimitry Andric std::pair<EVT, EVT>
getSplitDestVTs(const EVT & VT,SelectionDAG & DAG) const16960b57cec5SDimitry Andric AMDGPUTargetLowering::getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const {
16970b57cec5SDimitry Andric   EVT LoVT, HiVT;
16980b57cec5SDimitry Andric   EVT EltVT = VT.getVectorElementType();
16990b57cec5SDimitry Andric   unsigned NumElts = VT.getVectorNumElements();
17000b57cec5SDimitry Andric   unsigned LoNumElts = PowerOf2Ceil((NumElts + 1) / 2);
17010b57cec5SDimitry Andric   LoVT = EVT::getVectorVT(*DAG.getContext(), EltVT, LoNumElts);
17020b57cec5SDimitry Andric   HiVT = NumElts - LoNumElts == 1
17030b57cec5SDimitry Andric              ? EltVT
17040b57cec5SDimitry Andric              : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts - LoNumElts);
1705bdd1243dSDimitry Andric   return std::pair(LoVT, HiVT);
17060b57cec5SDimitry Andric }
17070b57cec5SDimitry Andric 
17080b57cec5SDimitry Andric // Split a vector value into two parts of types LoVT and HiVT. HiVT could be
17090b57cec5SDimitry Andric // scalar.
17100b57cec5SDimitry Andric std::pair<SDValue, SDValue>
splitVector(const SDValue & N,const SDLoc & DL,const EVT & LoVT,const EVT & HiVT,SelectionDAG & DAG) const17110b57cec5SDimitry Andric AMDGPUTargetLowering::splitVector(const SDValue &N, const SDLoc &DL,
17120b57cec5SDimitry Andric                                   const EVT &LoVT, const EVT &HiVT,
17130b57cec5SDimitry Andric                                   SelectionDAG &DAG) const {
17140b57cec5SDimitry Andric   assert(LoVT.getVectorNumElements() +
17150b57cec5SDimitry Andric                  (HiVT.isVector() ? HiVT.getVectorNumElements() : 1) <=
17160b57cec5SDimitry Andric              N.getValueType().getVectorNumElements() &&
17170b57cec5SDimitry Andric          "More vector elements requested than available!");
17180b57cec5SDimitry Andric   SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N,
17195ffd83dbSDimitry Andric                            DAG.getVectorIdxConstant(0, DL));
17200b57cec5SDimitry Andric   SDValue Hi = DAG.getNode(
17210b57cec5SDimitry Andric       HiVT.isVector() ? ISD::EXTRACT_SUBVECTOR : ISD::EXTRACT_VECTOR_ELT, DL,
17225ffd83dbSDimitry Andric       HiVT, N, DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), DL));
1723bdd1243dSDimitry Andric   return std::pair(Lo, Hi);
17240b57cec5SDimitry Andric }
17250b57cec5SDimitry Andric 
SplitVectorLoad(const SDValue Op,SelectionDAG & DAG) const17260b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
17270b57cec5SDimitry Andric                                               SelectionDAG &DAG) const {
17280b57cec5SDimitry Andric   LoadSDNode *Load = cast<LoadSDNode>(Op);
17290b57cec5SDimitry Andric   EVT VT = Op.getValueType();
1730480093f4SDimitry Andric   SDLoc SL(Op);
17310b57cec5SDimitry Andric 
17320b57cec5SDimitry Andric 
17330b57cec5SDimitry Andric   // If this is a 2 element vector, we really want to scalarize and not create
17340b57cec5SDimitry Andric   // weird 1 element vectors.
1735480093f4SDimitry Andric   if (VT.getVectorNumElements() == 2) {
1736480093f4SDimitry Andric     SDValue Ops[2];
1737480093f4SDimitry Andric     std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(Load, DAG);
1738480093f4SDimitry Andric     return DAG.getMergeValues(Ops, SL);
1739480093f4SDimitry Andric   }
17400b57cec5SDimitry Andric 
17410b57cec5SDimitry Andric   SDValue BasePtr = Load->getBasePtr();
17420b57cec5SDimitry Andric   EVT MemVT = Load->getMemoryVT();
17430b57cec5SDimitry Andric 
17440b57cec5SDimitry Andric   const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
17450b57cec5SDimitry Andric 
17460b57cec5SDimitry Andric   EVT LoVT, HiVT;
17470b57cec5SDimitry Andric   EVT LoMemVT, HiMemVT;
17480b57cec5SDimitry Andric   SDValue Lo, Hi;
17490b57cec5SDimitry Andric 
17500b57cec5SDimitry Andric   std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG);
17510b57cec5SDimitry Andric   std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);
17520b57cec5SDimitry Andric   std::tie(Lo, Hi) = splitVector(Op, SL, LoVT, HiVT, DAG);
17530b57cec5SDimitry Andric 
17540b57cec5SDimitry Andric   unsigned Size = LoMemVT.getStoreSize();
175581ad6265SDimitry Andric   Align BaseAlign = Load->getAlign();
175681ad6265SDimitry Andric   Align HiAlign = commonAlignment(BaseAlign, Size);
17570b57cec5SDimitry Andric 
17580b57cec5SDimitry Andric   SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT,
17590b57cec5SDimitry Andric                                   Load->getChain(), BasePtr, SrcValue, LoMemVT,
17600b57cec5SDimitry Andric                                   BaseAlign, Load->getMemOperand()->getFlags());
17615f757f3fSDimitry Andric   SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Size));
17620b57cec5SDimitry Andric   SDValue HiLoad =
17630b57cec5SDimitry Andric       DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(),
17640b57cec5SDimitry Andric                      HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()),
17650b57cec5SDimitry Andric                      HiMemVT, HiAlign, Load->getMemOperand()->getFlags());
17660b57cec5SDimitry Andric 
17670b57cec5SDimitry Andric   SDValue Join;
17680b57cec5SDimitry Andric   if (LoVT == HiVT) {
17690b57cec5SDimitry Andric     // This is the case that the vector is power of two so was evenly split.
17700b57cec5SDimitry Andric     Join = DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad);
17710b57cec5SDimitry Andric   } else {
17720b57cec5SDimitry Andric     Join = DAG.getNode(ISD::INSERT_SUBVECTOR, SL, VT, DAG.getUNDEF(VT), LoLoad,
17735ffd83dbSDimitry Andric                        DAG.getVectorIdxConstant(0, SL));
17745ffd83dbSDimitry Andric     Join = DAG.getNode(
17755ffd83dbSDimitry Andric         HiVT.isVector() ? ISD::INSERT_SUBVECTOR : ISD::INSERT_VECTOR_ELT, SL,
17765ffd83dbSDimitry Andric         VT, Join, HiLoad,
17775ffd83dbSDimitry Andric         DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), SL));
17780b57cec5SDimitry Andric   }
17790b57cec5SDimitry Andric 
17800b57cec5SDimitry Andric   SDValue Ops[] = {Join, DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
17810b57cec5SDimitry Andric                                      LoLoad.getValue(1), HiLoad.getValue(1))};
17820b57cec5SDimitry Andric 
17830b57cec5SDimitry Andric   return DAG.getMergeValues(Ops, SL);
17840b57cec5SDimitry Andric }
17850b57cec5SDimitry Andric 
WidenOrSplitVectorLoad(SDValue Op,SelectionDAG & DAG) const1786e8d8bef9SDimitry Andric SDValue AMDGPUTargetLowering::WidenOrSplitVectorLoad(SDValue Op,
17870b57cec5SDimitry Andric                                                      SelectionDAG &DAG) const {
17880b57cec5SDimitry Andric   LoadSDNode *Load = cast<LoadSDNode>(Op);
17890b57cec5SDimitry Andric   EVT VT = Op.getValueType();
17900b57cec5SDimitry Andric   SDValue BasePtr = Load->getBasePtr();
17910b57cec5SDimitry Andric   EVT MemVT = Load->getMemoryVT();
17920b57cec5SDimitry Andric   SDLoc SL(Op);
17930b57cec5SDimitry Andric   const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo();
179481ad6265SDimitry Andric   Align BaseAlign = Load->getAlign();
1795e8d8bef9SDimitry Andric   unsigned NumElements = MemVT.getVectorNumElements();
1796e8d8bef9SDimitry Andric 
1797e8d8bef9SDimitry Andric   // Widen from vec3 to vec4 when the load is at least 8-byte aligned
1798e8d8bef9SDimitry Andric   // or 16-byte fully dereferenceable. Otherwise, split the vector load.
1799e8d8bef9SDimitry Andric   if (NumElements != 3 ||
180081ad6265SDimitry Andric       (BaseAlign < Align(8) &&
1801e8d8bef9SDimitry Andric        !SrcValue.isDereferenceable(16, *DAG.getContext(), DAG.getDataLayout())))
1802e8d8bef9SDimitry Andric     return SplitVectorLoad(Op, DAG);
1803e8d8bef9SDimitry Andric 
1804e8d8bef9SDimitry Andric   assert(NumElements == 3);
18050b57cec5SDimitry Andric 
18060b57cec5SDimitry Andric   EVT WideVT =
18070b57cec5SDimitry Andric       EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4);
18080b57cec5SDimitry Andric   EVT WideMemVT =
18090b57cec5SDimitry Andric       EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(), 4);
18100b57cec5SDimitry Andric   SDValue WideLoad = DAG.getExtLoad(
18110b57cec5SDimitry Andric       Load->getExtensionType(), SL, WideVT, Load->getChain(), BasePtr, SrcValue,
18120b57cec5SDimitry Andric       WideMemVT, BaseAlign, Load->getMemOperand()->getFlags());
18130b57cec5SDimitry Andric   return DAG.getMergeValues(
18140b57cec5SDimitry Andric       {DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, VT, WideLoad,
18155ffd83dbSDimitry Andric                    DAG.getVectorIdxConstant(0, SL)),
18160b57cec5SDimitry Andric        WideLoad.getValue(1)},
18170b57cec5SDimitry Andric       SL);
18180b57cec5SDimitry Andric }
18190b57cec5SDimitry Andric 
SplitVectorStore(SDValue Op,SelectionDAG & DAG) const18200b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
18210b57cec5SDimitry Andric                                                SelectionDAG &DAG) const {
18220b57cec5SDimitry Andric   StoreSDNode *Store = cast<StoreSDNode>(Op);
18230b57cec5SDimitry Andric   SDValue Val = Store->getValue();
18240b57cec5SDimitry Andric   EVT VT = Val.getValueType();
18250b57cec5SDimitry Andric 
18260b57cec5SDimitry Andric   // If this is a 2 element vector, we really want to scalarize and not create
18270b57cec5SDimitry Andric   // weird 1 element vectors.
18280b57cec5SDimitry Andric   if (VT.getVectorNumElements() == 2)
18290b57cec5SDimitry Andric     return scalarizeVectorStore(Store, DAG);
18300b57cec5SDimitry Andric 
18310b57cec5SDimitry Andric   EVT MemVT = Store->getMemoryVT();
18320b57cec5SDimitry Andric   SDValue Chain = Store->getChain();
18330b57cec5SDimitry Andric   SDValue BasePtr = Store->getBasePtr();
18340b57cec5SDimitry Andric   SDLoc SL(Op);
18350b57cec5SDimitry Andric 
18360b57cec5SDimitry Andric   EVT LoVT, HiVT;
18370b57cec5SDimitry Andric   EVT LoMemVT, HiMemVT;
18380b57cec5SDimitry Andric   SDValue Lo, Hi;
18390b57cec5SDimitry Andric 
18400b57cec5SDimitry Andric   std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG);
18410b57cec5SDimitry Andric   std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG);
18420b57cec5SDimitry Andric   std::tie(Lo, Hi) = splitVector(Val, SL, LoVT, HiVT, DAG);
18430b57cec5SDimitry Andric 
18440b57cec5SDimitry Andric   SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, LoMemVT.getStoreSize());
18450b57cec5SDimitry Andric 
18460b57cec5SDimitry Andric   const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo();
184781ad6265SDimitry Andric   Align BaseAlign = Store->getAlign();
18480b57cec5SDimitry Andric   unsigned Size = LoMemVT.getStoreSize();
184981ad6265SDimitry Andric   Align HiAlign = commonAlignment(BaseAlign, Size);
18500b57cec5SDimitry Andric 
18510b57cec5SDimitry Andric   SDValue LoStore =
18520b57cec5SDimitry Andric       DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign,
18530b57cec5SDimitry Andric                         Store->getMemOperand()->getFlags());
18540b57cec5SDimitry Andric   SDValue HiStore =
18550b57cec5SDimitry Andric       DAG.getTruncStore(Chain, SL, Hi, HiPtr, SrcValue.getWithOffset(Size),
18560b57cec5SDimitry Andric                         HiMemVT, HiAlign, Store->getMemOperand()->getFlags());
18570b57cec5SDimitry Andric 
18580b57cec5SDimitry Andric   return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
18590b57cec5SDimitry Andric }
18600b57cec5SDimitry Andric 
18610b57cec5SDimitry Andric // This is a shortcut for integer division because we have fast i32<->f32
18620b57cec5SDimitry Andric // conversions, and fast f32 reciprocal instructions. The fractional part of a
18630b57cec5SDimitry Andric // float is enough to accurately represent up to a 24-bit signed integer.
LowerDIVREM24(SDValue Op,SelectionDAG & DAG,bool Sign) const18640b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
18650b57cec5SDimitry Andric                                             bool Sign) const {
18660b57cec5SDimitry Andric   SDLoc DL(Op);
18670b57cec5SDimitry Andric   EVT VT = Op.getValueType();
18680b57cec5SDimitry Andric   SDValue LHS = Op.getOperand(0);
18690b57cec5SDimitry Andric   SDValue RHS = Op.getOperand(1);
18700b57cec5SDimitry Andric   MVT IntVT = MVT::i32;
18710b57cec5SDimitry Andric   MVT FltVT = MVT::f32;
18720b57cec5SDimitry Andric 
18730b57cec5SDimitry Andric   unsigned LHSSignBits = DAG.ComputeNumSignBits(LHS);
18740b57cec5SDimitry Andric   if (LHSSignBits < 9)
18750b57cec5SDimitry Andric     return SDValue();
18760b57cec5SDimitry Andric 
18770b57cec5SDimitry Andric   unsigned RHSSignBits = DAG.ComputeNumSignBits(RHS);
18780b57cec5SDimitry Andric   if (RHSSignBits < 9)
18790b57cec5SDimitry Andric     return SDValue();
18800b57cec5SDimitry Andric 
18810b57cec5SDimitry Andric   unsigned BitSize = VT.getSizeInBits();
18820b57cec5SDimitry Andric   unsigned SignBits = std::min(LHSSignBits, RHSSignBits);
18830b57cec5SDimitry Andric   unsigned DivBits = BitSize - SignBits;
18840b57cec5SDimitry Andric   if (Sign)
18850b57cec5SDimitry Andric     ++DivBits;
18860b57cec5SDimitry Andric 
18870b57cec5SDimitry Andric   ISD::NodeType ToFp = Sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
18880b57cec5SDimitry Andric   ISD::NodeType ToInt = Sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
18890b57cec5SDimitry Andric 
18900b57cec5SDimitry Andric   SDValue jq = DAG.getConstant(1, DL, IntVT);
18910b57cec5SDimitry Andric 
18920b57cec5SDimitry Andric   if (Sign) {
18930b57cec5SDimitry Andric     // char|short jq = ia ^ ib;
18940b57cec5SDimitry Andric     jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
18950b57cec5SDimitry Andric 
18960b57cec5SDimitry Andric     // jq = jq >> (bitsize - 2)
18970b57cec5SDimitry Andric     jq = DAG.getNode(ISD::SRA, DL, VT, jq,
18980b57cec5SDimitry Andric                      DAG.getConstant(BitSize - 2, DL, VT));
18990b57cec5SDimitry Andric 
19000b57cec5SDimitry Andric     // jq = jq | 0x1
19010b57cec5SDimitry Andric     jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT));
19020b57cec5SDimitry Andric   }
19030b57cec5SDimitry Andric 
19040b57cec5SDimitry Andric   // int ia = (int)LHS;
19050b57cec5SDimitry Andric   SDValue ia = LHS;
19060b57cec5SDimitry Andric 
19070b57cec5SDimitry Andric   // int ib, (int)RHS;
19080b57cec5SDimitry Andric   SDValue ib = RHS;
19090b57cec5SDimitry Andric 
19100b57cec5SDimitry Andric   // float fa = (float)ia;
19110b57cec5SDimitry Andric   SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia);
19120b57cec5SDimitry Andric 
19130b57cec5SDimitry Andric   // float fb = (float)ib;
19140b57cec5SDimitry Andric   SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib);
19150b57cec5SDimitry Andric 
19160b57cec5SDimitry Andric   SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT,
19170b57cec5SDimitry Andric                            fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb));
19180b57cec5SDimitry Andric 
19190b57cec5SDimitry Andric   // fq = trunc(fq);
19200b57cec5SDimitry Andric   fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq);
19210b57cec5SDimitry Andric 
19220b57cec5SDimitry Andric   // float fqneg = -fq;
19230b57cec5SDimitry Andric   SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq);
19240b57cec5SDimitry Andric 
1925480093f4SDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
1926bdd1243dSDimitry Andric 
1927bdd1243dSDimitry Andric   bool UseFmadFtz = false;
1928bdd1243dSDimitry Andric   if (Subtarget->isGCN()) {
1929bdd1243dSDimitry Andric     const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
193006c3fb27SDimitry Andric     UseFmadFtz =
193106c3fb27SDimitry Andric         MFI->getMode().FP32Denormals != DenormalMode::getPreserveSign();
1932bdd1243dSDimitry Andric   }
1933480093f4SDimitry Andric 
19340b57cec5SDimitry Andric   // float fr = mad(fqneg, fb, fa);
1935bdd1243dSDimitry Andric   unsigned OpCode = !Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA
1936bdd1243dSDimitry Andric                     : UseFmadFtz ? (unsigned)AMDGPUISD::FMAD_FTZ
1937bdd1243dSDimitry Andric                                  : (unsigned)ISD::FMAD;
19380b57cec5SDimitry Andric   SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa);
19390b57cec5SDimitry Andric 
19400b57cec5SDimitry Andric   // int iq = (int)fq;
19410b57cec5SDimitry Andric   SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq);
19420b57cec5SDimitry Andric 
19430b57cec5SDimitry Andric   // fr = fabs(fr);
19440b57cec5SDimitry Andric   fr = DAG.getNode(ISD::FABS, DL, FltVT, fr);
19450b57cec5SDimitry Andric 
19460b57cec5SDimitry Andric   // fb = fabs(fb);
19470b57cec5SDimitry Andric   fb = DAG.getNode(ISD::FABS, DL, FltVT, fb);
19480b57cec5SDimitry Andric 
19490b57cec5SDimitry Andric   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
19500b57cec5SDimitry Andric 
19510b57cec5SDimitry Andric   // int cv = fr >= fb;
19520b57cec5SDimitry Andric   SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE);
19530b57cec5SDimitry Andric 
19540b57cec5SDimitry Andric   // jq = (cv ? jq : 0);
19550b57cec5SDimitry Andric   jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT));
19560b57cec5SDimitry Andric 
19570b57cec5SDimitry Andric   // dst = iq + jq;
19580b57cec5SDimitry Andric   SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq);
19590b57cec5SDimitry Andric 
19600b57cec5SDimitry Andric   // Rem needs compensation, it's easier to recompute it
19610b57cec5SDimitry Andric   SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS);
19620b57cec5SDimitry Andric   Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem);
19630b57cec5SDimitry Andric 
19640b57cec5SDimitry Andric   // Truncate to number of bits this divide really is.
19650b57cec5SDimitry Andric   if (Sign) {
19660b57cec5SDimitry Andric     SDValue InRegSize
19670b57cec5SDimitry Andric       = DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), DivBits));
19680b57cec5SDimitry Andric     Div = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Div, InRegSize);
19690b57cec5SDimitry Andric     Rem = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Rem, InRegSize);
19700b57cec5SDimitry Andric   } else {
19710b57cec5SDimitry Andric     SDValue TruncMask = DAG.getConstant((UINT64_C(1) << DivBits) - 1, DL, VT);
19720b57cec5SDimitry Andric     Div = DAG.getNode(ISD::AND, DL, VT, Div, TruncMask);
19730b57cec5SDimitry Andric     Rem = DAG.getNode(ISD::AND, DL, VT, Rem, TruncMask);
19740b57cec5SDimitry Andric   }
19750b57cec5SDimitry Andric 
19760b57cec5SDimitry Andric   return DAG.getMergeValues({ Div, Rem }, DL);
19770b57cec5SDimitry Andric }
19780b57cec5SDimitry Andric 
LowerUDIVREM64(SDValue Op,SelectionDAG & DAG,SmallVectorImpl<SDValue> & Results) const19790b57cec5SDimitry Andric void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
19800b57cec5SDimitry Andric                                       SelectionDAG &DAG,
19810b57cec5SDimitry Andric                                       SmallVectorImpl<SDValue> &Results) const {
19820b57cec5SDimitry Andric   SDLoc DL(Op);
19830b57cec5SDimitry Andric   EVT VT = Op.getValueType();
19840b57cec5SDimitry Andric 
19850b57cec5SDimitry Andric   assert(VT == MVT::i64 && "LowerUDIVREM64 expects an i64");
19860b57cec5SDimitry Andric 
19870b57cec5SDimitry Andric   EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
19880b57cec5SDimitry Andric 
19890b57cec5SDimitry Andric   SDValue One = DAG.getConstant(1, DL, HalfVT);
19900b57cec5SDimitry Andric   SDValue Zero = DAG.getConstant(0, DL, HalfVT);
19910b57cec5SDimitry Andric 
19920b57cec5SDimitry Andric   //HiLo split
199306c3fb27SDimitry Andric   SDValue LHS_Lo, LHS_Hi;
19940b57cec5SDimitry Andric   SDValue LHS = Op.getOperand(0);
199506c3fb27SDimitry Andric   std::tie(LHS_Lo, LHS_Hi) = DAG.SplitScalar(LHS, DL, HalfVT, HalfVT);
19960b57cec5SDimitry Andric 
199706c3fb27SDimitry Andric   SDValue RHS_Lo, RHS_Hi;
19980b57cec5SDimitry Andric   SDValue RHS = Op.getOperand(1);
199906c3fb27SDimitry Andric   std::tie(RHS_Lo, RHS_Hi) = DAG.SplitScalar(RHS, DL, HalfVT, HalfVT);
20000b57cec5SDimitry Andric 
20010b57cec5SDimitry Andric   if (DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) &&
20020b57cec5SDimitry Andric       DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) {
20030b57cec5SDimitry Andric 
20040b57cec5SDimitry Andric     SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
20050b57cec5SDimitry Andric                               LHS_Lo, RHS_Lo);
20060b57cec5SDimitry Andric 
20070b57cec5SDimitry Andric     SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(0), Zero});
20080b57cec5SDimitry Andric     SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(1), Zero});
20090b57cec5SDimitry Andric 
20100b57cec5SDimitry Andric     Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV));
20110b57cec5SDimitry Andric     Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM));
20120b57cec5SDimitry Andric     return;
20130b57cec5SDimitry Andric   }
20140b57cec5SDimitry Andric 
20150b57cec5SDimitry Andric   if (isTypeLegal(MVT::i64)) {
2016349cc55cSDimitry Andric     // The algorithm here is based on ideas from "Software Integer Division",
2017349cc55cSDimitry Andric     // Tom Rodeheffer, August 2008.
2018349cc55cSDimitry Andric 
2019480093f4SDimitry Andric     MachineFunction &MF = DAG.getMachineFunction();
2020480093f4SDimitry Andric     const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
2021480093f4SDimitry Andric 
20220b57cec5SDimitry Andric     // Compute denominator reciprocal.
202306c3fb27SDimitry Andric     unsigned FMAD =
202406c3fb27SDimitry Andric         !Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA
202506c3fb27SDimitry Andric         : MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign()
202606c3fb27SDimitry Andric             ? (unsigned)ISD::FMAD
202706c3fb27SDimitry Andric             : (unsigned)AMDGPUISD::FMAD_FTZ;
20280b57cec5SDimitry Andric 
20290b57cec5SDimitry Andric     SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo);
20300b57cec5SDimitry Andric     SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi);
20310b57cec5SDimitry Andric     SDValue Mad1 = DAG.getNode(FMAD, DL, MVT::f32, Cvt_Hi,
20320b57cec5SDimitry Andric       DAG.getConstantFP(APInt(32, 0x4f800000).bitsToFloat(), DL, MVT::f32),
20330b57cec5SDimitry Andric       Cvt_Lo);
20340b57cec5SDimitry Andric     SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, DL, MVT::f32, Mad1);
20350b57cec5SDimitry Andric     SDValue Mul1 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Rcp,
20360b57cec5SDimitry Andric       DAG.getConstantFP(APInt(32, 0x5f7ffffc).bitsToFloat(), DL, MVT::f32));
20370b57cec5SDimitry Andric     SDValue Mul2 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Mul1,
20380b57cec5SDimitry Andric       DAG.getConstantFP(APInt(32, 0x2f800000).bitsToFloat(), DL, MVT::f32));
20390b57cec5SDimitry Andric     SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, MVT::f32, Mul2);
20400b57cec5SDimitry Andric     SDValue Mad2 = DAG.getNode(FMAD, DL, MVT::f32, Trunc,
20410b57cec5SDimitry Andric       DAG.getConstantFP(APInt(32, 0xcf800000).bitsToFloat(), DL, MVT::f32),
20420b57cec5SDimitry Andric       Mul1);
20430b57cec5SDimitry Andric     SDValue Rcp_Lo = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Mad2);
20440b57cec5SDimitry Andric     SDValue Rcp_Hi = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Trunc);
20450b57cec5SDimitry Andric     SDValue Rcp64 = DAG.getBitcast(VT,
20460b57cec5SDimitry Andric                         DAG.getBuildVector(MVT::v2i32, DL, {Rcp_Lo, Rcp_Hi}));
20470b57cec5SDimitry Andric 
20480b57cec5SDimitry Andric     SDValue Zero64 = DAG.getConstant(0, DL, VT);
20490b57cec5SDimitry Andric     SDValue One64  = DAG.getConstant(1, DL, VT);
20500b57cec5SDimitry Andric     SDValue Zero1 = DAG.getConstant(0, DL, MVT::i1);
20510b57cec5SDimitry Andric     SDVTList HalfCarryVT = DAG.getVTList(HalfVT, MVT::i1);
20520b57cec5SDimitry Andric 
2053349cc55cSDimitry Andric     // First round of UNR (Unsigned integer Newton-Raphson).
20540b57cec5SDimitry Andric     SDValue Neg_RHS = DAG.getNode(ISD::SUB, DL, VT, Zero64, RHS);
20550b57cec5SDimitry Andric     SDValue Mullo1 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Rcp64);
20560b57cec5SDimitry Andric     SDValue Mulhi1 = DAG.getNode(ISD::MULHU, DL, VT, Rcp64, Mullo1);
205706c3fb27SDimitry Andric     SDValue Mulhi1_Lo, Mulhi1_Hi;
205806c3fb27SDimitry Andric     std::tie(Mulhi1_Lo, Mulhi1_Hi) =
205906c3fb27SDimitry Andric         DAG.SplitScalar(Mulhi1, DL, HalfVT, HalfVT);
206006c3fb27SDimitry Andric     SDValue Add1_Lo = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Rcp_Lo,
20610b57cec5SDimitry Andric                                   Mulhi1_Lo, Zero1);
206206c3fb27SDimitry Andric     SDValue Add1_Hi = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Rcp_Hi,
20630b57cec5SDimitry Andric                                   Mulhi1_Hi, Add1_Lo.getValue(1));
20640b57cec5SDimitry Andric     SDValue Add1 = DAG.getBitcast(VT,
20650b57cec5SDimitry Andric                         DAG.getBuildVector(MVT::v2i32, DL, {Add1_Lo, Add1_Hi}));
20660b57cec5SDimitry Andric 
2067349cc55cSDimitry Andric     // Second round of UNR.
20680b57cec5SDimitry Andric     SDValue Mullo2 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Add1);
20690b57cec5SDimitry Andric     SDValue Mulhi2 = DAG.getNode(ISD::MULHU, DL, VT, Add1, Mullo2);
207006c3fb27SDimitry Andric     SDValue Mulhi2_Lo, Mulhi2_Hi;
207106c3fb27SDimitry Andric     std::tie(Mulhi2_Lo, Mulhi2_Hi) =
207206c3fb27SDimitry Andric         DAG.SplitScalar(Mulhi2, DL, HalfVT, HalfVT);
207306c3fb27SDimitry Andric     SDValue Add2_Lo = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Add1_Lo,
20740b57cec5SDimitry Andric                                   Mulhi2_Lo, Zero1);
207506c3fb27SDimitry Andric     SDValue Add2_Hi = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Add1_Hi,
2076349cc55cSDimitry Andric                                   Mulhi2_Hi, Add2_Lo.getValue(1));
20770b57cec5SDimitry Andric     SDValue Add2 = DAG.getBitcast(VT,
20780b57cec5SDimitry Andric                         DAG.getBuildVector(MVT::v2i32, DL, {Add2_Lo, Add2_Hi}));
2079349cc55cSDimitry Andric 
20800b57cec5SDimitry Andric     SDValue Mulhi3 = DAG.getNode(ISD::MULHU, DL, VT, LHS, Add2);
20810b57cec5SDimitry Andric 
20820b57cec5SDimitry Andric     SDValue Mul3 = DAG.getNode(ISD::MUL, DL, VT, RHS, Mulhi3);
20830b57cec5SDimitry Andric 
208406c3fb27SDimitry Andric     SDValue Mul3_Lo, Mul3_Hi;
208506c3fb27SDimitry Andric     std::tie(Mul3_Lo, Mul3_Hi) = DAG.SplitScalar(Mul3, DL, HalfVT, HalfVT);
208606c3fb27SDimitry Andric     SDValue Sub1_Lo = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, LHS_Lo,
20870b57cec5SDimitry Andric                                   Mul3_Lo, Zero1);
208806c3fb27SDimitry Andric     SDValue Sub1_Hi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, LHS_Hi,
20890b57cec5SDimitry Andric                                   Mul3_Hi, Sub1_Lo.getValue(1));
20900b57cec5SDimitry Andric     SDValue Sub1_Mi = DAG.getNode(ISD::SUB, DL, HalfVT, LHS_Hi, Mul3_Hi);
20910b57cec5SDimitry Andric     SDValue Sub1 = DAG.getBitcast(VT,
20920b57cec5SDimitry Andric                         DAG.getBuildVector(MVT::v2i32, DL, {Sub1_Lo, Sub1_Hi}));
20930b57cec5SDimitry Andric 
20940b57cec5SDimitry Andric     SDValue MinusOne = DAG.getConstant(0xffffffffu, DL, HalfVT);
20950b57cec5SDimitry Andric     SDValue C1 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, MinusOne, Zero,
20960b57cec5SDimitry Andric                                  ISD::SETUGE);
20970b57cec5SDimitry Andric     SDValue C2 = DAG.getSelectCC(DL, Sub1_Lo, RHS_Lo, MinusOne, Zero,
20980b57cec5SDimitry Andric                                  ISD::SETUGE);
20990b57cec5SDimitry Andric     SDValue C3 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, C2, C1, ISD::SETEQ);
21000b57cec5SDimitry Andric 
21010b57cec5SDimitry Andric     // TODO: Here and below portions of the code can be enclosed into if/endif.
21020b57cec5SDimitry Andric     // Currently control flow is unconditional and we have 4 selects after
21030b57cec5SDimitry Andric     // potential endif to substitute PHIs.
21040b57cec5SDimitry Andric 
21050b57cec5SDimitry Andric     // if C3 != 0 ...
210606c3fb27SDimitry Andric     SDValue Sub2_Lo = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub1_Lo,
21070b57cec5SDimitry Andric                                   RHS_Lo, Zero1);
210806c3fb27SDimitry Andric     SDValue Sub2_Mi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub1_Mi,
21090b57cec5SDimitry Andric                                   RHS_Hi, Sub1_Lo.getValue(1));
211006c3fb27SDimitry Andric     SDValue Sub2_Hi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub2_Mi,
21110b57cec5SDimitry Andric                                   Zero, Sub2_Lo.getValue(1));
21120b57cec5SDimitry Andric     SDValue Sub2 = DAG.getBitcast(VT,
21130b57cec5SDimitry Andric                         DAG.getBuildVector(MVT::v2i32, DL, {Sub2_Lo, Sub2_Hi}));
21140b57cec5SDimitry Andric 
21150b57cec5SDimitry Andric     SDValue Add3 = DAG.getNode(ISD::ADD, DL, VT, Mulhi3, One64);
21160b57cec5SDimitry Andric 
21170b57cec5SDimitry Andric     SDValue C4 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, MinusOne, Zero,
21180b57cec5SDimitry Andric                                  ISD::SETUGE);
21190b57cec5SDimitry Andric     SDValue C5 = DAG.getSelectCC(DL, Sub2_Lo, RHS_Lo, MinusOne, Zero,
21200b57cec5SDimitry Andric                                  ISD::SETUGE);
21210b57cec5SDimitry Andric     SDValue C6 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, C5, C4, ISD::SETEQ);
21220b57cec5SDimitry Andric 
21230b57cec5SDimitry Andric     // if (C6 != 0)
21240b57cec5SDimitry Andric     SDValue Add4 = DAG.getNode(ISD::ADD, DL, VT, Add3, One64);
21250b57cec5SDimitry Andric 
212606c3fb27SDimitry Andric     SDValue Sub3_Lo = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub2_Lo,
21270b57cec5SDimitry Andric                                   RHS_Lo, Zero1);
212806c3fb27SDimitry Andric     SDValue Sub3_Mi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub2_Mi,
21290b57cec5SDimitry Andric                                   RHS_Hi, Sub2_Lo.getValue(1));
213006c3fb27SDimitry Andric     SDValue Sub3_Hi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub3_Mi,
21310b57cec5SDimitry Andric                                   Zero, Sub3_Lo.getValue(1));
21320b57cec5SDimitry Andric     SDValue Sub3 = DAG.getBitcast(VT,
21330b57cec5SDimitry Andric                         DAG.getBuildVector(MVT::v2i32, DL, {Sub3_Lo, Sub3_Hi}));
21340b57cec5SDimitry Andric 
21350b57cec5SDimitry Andric     // endif C6
21360b57cec5SDimitry Andric     // endif C3
21370b57cec5SDimitry Andric 
21380b57cec5SDimitry Andric     SDValue Sel1 = DAG.getSelectCC(DL, C6, Zero, Add4, Add3, ISD::SETNE);
21390b57cec5SDimitry Andric     SDValue Div  = DAG.getSelectCC(DL, C3, Zero, Sel1, Mulhi3, ISD::SETNE);
21400b57cec5SDimitry Andric 
21410b57cec5SDimitry Andric     SDValue Sel2 = DAG.getSelectCC(DL, C6, Zero, Sub3, Sub2, ISD::SETNE);
21420b57cec5SDimitry Andric     SDValue Rem  = DAG.getSelectCC(DL, C3, Zero, Sel2, Sub1, ISD::SETNE);
21430b57cec5SDimitry Andric 
21440b57cec5SDimitry Andric     Results.push_back(Div);
21450b57cec5SDimitry Andric     Results.push_back(Rem);
21460b57cec5SDimitry Andric 
21470b57cec5SDimitry Andric     return;
21480b57cec5SDimitry Andric   }
21490b57cec5SDimitry Andric 
21500b57cec5SDimitry Andric   // r600 expandion.
21510b57cec5SDimitry Andric   // Get Speculative values
21520b57cec5SDimitry Andric   SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo);
21530b57cec5SDimitry Andric   SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo);
21540b57cec5SDimitry Andric 
21550b57cec5SDimitry Andric   SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, Zero, REM_Part, LHS_Hi, ISD::SETEQ);
21560b57cec5SDimitry Andric   SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {REM_Lo, Zero});
21570b57cec5SDimitry Andric   REM = DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM);
21580b57cec5SDimitry Andric 
21590b57cec5SDimitry Andric   SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, Zero, DIV_Part, Zero, ISD::SETEQ);
21600b57cec5SDimitry Andric   SDValue DIV_Lo = Zero;
21610b57cec5SDimitry Andric 
21620b57cec5SDimitry Andric   const unsigned halfBitWidth = HalfVT.getSizeInBits();
21630b57cec5SDimitry Andric 
21640b57cec5SDimitry Andric   for (unsigned i = 0; i < halfBitWidth; ++i) {
21650b57cec5SDimitry Andric     const unsigned bitPos = halfBitWidth - i - 1;
21660b57cec5SDimitry Andric     SDValue POS = DAG.getConstant(bitPos, DL, HalfVT);
21670b57cec5SDimitry Andric     // Get value of high bit
21680b57cec5SDimitry Andric     SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS);
21690b57cec5SDimitry Andric     HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, One);
21700b57cec5SDimitry Andric     HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit);
21710b57cec5SDimitry Andric 
21720b57cec5SDimitry Andric     // Shift
21730b57cec5SDimitry Andric     REM = DAG.getNode(ISD::SHL, DL, VT, REM, DAG.getConstant(1, DL, VT));
21740b57cec5SDimitry Andric     // Add LHS high bit
21750b57cec5SDimitry Andric     REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit);
21760b57cec5SDimitry Andric 
21770b57cec5SDimitry Andric     SDValue BIT = DAG.getConstant(1ULL << bitPos, DL, HalfVT);
21780b57cec5SDimitry Andric     SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, Zero, ISD::SETUGE);
21790b57cec5SDimitry Andric 
21800b57cec5SDimitry Andric     DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT);
21810b57cec5SDimitry Andric 
21820b57cec5SDimitry Andric     // Update REM
21830b57cec5SDimitry Andric     SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS);
21840b57cec5SDimitry Andric     REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE);
21850b57cec5SDimitry Andric   }
21860b57cec5SDimitry Andric 
21870b57cec5SDimitry Andric   SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {DIV_Lo, DIV_Hi});
21880b57cec5SDimitry Andric   DIV = DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV);
21890b57cec5SDimitry Andric   Results.push_back(DIV);
21900b57cec5SDimitry Andric   Results.push_back(REM);
21910b57cec5SDimitry Andric }
21920b57cec5SDimitry Andric 
LowerUDIVREM(SDValue Op,SelectionDAG & DAG) const21930b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
21940b57cec5SDimitry Andric                                            SelectionDAG &DAG) const {
21950b57cec5SDimitry Andric   SDLoc DL(Op);
21960b57cec5SDimitry Andric   EVT VT = Op.getValueType();
21970b57cec5SDimitry Andric 
21980b57cec5SDimitry Andric   if (VT == MVT::i64) {
21990b57cec5SDimitry Andric     SmallVector<SDValue, 2> Results;
22000b57cec5SDimitry Andric     LowerUDIVREM64(Op, DAG, Results);
22010b57cec5SDimitry Andric     return DAG.getMergeValues(Results, DL);
22020b57cec5SDimitry Andric   }
22030b57cec5SDimitry Andric 
22040b57cec5SDimitry Andric   if (VT == MVT::i32) {
22050b57cec5SDimitry Andric     if (SDValue Res = LowerDIVREM24(Op, DAG, false))
22060b57cec5SDimitry Andric       return Res;
22070b57cec5SDimitry Andric   }
22080b57cec5SDimitry Andric 
22095ffd83dbSDimitry Andric   SDValue X = Op.getOperand(0);
22105ffd83dbSDimitry Andric   SDValue Y = Op.getOperand(1);
22110b57cec5SDimitry Andric 
22125ffd83dbSDimitry Andric   // See AMDGPUCodeGenPrepare::expandDivRem32 for a description of the
22135ffd83dbSDimitry Andric   // algorithm used here.
22140b57cec5SDimitry Andric 
22155ffd83dbSDimitry Andric   // Initial estimate of inv(y).
22165ffd83dbSDimitry Andric   SDValue Z = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Y);
22170b57cec5SDimitry Andric 
22185ffd83dbSDimitry Andric   // One round of UNR.
22195ffd83dbSDimitry Andric   SDValue NegY = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Y);
22205ffd83dbSDimitry Andric   SDValue NegYZ = DAG.getNode(ISD::MUL, DL, VT, NegY, Z);
22215ffd83dbSDimitry Andric   Z = DAG.getNode(ISD::ADD, DL, VT, Z,
22225ffd83dbSDimitry Andric                   DAG.getNode(ISD::MULHU, DL, VT, Z, NegYZ));
22230b57cec5SDimitry Andric 
22245ffd83dbSDimitry Andric   // Quotient/remainder estimate.
22255ffd83dbSDimitry Andric   SDValue Q = DAG.getNode(ISD::MULHU, DL, VT, X, Z);
22265ffd83dbSDimitry Andric   SDValue R =
22275ffd83dbSDimitry Andric       DAG.getNode(ISD::SUB, DL, VT, X, DAG.getNode(ISD::MUL, DL, VT, Q, Y));
22280b57cec5SDimitry Andric 
22295ffd83dbSDimitry Andric   // First quotient/remainder refinement.
22305ffd83dbSDimitry Andric   EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
22315ffd83dbSDimitry Andric   SDValue One = DAG.getConstant(1, DL, VT);
22325ffd83dbSDimitry Andric   SDValue Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE);
22335ffd83dbSDimitry Andric   Q = DAG.getNode(ISD::SELECT, DL, VT, Cond,
22345ffd83dbSDimitry Andric                   DAG.getNode(ISD::ADD, DL, VT, Q, One), Q);
22355ffd83dbSDimitry Andric   R = DAG.getNode(ISD::SELECT, DL, VT, Cond,
22365ffd83dbSDimitry Andric                   DAG.getNode(ISD::SUB, DL, VT, R, Y), R);
22370b57cec5SDimitry Andric 
22385ffd83dbSDimitry Andric   // Second quotient/remainder refinement.
22395ffd83dbSDimitry Andric   Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE);
22405ffd83dbSDimitry Andric   Q = DAG.getNode(ISD::SELECT, DL, VT, Cond,
22415ffd83dbSDimitry Andric                   DAG.getNode(ISD::ADD, DL, VT, Q, One), Q);
22425ffd83dbSDimitry Andric   R = DAG.getNode(ISD::SELECT, DL, VT, Cond,
22435ffd83dbSDimitry Andric                   DAG.getNode(ISD::SUB, DL, VT, R, Y), R);
22440b57cec5SDimitry Andric 
22455ffd83dbSDimitry Andric   return DAG.getMergeValues({Q, R}, DL);
22460b57cec5SDimitry Andric }
22470b57cec5SDimitry Andric 
LowerSDIVREM(SDValue Op,SelectionDAG & DAG) const22480b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op,
22490b57cec5SDimitry Andric                                            SelectionDAG &DAG) const {
22500b57cec5SDimitry Andric   SDLoc DL(Op);
22510b57cec5SDimitry Andric   EVT VT = Op.getValueType();
22520b57cec5SDimitry Andric 
22530b57cec5SDimitry Andric   SDValue LHS = Op.getOperand(0);
22540b57cec5SDimitry Andric   SDValue RHS = Op.getOperand(1);
22550b57cec5SDimitry Andric 
22560b57cec5SDimitry Andric   SDValue Zero = DAG.getConstant(0, DL, VT);
22570b57cec5SDimitry Andric   SDValue NegOne = DAG.getConstant(-1, DL, VT);
22580b57cec5SDimitry Andric 
22590b57cec5SDimitry Andric   if (VT == MVT::i32) {
22600b57cec5SDimitry Andric     if (SDValue Res = LowerDIVREM24(Op, DAG, true))
22610b57cec5SDimitry Andric       return Res;
22620b57cec5SDimitry Andric   }
22630b57cec5SDimitry Andric 
22640b57cec5SDimitry Andric   if (VT == MVT::i64 &&
22650b57cec5SDimitry Andric       DAG.ComputeNumSignBits(LHS) > 32 &&
22660b57cec5SDimitry Andric       DAG.ComputeNumSignBits(RHS) > 32) {
22670b57cec5SDimitry Andric     EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
22680b57cec5SDimitry Andric 
22690b57cec5SDimitry Andric     //HiLo split
22700b57cec5SDimitry Andric     SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
22710b57cec5SDimitry Andric     SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
22720b57cec5SDimitry Andric     SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT),
22730b57cec5SDimitry Andric                                  LHS_Lo, RHS_Lo);
22740b57cec5SDimitry Andric     SDValue Res[2] = {
22750b57cec5SDimitry Andric       DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)),
22760b57cec5SDimitry Andric       DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1))
22770b57cec5SDimitry Andric     };
22780b57cec5SDimitry Andric     return DAG.getMergeValues(Res, DL);
22790b57cec5SDimitry Andric   }
22800b57cec5SDimitry Andric 
22810b57cec5SDimitry Andric   SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT);
22820b57cec5SDimitry Andric   SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT);
22830b57cec5SDimitry Andric   SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign);
22840b57cec5SDimitry Andric   SDValue RSign = LHSign; // Remainder sign is the same as LHS
22850b57cec5SDimitry Andric 
22860b57cec5SDimitry Andric   LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign);
22870b57cec5SDimitry Andric   RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign);
22880b57cec5SDimitry Andric 
22890b57cec5SDimitry Andric   LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign);
22900b57cec5SDimitry Andric   RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign);
22910b57cec5SDimitry Andric 
22920b57cec5SDimitry Andric   SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS);
22930b57cec5SDimitry Andric   SDValue Rem = Div.getValue(1);
22940b57cec5SDimitry Andric 
22950b57cec5SDimitry Andric   Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign);
22960b57cec5SDimitry Andric   Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign);
22970b57cec5SDimitry Andric 
22980b57cec5SDimitry Andric   Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign);
22990b57cec5SDimitry Andric   Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign);
23000b57cec5SDimitry Andric 
23010b57cec5SDimitry Andric   SDValue Res[2] = {
23020b57cec5SDimitry Andric     Div,
23030b57cec5SDimitry Andric     Rem
23040b57cec5SDimitry Andric   };
23050b57cec5SDimitry Andric   return DAG.getMergeValues(Res, DL);
23060b57cec5SDimitry Andric }
23070b57cec5SDimitry Andric 
2308e8d8bef9SDimitry Andric // (frem x, y) -> (fma (fneg (ftrunc (fdiv x, y))), y, x)
LowerFREM(SDValue Op,SelectionDAG & DAG) const23090b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const {
23100b57cec5SDimitry Andric   SDLoc SL(Op);
23110b57cec5SDimitry Andric   EVT VT = Op.getValueType();
2312e8d8bef9SDimitry Andric   auto Flags = Op->getFlags();
23130b57cec5SDimitry Andric   SDValue X = Op.getOperand(0);
23140b57cec5SDimitry Andric   SDValue Y = Op.getOperand(1);
23150b57cec5SDimitry Andric 
2316e8d8bef9SDimitry Andric   SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y, Flags);
2317e8d8bef9SDimitry Andric   SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, VT, Div, Flags);
2318e8d8bef9SDimitry Andric   SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Trunc, Flags);
2319e8d8bef9SDimitry Andric   // TODO: For f32 use FMAD instead if !hasFastFMA32?
2320e8d8bef9SDimitry Andric   return DAG.getNode(ISD::FMA, SL, VT, Neg, Y, X, Flags);
23210b57cec5SDimitry Andric }
23220b57cec5SDimitry Andric 
LowerFCEIL(SDValue Op,SelectionDAG & DAG) const23230b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const {
23240b57cec5SDimitry Andric   SDLoc SL(Op);
23250b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
23260b57cec5SDimitry Andric 
23270b57cec5SDimitry Andric   // result = trunc(src)
23280b57cec5SDimitry Andric   // if (src > 0.0 && src != result)
23290b57cec5SDimitry Andric   //   result += 1.0
23300b57cec5SDimitry Andric 
23310b57cec5SDimitry Andric   SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
23320b57cec5SDimitry Andric 
23330b57cec5SDimitry Andric   const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
23340b57cec5SDimitry Andric   const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64);
23350b57cec5SDimitry Andric 
23360b57cec5SDimitry Andric   EVT SetCCVT =
23370b57cec5SDimitry Andric       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
23380b57cec5SDimitry Andric 
23390b57cec5SDimitry Andric   SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT);
23400b57cec5SDimitry Andric   SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
23410b57cec5SDimitry Andric   SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
23420b57cec5SDimitry Andric 
23430b57cec5SDimitry Andric   SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero);
23440b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
23450b57cec5SDimitry Andric   return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
23460b57cec5SDimitry Andric }
23470b57cec5SDimitry Andric 
extractF64Exponent(SDValue Hi,const SDLoc & SL,SelectionDAG & DAG)23480b57cec5SDimitry Andric static SDValue extractF64Exponent(SDValue Hi, const SDLoc &SL,
23490b57cec5SDimitry Andric                                   SelectionDAG &DAG) {
23500b57cec5SDimitry Andric   const unsigned FractBits = 52;
23510b57cec5SDimitry Andric   const unsigned ExpBits = 11;
23520b57cec5SDimitry Andric 
23530b57cec5SDimitry Andric   SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32,
23540b57cec5SDimitry Andric                                 Hi,
23550b57cec5SDimitry Andric                                 DAG.getConstant(FractBits - 32, SL, MVT::i32),
23560b57cec5SDimitry Andric                                 DAG.getConstant(ExpBits, SL, MVT::i32));
23570b57cec5SDimitry Andric   SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart,
23580b57cec5SDimitry Andric                             DAG.getConstant(1023, SL, MVT::i32));
23590b57cec5SDimitry Andric 
23600b57cec5SDimitry Andric   return Exp;
23610b57cec5SDimitry Andric }
23620b57cec5SDimitry Andric 
LowerFTRUNC(SDValue Op,SelectionDAG & DAG) const23630b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const {
23640b57cec5SDimitry Andric   SDLoc SL(Op);
23650b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
23660b57cec5SDimitry Andric 
23670b57cec5SDimitry Andric   assert(Op.getValueType() == MVT::f64);
23680b57cec5SDimitry Andric 
23690b57cec5SDimitry Andric   const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
23700b57cec5SDimitry Andric 
23710b57cec5SDimitry Andric   // Extract the upper half, since this is where we will find the sign and
23720b57cec5SDimitry Andric   // exponent.
2373349cc55cSDimitry Andric   SDValue Hi = getHiHalf64(Src, DAG);
23740b57cec5SDimitry Andric 
23750b57cec5SDimitry Andric   SDValue Exp = extractF64Exponent(Hi, SL, DAG);
23760b57cec5SDimitry Andric 
23770b57cec5SDimitry Andric   const unsigned FractBits = 52;
23780b57cec5SDimitry Andric 
23790b57cec5SDimitry Andric   // Extract the sign bit.
23800b57cec5SDimitry Andric   const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32);
23810b57cec5SDimitry Andric   SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask);
23820b57cec5SDimitry Andric 
23830b57cec5SDimitry Andric   // Extend back to 64-bits.
23840b57cec5SDimitry Andric   SDValue SignBit64 = DAG.getBuildVector(MVT::v2i32, SL, {Zero, SignBit});
23850b57cec5SDimitry Andric   SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64);
23860b57cec5SDimitry Andric 
23870b57cec5SDimitry Andric   SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src);
23880b57cec5SDimitry Andric   const SDValue FractMask
23890b57cec5SDimitry Andric     = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64);
23900b57cec5SDimitry Andric 
23910b57cec5SDimitry Andric   SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp);
23920b57cec5SDimitry Andric   SDValue Not = DAG.getNOT(SL, Shr, MVT::i64);
23930b57cec5SDimitry Andric   SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not);
23940b57cec5SDimitry Andric 
23950b57cec5SDimitry Andric   EVT SetCCVT =
23960b57cec5SDimitry Andric       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32);
23970b57cec5SDimitry Andric 
23980b57cec5SDimitry Andric   const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32);
23990b57cec5SDimitry Andric 
24000b57cec5SDimitry Andric   SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT);
24010b57cec5SDimitry Andric   SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT);
24020b57cec5SDimitry Andric 
24030b57cec5SDimitry Andric   SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0);
24040b57cec5SDimitry Andric   SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1);
24050b57cec5SDimitry Andric 
24060b57cec5SDimitry Andric   return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2);
24070b57cec5SDimitry Andric }
24080b57cec5SDimitry Andric 
LowerFROUNDEVEN(SDValue Op,SelectionDAG & DAG) const24095f757f3fSDimitry Andric SDValue AMDGPUTargetLowering::LowerFROUNDEVEN(SDValue Op,
24105f757f3fSDimitry Andric                                               SelectionDAG &DAG) const {
24110b57cec5SDimitry Andric   SDLoc SL(Op);
24120b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
24130b57cec5SDimitry Andric 
24140b57cec5SDimitry Andric   assert(Op.getValueType() == MVT::f64);
24150b57cec5SDimitry Andric 
24160b57cec5SDimitry Andric   APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
24170b57cec5SDimitry Andric   SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
24180b57cec5SDimitry Andric   SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
24190b57cec5SDimitry Andric 
24200b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
24210b57cec5SDimitry Andric 
24220b57cec5SDimitry Andric   SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign);
24230b57cec5SDimitry Andric   SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign);
24240b57cec5SDimitry Andric 
24250b57cec5SDimitry Andric   SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
24260b57cec5SDimitry Andric 
24270b57cec5SDimitry Andric   APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
24280b57cec5SDimitry Andric   SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64);
24290b57cec5SDimitry Andric 
24300b57cec5SDimitry Andric   EVT SetCCVT =
24310b57cec5SDimitry Andric       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
24320b57cec5SDimitry Andric   SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT);
24330b57cec5SDimitry Andric 
24340b57cec5SDimitry Andric   return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2);
24350b57cec5SDimitry Andric }
24360b57cec5SDimitry Andric 
LowerFNEARBYINT(SDValue Op,SelectionDAG & DAG) const24375f757f3fSDimitry Andric SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op,
24385f757f3fSDimitry Andric                                               SelectionDAG &DAG) const {
24390b57cec5SDimitry Andric   // FNEARBYINT and FRINT are the same, except in their handling of FP
24400b57cec5SDimitry Andric   // exceptions. Those aren't really meaningful for us, and OpenCL only has
24410b57cec5SDimitry Andric   // rint, so just treat them as equivalent.
24425f757f3fSDimitry Andric   return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), Op.getValueType(),
24435f757f3fSDimitry Andric                      Op.getOperand(0));
24440b57cec5SDimitry Andric }
24450b57cec5SDimitry Andric 
LowerFRINT(SDValue Op,SelectionDAG & DAG) const24465f757f3fSDimitry Andric SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
2447bdd1243dSDimitry Andric   auto VT = Op.getValueType();
2448bdd1243dSDimitry Andric   auto Arg = Op.getOperand(0u);
24495f757f3fSDimitry Andric   return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), VT, Arg);
2450bdd1243dSDimitry Andric }
2451bdd1243dSDimitry Andric 
24520b57cec5SDimitry Andric // XXX - May require not supporting f32 denormals?
24530b57cec5SDimitry Andric 
24540b57cec5SDimitry Andric // Don't handle v2f16. The extra instructions to scalarize and repack around the
24550b57cec5SDimitry Andric // compare and vselect end up producing worse code than scalarizing the whole
24560b57cec5SDimitry Andric // operation.
LowerFROUND(SDValue Op,SelectionDAG & DAG) const24575ffd83dbSDimitry Andric SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const {
24580b57cec5SDimitry Andric   SDLoc SL(Op);
24590b57cec5SDimitry Andric   SDValue X = Op.getOperand(0);
24600b57cec5SDimitry Andric   EVT VT = Op.getValueType();
24610b57cec5SDimitry Andric 
24620b57cec5SDimitry Andric   SDValue T = DAG.getNode(ISD::FTRUNC, SL, VT, X);
24630b57cec5SDimitry Andric 
24640b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
24650b57cec5SDimitry Andric 
24660b57cec5SDimitry Andric   SDValue Diff = DAG.getNode(ISD::FSUB, SL, VT, X, T);
24670b57cec5SDimitry Andric 
24680b57cec5SDimitry Andric   SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, VT, Diff);
24690b57cec5SDimitry Andric 
24700b57cec5SDimitry Andric   const SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
24710b57cec5SDimitry Andric   const SDValue One = DAG.getConstantFP(1.0, SL, VT);
24720b57cec5SDimitry Andric 
24730b57cec5SDimitry Andric   EVT SetCCVT =
24740b57cec5SDimitry Andric       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
24750b57cec5SDimitry Andric 
24765f757f3fSDimitry Andric   const SDValue Half = DAG.getConstantFP(0.5, SL, VT);
24770b57cec5SDimitry Andric   SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE);
24785f757f3fSDimitry Andric   SDValue OneOrZeroFP = DAG.getNode(ISD::SELECT, SL, VT, Cmp, One, Zero);
24790b57cec5SDimitry Andric 
24805f757f3fSDimitry Andric   SDValue SignedOffset = DAG.getNode(ISD::FCOPYSIGN, SL, VT, OneOrZeroFP, X);
24815f757f3fSDimitry Andric   return DAG.getNode(ISD::FADD, SL, VT, T, SignedOffset);
24820b57cec5SDimitry Andric }
24830b57cec5SDimitry Andric 
LowerFFLOOR(SDValue Op,SelectionDAG & DAG) const24840b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
24850b57cec5SDimitry Andric   SDLoc SL(Op);
24860b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
24870b57cec5SDimitry Andric 
24880b57cec5SDimitry Andric   // result = trunc(src);
24890b57cec5SDimitry Andric   // if (src < 0.0 && src != result)
24900b57cec5SDimitry Andric   //   result += -1.0.
24910b57cec5SDimitry Andric 
24920b57cec5SDimitry Andric   SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src);
24930b57cec5SDimitry Andric 
24940b57cec5SDimitry Andric   const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64);
24950b57cec5SDimitry Andric   const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64);
24960b57cec5SDimitry Andric 
24970b57cec5SDimitry Andric   EVT SetCCVT =
24980b57cec5SDimitry Andric       getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64);
24990b57cec5SDimitry Andric 
25000b57cec5SDimitry Andric   SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT);
25010b57cec5SDimitry Andric   SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE);
25020b57cec5SDimitry Andric   SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc);
25030b57cec5SDimitry Andric 
25040b57cec5SDimitry Andric   SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero);
25050b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
25060b57cec5SDimitry Andric   return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
25070b57cec5SDimitry Andric }
25080b57cec5SDimitry Andric 
250906c3fb27SDimitry Andric /// Return true if it's known that \p Src can never be an f32 denormal value.
valueIsKnownNeverF32Denorm(SDValue Src)251006c3fb27SDimitry Andric static bool valueIsKnownNeverF32Denorm(SDValue Src) {
251106c3fb27SDimitry Andric   switch (Src.getOpcode()) {
251206c3fb27SDimitry Andric   case ISD::FP_EXTEND:
251306c3fb27SDimitry Andric     return Src.getOperand(0).getValueType() == MVT::f16;
251406c3fb27SDimitry Andric   case ISD::FP16_TO_FP:
25155f757f3fSDimitry Andric   case ISD::FFREXP:
251606c3fb27SDimitry Andric     return true;
25175f757f3fSDimitry Andric   case ISD::INTRINSIC_WO_CHAIN: {
2518647cbc5dSDimitry Andric     unsigned IntrinsicID = Src.getConstantOperandVal(0);
25195f757f3fSDimitry Andric     switch (IntrinsicID) {
25205f757f3fSDimitry Andric     case Intrinsic::amdgcn_frexp_mant:
25215f757f3fSDimitry Andric       return true;
25225f757f3fSDimitry Andric     default:
25235f757f3fSDimitry Andric       return false;
25245f757f3fSDimitry Andric     }
25255f757f3fSDimitry Andric   }
252606c3fb27SDimitry Andric   default:
252706c3fb27SDimitry Andric     return false;
25280b57cec5SDimitry Andric   }
25290b57cec5SDimitry Andric 
253006c3fb27SDimitry Andric   llvm_unreachable("covered opcode switch");
253106c3fb27SDimitry Andric }
253206c3fb27SDimitry Andric 
allowApproxFunc(const SelectionDAG & DAG,SDNodeFlags Flags)25335f757f3fSDimitry Andric bool AMDGPUTargetLowering::allowApproxFunc(const SelectionDAG &DAG,
25345f757f3fSDimitry Andric                                            SDNodeFlags Flags) {
253506c3fb27SDimitry Andric   if (Flags.hasApproximateFuncs())
253606c3fb27SDimitry Andric     return true;
253706c3fb27SDimitry Andric   auto &Options = DAG.getTarget().Options;
253806c3fb27SDimitry Andric   return Options.UnsafeFPMath || Options.ApproxFuncFPMath;
253906c3fb27SDimitry Andric }
254006c3fb27SDimitry Andric 
needsDenormHandlingF32(const SelectionDAG & DAG,SDValue Src,SDNodeFlags Flags)25415f757f3fSDimitry Andric bool AMDGPUTargetLowering::needsDenormHandlingF32(const SelectionDAG &DAG,
25425f757f3fSDimitry Andric                                                   SDValue Src,
254306c3fb27SDimitry Andric                                                   SDNodeFlags Flags) {
254406c3fb27SDimitry Andric   return !valueIsKnownNeverF32Denorm(Src) &&
254506c3fb27SDimitry Andric          DAG.getMachineFunction()
254606c3fb27SDimitry Andric                  .getDenormalMode(APFloat::IEEEsingle())
254706c3fb27SDimitry Andric                  .Input != DenormalMode::PreserveSign;
254806c3fb27SDimitry Andric }
254906c3fb27SDimitry Andric 
getIsLtSmallestNormal(SelectionDAG & DAG,SDValue Src,SDNodeFlags Flags) const255006c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::getIsLtSmallestNormal(SelectionDAG &DAG,
255106c3fb27SDimitry Andric                                                     SDValue Src,
255206c3fb27SDimitry Andric                                                     SDNodeFlags Flags) const {
255306c3fb27SDimitry Andric   SDLoc SL(Src);
255406c3fb27SDimitry Andric   EVT VT = Src.getValueType();
255506c3fb27SDimitry Andric   const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT);
255606c3fb27SDimitry Andric   SDValue SmallestNormal =
255706c3fb27SDimitry Andric       DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT);
255806c3fb27SDimitry Andric 
255906c3fb27SDimitry Andric   // Want to scale denormals up, but negatives and 0 work just as well on the
256006c3fb27SDimitry Andric   // scaled path.
256106c3fb27SDimitry Andric   SDValue IsLtSmallestNormal = DAG.getSetCC(
256206c3fb27SDimitry Andric       SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src,
256306c3fb27SDimitry Andric       SmallestNormal, ISD::SETOLT);
256406c3fb27SDimitry Andric 
256506c3fb27SDimitry Andric   return IsLtSmallestNormal;
256606c3fb27SDimitry Andric }
256706c3fb27SDimitry Andric 
getIsFinite(SelectionDAG & DAG,SDValue Src,SDNodeFlags Flags) const256806c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::getIsFinite(SelectionDAG &DAG, SDValue Src,
256906c3fb27SDimitry Andric                                           SDNodeFlags Flags) const {
257006c3fb27SDimitry Andric   SDLoc SL(Src);
257106c3fb27SDimitry Andric   EVT VT = Src.getValueType();
257206c3fb27SDimitry Andric   const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT);
257306c3fb27SDimitry Andric   SDValue Inf = DAG.getConstantFP(APFloat::getInf(Semantics), SL, VT);
257406c3fb27SDimitry Andric 
257506c3fb27SDimitry Andric   SDValue Fabs = DAG.getNode(ISD::FABS, SL, VT, Src, Flags);
257606c3fb27SDimitry Andric   SDValue IsFinite = DAG.getSetCC(
257706c3fb27SDimitry Andric       SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Fabs,
257806c3fb27SDimitry Andric       Inf, ISD::SETOLT);
257906c3fb27SDimitry Andric   return IsFinite;
258006c3fb27SDimitry Andric }
258106c3fb27SDimitry Andric 
258206c3fb27SDimitry Andric /// If denormal handling is required return the scaled input to FLOG2, and the
258306c3fb27SDimitry Andric /// check for denormal range. Otherwise, return null values.
258406c3fb27SDimitry Andric std::pair<SDValue, SDValue>
getScaledLogInput(SelectionDAG & DAG,const SDLoc SL,SDValue Src,SDNodeFlags Flags) const258506c3fb27SDimitry Andric AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL,
258606c3fb27SDimitry Andric                                         SDValue Src, SDNodeFlags Flags) const {
25878a4dda33SDimitry Andric   if (!needsDenormHandlingF32(DAG, Src, Flags))
258806c3fb27SDimitry Andric     return {};
258906c3fb27SDimitry Andric 
259006c3fb27SDimitry Andric   MVT VT = MVT::f32;
259106c3fb27SDimitry Andric   const fltSemantics &Semantics = APFloat::IEEEsingle();
259206c3fb27SDimitry Andric   SDValue SmallestNormal =
259306c3fb27SDimitry Andric       DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT);
259406c3fb27SDimitry Andric 
259506c3fb27SDimitry Andric   SDValue IsLtSmallestNormal = DAG.getSetCC(
259606c3fb27SDimitry Andric       SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src,
259706c3fb27SDimitry Andric       SmallestNormal, ISD::SETOLT);
259806c3fb27SDimitry Andric 
259906c3fb27SDimitry Andric   SDValue Scale32 = DAG.getConstantFP(0x1.0p+32, SL, VT);
260006c3fb27SDimitry Andric   SDValue One = DAG.getConstantFP(1.0, SL, VT);
260106c3fb27SDimitry Andric   SDValue ScaleFactor =
260206c3fb27SDimitry Andric       DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, Scale32, One, Flags);
260306c3fb27SDimitry Andric 
260406c3fb27SDimitry Andric   SDValue ScaledInput = DAG.getNode(ISD::FMUL, SL, VT, Src, ScaleFactor, Flags);
260506c3fb27SDimitry Andric   return {ScaledInput, IsLtSmallestNormal};
260606c3fb27SDimitry Andric }
260706c3fb27SDimitry Andric 
LowerFLOG2(SDValue Op,SelectionDAG & DAG) const260806c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::LowerFLOG2(SDValue Op, SelectionDAG &DAG) const {
260906c3fb27SDimitry Andric   // v_log_f32 is good enough for OpenCL, except it doesn't handle denormals.
261006c3fb27SDimitry Andric   // If we have to handle denormals, scale up the input and adjust the result.
261106c3fb27SDimitry Andric 
261206c3fb27SDimitry Andric   // scaled = x * (is_denormal ? 0x1.0p+32 : 1.0)
261306c3fb27SDimitry Andric   // log2 = amdgpu_log2 - (is_denormal ? 32.0 : 0.0)
261406c3fb27SDimitry Andric 
261506c3fb27SDimitry Andric   SDLoc SL(Op);
261606c3fb27SDimitry Andric   EVT VT = Op.getValueType();
261706c3fb27SDimitry Andric   SDValue Src = Op.getOperand(0);
261806c3fb27SDimitry Andric   SDNodeFlags Flags = Op->getFlags();
261906c3fb27SDimitry Andric 
262006c3fb27SDimitry Andric   if (VT == MVT::f16) {
262106c3fb27SDimitry Andric     // Nothing in half is a denormal when promoted to f32.
262206c3fb27SDimitry Andric     assert(!Subtarget->has16BitInsts());
262306c3fb27SDimitry Andric     SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src, Flags);
262406c3fb27SDimitry Andric     SDValue Log = DAG.getNode(AMDGPUISD::LOG, SL, MVT::f32, Ext, Flags);
262506c3fb27SDimitry Andric     return DAG.getNode(ISD::FP_ROUND, SL, VT, Log,
262606c3fb27SDimitry Andric                        DAG.getTargetConstant(0, SL, MVT::i32), Flags);
262706c3fb27SDimitry Andric   }
262806c3fb27SDimitry Andric 
262906c3fb27SDimitry Andric   auto [ScaledInput, IsLtSmallestNormal] =
263006c3fb27SDimitry Andric       getScaledLogInput(DAG, SL, Src, Flags);
263106c3fb27SDimitry Andric   if (!ScaledInput)
263206c3fb27SDimitry Andric     return DAG.getNode(AMDGPUISD::LOG, SL, VT, Src, Flags);
263306c3fb27SDimitry Andric 
263406c3fb27SDimitry Andric   SDValue Log2 = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags);
263506c3fb27SDimitry Andric 
263606c3fb27SDimitry Andric   SDValue ThirtyTwo = DAG.getConstantFP(32.0, SL, VT);
263706c3fb27SDimitry Andric   SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
263806c3fb27SDimitry Andric   SDValue ResultOffset =
263906c3fb27SDimitry Andric       DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, ThirtyTwo, Zero);
264006c3fb27SDimitry Andric   return DAG.getNode(ISD::FSUB, SL, VT, Log2, ResultOffset, Flags);
264106c3fb27SDimitry Andric }
264206c3fb27SDimitry Andric 
getMad(SelectionDAG & DAG,const SDLoc & SL,EVT VT,SDValue X,SDValue Y,SDValue C,SDNodeFlags Flags=SDNodeFlags ())264306c3fb27SDimitry Andric static SDValue getMad(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue X,
264406c3fb27SDimitry Andric                       SDValue Y, SDValue C, SDNodeFlags Flags = SDNodeFlags()) {
264506c3fb27SDimitry Andric   SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Y, Flags);
264606c3fb27SDimitry Andric   return DAG.getNode(ISD::FADD, SL, VT, Mul, C, Flags);
264706c3fb27SDimitry Andric }
264806c3fb27SDimitry Andric 
LowerFLOGCommon(SDValue Op,SelectionDAG & DAG) const264906c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
265006c3fb27SDimitry Andric                                               SelectionDAG &DAG) const {
265106c3fb27SDimitry Andric   SDValue X = Op.getOperand(0);
265206c3fb27SDimitry Andric   EVT VT = Op.getValueType();
265306c3fb27SDimitry Andric   SDNodeFlags Flags = Op->getFlags();
265406c3fb27SDimitry Andric   SDLoc DL(Op);
265506c3fb27SDimitry Andric 
265606c3fb27SDimitry Andric   const bool IsLog10 = Op.getOpcode() == ISD::FLOG10;
265706c3fb27SDimitry Andric   assert(IsLog10 || Op.getOpcode() == ISD::FLOG);
265806c3fb27SDimitry Andric 
265906c3fb27SDimitry Andric   const auto &Options = getTargetMachine().Options;
266006c3fb27SDimitry Andric   if (VT == MVT::f16 || Flags.hasApproximateFuncs() ||
266106c3fb27SDimitry Andric       Options.ApproxFuncFPMath || Options.UnsafeFPMath) {
266206c3fb27SDimitry Andric 
266306c3fb27SDimitry Andric     if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {
266406c3fb27SDimitry Andric       // Log and multiply in f32 is good enough for f16.
266506c3fb27SDimitry Andric       X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X, Flags);
266606c3fb27SDimitry Andric     }
266706c3fb27SDimitry Andric 
26688a4dda33SDimitry Andric     SDValue Lowered = LowerFLOGUnsafe(X, DL, DAG, IsLog10, Flags);
266906c3fb27SDimitry Andric     if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {
267006c3fb27SDimitry Andric       return DAG.getNode(ISD::FP_ROUND, DL, VT, Lowered,
267106c3fb27SDimitry Andric                          DAG.getTargetConstant(0, DL, MVT::i32), Flags);
267206c3fb27SDimitry Andric     }
267306c3fb27SDimitry Andric 
267406c3fb27SDimitry Andric     return Lowered;
267506c3fb27SDimitry Andric   }
267606c3fb27SDimitry Andric 
267706c3fb27SDimitry Andric   auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, DL, X, Flags);
267806c3fb27SDimitry Andric   if (ScaledInput)
267906c3fb27SDimitry Andric     X = ScaledInput;
268006c3fb27SDimitry Andric 
268106c3fb27SDimitry Andric   SDValue Y = DAG.getNode(AMDGPUISD::LOG, DL, VT, X, Flags);
268206c3fb27SDimitry Andric 
268306c3fb27SDimitry Andric   SDValue R;
268406c3fb27SDimitry Andric   if (Subtarget->hasFastFMAF32()) {
268506c3fb27SDimitry Andric     // c+cc are ln(2)/ln(10) to more than 49 bits
268606c3fb27SDimitry Andric     const float c_log10 = 0x1.344134p-2f;
268706c3fb27SDimitry Andric     const float cc_log10 = 0x1.09f79ep-26f;
268806c3fb27SDimitry Andric 
268906c3fb27SDimitry Andric     // c + cc is ln(2) to more than 49 bits
269006c3fb27SDimitry Andric     const float c_log = 0x1.62e42ep-1f;
269106c3fb27SDimitry Andric     const float cc_log = 0x1.efa39ep-25f;
269206c3fb27SDimitry Andric 
269306c3fb27SDimitry Andric     SDValue C = DAG.getConstantFP(IsLog10 ? c_log10 : c_log, DL, VT);
269406c3fb27SDimitry Andric     SDValue CC = DAG.getConstantFP(IsLog10 ? cc_log10 : cc_log, DL, VT);
269506c3fb27SDimitry Andric 
269606c3fb27SDimitry Andric     R = DAG.getNode(ISD::FMUL, DL, VT, Y, C, Flags);
269706c3fb27SDimitry Andric     SDValue NegR = DAG.getNode(ISD::FNEG, DL, VT, R, Flags);
269806c3fb27SDimitry Andric     SDValue FMA0 = DAG.getNode(ISD::FMA, DL, VT, Y, C, NegR, Flags);
269906c3fb27SDimitry Andric     SDValue FMA1 = DAG.getNode(ISD::FMA, DL, VT, Y, CC, FMA0, Flags);
270006c3fb27SDimitry Andric     R = DAG.getNode(ISD::FADD, DL, VT, R, FMA1, Flags);
270106c3fb27SDimitry Andric   } else {
270206c3fb27SDimitry Andric     // ch+ct is ln(2)/ln(10) to more than 36 bits
270306c3fb27SDimitry Andric     const float ch_log10 = 0x1.344000p-2f;
270406c3fb27SDimitry Andric     const float ct_log10 = 0x1.3509f6p-18f;
270506c3fb27SDimitry Andric 
270606c3fb27SDimitry Andric     // ch + ct is ln(2) to more than 36 bits
270706c3fb27SDimitry Andric     const float ch_log = 0x1.62e000p-1f;
270806c3fb27SDimitry Andric     const float ct_log = 0x1.0bfbe8p-15f;
270906c3fb27SDimitry Andric 
271006c3fb27SDimitry Andric     SDValue CH = DAG.getConstantFP(IsLog10 ? ch_log10 : ch_log, DL, VT);
271106c3fb27SDimitry Andric     SDValue CT = DAG.getConstantFP(IsLog10 ? ct_log10 : ct_log, DL, VT);
271206c3fb27SDimitry Andric 
271306c3fb27SDimitry Andric     SDValue YAsInt = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Y);
271406c3fb27SDimitry Andric     SDValue MaskConst = DAG.getConstant(0xfffff000, DL, MVT::i32);
271506c3fb27SDimitry Andric     SDValue YHInt = DAG.getNode(ISD::AND, DL, MVT::i32, YAsInt, MaskConst);
271606c3fb27SDimitry Andric     SDValue YH = DAG.getNode(ISD::BITCAST, DL, MVT::f32, YHInt);
271706c3fb27SDimitry Andric     SDValue YT = DAG.getNode(ISD::FSUB, DL, VT, Y, YH, Flags);
271806c3fb27SDimitry Andric 
271906c3fb27SDimitry Andric     SDValue YTCT = DAG.getNode(ISD::FMUL, DL, VT, YT, CT, Flags);
272006c3fb27SDimitry Andric     SDValue Mad0 = getMad(DAG, DL, VT, YH, CT, YTCT, Flags);
272106c3fb27SDimitry Andric     SDValue Mad1 = getMad(DAG, DL, VT, YT, CH, Mad0, Flags);
272206c3fb27SDimitry Andric     R = getMad(DAG, DL, VT, YH, CH, Mad1);
272306c3fb27SDimitry Andric   }
272406c3fb27SDimitry Andric 
272506c3fb27SDimitry Andric   const bool IsFiniteOnly = (Flags.hasNoNaNs() || Options.NoNaNsFPMath) &&
272606c3fb27SDimitry Andric                             (Flags.hasNoInfs() || Options.NoInfsFPMath);
272706c3fb27SDimitry Andric 
272806c3fb27SDimitry Andric   // TODO: Check if known finite from source value.
272906c3fb27SDimitry Andric   if (!IsFiniteOnly) {
273006c3fb27SDimitry Andric     SDValue IsFinite = getIsFinite(DAG, Y, Flags);
273106c3fb27SDimitry Andric     R = DAG.getNode(ISD::SELECT, DL, VT, IsFinite, R, Y, Flags);
273206c3fb27SDimitry Andric   }
273306c3fb27SDimitry Andric 
273406c3fb27SDimitry Andric   if (IsScaled) {
273506c3fb27SDimitry Andric     SDValue Zero = DAG.getConstantFP(0.0f, DL, VT);
273606c3fb27SDimitry Andric     SDValue ShiftK =
273706c3fb27SDimitry Andric         DAG.getConstantFP(IsLog10 ? 0x1.344136p+3f : 0x1.62e430p+4f, DL, VT);
273806c3fb27SDimitry Andric     SDValue Shift =
273906c3fb27SDimitry Andric         DAG.getNode(ISD::SELECT, DL, VT, IsScaled, ShiftK, Zero, Flags);
274006c3fb27SDimitry Andric     R = DAG.getNode(ISD::FSUB, DL, VT, R, Shift, Flags);
274106c3fb27SDimitry Andric   }
274206c3fb27SDimitry Andric 
274306c3fb27SDimitry Andric   return R;
274406c3fb27SDimitry Andric }
274506c3fb27SDimitry Andric 
LowerFLOG10(SDValue Op,SelectionDAG & DAG) const274606c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::LowerFLOG10(SDValue Op, SelectionDAG &DAG) const {
274706c3fb27SDimitry Andric   return LowerFLOGCommon(Op, DAG);
274806c3fb27SDimitry Andric }
274906c3fb27SDimitry Andric 
275006c3fb27SDimitry Andric // Do f32 fast math expansion for flog2 or flog10. This is accurate enough for a
275106c3fb27SDimitry Andric // promote f16 operation.
LowerFLOGUnsafe(SDValue Src,const SDLoc & SL,SelectionDAG & DAG,bool IsLog10,SDNodeFlags Flags) const275206c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::LowerFLOGUnsafe(SDValue Src, const SDLoc &SL,
27538a4dda33SDimitry Andric                                               SelectionDAG &DAG, bool IsLog10,
275406c3fb27SDimitry Andric                                               SDNodeFlags Flags) const {
275506c3fb27SDimitry Andric   EVT VT = Src.getValueType();
27565f757f3fSDimitry Andric   unsigned LogOp =
27575f757f3fSDimitry Andric       VT == MVT::f32 ? (unsigned)AMDGPUISD::LOG : (unsigned)ISD::FLOG2;
27588a4dda33SDimitry Andric 
27598a4dda33SDimitry Andric   double Log2BaseInverted =
27608a4dda33SDimitry Andric       IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2;
27618a4dda33SDimitry Andric 
27628a4dda33SDimitry Andric   if (VT == MVT::f32) {
27638a4dda33SDimitry Andric     auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, SL, Src, Flags);
27648a4dda33SDimitry Andric     if (ScaledInput) {
27658a4dda33SDimitry Andric       SDValue LogSrc = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags);
27668a4dda33SDimitry Andric       SDValue ScaledResultOffset =
27678a4dda33SDimitry Andric           DAG.getConstantFP(-32.0 * Log2BaseInverted, SL, VT);
27688a4dda33SDimitry Andric 
27698a4dda33SDimitry Andric       SDValue Zero = DAG.getConstantFP(0.0f, SL, VT);
27708a4dda33SDimitry Andric 
27718a4dda33SDimitry Andric       SDValue ResultOffset = DAG.getNode(ISD::SELECT, SL, VT, IsScaled,
27728a4dda33SDimitry Andric                                          ScaledResultOffset, Zero, Flags);
27738a4dda33SDimitry Andric 
27748a4dda33SDimitry Andric       SDValue Log2Inv = DAG.getConstantFP(Log2BaseInverted, SL, VT);
27758a4dda33SDimitry Andric 
27768a4dda33SDimitry Andric       if (Subtarget->hasFastFMAF32())
27778a4dda33SDimitry Andric         return DAG.getNode(ISD::FMA, SL, VT, LogSrc, Log2Inv, ResultOffset,
27788a4dda33SDimitry Andric                            Flags);
27798a4dda33SDimitry Andric       SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, LogSrc, Log2Inv, Flags);
27808a4dda33SDimitry Andric       return DAG.getNode(ISD::FADD, SL, VT, Mul, ResultOffset);
27818a4dda33SDimitry Andric     }
27828a4dda33SDimitry Andric   }
27838a4dda33SDimitry Andric 
278406c3fb27SDimitry Andric   SDValue Log2Operand = DAG.getNode(LogOp, SL, VT, Src, Flags);
278506c3fb27SDimitry Andric   SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT);
278606c3fb27SDimitry Andric 
278706c3fb27SDimitry Andric   return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand,
278806c3fb27SDimitry Andric                      Flags);
278906c3fb27SDimitry Andric }
279006c3fb27SDimitry Andric 
lowerFEXP2(SDValue Op,SelectionDAG & DAG) const279106c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const {
279206c3fb27SDimitry Andric   // v_exp_f32 is good enough for OpenCL, except it doesn't handle denormals.
279306c3fb27SDimitry Andric   // If we have to handle denormals, scale up the input and adjust the result.
279406c3fb27SDimitry Andric 
279506c3fb27SDimitry Andric   SDLoc SL(Op);
279606c3fb27SDimitry Andric   EVT VT = Op.getValueType();
279706c3fb27SDimitry Andric   SDValue Src = Op.getOperand(0);
279806c3fb27SDimitry Andric   SDNodeFlags Flags = Op->getFlags();
279906c3fb27SDimitry Andric 
280006c3fb27SDimitry Andric   if (VT == MVT::f16) {
280106c3fb27SDimitry Andric     // Nothing in half is a denormal when promoted to f32.
280206c3fb27SDimitry Andric     assert(!Subtarget->has16BitInsts());
280306c3fb27SDimitry Andric     SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src, Flags);
280406c3fb27SDimitry Andric     SDValue Log = DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Ext, Flags);
280506c3fb27SDimitry Andric     return DAG.getNode(ISD::FP_ROUND, SL, VT, Log,
280606c3fb27SDimitry Andric                        DAG.getTargetConstant(0, SL, MVT::i32), Flags);
280706c3fb27SDimitry Andric   }
280806c3fb27SDimitry Andric 
280906c3fb27SDimitry Andric   assert(VT == MVT::f32);
281006c3fb27SDimitry Andric 
28118a4dda33SDimitry Andric   if (!needsDenormHandlingF32(DAG, Src, Flags))
281206c3fb27SDimitry Andric     return DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Src, Flags);
281306c3fb27SDimitry Andric 
281406c3fb27SDimitry Andric   // bool needs_scaling = x < -0x1.f80000p+6f;
281506c3fb27SDimitry Andric   // v_exp_f32(x + (s ? 0x1.0p+6f : 0.0f)) * (s ? 0x1.0p-64f : 1.0f);
281606c3fb27SDimitry Andric 
281706c3fb27SDimitry Andric   // -nextafter(128.0, -1)
281806c3fb27SDimitry Andric   SDValue RangeCheckConst = DAG.getConstantFP(-0x1.f80000p+6f, SL, VT);
281906c3fb27SDimitry Andric 
282006c3fb27SDimitry Andric   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
282106c3fb27SDimitry Andric 
282206c3fb27SDimitry Andric   SDValue NeedsScaling =
282306c3fb27SDimitry Andric       DAG.getSetCC(SL, SetCCVT, Src, RangeCheckConst, ISD::SETOLT);
282406c3fb27SDimitry Andric 
282506c3fb27SDimitry Andric   SDValue SixtyFour = DAG.getConstantFP(0x1.0p+6f, SL, VT);
282606c3fb27SDimitry Andric   SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
282706c3fb27SDimitry Andric 
282806c3fb27SDimitry Andric   SDValue AddOffset =
282906c3fb27SDimitry Andric       DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, SixtyFour, Zero);
283006c3fb27SDimitry Andric 
283106c3fb27SDimitry Andric   SDValue AddInput = DAG.getNode(ISD::FADD, SL, VT, Src, AddOffset, Flags);
283206c3fb27SDimitry Andric   SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, AddInput, Flags);
283306c3fb27SDimitry Andric 
283406c3fb27SDimitry Andric   SDValue TwoExpNeg64 = DAG.getConstantFP(0x1.0p-64f, SL, VT);
283506c3fb27SDimitry Andric   SDValue One = DAG.getConstantFP(1.0, SL, VT);
283606c3fb27SDimitry Andric   SDValue ResultScale =
283706c3fb27SDimitry Andric       DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, TwoExpNeg64, One);
283806c3fb27SDimitry Andric 
283906c3fb27SDimitry Andric   return DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScale, Flags);
284006c3fb27SDimitry Andric }
284106c3fb27SDimitry Andric 
lowerFEXPUnsafe(SDValue X,const SDLoc & SL,SelectionDAG & DAG,SDNodeFlags Flags) const28425f757f3fSDimitry Andric SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue X, const SDLoc &SL,
284306c3fb27SDimitry Andric                                               SelectionDAG &DAG,
284406c3fb27SDimitry Andric                                               SDNodeFlags Flags) const {
28455f757f3fSDimitry Andric   EVT VT = X.getValueType();
28465f757f3fSDimitry Andric   const SDValue Log2E = DAG.getConstantFP(numbers::log2e, SL, VT);
28475f757f3fSDimitry Andric 
28485f757f3fSDimitry Andric   if (VT != MVT::f32 || !needsDenormHandlingF32(DAG, X, Flags)) {
28490b57cec5SDimitry Andric     // exp2(M_LOG2E_F * f);
28505f757f3fSDimitry Andric     SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Log2E, Flags);
28515f757f3fSDimitry Andric     return DAG.getNode(VT == MVT::f32 ? (unsigned)AMDGPUISD::EXP
28525f757f3fSDimitry Andric                                       : (unsigned)ISD::FEXP2,
28535f757f3fSDimitry Andric                        SL, VT, Mul, Flags);
28545f757f3fSDimitry Andric   }
28555f757f3fSDimitry Andric 
28565f757f3fSDimitry Andric   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
28575f757f3fSDimitry Andric 
28585f757f3fSDimitry Andric   SDValue Threshold = DAG.getConstantFP(-0x1.5d58a0p+6f, SL, VT);
28595f757f3fSDimitry Andric   SDValue NeedsScaling = DAG.getSetCC(SL, SetCCVT, X, Threshold, ISD::SETOLT);
28605f757f3fSDimitry Andric 
28615f757f3fSDimitry Andric   SDValue ScaleOffset = DAG.getConstantFP(0x1.0p+6f, SL, VT);
28625f757f3fSDimitry Andric 
28635f757f3fSDimitry Andric   SDValue ScaledX = DAG.getNode(ISD::FADD, SL, VT, X, ScaleOffset, Flags);
28645f757f3fSDimitry Andric 
28655f757f3fSDimitry Andric   SDValue AdjustedX =
28665f757f3fSDimitry Andric       DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X);
28675f757f3fSDimitry Andric 
28685f757f3fSDimitry Andric   SDValue ExpInput = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, Log2E, Flags);
28695f757f3fSDimitry Andric 
28705f757f3fSDimitry Andric   SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, ExpInput, Flags);
28715f757f3fSDimitry Andric 
28725f757f3fSDimitry Andric   SDValue ResultScaleFactor = DAG.getConstantFP(0x1.969d48p-93f, SL, VT);
28735f757f3fSDimitry Andric   SDValue AdjustedResult =
28745f757f3fSDimitry Andric       DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScaleFactor, Flags);
28755f757f3fSDimitry Andric 
28765f757f3fSDimitry Andric   return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, Exp2,
28775f757f3fSDimitry Andric                      Flags);
28785f757f3fSDimitry Andric }
28795f757f3fSDimitry Andric 
28805f757f3fSDimitry Andric /// Emit approx-funcs appropriate lowering for exp10. inf/nan should still be
28815f757f3fSDimitry Andric /// handled correctly.
lowerFEXP10Unsafe(SDValue X,const SDLoc & SL,SelectionDAG & DAG,SDNodeFlags Flags) const28825f757f3fSDimitry Andric SDValue AMDGPUTargetLowering::lowerFEXP10Unsafe(SDValue X, const SDLoc &SL,
28835f757f3fSDimitry Andric                                                 SelectionDAG &DAG,
28845f757f3fSDimitry Andric                                                 SDNodeFlags Flags) const {
28855f757f3fSDimitry Andric   const EVT VT = X.getValueType();
28865f757f3fSDimitry Andric   const unsigned Exp2Op = VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2;
28875f757f3fSDimitry Andric 
28885f757f3fSDimitry Andric   if (VT != MVT::f32 || !needsDenormHandlingF32(DAG, X, Flags)) {
28895f757f3fSDimitry Andric     // exp2(x * 0x1.a92000p+1f) * exp2(x * 0x1.4f0978p-11f);
28905f757f3fSDimitry Andric     SDValue K0 = DAG.getConstantFP(0x1.a92000p+1f, SL, VT);
28915f757f3fSDimitry Andric     SDValue K1 = DAG.getConstantFP(0x1.4f0978p-11f, SL, VT);
28925f757f3fSDimitry Andric 
28935f757f3fSDimitry Andric     SDValue Mul0 = DAG.getNode(ISD::FMUL, SL, VT, X, K0, Flags);
28945f757f3fSDimitry Andric     SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags);
28955f757f3fSDimitry Andric     SDValue Mul1 = DAG.getNode(ISD::FMUL, SL, VT, X, K1, Flags);
28965f757f3fSDimitry Andric     SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags);
28975f757f3fSDimitry Andric     return DAG.getNode(ISD::FMUL, SL, VT, Exp2_0, Exp2_1);
28985f757f3fSDimitry Andric   }
28995f757f3fSDimitry Andric 
29005f757f3fSDimitry Andric   // bool s = x < -0x1.2f7030p+5f;
29015f757f3fSDimitry Andric   // x += s ? 0x1.0p+5f : 0.0f;
29025f757f3fSDimitry Andric   // exp10 = exp2(x * 0x1.a92000p+1f) *
29035f757f3fSDimitry Andric   //        exp2(x * 0x1.4f0978p-11f) *
29045f757f3fSDimitry Andric   //        (s ? 0x1.9f623ep-107f : 1.0f);
29055f757f3fSDimitry Andric 
29065f757f3fSDimitry Andric   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
29075f757f3fSDimitry Andric 
29085f757f3fSDimitry Andric   SDValue Threshold = DAG.getConstantFP(-0x1.2f7030p+5f, SL, VT);
29095f757f3fSDimitry Andric   SDValue NeedsScaling = DAG.getSetCC(SL, SetCCVT, X, Threshold, ISD::SETOLT);
29105f757f3fSDimitry Andric 
29115f757f3fSDimitry Andric   SDValue ScaleOffset = DAG.getConstantFP(0x1.0p+5f, SL, VT);
29125f757f3fSDimitry Andric   SDValue ScaledX = DAG.getNode(ISD::FADD, SL, VT, X, ScaleOffset, Flags);
29135f757f3fSDimitry Andric   SDValue AdjustedX =
29145f757f3fSDimitry Andric       DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X);
29155f757f3fSDimitry Andric 
29165f757f3fSDimitry Andric   SDValue K0 = DAG.getConstantFP(0x1.a92000p+1f, SL, VT);
29175f757f3fSDimitry Andric   SDValue K1 = DAG.getConstantFP(0x1.4f0978p-11f, SL, VT);
29185f757f3fSDimitry Andric 
29195f757f3fSDimitry Andric   SDValue Mul0 = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, K0, Flags);
29205f757f3fSDimitry Andric   SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags);
29215f757f3fSDimitry Andric   SDValue Mul1 = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, K1, Flags);
29225f757f3fSDimitry Andric   SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags);
29235f757f3fSDimitry Andric 
29245f757f3fSDimitry Andric   SDValue MulExps = DAG.getNode(ISD::FMUL, SL, VT, Exp2_0, Exp2_1, Flags);
29255f757f3fSDimitry Andric 
29265f757f3fSDimitry Andric   SDValue ResultScaleFactor = DAG.getConstantFP(0x1.9f623ep-107f, SL, VT);
29275f757f3fSDimitry Andric   SDValue AdjustedResult =
29285f757f3fSDimitry Andric       DAG.getNode(ISD::FMUL, SL, VT, MulExps, ResultScaleFactor, Flags);
29295f757f3fSDimitry Andric 
29305f757f3fSDimitry Andric   return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, MulExps,
293106c3fb27SDimitry Andric                      Flags);
293206c3fb27SDimitry Andric }
293306c3fb27SDimitry Andric 
lowerFEXP(SDValue Op,SelectionDAG & DAG) const29340b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
29350b57cec5SDimitry Andric   EVT VT = Op.getValueType();
29360b57cec5SDimitry Andric   SDLoc SL(Op);
293706c3fb27SDimitry Andric   SDValue X = Op.getOperand(0);
293806c3fb27SDimitry Andric   SDNodeFlags Flags = Op->getFlags();
29395f757f3fSDimitry Andric   const bool IsExp10 = Op.getOpcode() == ISD::FEXP10;
29400b57cec5SDimitry Andric 
294106c3fb27SDimitry Andric   if (VT.getScalarType() == MVT::f16) {
294206c3fb27SDimitry Andric     // v_exp_f16 (fmul x, log2e)
294306c3fb27SDimitry Andric     if (allowApproxFunc(DAG, Flags)) // TODO: Does this really require fast?
294406c3fb27SDimitry Andric       return lowerFEXPUnsafe(X, SL, DAG, Flags);
294506c3fb27SDimitry Andric 
294606c3fb27SDimitry Andric     if (VT.isVector())
294706c3fb27SDimitry Andric       return SDValue();
294806c3fb27SDimitry Andric 
294906c3fb27SDimitry Andric     // exp(f16 x) ->
295006c3fb27SDimitry Andric     //   fptrunc (v_exp_f32 (fmul (fpext x), log2e))
295106c3fb27SDimitry Andric 
295206c3fb27SDimitry Andric     // Nothing in half is a denormal when promoted to f32.
295306c3fb27SDimitry Andric     SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, X, Flags);
295406c3fb27SDimitry Andric     SDValue Lowered = lowerFEXPUnsafe(Ext, SL, DAG, Flags);
295506c3fb27SDimitry Andric     return DAG.getNode(ISD::FP_ROUND, SL, VT, Lowered,
295606c3fb27SDimitry Andric                        DAG.getTargetConstant(0, SL, MVT::i32), Flags);
295706c3fb27SDimitry Andric   }
295806c3fb27SDimitry Andric 
295906c3fb27SDimitry Andric   assert(VT == MVT::f32);
296006c3fb27SDimitry Andric 
296106c3fb27SDimitry Andric   // TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying
296206c3fb27SDimitry Andric   // library behavior. Also, is known-not-daz source sufficient?
29635f757f3fSDimitry Andric   if (allowApproxFunc(DAG, Flags)) {
29645f757f3fSDimitry Andric     return IsExp10 ? lowerFEXP10Unsafe(X, SL, DAG, Flags)
29655f757f3fSDimitry Andric                    : lowerFEXPUnsafe(X, SL, DAG, Flags);
296606c3fb27SDimitry Andric   }
296706c3fb27SDimitry Andric 
296806c3fb27SDimitry Andric   //    Algorithm:
296906c3fb27SDimitry Andric   //
297006c3fb27SDimitry Andric   //    e^x = 2^(x/ln(2)) = 2^(x*(64/ln(2))/64)
297106c3fb27SDimitry Andric   //
297206c3fb27SDimitry Andric   //    x*(64/ln(2)) = n + f, |f| <= 0.5, n is integer
297306c3fb27SDimitry Andric   //    n = 64*m + j,   0 <= j < 64
297406c3fb27SDimitry Andric   //
297506c3fb27SDimitry Andric   //    e^x = 2^((64*m + j + f)/64)
297606c3fb27SDimitry Andric   //        = (2^m) * (2^(j/64)) * 2^(f/64)
297706c3fb27SDimitry Andric   //        = (2^m) * (2^(j/64)) * e^(f*(ln(2)/64))
297806c3fb27SDimitry Andric   //
297906c3fb27SDimitry Andric   //    f = x*(64/ln(2)) - n
298006c3fb27SDimitry Andric   //    r = f*(ln(2)/64) = x - n*(ln(2)/64)
298106c3fb27SDimitry Andric   //
298206c3fb27SDimitry Andric   //    e^x = (2^m) * (2^(j/64)) * e^r
298306c3fb27SDimitry Andric   //
298406c3fb27SDimitry Andric   //    (2^(j/64)) is precomputed
298506c3fb27SDimitry Andric   //
298606c3fb27SDimitry Andric   //    e^r = 1 + r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5!
298706c3fb27SDimitry Andric   //    e^r = 1 + q
298806c3fb27SDimitry Andric   //
298906c3fb27SDimitry Andric   //    q = r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5!
299006c3fb27SDimitry Andric   //
299106c3fb27SDimitry Andric   //    e^x = (2^m) * ( (2^(j/64)) + q*(2^(j/64)) )
299206c3fb27SDimitry Andric   SDNodeFlags FlagsNoContract = Flags;
299306c3fb27SDimitry Andric   FlagsNoContract.setAllowContract(false);
299406c3fb27SDimitry Andric 
299506c3fb27SDimitry Andric   SDValue PH, PL;
299606c3fb27SDimitry Andric   if (Subtarget->hasFastFMAF32()) {
299706c3fb27SDimitry Andric     const float c_exp = numbers::log2ef;
299806c3fb27SDimitry Andric     const float cc_exp = 0x1.4ae0bep-26f; // c+cc are 49 bits
299906c3fb27SDimitry Andric     const float c_exp10 = 0x1.a934f0p+1f;
300006c3fb27SDimitry Andric     const float cc_exp10 = 0x1.2f346ep-24f;
300106c3fb27SDimitry Andric 
300206c3fb27SDimitry Andric     SDValue C = DAG.getConstantFP(IsExp10 ? c_exp10 : c_exp, SL, VT);
300306c3fb27SDimitry Andric     SDValue CC = DAG.getConstantFP(IsExp10 ? cc_exp10 : cc_exp, SL, VT);
300406c3fb27SDimitry Andric 
300506c3fb27SDimitry Andric     PH = DAG.getNode(ISD::FMUL, SL, VT, X, C, Flags);
300606c3fb27SDimitry Andric     SDValue NegPH = DAG.getNode(ISD::FNEG, SL, VT, PH, Flags);
300706c3fb27SDimitry Andric     SDValue FMA0 = DAG.getNode(ISD::FMA, SL, VT, X, C, NegPH, Flags);
300806c3fb27SDimitry Andric     PL = DAG.getNode(ISD::FMA, SL, VT, X, CC, FMA0, Flags);
300906c3fb27SDimitry Andric   } else {
301006c3fb27SDimitry Andric     const float ch_exp = 0x1.714000p+0f;
301106c3fb27SDimitry Andric     const float cl_exp = 0x1.47652ap-12f; // ch + cl are 36 bits
301206c3fb27SDimitry Andric 
301306c3fb27SDimitry Andric     const float ch_exp10 = 0x1.a92000p+1f;
301406c3fb27SDimitry Andric     const float cl_exp10 = 0x1.4f0978p-11f;
301506c3fb27SDimitry Andric 
301606c3fb27SDimitry Andric     SDValue CH = DAG.getConstantFP(IsExp10 ? ch_exp10 : ch_exp, SL, VT);
301706c3fb27SDimitry Andric     SDValue CL = DAG.getConstantFP(IsExp10 ? cl_exp10 : cl_exp, SL, VT);
301806c3fb27SDimitry Andric 
301906c3fb27SDimitry Andric     SDValue XAsInt = DAG.getNode(ISD::BITCAST, SL, MVT::i32, X);
302006c3fb27SDimitry Andric     SDValue MaskConst = DAG.getConstant(0xfffff000, SL, MVT::i32);
302106c3fb27SDimitry Andric     SDValue XHAsInt = DAG.getNode(ISD::AND, SL, MVT::i32, XAsInt, MaskConst);
302206c3fb27SDimitry Andric     SDValue XH = DAG.getNode(ISD::BITCAST, SL, VT, XHAsInt);
302306c3fb27SDimitry Andric     SDValue XL = DAG.getNode(ISD::FSUB, SL, VT, X, XH, Flags);
302406c3fb27SDimitry Andric 
302506c3fb27SDimitry Andric     PH = DAG.getNode(ISD::FMUL, SL, VT, XH, CH, Flags);
302606c3fb27SDimitry Andric 
302706c3fb27SDimitry Andric     SDValue XLCL = DAG.getNode(ISD::FMUL, SL, VT, XL, CL, Flags);
302806c3fb27SDimitry Andric     SDValue Mad0 = getMad(DAG, SL, VT, XL, CH, XLCL, Flags);
302906c3fb27SDimitry Andric     PL = getMad(DAG, SL, VT, XH, CL, Mad0, Flags);
303006c3fb27SDimitry Andric   }
303106c3fb27SDimitry Andric 
30325f757f3fSDimitry Andric   SDValue E = DAG.getNode(ISD::FROUNDEVEN, SL, VT, PH, Flags);
303306c3fb27SDimitry Andric 
303406c3fb27SDimitry Andric   // It is unsafe to contract this fsub into the PH multiply.
303506c3fb27SDimitry Andric   SDValue PHSubE = DAG.getNode(ISD::FSUB, SL, VT, PH, E, FlagsNoContract);
303606c3fb27SDimitry Andric 
303706c3fb27SDimitry Andric   SDValue A = DAG.getNode(ISD::FADD, SL, VT, PHSubE, PL, Flags);
303806c3fb27SDimitry Andric   SDValue IntE = DAG.getNode(ISD::FP_TO_SINT, SL, MVT::i32, E);
303906c3fb27SDimitry Andric   SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, A, Flags);
304006c3fb27SDimitry Andric 
304106c3fb27SDimitry Andric   SDValue R = DAG.getNode(ISD::FLDEXP, SL, VT, Exp2, IntE, Flags);
304206c3fb27SDimitry Andric 
304306c3fb27SDimitry Andric   SDValue UnderflowCheckConst =
304406c3fb27SDimitry Andric       DAG.getConstantFP(IsExp10 ? -0x1.66d3e8p+5f : -0x1.9d1da0p+6f, SL, VT);
304506c3fb27SDimitry Andric 
304606c3fb27SDimitry Andric   EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
304706c3fb27SDimitry Andric   SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
304806c3fb27SDimitry Andric   SDValue Underflow =
304906c3fb27SDimitry Andric       DAG.getSetCC(SL, SetCCVT, X, UnderflowCheckConst, ISD::SETOLT);
305006c3fb27SDimitry Andric 
305106c3fb27SDimitry Andric   R = DAG.getNode(ISD::SELECT, SL, VT, Underflow, Zero, R);
305206c3fb27SDimitry Andric   const auto &Options = getTargetMachine().Options;
305306c3fb27SDimitry Andric 
305406c3fb27SDimitry Andric   if (!Flags.hasNoInfs() && !Options.NoInfsFPMath) {
305506c3fb27SDimitry Andric     SDValue OverflowCheckConst =
305606c3fb27SDimitry Andric         DAG.getConstantFP(IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f, SL, VT);
305706c3fb27SDimitry Andric     SDValue Overflow =
305806c3fb27SDimitry Andric         DAG.getSetCC(SL, SetCCVT, X, OverflowCheckConst, ISD::SETOGT);
305906c3fb27SDimitry Andric     SDValue Inf =
306006c3fb27SDimitry Andric         DAG.getConstantFP(APFloat::getInf(APFloat::IEEEsingle()), SL, VT);
306106c3fb27SDimitry Andric     R = DAG.getNode(ISD::SELECT, SL, VT, Overflow, Inf, R);
306206c3fb27SDimitry Andric   }
306306c3fb27SDimitry Andric 
306406c3fb27SDimitry Andric   return R;
30650b57cec5SDimitry Andric }
30660b57cec5SDimitry Andric 
isCtlzOpc(unsigned Opc)30670b57cec5SDimitry Andric static bool isCtlzOpc(unsigned Opc) {
30680b57cec5SDimitry Andric   return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
30690b57cec5SDimitry Andric }
30700b57cec5SDimitry Andric 
isCttzOpc(unsigned Opc)30710b57cec5SDimitry Andric static bool isCttzOpc(unsigned Opc) {
30720b57cec5SDimitry Andric   return Opc == ISD::CTTZ || Opc == ISD::CTTZ_ZERO_UNDEF;
30730b57cec5SDimitry Andric }
30740b57cec5SDimitry Andric 
lowerCTLZResults(SDValue Op,SelectionDAG & DAG) const30757a6dacacSDimitry Andric SDValue AMDGPUTargetLowering::lowerCTLZResults(SDValue Op,
30767a6dacacSDimitry Andric                                                SelectionDAG &DAG) const {
30777a6dacacSDimitry Andric   auto SL = SDLoc(Op);
30787a6dacacSDimitry Andric   auto Arg = Op.getOperand(0u);
30797a6dacacSDimitry Andric   auto ResultVT = Op.getValueType();
30807a6dacacSDimitry Andric 
30817a6dacacSDimitry Andric   if (ResultVT != MVT::i8 && ResultVT != MVT::i16)
30827a6dacacSDimitry Andric     return {};
30837a6dacacSDimitry Andric 
30847a6dacacSDimitry Andric   assert(isCtlzOpc(Op.getOpcode()));
30857a6dacacSDimitry Andric   assert(ResultVT == Arg.getValueType());
30867a6dacacSDimitry Andric 
30877a6dacacSDimitry Andric   auto const LeadingZeroes = 32u - ResultVT.getFixedSizeInBits();
30887a6dacacSDimitry Andric   auto NewOp = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Arg);
30897a6dacacSDimitry Andric   auto ShiftVal = DAG.getConstant(LeadingZeroes, SL, MVT::i32);
30907a6dacacSDimitry Andric   NewOp = DAG.getNode(ISD::SHL, SL, MVT::i32, NewOp, ShiftVal);
30917a6dacacSDimitry Andric   NewOp = DAG.getNode(Op.getOpcode(), SL, MVT::i32, NewOp);
30927a6dacacSDimitry Andric   return DAG.getNode(ISD::TRUNCATE, SL, ResultVT, NewOp);
30937a6dacacSDimitry Andric }
30947a6dacacSDimitry Andric 
LowerCTLZ_CTTZ(SDValue Op,SelectionDAG & DAG) const30950b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const {
30960b57cec5SDimitry Andric   SDLoc SL(Op);
30970b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
30980b57cec5SDimitry Andric 
3099349cc55cSDimitry Andric   assert(isCtlzOpc(Op.getOpcode()) || isCttzOpc(Op.getOpcode()));
3100349cc55cSDimitry Andric   bool Ctlz = isCtlzOpc(Op.getOpcode());
3101349cc55cSDimitry Andric   unsigned NewOpc = Ctlz ? AMDGPUISD::FFBH_U32 : AMDGPUISD::FFBL_B32;
31020b57cec5SDimitry Andric 
3103349cc55cSDimitry Andric   bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ||
3104349cc55cSDimitry Andric                    Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF;
3105cb14a3feSDimitry Andric   bool Is64BitScalar = !Src->isDivergent() && Src.getValueType() == MVT::i64;
31060b57cec5SDimitry Andric 
3107cb14a3feSDimitry Andric   if (Src.getValueType() == MVT::i32 || Is64BitScalar) {
3108349cc55cSDimitry Andric     // (ctlz hi:lo) -> (umin (ffbh src), 32)
3109349cc55cSDimitry Andric     // (cttz hi:lo) -> (umin (ffbl src), 32)
3110349cc55cSDimitry Andric     // (ctlz_zero_undef src) -> (ffbh src)
3111349cc55cSDimitry Andric     // (cttz_zero_undef src) -> (ffbl src)
3112cb14a3feSDimitry Andric 
3113cb14a3feSDimitry Andric     //  64-bit scalar version produce 32-bit result
3114cb14a3feSDimitry Andric     // (ctlz hi:lo) -> (umin (S_FLBIT_I32_B64 src), 64)
3115cb14a3feSDimitry Andric     // (cttz hi:lo) -> (umin (S_FF1_I32_B64 src), 64)
3116cb14a3feSDimitry Andric     // (ctlz_zero_undef src) -> (S_FLBIT_I32_B64 src)
3117cb14a3feSDimitry Andric     // (cttz_zero_undef src) -> (S_FF1_I32_B64 src)
3118349cc55cSDimitry Andric     SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src);
3119349cc55cSDimitry Andric     if (!ZeroUndef) {
3120cb14a3feSDimitry Andric       const SDValue ConstVal = DAG.getConstant(
3121cb14a3feSDimitry Andric           Op.getValueType().getScalarSizeInBits(), SL, MVT::i32);
3122cb14a3feSDimitry Andric       NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, ConstVal);
3123349cc55cSDimitry Andric     }
3124cb14a3feSDimitry Andric     return DAG.getNode(ISD::ZERO_EXTEND, SL, Src.getValueType(), NewOpr);
31250b57cec5SDimitry Andric   }
31260b57cec5SDimitry Andric 
3127349cc55cSDimitry Andric   SDValue Lo, Hi;
3128349cc55cSDimitry Andric   std::tie(Lo, Hi) = split64BitValue(Src, DAG);
3129349cc55cSDimitry Andric 
3130349cc55cSDimitry Andric   SDValue OprLo = DAG.getNode(NewOpc, SL, MVT::i32, Lo);
3131349cc55cSDimitry Andric   SDValue OprHi = DAG.getNode(NewOpc, SL, MVT::i32, Hi);
3132349cc55cSDimitry Andric 
3133349cc55cSDimitry Andric   // (ctlz hi:lo) -> (umin3 (ffbh hi), (uaddsat (ffbh lo), 32), 64)
3134349cc55cSDimitry Andric   // (cttz hi:lo) -> (umin3 (uaddsat (ffbl hi), 32), (ffbl lo), 64)
3135349cc55cSDimitry Andric   // (ctlz_zero_undef hi:lo) -> (umin (ffbh hi), (add (ffbh lo), 32))
3136349cc55cSDimitry Andric   // (cttz_zero_undef hi:lo) -> (umin (add (ffbl hi), 32), (ffbl lo))
3137349cc55cSDimitry Andric 
3138349cc55cSDimitry Andric   unsigned AddOpc = ZeroUndef ? ISD::ADD : ISD::UADDSAT;
3139349cc55cSDimitry Andric   const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32);
3140349cc55cSDimitry Andric   if (Ctlz)
3141349cc55cSDimitry Andric     OprLo = DAG.getNode(AddOpc, SL, MVT::i32, OprLo, Const32);
3142349cc55cSDimitry Andric   else
3143349cc55cSDimitry Andric     OprHi = DAG.getNode(AddOpc, SL, MVT::i32, OprHi, Const32);
3144349cc55cSDimitry Andric 
3145349cc55cSDimitry Andric   SDValue NewOpr;
3146349cc55cSDimitry Andric   NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, OprLo, OprHi);
31470b57cec5SDimitry Andric   if (!ZeroUndef) {
3148349cc55cSDimitry Andric     const SDValue Const64 = DAG.getConstant(64, SL, MVT::i32);
3149349cc55cSDimitry Andric     NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const64);
31500b57cec5SDimitry Andric   }
31510b57cec5SDimitry Andric 
31520b57cec5SDimitry Andric   return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr);
31530b57cec5SDimitry Andric }
31540b57cec5SDimitry Andric 
LowerINT_TO_FP32(SDValue Op,SelectionDAG & DAG,bool Signed) const31550b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
31560b57cec5SDimitry Andric                                                bool Signed) const {
3157349cc55cSDimitry Andric   // The regular method converting a 64-bit integer to float roughly consists of
3158349cc55cSDimitry Andric   // 2 steps: normalization and rounding. In fact, after normalization, the
3159349cc55cSDimitry Andric   // conversion from a 64-bit integer to a float is essentially the same as the
3160349cc55cSDimitry Andric   // one from a 32-bit integer. The only difference is that it has more
3161349cc55cSDimitry Andric   // trailing bits to be rounded. To leverage the native 32-bit conversion, a
3162349cc55cSDimitry Andric   // 64-bit integer could be preprocessed and fit into a 32-bit integer then
3163349cc55cSDimitry Andric   // converted into the correct float number. The basic steps for the unsigned
3164349cc55cSDimitry Andric   // conversion are illustrated in the following pseudo code:
3165349cc55cSDimitry Andric   //
3166349cc55cSDimitry Andric   // f32 uitofp(i64 u) {
3167349cc55cSDimitry Andric   //   i32 hi, lo = split(u);
3168349cc55cSDimitry Andric   //   // Only count the leading zeros in hi as we have native support of the
3169349cc55cSDimitry Andric   //   // conversion from i32 to f32. If hi is all 0s, the conversion is
3170349cc55cSDimitry Andric   //   // reduced to a 32-bit one automatically.
3171349cc55cSDimitry Andric   //   i32 shamt = clz(hi); // Return 32 if hi is all 0s.
3172349cc55cSDimitry Andric   //   u <<= shamt;
3173349cc55cSDimitry Andric   //   hi, lo = split(u);
3174349cc55cSDimitry Andric   //   hi |= (lo != 0) ? 1 : 0; // Adjust rounding bit in hi based on lo.
3175349cc55cSDimitry Andric   //   // convert it as a 32-bit integer and scale the result back.
3176349cc55cSDimitry Andric   //   return uitofp(hi) * 2^(32 - shamt);
31770b57cec5SDimitry Andric   // }
3178349cc55cSDimitry Andric   //
3179349cc55cSDimitry Andric   // The signed one follows the same principle but uses 'ffbh_i32' to count its
3180349cc55cSDimitry Andric   // sign bits instead. If 'ffbh_i32' is not available, its absolute value is
3181349cc55cSDimitry Andric   // converted instead followed by negation based its sign bit.
31820b57cec5SDimitry Andric 
31830b57cec5SDimitry Andric   SDLoc SL(Op);
31840b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
31850b57cec5SDimitry Andric 
3186349cc55cSDimitry Andric   SDValue Lo, Hi;
3187349cc55cSDimitry Andric   std::tie(Lo, Hi) = split64BitValue(Src, DAG);
3188349cc55cSDimitry Andric   SDValue Sign;
3189349cc55cSDimitry Andric   SDValue ShAmt;
3190349cc55cSDimitry Andric   if (Signed && Subtarget->isGCN()) {
3191349cc55cSDimitry Andric     // We also need to consider the sign bit in Lo if Hi has just sign bits,
3192349cc55cSDimitry Andric     // i.e. Hi is 0 or -1. However, that only needs to take the MSB into
3193349cc55cSDimitry Andric     // account. That is, the maximal shift is
3194349cc55cSDimitry Andric     // - 32 if Lo and Hi have opposite signs;
3195349cc55cSDimitry Andric     // - 33 if Lo and Hi have the same sign.
3196349cc55cSDimitry Andric     //
3197349cc55cSDimitry Andric     // Or, MaxShAmt = 33 + OppositeSign, where
3198349cc55cSDimitry Andric     //
3199349cc55cSDimitry Andric     // OppositeSign is defined as ((Lo ^ Hi) >> 31), which is
3200349cc55cSDimitry Andric     // - -1 if Lo and Hi have opposite signs; and
3201349cc55cSDimitry Andric     // -  0 otherwise.
3202349cc55cSDimitry Andric     //
3203349cc55cSDimitry Andric     // All in all, ShAmt is calculated as
3204349cc55cSDimitry Andric     //
3205349cc55cSDimitry Andric     //  umin(sffbh(Hi), 33 + (Lo^Hi)>>31) - 1.
3206349cc55cSDimitry Andric     //
3207349cc55cSDimitry Andric     // or
3208349cc55cSDimitry Andric     //
3209349cc55cSDimitry Andric     //  umin(sffbh(Hi) - 1, 32 + (Lo^Hi)>>31).
3210349cc55cSDimitry Andric     //
3211349cc55cSDimitry Andric     // to reduce the critical path.
3212349cc55cSDimitry Andric     SDValue OppositeSign = DAG.getNode(
3213349cc55cSDimitry Andric         ISD::SRA, SL, MVT::i32, DAG.getNode(ISD::XOR, SL, MVT::i32, Lo, Hi),
3214349cc55cSDimitry Andric         DAG.getConstant(31, SL, MVT::i32));
3215349cc55cSDimitry Andric     SDValue MaxShAmt =
3216349cc55cSDimitry Andric         DAG.getNode(ISD::ADD, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32),
3217349cc55cSDimitry Andric                     OppositeSign);
3218349cc55cSDimitry Andric     // Count the leading sign bits.
3219349cc55cSDimitry Andric     ShAmt = DAG.getNode(AMDGPUISD::FFBH_I32, SL, MVT::i32, Hi);
3220349cc55cSDimitry Andric     // Different from unsigned conversion, the shift should be one bit less to
3221349cc55cSDimitry Andric     // preserve the sign bit.
3222349cc55cSDimitry Andric     ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, ShAmt,
3223349cc55cSDimitry Andric                         DAG.getConstant(1, SL, MVT::i32));
3224349cc55cSDimitry Andric     ShAmt = DAG.getNode(ISD::UMIN, SL, MVT::i32, ShAmt, MaxShAmt);
3225349cc55cSDimitry Andric   } else {
32260b57cec5SDimitry Andric     if (Signed) {
3227349cc55cSDimitry Andric       // Without 'ffbh_i32', only leading zeros could be counted. Take the
3228349cc55cSDimitry Andric       // absolute value first.
3229349cc55cSDimitry Andric       Sign = DAG.getNode(ISD::SRA, SL, MVT::i64, Src,
3230349cc55cSDimitry Andric                          DAG.getConstant(63, SL, MVT::i64));
3231349cc55cSDimitry Andric       SDValue Abs =
3232349cc55cSDimitry Andric           DAG.getNode(ISD::XOR, SL, MVT::i64,
3233349cc55cSDimitry Andric                       DAG.getNode(ISD::ADD, SL, MVT::i64, Src, Sign), Sign);
3234349cc55cSDimitry Andric       std::tie(Lo, Hi) = split64BitValue(Abs, DAG);
32350b57cec5SDimitry Andric     }
3236349cc55cSDimitry Andric     // Count the leading zeros.
3237349cc55cSDimitry Andric     ShAmt = DAG.getNode(ISD::CTLZ, SL, MVT::i32, Hi);
3238349cc55cSDimitry Andric     // The shift amount for signed integers is [0, 32].
3239349cc55cSDimitry Andric   }
3240349cc55cSDimitry Andric   // Normalize the given 64-bit integer.
3241349cc55cSDimitry Andric   SDValue Norm = DAG.getNode(ISD::SHL, SL, MVT::i64, Src, ShAmt);
3242349cc55cSDimitry Andric   // Split it again.
3243349cc55cSDimitry Andric   std::tie(Lo, Hi) = split64BitValue(Norm, DAG);
3244349cc55cSDimitry Andric   // Calculate the adjust bit for rounding.
3245349cc55cSDimitry Andric   // (lo != 0) ? 1 : 0 => (lo >= 1) ? 1 : 0 => umin(1, lo)
3246349cc55cSDimitry Andric   SDValue Adjust = DAG.getNode(ISD::UMIN, SL, MVT::i32,
3247349cc55cSDimitry Andric                                DAG.getConstant(1, SL, MVT::i32), Lo);
3248349cc55cSDimitry Andric   // Get the 32-bit normalized integer.
3249349cc55cSDimitry Andric   Norm = DAG.getNode(ISD::OR, SL, MVT::i32, Hi, Adjust);
3250349cc55cSDimitry Andric   // Convert the normalized 32-bit integer into f32.
3251349cc55cSDimitry Andric   unsigned Opc =
3252349cc55cSDimitry Andric       (Signed && Subtarget->isGCN()) ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
3253349cc55cSDimitry Andric   SDValue FVal = DAG.getNode(Opc, SL, MVT::f32, Norm);
32540b57cec5SDimitry Andric 
3255349cc55cSDimitry Andric   // Finally, need to scale back the converted floating number as the original
3256349cc55cSDimitry Andric   // 64-bit integer is converted as a 32-bit one.
3257349cc55cSDimitry Andric   ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32),
3258349cc55cSDimitry Andric                       ShAmt);
3259349cc55cSDimitry Andric   // On GCN, use LDEXP directly.
3260349cc55cSDimitry Andric   if (Subtarget->isGCN())
326106c3fb27SDimitry Andric     return DAG.getNode(ISD::FLDEXP, SL, MVT::f32, FVal, ShAmt);
32620b57cec5SDimitry Andric 
3263349cc55cSDimitry Andric   // Otherwise, align 'ShAmt' to the exponent part and add it into the exponent
3264349cc55cSDimitry Andric   // part directly to emulate the multiplication of 2^ShAmt. That 8-bit
3265349cc55cSDimitry Andric   // exponent is enough to avoid overflowing into the sign bit.
3266349cc55cSDimitry Andric   SDValue Exp = DAG.getNode(ISD::SHL, SL, MVT::i32, ShAmt,
3267349cc55cSDimitry Andric                             DAG.getConstant(23, SL, MVT::i32));
3268349cc55cSDimitry Andric   SDValue IVal =
3269349cc55cSDimitry Andric       DAG.getNode(ISD::ADD, SL, MVT::i32,
3270349cc55cSDimitry Andric                   DAG.getNode(ISD::BITCAST, SL, MVT::i32, FVal), Exp);
3271349cc55cSDimitry Andric   if (Signed) {
3272349cc55cSDimitry Andric     // Set the sign bit.
3273349cc55cSDimitry Andric     Sign = DAG.getNode(ISD::SHL, SL, MVT::i32,
3274349cc55cSDimitry Andric                        DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Sign),
3275349cc55cSDimitry Andric                        DAG.getConstant(31, SL, MVT::i32));
3276349cc55cSDimitry Andric     IVal = DAG.getNode(ISD::OR, SL, MVT::i32, IVal, Sign);
3277349cc55cSDimitry Andric   }
3278349cc55cSDimitry Andric   return DAG.getNode(ISD::BITCAST, SL, MVT::f32, IVal);
32790b57cec5SDimitry Andric }
32800b57cec5SDimitry Andric 
LowerINT_TO_FP64(SDValue Op,SelectionDAG & DAG,bool Signed) const32810b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
32820b57cec5SDimitry Andric                                                bool Signed) const {
32830b57cec5SDimitry Andric   SDLoc SL(Op);
32840b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
32850b57cec5SDimitry Andric 
3286349cc55cSDimitry Andric   SDValue Lo, Hi;
3287349cc55cSDimitry Andric   std::tie(Lo, Hi) = split64BitValue(Src, DAG);
32880b57cec5SDimitry Andric 
32890b57cec5SDimitry Andric   SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP,
32900b57cec5SDimitry Andric                               SL, MVT::f64, Hi);
32910b57cec5SDimitry Andric 
32920b57cec5SDimitry Andric   SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);
32930b57cec5SDimitry Andric 
329406c3fb27SDimitry Andric   SDValue LdExp = DAG.getNode(ISD::FLDEXP, SL, MVT::f64, CvtHi,
32950b57cec5SDimitry Andric                               DAG.getConstant(32, SL, MVT::i32));
32960b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
32970b57cec5SDimitry Andric   return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
32980b57cec5SDimitry Andric }
32990b57cec5SDimitry Andric 
LowerUINT_TO_FP(SDValue Op,SelectionDAG & DAG) const33000b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
33010b57cec5SDimitry Andric                                                SelectionDAG &DAG) const {
33020b57cec5SDimitry Andric   // TODO: Factor out code common with LowerSINT_TO_FP.
33030b57cec5SDimitry Andric   EVT DestVT = Op.getValueType();
3304480093f4SDimitry Andric   SDValue Src = Op.getOperand(0);
3305480093f4SDimitry Andric   EVT SrcVT = Src.getValueType();
3306480093f4SDimitry Andric 
3307480093f4SDimitry Andric   if (SrcVT == MVT::i16) {
3308480093f4SDimitry Andric     if (DestVT == MVT::f16)
3309480093f4SDimitry Andric       return Op;
3310480093f4SDimitry Andric     SDLoc DL(Op);
3311480093f4SDimitry Andric 
3312480093f4SDimitry Andric     // Promote src to i32
3313480093f4SDimitry Andric     SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src);
3314480093f4SDimitry Andric     return DAG.getNode(ISD::UINT_TO_FP, DL, DestVT, Ext);
3315480093f4SDimitry Andric   }
3316480093f4SDimitry Andric 
33171db9f3b2SDimitry Andric   if (DestVT == MVT::bf16) {
33181db9f3b2SDimitry Andric     SDLoc SL(Op);
33191db9f3b2SDimitry Andric     SDValue ToF32 = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f32, Src);
33201db9f3b2SDimitry Andric     SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SL, /*isTarget=*/true);
33211db9f3b2SDimitry Andric     return DAG.getNode(ISD::FP_ROUND, SL, MVT::bf16, ToF32, FPRoundFlag);
33221db9f3b2SDimitry Andric   }
33231db9f3b2SDimitry Andric 
33241db9f3b2SDimitry Andric   if (SrcVT != MVT::i64)
33251db9f3b2SDimitry Andric     return Op;
3326480093f4SDimitry Andric 
33270b57cec5SDimitry Andric   if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
33280b57cec5SDimitry Andric     SDLoc DL(Op);
33290b57cec5SDimitry Andric 
33300b57cec5SDimitry Andric     SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src);
3331bdd1243dSDimitry Andric     SDValue FPRoundFlag =
3332bdd1243dSDimitry Andric         DAG.getIntPtrConstant(0, SDLoc(Op), /*isTarget=*/true);
33330b57cec5SDimitry Andric     SDValue FPRound =
33340b57cec5SDimitry Andric         DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag);
33350b57cec5SDimitry Andric 
33360b57cec5SDimitry Andric     return FPRound;
33370b57cec5SDimitry Andric   }
33380b57cec5SDimitry Andric 
33390b57cec5SDimitry Andric   if (DestVT == MVT::f32)
33400b57cec5SDimitry Andric     return LowerINT_TO_FP32(Op, DAG, false);
33410b57cec5SDimitry Andric 
33420b57cec5SDimitry Andric   assert(DestVT == MVT::f64);
33430b57cec5SDimitry Andric   return LowerINT_TO_FP64(Op, DAG, false);
33440b57cec5SDimitry Andric }
33450b57cec5SDimitry Andric 
LowerSINT_TO_FP(SDValue Op,SelectionDAG & DAG) const33460b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
33470b57cec5SDimitry Andric                                               SelectionDAG &DAG) const {
3348480093f4SDimitry Andric   EVT DestVT = Op.getValueType();
3349480093f4SDimitry Andric 
3350480093f4SDimitry Andric   SDValue Src = Op.getOperand(0);
3351480093f4SDimitry Andric   EVT SrcVT = Src.getValueType();
3352480093f4SDimitry Andric 
3353480093f4SDimitry Andric   if (SrcVT == MVT::i16) {
3354480093f4SDimitry Andric     if (DestVT == MVT::f16)
3355480093f4SDimitry Andric       return Op;
3356480093f4SDimitry Andric 
3357480093f4SDimitry Andric     SDLoc DL(Op);
3358480093f4SDimitry Andric     // Promote src to i32
3359480093f4SDimitry Andric     SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, Src);
3360480093f4SDimitry Andric     return DAG.getNode(ISD::SINT_TO_FP, DL, DestVT, Ext);
3361480093f4SDimitry Andric   }
3362480093f4SDimitry Andric 
33631db9f3b2SDimitry Andric   if (DestVT == MVT::bf16) {
33641db9f3b2SDimitry Andric     SDLoc SL(Op);
33651db9f3b2SDimitry Andric     SDValue ToF32 = DAG.getNode(ISD::SINT_TO_FP, SL, MVT::f32, Src);
33661db9f3b2SDimitry Andric     SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SL, /*isTarget=*/true);
33671db9f3b2SDimitry Andric     return DAG.getNode(ISD::FP_ROUND, SL, MVT::bf16, ToF32, FPRoundFlag);
33681db9f3b2SDimitry Andric   }
33691db9f3b2SDimitry Andric 
33701db9f3b2SDimitry Andric   if (SrcVT != MVT::i64)
33711db9f3b2SDimitry Andric     return Op;
33720b57cec5SDimitry Andric 
33730b57cec5SDimitry Andric   // TODO: Factor out code common with LowerUINT_TO_FP.
33740b57cec5SDimitry Andric 
33750b57cec5SDimitry Andric   if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
33760b57cec5SDimitry Andric     SDLoc DL(Op);
33770b57cec5SDimitry Andric     SDValue Src = Op.getOperand(0);
33780b57cec5SDimitry Andric 
33790b57cec5SDimitry Andric     SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src);
3380bdd1243dSDimitry Andric     SDValue FPRoundFlag =
3381bdd1243dSDimitry Andric         DAG.getIntPtrConstant(0, SDLoc(Op), /*isTarget=*/true);
33820b57cec5SDimitry Andric     SDValue FPRound =
33830b57cec5SDimitry Andric         DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag);
33840b57cec5SDimitry Andric 
33850b57cec5SDimitry Andric     return FPRound;
33860b57cec5SDimitry Andric   }
33870b57cec5SDimitry Andric 
33880b57cec5SDimitry Andric   if (DestVT == MVT::f32)
33890b57cec5SDimitry Andric     return LowerINT_TO_FP32(Op, DAG, true);
33900b57cec5SDimitry Andric 
33910b57cec5SDimitry Andric   assert(DestVT == MVT::f64);
33920b57cec5SDimitry Andric   return LowerINT_TO_FP64(Op, DAG, true);
33930b57cec5SDimitry Andric }
33940b57cec5SDimitry Andric 
LowerFP_TO_INT64(SDValue Op,SelectionDAG & DAG,bool Signed) const3395fe6060f1SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG,
33960b57cec5SDimitry Andric                                                bool Signed) const {
33970b57cec5SDimitry Andric   SDLoc SL(Op);
33980b57cec5SDimitry Andric 
33990b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
3400fe6060f1SDimitry Andric   EVT SrcVT = Src.getValueType();
34010b57cec5SDimitry Andric 
3402fe6060f1SDimitry Andric   assert(SrcVT == MVT::f32 || SrcVT == MVT::f64);
34030b57cec5SDimitry Andric 
3404fe6060f1SDimitry Andric   // The basic idea of converting a floating point number into a pair of 32-bit
3405fe6060f1SDimitry Andric   // integers is illustrated as follows:
3406fe6060f1SDimitry Andric   //
3407fe6060f1SDimitry Andric   //     tf := trunc(val);
3408fe6060f1SDimitry Andric   //    hif := floor(tf * 2^-32);
3409fe6060f1SDimitry Andric   //    lof := tf - hif * 2^32; // lof is always positive due to floor.
3410fe6060f1SDimitry Andric   //     hi := fptoi(hif);
3411fe6060f1SDimitry Andric   //     lo := fptoi(lof);
3412fe6060f1SDimitry Andric   //
3413fe6060f1SDimitry Andric   SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, SrcVT, Src);
3414fe6060f1SDimitry Andric   SDValue Sign;
3415fe6060f1SDimitry Andric   if (Signed && SrcVT == MVT::f32) {
3416fe6060f1SDimitry Andric     // However, a 32-bit floating point number has only 23 bits mantissa and
3417fe6060f1SDimitry Andric     // it's not enough to hold all the significant bits of `lof` if val is
3418fe6060f1SDimitry Andric     // negative. To avoid the loss of precision, We need to take the absolute
3419fe6060f1SDimitry Andric     // value after truncating and flip the result back based on the original
3420fe6060f1SDimitry Andric     // signedness.
3421fe6060f1SDimitry Andric     Sign = DAG.getNode(ISD::SRA, SL, MVT::i32,
3422fe6060f1SDimitry Andric                        DAG.getNode(ISD::BITCAST, SL, MVT::i32, Trunc),
3423fe6060f1SDimitry Andric                        DAG.getConstant(31, SL, MVT::i32));
3424fe6060f1SDimitry Andric     Trunc = DAG.getNode(ISD::FABS, SL, SrcVT, Trunc);
3425fe6060f1SDimitry Andric   }
3426fe6060f1SDimitry Andric 
3427fe6060f1SDimitry Andric   SDValue K0, K1;
3428fe6060f1SDimitry Andric   if (SrcVT == MVT::f64) {
342906c3fb27SDimitry Andric     K0 = DAG.getConstantFP(
343006c3fb27SDimitry Andric         llvm::bit_cast<double>(UINT64_C(/*2^-32*/ 0x3df0000000000000)), SL,
343106c3fb27SDimitry Andric         SrcVT);
343206c3fb27SDimitry Andric     K1 = DAG.getConstantFP(
343306c3fb27SDimitry Andric         llvm::bit_cast<double>(UINT64_C(/*-2^32*/ 0xc1f0000000000000)), SL,
343406c3fb27SDimitry Andric         SrcVT);
3435fe6060f1SDimitry Andric   } else {
343606c3fb27SDimitry Andric     K0 = DAG.getConstantFP(
343706c3fb27SDimitry Andric         llvm::bit_cast<float>(UINT32_C(/*2^-32*/ 0x2f800000)), SL, SrcVT);
343806c3fb27SDimitry Andric     K1 = DAG.getConstantFP(
343906c3fb27SDimitry Andric         llvm::bit_cast<float>(UINT32_C(/*-2^32*/ 0xcf800000)), SL, SrcVT);
3440fe6060f1SDimitry Andric   }
34410b57cec5SDimitry Andric   // TODO: Should this propagate fast-math-flags?
3442fe6060f1SDimitry Andric   SDValue Mul = DAG.getNode(ISD::FMUL, SL, SrcVT, Trunc, K0);
34430b57cec5SDimitry Andric 
3444fe6060f1SDimitry Andric   SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, SrcVT, Mul);
34450b57cec5SDimitry Andric 
3446fe6060f1SDimitry Andric   SDValue Fma = DAG.getNode(ISD::FMA, SL, SrcVT, FloorMul, K1, Trunc);
34470b57cec5SDimitry Andric 
3448fe6060f1SDimitry Andric   SDValue Hi = DAG.getNode((Signed && SrcVT == MVT::f64) ? ISD::FP_TO_SINT
3449fe6060f1SDimitry Andric                                                          : ISD::FP_TO_UINT,
3450fe6060f1SDimitry Andric                            SL, MVT::i32, FloorMul);
34510b57cec5SDimitry Andric   SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma);
34520b57cec5SDimitry Andric 
3453fe6060f1SDimitry Andric   SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i64,
3454fe6060f1SDimitry Andric                                DAG.getBuildVector(MVT::v2i32, SL, {Lo, Hi}));
34550b57cec5SDimitry Andric 
3456fe6060f1SDimitry Andric   if (Signed && SrcVT == MVT::f32) {
3457fe6060f1SDimitry Andric     assert(Sign);
3458fe6060f1SDimitry Andric     // Flip the result based on the signedness, which is either all 0s or 1s.
3459fe6060f1SDimitry Andric     Sign = DAG.getNode(ISD::BITCAST, SL, MVT::i64,
3460fe6060f1SDimitry Andric                        DAG.getBuildVector(MVT::v2i32, SL, {Sign, Sign}));
3461fe6060f1SDimitry Andric     // r := xor(r, sign) - sign;
3462fe6060f1SDimitry Andric     Result =
3463fe6060f1SDimitry Andric         DAG.getNode(ISD::SUB, SL, MVT::i64,
3464fe6060f1SDimitry Andric                     DAG.getNode(ISD::XOR, SL, MVT::i64, Result, Sign), Sign);
3465fe6060f1SDimitry Andric   }
3466fe6060f1SDimitry Andric 
3467fe6060f1SDimitry Andric   return Result;
34680b57cec5SDimitry Andric }
34690b57cec5SDimitry Andric 
LowerFP_TO_FP16(SDValue Op,SelectionDAG & DAG) const34700b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const {
34710b57cec5SDimitry Andric   SDLoc DL(Op);
34720b57cec5SDimitry Andric   SDValue N0 = Op.getOperand(0);
34730b57cec5SDimitry Andric 
34740b57cec5SDimitry Andric   // Convert to target node to get known bits
34750b57cec5SDimitry Andric   if (N0.getValueType() == MVT::f32)
34760b57cec5SDimitry Andric     return DAG.getNode(AMDGPUISD::FP_TO_FP16, DL, Op.getValueType(), N0);
34770b57cec5SDimitry Andric 
34780b57cec5SDimitry Andric   if (getTargetMachine().Options.UnsafeFPMath) {
34790b57cec5SDimitry Andric     // There is a generic expand for FP_TO_FP16 with unsafe fast math.
34800b57cec5SDimitry Andric     return SDValue();
34810b57cec5SDimitry Andric   }
34820b57cec5SDimitry Andric 
34830b57cec5SDimitry Andric   assert(N0.getSimpleValueType() == MVT::f64);
34840b57cec5SDimitry Andric 
34850b57cec5SDimitry Andric   // f64 -> f16 conversion using round-to-nearest-even rounding mode.
34860b57cec5SDimitry Andric   const unsigned ExpMask = 0x7ff;
34870b57cec5SDimitry Andric   const unsigned ExpBiasf64 = 1023;
34880b57cec5SDimitry Andric   const unsigned ExpBiasf16 = 15;
34890b57cec5SDimitry Andric   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
34900b57cec5SDimitry Andric   SDValue One = DAG.getConstant(1, DL, MVT::i32);
34910b57cec5SDimitry Andric   SDValue U = DAG.getNode(ISD::BITCAST, DL, MVT::i64, N0);
34920b57cec5SDimitry Andric   SDValue UH = DAG.getNode(ISD::SRL, DL, MVT::i64, U,
34930b57cec5SDimitry Andric                            DAG.getConstant(32, DL, MVT::i64));
34940b57cec5SDimitry Andric   UH = DAG.getZExtOrTrunc(UH, DL, MVT::i32);
34950b57cec5SDimitry Andric   U = DAG.getZExtOrTrunc(U, DL, MVT::i32);
34960b57cec5SDimitry Andric   SDValue E = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
34970b57cec5SDimitry Andric                           DAG.getConstant(20, DL, MVT::i64));
34980b57cec5SDimitry Andric   E = DAG.getNode(ISD::AND, DL, MVT::i32, E,
34990b57cec5SDimitry Andric                   DAG.getConstant(ExpMask, DL, MVT::i32));
35000b57cec5SDimitry Andric   // Subtract the fp64 exponent bias (1023) to get the real exponent and
35010b57cec5SDimitry Andric   // add the f16 bias (15) to get the biased exponent for the f16 format.
35020b57cec5SDimitry Andric   E = DAG.getNode(ISD::ADD, DL, MVT::i32, E,
35030b57cec5SDimitry Andric                   DAG.getConstant(-ExpBiasf64 + ExpBiasf16, DL, MVT::i32));
35040b57cec5SDimitry Andric 
35050b57cec5SDimitry Andric   SDValue M = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
35060b57cec5SDimitry Andric                           DAG.getConstant(8, DL, MVT::i32));
35070b57cec5SDimitry Andric   M = DAG.getNode(ISD::AND, DL, MVT::i32, M,
35080b57cec5SDimitry Andric                   DAG.getConstant(0xffe, DL, MVT::i32));
35090b57cec5SDimitry Andric 
35100b57cec5SDimitry Andric   SDValue MaskedSig = DAG.getNode(ISD::AND, DL, MVT::i32, UH,
35110b57cec5SDimitry Andric                                   DAG.getConstant(0x1ff, DL, MVT::i32));
35120b57cec5SDimitry Andric   MaskedSig = DAG.getNode(ISD::OR, DL, MVT::i32, MaskedSig, U);
35130b57cec5SDimitry Andric 
35140b57cec5SDimitry Andric   SDValue Lo40Set = DAG.getSelectCC(DL, MaskedSig, Zero, Zero, One, ISD::SETEQ);
35150b57cec5SDimitry Andric   M = DAG.getNode(ISD::OR, DL, MVT::i32, M, Lo40Set);
35160b57cec5SDimitry Andric 
35170b57cec5SDimitry Andric   // (M != 0 ? 0x0200 : 0) | 0x7c00;
35180b57cec5SDimitry Andric   SDValue I = DAG.getNode(ISD::OR, DL, MVT::i32,
35190b57cec5SDimitry Andric       DAG.getSelectCC(DL, M, Zero, DAG.getConstant(0x0200, DL, MVT::i32),
35200b57cec5SDimitry Andric                       Zero, ISD::SETNE), DAG.getConstant(0x7c00, DL, MVT::i32));
35210b57cec5SDimitry Andric 
35220b57cec5SDimitry Andric   // N = M | (E << 12);
35230b57cec5SDimitry Andric   SDValue N = DAG.getNode(ISD::OR, DL, MVT::i32, M,
35240b57cec5SDimitry Andric       DAG.getNode(ISD::SHL, DL, MVT::i32, E,
35250b57cec5SDimitry Andric                   DAG.getConstant(12, DL, MVT::i32)));
35260b57cec5SDimitry Andric 
35270b57cec5SDimitry Andric   // B = clamp(1-E, 0, 13);
35280b57cec5SDimitry Andric   SDValue OneSubExp = DAG.getNode(ISD::SUB, DL, MVT::i32,
35290b57cec5SDimitry Andric                                   One, E);
35300b57cec5SDimitry Andric   SDValue B = DAG.getNode(ISD::SMAX, DL, MVT::i32, OneSubExp, Zero);
35310b57cec5SDimitry Andric   B = DAG.getNode(ISD::SMIN, DL, MVT::i32, B,
35320b57cec5SDimitry Andric                   DAG.getConstant(13, DL, MVT::i32));
35330b57cec5SDimitry Andric 
35340b57cec5SDimitry Andric   SDValue SigSetHigh = DAG.getNode(ISD::OR, DL, MVT::i32, M,
35350b57cec5SDimitry Andric                                    DAG.getConstant(0x1000, DL, MVT::i32));
35360b57cec5SDimitry Andric 
35370b57cec5SDimitry Andric   SDValue D = DAG.getNode(ISD::SRL, DL, MVT::i32, SigSetHigh, B);
35380b57cec5SDimitry Andric   SDValue D0 = DAG.getNode(ISD::SHL, DL, MVT::i32, D, B);
35390b57cec5SDimitry Andric   SDValue D1 = DAG.getSelectCC(DL, D0, SigSetHigh, One, Zero, ISD::SETNE);
35400b57cec5SDimitry Andric   D = DAG.getNode(ISD::OR, DL, MVT::i32, D, D1);
35410b57cec5SDimitry Andric 
35420b57cec5SDimitry Andric   SDValue V = DAG.getSelectCC(DL, E, One, D, N, ISD::SETLT);
35430b57cec5SDimitry Andric   SDValue VLow3 = DAG.getNode(ISD::AND, DL, MVT::i32, V,
35440b57cec5SDimitry Andric                               DAG.getConstant(0x7, DL, MVT::i32));
35450b57cec5SDimitry Andric   V = DAG.getNode(ISD::SRL, DL, MVT::i32, V,
35460b57cec5SDimitry Andric                   DAG.getConstant(2, DL, MVT::i32));
35470b57cec5SDimitry Andric   SDValue V0 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(3, DL, MVT::i32),
35480b57cec5SDimitry Andric                                One, Zero, ISD::SETEQ);
35490b57cec5SDimitry Andric   SDValue V1 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(5, DL, MVT::i32),
35500b57cec5SDimitry Andric                                One, Zero, ISD::SETGT);
35510b57cec5SDimitry Andric   V1 = DAG.getNode(ISD::OR, DL, MVT::i32, V0, V1);
35520b57cec5SDimitry Andric   V = DAG.getNode(ISD::ADD, DL, MVT::i32, V, V1);
35530b57cec5SDimitry Andric 
35540b57cec5SDimitry Andric   V = DAG.getSelectCC(DL, E, DAG.getConstant(30, DL, MVT::i32),
35550b57cec5SDimitry Andric                       DAG.getConstant(0x7c00, DL, MVT::i32), V, ISD::SETGT);
35560b57cec5SDimitry Andric   V = DAG.getSelectCC(DL, E, DAG.getConstant(1039, DL, MVT::i32),
35570b57cec5SDimitry Andric                       I, V, ISD::SETEQ);
35580b57cec5SDimitry Andric 
35590b57cec5SDimitry Andric   // Extract the sign bit.
35600b57cec5SDimitry Andric   SDValue Sign = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
35610b57cec5SDimitry Andric                             DAG.getConstant(16, DL, MVT::i32));
35620b57cec5SDimitry Andric   Sign = DAG.getNode(ISD::AND, DL, MVT::i32, Sign,
35630b57cec5SDimitry Andric                      DAG.getConstant(0x8000, DL, MVT::i32));
35640b57cec5SDimitry Andric 
35650b57cec5SDimitry Andric   V = DAG.getNode(ISD::OR, DL, MVT::i32, Sign, V);
35660b57cec5SDimitry Andric   return DAG.getZExtOrTrunc(V, DL, Op.getValueType());
35670b57cec5SDimitry Andric }
35680b57cec5SDimitry Andric 
LowerFP_TO_INT(const SDValue Op,SelectionDAG & DAG) const35691db9f3b2SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_INT(const SDValue Op,
35700b57cec5SDimitry Andric                                              SelectionDAG &DAG) const {
35710b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
3572fe6060f1SDimitry Andric   unsigned OpOpcode = Op.getOpcode();
35730b57cec5SDimitry Andric   EVT SrcVT = Src.getValueType();
3574fe6060f1SDimitry Andric   EVT DestVT = Op.getValueType();
3575fe6060f1SDimitry Andric 
3576fe6060f1SDimitry Andric   // Will be selected natively
3577fe6060f1SDimitry Andric   if (SrcVT == MVT::f16 && DestVT == MVT::i16)
3578fe6060f1SDimitry Andric     return Op;
3579fe6060f1SDimitry Andric 
35801db9f3b2SDimitry Andric   if (SrcVT == MVT::bf16) {
35811db9f3b2SDimitry Andric     SDLoc DL(Op);
35821db9f3b2SDimitry Andric     SDValue PromotedSrc = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
35831db9f3b2SDimitry Andric     return DAG.getNode(Op.getOpcode(), DL, DestVT, PromotedSrc);
35841db9f3b2SDimitry Andric   }
35851db9f3b2SDimitry Andric 
3586fe6060f1SDimitry Andric   // Promote i16 to i32
3587fe6060f1SDimitry Andric   if (DestVT == MVT::i16 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) {
3588fe6060f1SDimitry Andric     SDLoc DL(Op);
3589fe6060f1SDimitry Andric 
3590fe6060f1SDimitry Andric     SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src);
3591fe6060f1SDimitry Andric     return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToInt32);
3592fe6060f1SDimitry Andric   }
3593fe6060f1SDimitry Andric 
35941db9f3b2SDimitry Andric   if (DestVT != MVT::i64)
35951db9f3b2SDimitry Andric     return Op;
35961db9f3b2SDimitry Andric 
3597e8d8bef9SDimitry Andric   if (SrcVT == MVT::f16 ||
3598e8d8bef9SDimitry Andric       (SrcVT == MVT::f32 && Src.getOpcode() == ISD::FP16_TO_FP)) {
35990b57cec5SDimitry Andric     SDLoc DL(Op);
36000b57cec5SDimitry Andric 
3601fe6060f1SDimitry Andric     SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src);
3602fe6060f1SDimitry Andric     unsigned Ext =
3603fe6060f1SDimitry Andric         OpOpcode == ISD::FP_TO_SINT ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3604fe6060f1SDimitry Andric     return DAG.getNode(Ext, DL, MVT::i64, FpToInt32);
36050b57cec5SDimitry Andric   }
36060b57cec5SDimitry Andric 
36071db9f3b2SDimitry Andric   if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
3608fe6060f1SDimitry Andric     return LowerFP_TO_INT64(Op, DAG, OpOpcode == ISD::FP_TO_SINT);
36090b57cec5SDimitry Andric 
36100b57cec5SDimitry Andric   return SDValue();
36110b57cec5SDimitry Andric }
36120b57cec5SDimitry Andric 
LowerSIGN_EXTEND_INREG(SDValue Op,SelectionDAG & DAG) const36130b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
36140b57cec5SDimitry Andric                                                      SelectionDAG &DAG) const {
36150b57cec5SDimitry Andric   EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
36160b57cec5SDimitry Andric   MVT VT = Op.getSimpleValueType();
36170b57cec5SDimitry Andric   MVT ScalarVT = VT.getScalarType();
36180b57cec5SDimitry Andric 
36190b57cec5SDimitry Andric   assert(VT.isVector());
36200b57cec5SDimitry Andric 
36210b57cec5SDimitry Andric   SDValue Src = Op.getOperand(0);
36220b57cec5SDimitry Andric   SDLoc DL(Op);
36230b57cec5SDimitry Andric 
36240b57cec5SDimitry Andric   // TODO: Don't scalarize on Evergreen?
36250b57cec5SDimitry Andric   unsigned NElts = VT.getVectorNumElements();
36260b57cec5SDimitry Andric   SmallVector<SDValue, 8> Args;
36270b57cec5SDimitry Andric   DAG.ExtractVectorElements(Src, Args, 0, NElts);
36280b57cec5SDimitry Andric 
36290b57cec5SDimitry Andric   SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType());
36300b57cec5SDimitry Andric   for (unsigned I = 0; I < NElts; ++I)
36310b57cec5SDimitry Andric     Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp);
36320b57cec5SDimitry Andric 
36330b57cec5SDimitry Andric   return DAG.getBuildVector(VT, DL, Args);
36340b57cec5SDimitry Andric }
36350b57cec5SDimitry Andric 
36360b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
36370b57cec5SDimitry Andric // Custom DAG optimizations
36380b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
36390b57cec5SDimitry Andric 
isU24(SDValue Op,SelectionDAG & DAG)36400b57cec5SDimitry Andric static bool isU24(SDValue Op, SelectionDAG &DAG) {
36410b57cec5SDimitry Andric   return AMDGPUTargetLowering::numBitsUnsigned(Op, DAG) <= 24;
36420b57cec5SDimitry Andric }
36430b57cec5SDimitry Andric 
isI24(SDValue Op,SelectionDAG & DAG)36440b57cec5SDimitry Andric static bool isI24(SDValue Op, SelectionDAG &DAG) {
36450b57cec5SDimitry Andric   EVT VT = Op.getValueType();
36460b57cec5SDimitry Andric   return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated
36470b57cec5SDimitry Andric                                      // as unsigned 24-bit values.
3648349cc55cSDimitry Andric          AMDGPUTargetLowering::numBitsSigned(Op, DAG) <= 24;
36490b57cec5SDimitry Andric }
36500b57cec5SDimitry Andric 
simplifyMul24(SDNode * Node24,TargetLowering::DAGCombinerInfo & DCI)3651fe6060f1SDimitry Andric static SDValue simplifyMul24(SDNode *Node24,
36520b57cec5SDimitry Andric                              TargetLowering::DAGCombinerInfo &DCI) {
36530b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
36545ffd83dbSDimitry Andric   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
36558bcb0991SDimitry Andric   bool IsIntrin = Node24->getOpcode() == ISD::INTRINSIC_WO_CHAIN;
36568bcb0991SDimitry Andric 
36578bcb0991SDimitry Andric   SDValue LHS = IsIntrin ? Node24->getOperand(1) : Node24->getOperand(0);
36588bcb0991SDimitry Andric   SDValue RHS = IsIntrin ? Node24->getOperand(2) : Node24->getOperand(1);
36598bcb0991SDimitry Andric   unsigned NewOpcode = Node24->getOpcode();
36608bcb0991SDimitry Andric   if (IsIntrin) {
3661647cbc5dSDimitry Andric     unsigned IID = Node24->getConstantOperandVal(0);
3662349cc55cSDimitry Andric     switch (IID) {
3663349cc55cSDimitry Andric     case Intrinsic::amdgcn_mul_i24:
3664349cc55cSDimitry Andric       NewOpcode = AMDGPUISD::MUL_I24;
3665349cc55cSDimitry Andric       break;
3666349cc55cSDimitry Andric     case Intrinsic::amdgcn_mul_u24:
3667349cc55cSDimitry Andric       NewOpcode = AMDGPUISD::MUL_U24;
3668349cc55cSDimitry Andric       break;
3669349cc55cSDimitry Andric     case Intrinsic::amdgcn_mulhi_i24:
3670349cc55cSDimitry Andric       NewOpcode = AMDGPUISD::MULHI_I24;
3671349cc55cSDimitry Andric       break;
3672349cc55cSDimitry Andric     case Intrinsic::amdgcn_mulhi_u24:
3673349cc55cSDimitry Andric       NewOpcode = AMDGPUISD::MULHI_U24;
3674349cc55cSDimitry Andric       break;
3675349cc55cSDimitry Andric     default:
3676349cc55cSDimitry Andric       llvm_unreachable("Expected 24-bit mul intrinsic");
3677349cc55cSDimitry Andric     }
36788bcb0991SDimitry Andric   }
36790b57cec5SDimitry Andric 
36800b57cec5SDimitry Andric   APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24);
36810b57cec5SDimitry Andric 
36825ffd83dbSDimitry Andric   // First try to simplify using SimplifyMultipleUseDemandedBits which allows
36835ffd83dbSDimitry Andric   // the operands to have other uses, but will only perform simplifications that
36845ffd83dbSDimitry Andric   // involve bypassing some nodes for this user.
36855ffd83dbSDimitry Andric   SDValue DemandedLHS = TLI.SimplifyMultipleUseDemandedBits(LHS, Demanded, DAG);
36865ffd83dbSDimitry Andric   SDValue DemandedRHS = TLI.SimplifyMultipleUseDemandedBits(RHS, Demanded, DAG);
36870b57cec5SDimitry Andric   if (DemandedLHS || DemandedRHS)
36888bcb0991SDimitry Andric     return DAG.getNode(NewOpcode, SDLoc(Node24), Node24->getVTList(),
36890b57cec5SDimitry Andric                        DemandedLHS ? DemandedLHS : LHS,
36900b57cec5SDimitry Andric                        DemandedRHS ? DemandedRHS : RHS);
36910b57cec5SDimitry Andric 
36920b57cec5SDimitry Andric   // Now try SimplifyDemandedBits which can simplify the nodes used by our
36930b57cec5SDimitry Andric   // operands if this node is the only user.
36940b57cec5SDimitry Andric   if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI))
36950b57cec5SDimitry Andric     return SDValue(Node24, 0);
36960b57cec5SDimitry Andric   if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI))
36970b57cec5SDimitry Andric     return SDValue(Node24, 0);
36980b57cec5SDimitry Andric 
36990b57cec5SDimitry Andric   return SDValue();
37000b57cec5SDimitry Andric }
37010b57cec5SDimitry Andric 
37020b57cec5SDimitry Andric template <typename IntTy>
constantFoldBFE(SelectionDAG & DAG,IntTy Src0,uint32_t Offset,uint32_t Width,const SDLoc & DL)37030b57cec5SDimitry Andric static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset,
37040b57cec5SDimitry Andric                                uint32_t Width, const SDLoc &DL) {
37050b57cec5SDimitry Andric   if (Width + Offset < 32) {
37060b57cec5SDimitry Andric     uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width);
37070b57cec5SDimitry Andric     IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width);
37080b57cec5SDimitry Andric     return DAG.getConstant(Result, DL, MVT::i32);
37090b57cec5SDimitry Andric   }
37100b57cec5SDimitry Andric 
37110b57cec5SDimitry Andric   return DAG.getConstant(Src0 >> Offset, DL, MVT::i32);
37120b57cec5SDimitry Andric }
37130b57cec5SDimitry Andric 
hasVolatileUser(SDNode * Val)37140b57cec5SDimitry Andric static bool hasVolatileUser(SDNode *Val) {
37150b57cec5SDimitry Andric   for (SDNode *U : Val->uses()) {
37160b57cec5SDimitry Andric     if (MemSDNode *M = dyn_cast<MemSDNode>(U)) {
37170b57cec5SDimitry Andric       if (M->isVolatile())
37180b57cec5SDimitry Andric         return true;
37190b57cec5SDimitry Andric     }
37200b57cec5SDimitry Andric   }
37210b57cec5SDimitry Andric 
37220b57cec5SDimitry Andric   return false;
37230b57cec5SDimitry Andric }
37240b57cec5SDimitry Andric 
shouldCombineMemoryType(EVT VT) const37250b57cec5SDimitry Andric bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const {
37260b57cec5SDimitry Andric   // i32 vectors are the canonical memory type.
37270b57cec5SDimitry Andric   if (VT.getScalarType() == MVT::i32 || isTypeLegal(VT))
37280b57cec5SDimitry Andric     return false;
37290b57cec5SDimitry Andric 
37300b57cec5SDimitry Andric   if (!VT.isByteSized())
37310b57cec5SDimitry Andric     return false;
37320b57cec5SDimitry Andric 
37330b57cec5SDimitry Andric   unsigned Size = VT.getStoreSize();
37340b57cec5SDimitry Andric 
37350b57cec5SDimitry Andric   if ((Size == 1 || Size == 2 || Size == 4) && !VT.isVector())
37360b57cec5SDimitry Andric     return false;
37370b57cec5SDimitry Andric 
37380b57cec5SDimitry Andric   if (Size == 3 || (Size > 4 && (Size % 4 != 0)))
37390b57cec5SDimitry Andric     return false;
37400b57cec5SDimitry Andric 
37410b57cec5SDimitry Andric   return true;
37420b57cec5SDimitry Andric }
37430b57cec5SDimitry Andric 
37440b57cec5SDimitry Andric // Replace load of an illegal type with a store of a bitcast to a friendlier
37450b57cec5SDimitry Andric // type.
performLoadCombine(SDNode * N,DAGCombinerInfo & DCI) const37460b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
37470b57cec5SDimitry Andric                                                  DAGCombinerInfo &DCI) const {
37480b57cec5SDimitry Andric   if (!DCI.isBeforeLegalize())
37490b57cec5SDimitry Andric     return SDValue();
37500b57cec5SDimitry Andric 
37510b57cec5SDimitry Andric   LoadSDNode *LN = cast<LoadSDNode>(N);
37525ffd83dbSDimitry Andric   if (!LN->isSimple() || !ISD::isNormalLoad(LN) || hasVolatileUser(LN))
37530b57cec5SDimitry Andric     return SDValue();
37540b57cec5SDimitry Andric 
37550b57cec5SDimitry Andric   SDLoc SL(N);
37560b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
37570b57cec5SDimitry Andric   EVT VT = LN->getMemoryVT();
37580b57cec5SDimitry Andric 
37590b57cec5SDimitry Andric   unsigned Size = VT.getStoreSize();
37605ffd83dbSDimitry Andric   Align Alignment = LN->getAlign();
37615ffd83dbSDimitry Andric   if (Alignment < Size && isTypeLegal(VT)) {
3762bdd1243dSDimitry Andric     unsigned IsFast;
37630b57cec5SDimitry Andric     unsigned AS = LN->getAddressSpace();
37640b57cec5SDimitry Andric 
37650b57cec5SDimitry Andric     // Expand unaligned loads earlier than legalization. Due to visitation order
37660b57cec5SDimitry Andric     // problems during legalization, the emitted instructions to pack and unpack
37670b57cec5SDimitry Andric     // the bytes again are not eliminated in the case of an unaligned copy.
3768fe6060f1SDimitry Andric     if (!allowsMisalignedMemoryAccesses(
3769fe6060f1SDimitry Andric             VT, AS, Alignment, LN->getMemOperand()->getFlags(), &IsFast)) {
3770480093f4SDimitry Andric       if (VT.isVector())
377181ad6265SDimitry Andric         return SplitVectorLoad(SDValue(LN, 0), DAG);
377281ad6265SDimitry Andric 
377381ad6265SDimitry Andric       SDValue Ops[2];
37740b57cec5SDimitry Andric       std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG);
3775480093f4SDimitry Andric 
37760b57cec5SDimitry Andric       return DAG.getMergeValues(Ops, SDLoc(N));
37770b57cec5SDimitry Andric     }
37780b57cec5SDimitry Andric 
37790b57cec5SDimitry Andric     if (!IsFast)
37800b57cec5SDimitry Andric       return SDValue();
37810b57cec5SDimitry Andric   }
37820b57cec5SDimitry Andric 
37830b57cec5SDimitry Andric   if (!shouldCombineMemoryType(VT))
37840b57cec5SDimitry Andric     return SDValue();
37850b57cec5SDimitry Andric 
37860b57cec5SDimitry Andric   EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
37870b57cec5SDimitry Andric 
37880b57cec5SDimitry Andric   SDValue NewLoad
37890b57cec5SDimitry Andric     = DAG.getLoad(NewVT, SL, LN->getChain(),
37900b57cec5SDimitry Andric                   LN->getBasePtr(), LN->getMemOperand());
37910b57cec5SDimitry Andric 
37920b57cec5SDimitry Andric   SDValue BC = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad);
37930b57cec5SDimitry Andric   DCI.CombineTo(N, BC, NewLoad.getValue(1));
37940b57cec5SDimitry Andric   return SDValue(N, 0);
37950b57cec5SDimitry Andric }
37960b57cec5SDimitry Andric 
37970b57cec5SDimitry Andric // Replace store of an illegal type with a store of a bitcast to a friendlier
37980b57cec5SDimitry Andric // type.
performStoreCombine(SDNode * N,DAGCombinerInfo & DCI) const37990b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
38000b57cec5SDimitry Andric                                                   DAGCombinerInfo &DCI) const {
38010b57cec5SDimitry Andric   if (!DCI.isBeforeLegalize())
38020b57cec5SDimitry Andric     return SDValue();
38030b57cec5SDimitry Andric 
38040b57cec5SDimitry Andric   StoreSDNode *SN = cast<StoreSDNode>(N);
38055ffd83dbSDimitry Andric   if (!SN->isSimple() || !ISD::isNormalStore(SN))
38060b57cec5SDimitry Andric     return SDValue();
38070b57cec5SDimitry Andric 
38080b57cec5SDimitry Andric   EVT VT = SN->getMemoryVT();
38090b57cec5SDimitry Andric   unsigned Size = VT.getStoreSize();
38100b57cec5SDimitry Andric 
38110b57cec5SDimitry Andric   SDLoc SL(N);
38120b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
38135ffd83dbSDimitry Andric   Align Alignment = SN->getAlign();
38145ffd83dbSDimitry Andric   if (Alignment < Size && isTypeLegal(VT)) {
3815bdd1243dSDimitry Andric     unsigned IsFast;
38160b57cec5SDimitry Andric     unsigned AS = SN->getAddressSpace();
38170b57cec5SDimitry Andric 
38180b57cec5SDimitry Andric     // Expand unaligned stores earlier than legalization. Due to visitation
38190b57cec5SDimitry Andric     // order problems during legalization, the emitted instructions to pack and
38200b57cec5SDimitry Andric     // unpack the bytes again are not eliminated in the case of an unaligned
38210b57cec5SDimitry Andric     // copy.
3822fe6060f1SDimitry Andric     if (!allowsMisalignedMemoryAccesses(
3823fe6060f1SDimitry Andric             VT, AS, Alignment, SN->getMemOperand()->getFlags(), &IsFast)) {
38240b57cec5SDimitry Andric       if (VT.isVector())
382581ad6265SDimitry Andric         return SplitVectorStore(SDValue(SN, 0), DAG);
38260b57cec5SDimitry Andric 
38270b57cec5SDimitry Andric       return expandUnalignedStore(SN, DAG);
38280b57cec5SDimitry Andric     }
38290b57cec5SDimitry Andric 
38300b57cec5SDimitry Andric     if (!IsFast)
38310b57cec5SDimitry Andric       return SDValue();
38320b57cec5SDimitry Andric   }
38330b57cec5SDimitry Andric 
38340b57cec5SDimitry Andric   if (!shouldCombineMemoryType(VT))
38350b57cec5SDimitry Andric     return SDValue();
38360b57cec5SDimitry Andric 
38370b57cec5SDimitry Andric   EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT);
38380b57cec5SDimitry Andric   SDValue Val = SN->getValue();
38390b57cec5SDimitry Andric 
38400b57cec5SDimitry Andric   //DCI.AddToWorklist(Val.getNode());
38410b57cec5SDimitry Andric 
38420b57cec5SDimitry Andric   bool OtherUses = !Val.hasOneUse();
38430b57cec5SDimitry Andric   SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NewVT, Val);
38440b57cec5SDimitry Andric   if (OtherUses) {
38450b57cec5SDimitry Andric     SDValue CastBack = DAG.getNode(ISD::BITCAST, SL, VT, CastVal);
38460b57cec5SDimitry Andric     DAG.ReplaceAllUsesOfValueWith(Val, CastBack);
38470b57cec5SDimitry Andric   }
38480b57cec5SDimitry Andric 
38490b57cec5SDimitry Andric   return DAG.getStore(SN->getChain(), SL, CastVal,
38500b57cec5SDimitry Andric                       SN->getBasePtr(), SN->getMemOperand());
38510b57cec5SDimitry Andric }
38520b57cec5SDimitry Andric 
38530b57cec5SDimitry Andric // FIXME: This should go in generic DAG combiner with an isTruncateFree check,
38540b57cec5SDimitry Andric // but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU
38550b57cec5SDimitry Andric // issues.
performAssertSZExtCombine(SDNode * N,DAGCombinerInfo & DCI) const38560b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N,
38570b57cec5SDimitry Andric                                                         DAGCombinerInfo &DCI) const {
38580b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
38590b57cec5SDimitry Andric   SDValue N0 = N->getOperand(0);
38600b57cec5SDimitry Andric 
38610b57cec5SDimitry Andric   // (vt2 (assertzext (truncate vt0:x), vt1)) ->
38620b57cec5SDimitry Andric   //     (vt2 (truncate (assertzext vt0:x, vt1)))
38630b57cec5SDimitry Andric   if (N0.getOpcode() == ISD::TRUNCATE) {
38640b57cec5SDimitry Andric     SDValue N1 = N->getOperand(1);
38650b57cec5SDimitry Andric     EVT ExtVT = cast<VTSDNode>(N1)->getVT();
38660b57cec5SDimitry Andric     SDLoc SL(N);
38670b57cec5SDimitry Andric 
38680b57cec5SDimitry Andric     SDValue Src = N0.getOperand(0);
38690b57cec5SDimitry Andric     EVT SrcVT = Src.getValueType();
38700b57cec5SDimitry Andric     if (SrcVT.bitsGE(ExtVT)) {
38710b57cec5SDimitry Andric       SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1);
38720b57cec5SDimitry Andric       return DAG.getNode(ISD::TRUNCATE, SL, N->getValueType(0), NewInReg);
38730b57cec5SDimitry Andric     }
38740b57cec5SDimitry Andric   }
38750b57cec5SDimitry Andric 
38760b57cec5SDimitry Andric   return SDValue();
38770b57cec5SDimitry Andric }
38788bcb0991SDimitry Andric 
performIntrinsicWOChainCombine(SDNode * N,DAGCombinerInfo & DCI) const38798bcb0991SDimitry Andric SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
38808bcb0991SDimitry Andric   SDNode *N, DAGCombinerInfo &DCI) const {
3881647cbc5dSDimitry Andric   unsigned IID = N->getConstantOperandVal(0);
38828bcb0991SDimitry Andric   switch (IID) {
38838bcb0991SDimitry Andric   case Intrinsic::amdgcn_mul_i24:
38848bcb0991SDimitry Andric   case Intrinsic::amdgcn_mul_u24:
3885349cc55cSDimitry Andric   case Intrinsic::amdgcn_mulhi_i24:
3886349cc55cSDimitry Andric   case Intrinsic::amdgcn_mulhi_u24:
3887fe6060f1SDimitry Andric     return simplifyMul24(N, DCI);
38885ffd83dbSDimitry Andric   case Intrinsic::amdgcn_fract:
38895ffd83dbSDimitry Andric   case Intrinsic::amdgcn_rsq:
38905ffd83dbSDimitry Andric   case Intrinsic::amdgcn_rcp_legacy:
38915ffd83dbSDimitry Andric   case Intrinsic::amdgcn_rsq_legacy:
38925f757f3fSDimitry Andric   case Intrinsic::amdgcn_rsq_clamp: {
38935ffd83dbSDimitry Andric     // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
38945ffd83dbSDimitry Andric     SDValue Src = N->getOperand(1);
38955ffd83dbSDimitry Andric     return Src.isUndef() ? Src : SDValue();
38965ffd83dbSDimitry Andric   }
389706c3fb27SDimitry Andric   case Intrinsic::amdgcn_frexp_exp: {
389806c3fb27SDimitry Andric     // frexp_exp (fneg x) -> frexp_exp x
389906c3fb27SDimitry Andric     // frexp_exp (fabs x) -> frexp_exp x
390006c3fb27SDimitry Andric     // frexp_exp (fneg (fabs x)) -> frexp_exp x
390106c3fb27SDimitry Andric     SDValue Src = N->getOperand(1);
390206c3fb27SDimitry Andric     SDValue PeekSign = peekFPSignOps(Src);
390306c3fb27SDimitry Andric     if (PeekSign == Src)
390406c3fb27SDimitry Andric       return SDValue();
390506c3fb27SDimitry Andric     return SDValue(DCI.DAG.UpdateNodeOperands(N, N->getOperand(0), PeekSign),
390606c3fb27SDimitry Andric                    0);
390706c3fb27SDimitry Andric   }
39088bcb0991SDimitry Andric   default:
39098bcb0991SDimitry Andric     return SDValue();
39108bcb0991SDimitry Andric   }
39118bcb0991SDimitry Andric }
39128bcb0991SDimitry Andric 
39130b57cec5SDimitry Andric /// Split the 64-bit value \p LHS into two 32-bit components, and perform the
39140b57cec5SDimitry Andric /// binary operation \p Opc to it with the corresponding constant operands.
splitBinaryBitConstantOpImpl(DAGCombinerInfo & DCI,const SDLoc & SL,unsigned Opc,SDValue LHS,uint32_t ValLo,uint32_t ValHi) const39150b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
39160b57cec5SDimitry Andric   DAGCombinerInfo &DCI, const SDLoc &SL,
39170b57cec5SDimitry Andric   unsigned Opc, SDValue LHS,
39180b57cec5SDimitry Andric   uint32_t ValLo, uint32_t ValHi) const {
39190b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
39200b57cec5SDimitry Andric   SDValue Lo, Hi;
39210b57cec5SDimitry Andric   std::tie(Lo, Hi) = split64BitValue(LHS, DAG);
39220b57cec5SDimitry Andric 
39230b57cec5SDimitry Andric   SDValue LoRHS = DAG.getConstant(ValLo, SL, MVT::i32);
39240b57cec5SDimitry Andric   SDValue HiRHS = DAG.getConstant(ValHi, SL, MVT::i32);
39250b57cec5SDimitry Andric 
39260b57cec5SDimitry Andric   SDValue LoAnd = DAG.getNode(Opc, SL, MVT::i32, Lo, LoRHS);
39270b57cec5SDimitry Andric   SDValue HiAnd = DAG.getNode(Opc, SL, MVT::i32, Hi, HiRHS);
39280b57cec5SDimitry Andric 
39290b57cec5SDimitry Andric   // Re-visit the ands. It's possible we eliminated one of them and it could
39300b57cec5SDimitry Andric   // simplify the vector.
39310b57cec5SDimitry Andric   DCI.AddToWorklist(Lo.getNode());
39320b57cec5SDimitry Andric   DCI.AddToWorklist(Hi.getNode());
39330b57cec5SDimitry Andric 
39340b57cec5SDimitry Andric   SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {LoAnd, HiAnd});
39350b57cec5SDimitry Andric   return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
39360b57cec5SDimitry Andric }
39370b57cec5SDimitry Andric 
performShlCombine(SDNode * N,DAGCombinerInfo & DCI) const39380b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
39390b57cec5SDimitry Andric                                                 DAGCombinerInfo &DCI) const {
39400b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
39410b57cec5SDimitry Andric 
39420b57cec5SDimitry Andric   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
39430b57cec5SDimitry Andric   if (!RHS)
39440b57cec5SDimitry Andric     return SDValue();
39450b57cec5SDimitry Andric 
39460b57cec5SDimitry Andric   SDValue LHS = N->getOperand(0);
39470b57cec5SDimitry Andric   unsigned RHSVal = RHS->getZExtValue();
39480b57cec5SDimitry Andric   if (!RHSVal)
39490b57cec5SDimitry Andric     return LHS;
39500b57cec5SDimitry Andric 
39510b57cec5SDimitry Andric   SDLoc SL(N);
39520b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
39530b57cec5SDimitry Andric 
39540b57cec5SDimitry Andric   switch (LHS->getOpcode()) {
39550b57cec5SDimitry Andric   default:
39560b57cec5SDimitry Andric     break;
39570b57cec5SDimitry Andric   case ISD::ZERO_EXTEND:
39580b57cec5SDimitry Andric   case ISD::SIGN_EXTEND:
39590b57cec5SDimitry Andric   case ISD::ANY_EXTEND: {
39600b57cec5SDimitry Andric     SDValue X = LHS->getOperand(0);
39610b57cec5SDimitry Andric 
39620b57cec5SDimitry Andric     if (VT == MVT::i32 && RHSVal == 16 && X.getValueType() == MVT::i16 &&
39630b57cec5SDimitry Andric         isOperationLegal(ISD::BUILD_VECTOR, MVT::v2i16)) {
39640b57cec5SDimitry Andric       // Prefer build_vector as the canonical form if packed types are legal.
39650b57cec5SDimitry Andric       // (shl ([asz]ext i16:x), 16 -> build_vector 0, x
39660b57cec5SDimitry Andric       SDValue Vec = DAG.getBuildVector(MVT::v2i16, SL,
39670b57cec5SDimitry Andric        { DAG.getConstant(0, SL, MVT::i16), LHS->getOperand(0) });
39680b57cec5SDimitry Andric       return DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec);
39690b57cec5SDimitry Andric     }
39700b57cec5SDimitry Andric 
39710b57cec5SDimitry Andric     // shl (ext x) => zext (shl x), if shift does not overflow int
39720b57cec5SDimitry Andric     if (VT != MVT::i64)
39730b57cec5SDimitry Andric       break;
39740b57cec5SDimitry Andric     KnownBits Known = DAG.computeKnownBits(X);
39750b57cec5SDimitry Andric     unsigned LZ = Known.countMinLeadingZeros();
39760b57cec5SDimitry Andric     if (LZ < RHSVal)
39770b57cec5SDimitry Andric       break;
39780b57cec5SDimitry Andric     EVT XVT = X.getValueType();
39790b57cec5SDimitry Andric     SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0));
39800b57cec5SDimitry Andric     return DAG.getZExtOrTrunc(Shl, SL, VT);
39810b57cec5SDimitry Andric   }
39820b57cec5SDimitry Andric   }
39830b57cec5SDimitry Andric 
39840b57cec5SDimitry Andric   if (VT != MVT::i64)
39850b57cec5SDimitry Andric     return SDValue();
39860b57cec5SDimitry Andric 
39870b57cec5SDimitry Andric   // i64 (shl x, C) -> (build_pair 0, (shl x, C -32))
39880b57cec5SDimitry Andric 
39890b57cec5SDimitry Andric   // On some subtargets, 64-bit shift is a quarter rate instruction. In the
39900b57cec5SDimitry Andric   // common case, splitting this into a move and a 32-bit shift is faster and
39910b57cec5SDimitry Andric   // the same code size.
39920b57cec5SDimitry Andric   if (RHSVal < 32)
39930b57cec5SDimitry Andric     return SDValue();
39940b57cec5SDimitry Andric 
39950b57cec5SDimitry Andric   SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32);
39960b57cec5SDimitry Andric 
39970b57cec5SDimitry Andric   SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
39980b57cec5SDimitry Andric   SDValue NewShift = DAG.getNode(ISD::SHL, SL, MVT::i32, Lo, ShiftAmt);
39990b57cec5SDimitry Andric 
40000b57cec5SDimitry Andric   const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
40010b57cec5SDimitry Andric 
40020b57cec5SDimitry Andric   SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {Zero, NewShift});
40030b57cec5SDimitry Andric   return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
40040b57cec5SDimitry Andric }
40050b57cec5SDimitry Andric 
performSraCombine(SDNode * N,DAGCombinerInfo & DCI) const40060b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N,
40070b57cec5SDimitry Andric                                                 DAGCombinerInfo &DCI) const {
40080b57cec5SDimitry Andric   if (N->getValueType(0) != MVT::i64)
40090b57cec5SDimitry Andric     return SDValue();
40100b57cec5SDimitry Andric 
40110b57cec5SDimitry Andric   const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
40120b57cec5SDimitry Andric   if (!RHS)
40130b57cec5SDimitry Andric     return SDValue();
40140b57cec5SDimitry Andric 
40150b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
40160b57cec5SDimitry Andric   SDLoc SL(N);
40170b57cec5SDimitry Andric   unsigned RHSVal = RHS->getZExtValue();
40180b57cec5SDimitry Andric 
40190b57cec5SDimitry Andric   // (sra i64:x, 32) -> build_pair x, (sra hi_32(x), 31)
40200b57cec5SDimitry Andric   if (RHSVal == 32) {
40210b57cec5SDimitry Andric     SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
40220b57cec5SDimitry Andric     SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
40230b57cec5SDimitry Andric                                    DAG.getConstant(31, SL, MVT::i32));
40240b57cec5SDimitry Andric 
40250b57cec5SDimitry Andric     SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {Hi, NewShift});
40260b57cec5SDimitry Andric     return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec);
40270b57cec5SDimitry Andric   }
40280b57cec5SDimitry Andric 
40290b57cec5SDimitry Andric   // (sra i64:x, 63) -> build_pair (sra hi_32(x), 31), (sra hi_32(x), 31)
40300b57cec5SDimitry Andric   if (RHSVal == 63) {
40310b57cec5SDimitry Andric     SDValue Hi = getHiHalf64(N->getOperand(0), DAG);
40320b57cec5SDimitry Andric     SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi,
40330b57cec5SDimitry Andric                                    DAG.getConstant(31, SL, MVT::i32));
40340b57cec5SDimitry Andric     SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, NewShift});
40350b57cec5SDimitry Andric     return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec);
40360b57cec5SDimitry Andric   }
40370b57cec5SDimitry Andric 
40380b57cec5SDimitry Andric   return SDValue();
40390b57cec5SDimitry Andric }
40400b57cec5SDimitry Andric 
performSrlCombine(SDNode * N,DAGCombinerInfo & DCI) const40410b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
40420b57cec5SDimitry Andric                                                 DAGCombinerInfo &DCI) const {
40430b57cec5SDimitry Andric   auto *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
40440b57cec5SDimitry Andric   if (!RHS)
40450b57cec5SDimitry Andric     return SDValue();
40460b57cec5SDimitry Andric 
40470b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
40480b57cec5SDimitry Andric   SDValue LHS = N->getOperand(0);
40490b57cec5SDimitry Andric   unsigned ShiftAmt = RHS->getZExtValue();
40500b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
40510b57cec5SDimitry Andric   SDLoc SL(N);
40520b57cec5SDimitry Andric 
40530b57cec5SDimitry Andric   // fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1)
40540b57cec5SDimitry Andric   // this improves the ability to match BFE patterns in isel.
40550b57cec5SDimitry Andric   if (LHS.getOpcode() == ISD::AND) {
40560b57cec5SDimitry Andric     if (auto *Mask = dyn_cast<ConstantSDNode>(LHS.getOperand(1))) {
405781ad6265SDimitry Andric       unsigned MaskIdx, MaskLen;
405881ad6265SDimitry Andric       if (Mask->getAPIntValue().isShiftedMask(MaskIdx, MaskLen) &&
405981ad6265SDimitry Andric           MaskIdx == ShiftAmt) {
40600b57cec5SDimitry Andric         return DAG.getNode(
40610b57cec5SDimitry Andric             ISD::AND, SL, VT,
40620b57cec5SDimitry Andric             DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)),
40630b57cec5SDimitry Andric             DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1)));
40640b57cec5SDimitry Andric       }
40650b57cec5SDimitry Andric     }
40660b57cec5SDimitry Andric   }
40670b57cec5SDimitry Andric 
40680b57cec5SDimitry Andric   if (VT != MVT::i64)
40690b57cec5SDimitry Andric     return SDValue();
40700b57cec5SDimitry Andric 
40710b57cec5SDimitry Andric   if (ShiftAmt < 32)
40720b57cec5SDimitry Andric     return SDValue();
40730b57cec5SDimitry Andric 
40740b57cec5SDimitry Andric   // srl i64:x, C for C >= 32
40750b57cec5SDimitry Andric   // =>
40760b57cec5SDimitry Andric   //   build_pair (srl hi_32(x), C - 32), 0
40770b57cec5SDimitry Andric   SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
40780b57cec5SDimitry Andric 
4079349cc55cSDimitry Andric   SDValue Hi = getHiHalf64(LHS, DAG);
40800b57cec5SDimitry Andric 
40810b57cec5SDimitry Andric   SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32);
40820b57cec5SDimitry Andric   SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst);
40830b57cec5SDimitry Andric 
40840b57cec5SDimitry Andric   SDValue BuildPair = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, Zero});
40850b57cec5SDimitry Andric 
40860b57cec5SDimitry Andric   return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildPair);
40870b57cec5SDimitry Andric }
40880b57cec5SDimitry Andric 
performTruncateCombine(SDNode * N,DAGCombinerInfo & DCI) const40890b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performTruncateCombine(
40900b57cec5SDimitry Andric   SDNode *N, DAGCombinerInfo &DCI) const {
40910b57cec5SDimitry Andric   SDLoc SL(N);
40920b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
40930b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
40940b57cec5SDimitry Andric   SDValue Src = N->getOperand(0);
40950b57cec5SDimitry Andric 
40960b57cec5SDimitry Andric   // vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x)
40970b57cec5SDimitry Andric   if (Src.getOpcode() == ISD::BITCAST && !VT.isVector()) {
40980b57cec5SDimitry Andric     SDValue Vec = Src.getOperand(0);
40990b57cec5SDimitry Andric     if (Vec.getOpcode() == ISD::BUILD_VECTOR) {
41000b57cec5SDimitry Andric       SDValue Elt0 = Vec.getOperand(0);
41010b57cec5SDimitry Andric       EVT EltVT = Elt0.getValueType();
4102e8d8bef9SDimitry Andric       if (VT.getFixedSizeInBits() <= EltVT.getFixedSizeInBits()) {
41030b57cec5SDimitry Andric         if (EltVT.isFloatingPoint()) {
41040b57cec5SDimitry Andric           Elt0 = DAG.getNode(ISD::BITCAST, SL,
41050b57cec5SDimitry Andric                              EltVT.changeTypeToInteger(), Elt0);
41060b57cec5SDimitry Andric         }
41070b57cec5SDimitry Andric 
41080b57cec5SDimitry Andric         return DAG.getNode(ISD::TRUNCATE, SL, VT, Elt0);
41090b57cec5SDimitry Andric       }
41100b57cec5SDimitry Andric     }
41110b57cec5SDimitry Andric   }
41120b57cec5SDimitry Andric 
41130b57cec5SDimitry Andric   // Equivalent of above for accessing the high element of a vector as an
41140b57cec5SDimitry Andric   // integer operation.
41150b57cec5SDimitry Andric   // trunc (srl (bitcast (build_vector x, y))), 16 -> trunc (bitcast y)
41160b57cec5SDimitry Andric   if (Src.getOpcode() == ISD::SRL && !VT.isVector()) {
41170b57cec5SDimitry Andric     if (auto K = isConstOrConstSplat(Src.getOperand(1))) {
41180b57cec5SDimitry Andric       if (2 * K->getZExtValue() == Src.getValueType().getScalarSizeInBits()) {
41190b57cec5SDimitry Andric         SDValue BV = stripBitcast(Src.getOperand(0));
41200b57cec5SDimitry Andric         if (BV.getOpcode() == ISD::BUILD_VECTOR &&
41210b57cec5SDimitry Andric             BV.getValueType().getVectorNumElements() == 2) {
41220b57cec5SDimitry Andric           SDValue SrcElt = BV.getOperand(1);
41230b57cec5SDimitry Andric           EVT SrcEltVT = SrcElt.getValueType();
41240b57cec5SDimitry Andric           if (SrcEltVT.isFloatingPoint()) {
41250b57cec5SDimitry Andric             SrcElt = DAG.getNode(ISD::BITCAST, SL,
41260b57cec5SDimitry Andric                                  SrcEltVT.changeTypeToInteger(), SrcElt);
41270b57cec5SDimitry Andric           }
41280b57cec5SDimitry Andric 
41290b57cec5SDimitry Andric           return DAG.getNode(ISD::TRUNCATE, SL, VT, SrcElt);
41300b57cec5SDimitry Andric         }
41310b57cec5SDimitry Andric       }
41320b57cec5SDimitry Andric     }
41330b57cec5SDimitry Andric   }
41340b57cec5SDimitry Andric 
41350b57cec5SDimitry Andric   // Partially shrink 64-bit shifts to 32-bit if reduced to 16-bit.
41360b57cec5SDimitry Andric   //
41370b57cec5SDimitry Andric   // i16 (trunc (srl i64:x, K)), K <= 16 ->
41380b57cec5SDimitry Andric   //     i16 (trunc (srl (i32 (trunc x), K)))
41390b57cec5SDimitry Andric   if (VT.getScalarSizeInBits() < 32) {
41400b57cec5SDimitry Andric     EVT SrcVT = Src.getValueType();
41410b57cec5SDimitry Andric     if (SrcVT.getScalarSizeInBits() > 32 &&
41420b57cec5SDimitry Andric         (Src.getOpcode() == ISD::SRL ||
41430b57cec5SDimitry Andric          Src.getOpcode() == ISD::SRA ||
41440b57cec5SDimitry Andric          Src.getOpcode() == ISD::SHL)) {
41450b57cec5SDimitry Andric       SDValue Amt = Src.getOperand(1);
41460b57cec5SDimitry Andric       KnownBits Known = DAG.computeKnownBits(Amt);
4147bdd1243dSDimitry Andric 
4148bdd1243dSDimitry Andric       // - For left shifts, do the transform as long as the shift
4149bdd1243dSDimitry Andric       //   amount is still legal for i32, so when ShiftAmt < 32 (<= 31)
4150bdd1243dSDimitry Andric       // - For right shift, do it if ShiftAmt <= (32 - Size) to avoid
4151bdd1243dSDimitry Andric       //   losing information stored in the high bits when truncating.
4152bdd1243dSDimitry Andric       const unsigned MaxCstSize =
4153bdd1243dSDimitry Andric           (Src.getOpcode() == ISD::SHL) ? 31 : (32 - VT.getScalarSizeInBits());
4154bdd1243dSDimitry Andric       if (Known.getMaxValue().ule(MaxCstSize)) {
41550b57cec5SDimitry Andric         EVT MidVT = VT.isVector() ?
41560b57cec5SDimitry Andric           EVT::getVectorVT(*DAG.getContext(), MVT::i32,
41570b57cec5SDimitry Andric                            VT.getVectorNumElements()) : MVT::i32;
41580b57cec5SDimitry Andric 
41590b57cec5SDimitry Andric         EVT NewShiftVT = getShiftAmountTy(MidVT, DAG.getDataLayout());
41600b57cec5SDimitry Andric         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MidVT,
41610b57cec5SDimitry Andric                                     Src.getOperand(0));
41620b57cec5SDimitry Andric         DCI.AddToWorklist(Trunc.getNode());
41630b57cec5SDimitry Andric 
41640b57cec5SDimitry Andric         if (Amt.getValueType() != NewShiftVT) {
41650b57cec5SDimitry Andric           Amt = DAG.getZExtOrTrunc(Amt, SL, NewShiftVT);
41660b57cec5SDimitry Andric           DCI.AddToWorklist(Amt.getNode());
41670b57cec5SDimitry Andric         }
41680b57cec5SDimitry Andric 
41690b57cec5SDimitry Andric         SDValue ShrunkShift = DAG.getNode(Src.getOpcode(), SL, MidVT,
41700b57cec5SDimitry Andric                                           Trunc, Amt);
41710b57cec5SDimitry Andric         return DAG.getNode(ISD::TRUNCATE, SL, VT, ShrunkShift);
41720b57cec5SDimitry Andric       }
41730b57cec5SDimitry Andric     }
41740b57cec5SDimitry Andric   }
41750b57cec5SDimitry Andric 
41760b57cec5SDimitry Andric   return SDValue();
41770b57cec5SDimitry Andric }
41780b57cec5SDimitry Andric 
41790b57cec5SDimitry Andric // We need to specifically handle i64 mul here to avoid unnecessary conversion
41800b57cec5SDimitry Andric // instructions. If we only match on the legalized i64 mul expansion,
41810b57cec5SDimitry Andric // SimplifyDemandedBits will be unable to remove them because there will be
41820b57cec5SDimitry Andric // multiple uses due to the separate mul + mulh[su].
getMul24(SelectionDAG & DAG,const SDLoc & SL,SDValue N0,SDValue N1,unsigned Size,bool Signed)41830b57cec5SDimitry Andric static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL,
41840b57cec5SDimitry Andric                         SDValue N0, SDValue N1, unsigned Size, bool Signed) {
41850b57cec5SDimitry Andric   if (Size <= 32) {
41860b57cec5SDimitry Andric     unsigned MulOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
41870b57cec5SDimitry Andric     return DAG.getNode(MulOpc, SL, MVT::i32, N0, N1);
41880b57cec5SDimitry Andric   }
41890b57cec5SDimitry Andric 
4190e8d8bef9SDimitry Andric   unsigned MulLoOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
4191e8d8bef9SDimitry Andric   unsigned MulHiOpc = Signed ? AMDGPUISD::MULHI_I24 : AMDGPUISD::MULHI_U24;
41920b57cec5SDimitry Andric 
4193e8d8bef9SDimitry Andric   SDValue MulLo = DAG.getNode(MulLoOpc, SL, MVT::i32, N0, N1);
4194e8d8bef9SDimitry Andric   SDValue MulHi = DAG.getNode(MulHiOpc, SL, MVT::i32, N0, N1);
41950b57cec5SDimitry Andric 
4196e8d8bef9SDimitry Andric   return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, MulLo, MulHi);
41970b57cec5SDimitry Andric }
41980b57cec5SDimitry Andric 
419906c3fb27SDimitry Andric /// If \p V is an add of a constant 1, returns the other operand. Otherwise
420006c3fb27SDimitry Andric /// return SDValue().
getAddOneOp(const SDNode * V)420106c3fb27SDimitry Andric static SDValue getAddOneOp(const SDNode *V) {
420206c3fb27SDimitry Andric   if (V->getOpcode() != ISD::ADD)
420306c3fb27SDimitry Andric     return SDValue();
420406c3fb27SDimitry Andric 
42055f757f3fSDimitry Andric   return isOneConstant(V->getOperand(1)) ? V->getOperand(0) : SDValue();
420606c3fb27SDimitry Andric }
420706c3fb27SDimitry Andric 
performMulCombine(SDNode * N,DAGCombinerInfo & DCI) const42080b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
42090b57cec5SDimitry Andric                                                 DAGCombinerInfo &DCI) const {
42100b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
42110b57cec5SDimitry Andric 
4212fe6060f1SDimitry Andric   // Don't generate 24-bit multiplies on values that are in SGPRs, since
4213fe6060f1SDimitry Andric   // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs
4214fe6060f1SDimitry Andric   // unnecessarily). isDivergent() is used as an approximation of whether the
4215fe6060f1SDimitry Andric   // value is in an SGPR.
4216fe6060f1SDimitry Andric   if (!N->isDivergent())
4217fe6060f1SDimitry Andric     return SDValue();
4218fe6060f1SDimitry Andric 
42190b57cec5SDimitry Andric   unsigned Size = VT.getSizeInBits();
42200b57cec5SDimitry Andric   if (VT.isVector() || Size > 64)
42210b57cec5SDimitry Andric     return SDValue();
42220b57cec5SDimitry Andric 
42230b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
42240b57cec5SDimitry Andric   SDLoc DL(N);
42250b57cec5SDimitry Andric 
42260b57cec5SDimitry Andric   SDValue N0 = N->getOperand(0);
42270b57cec5SDimitry Andric   SDValue N1 = N->getOperand(1);
42280b57cec5SDimitry Andric 
422906c3fb27SDimitry Andric   // Undo InstCombine canonicalize X * (Y + 1) -> X * Y + X to enable mad
423006c3fb27SDimitry Andric   // matching.
423106c3fb27SDimitry Andric 
423206c3fb27SDimitry Andric   // mul x, (add y, 1) -> add (mul x, y), x
423306c3fb27SDimitry Andric   auto IsFoldableAdd = [](SDValue V) -> SDValue {
423406c3fb27SDimitry Andric     SDValue AddOp = getAddOneOp(V.getNode());
423506c3fb27SDimitry Andric     if (!AddOp)
423606c3fb27SDimitry Andric       return SDValue();
423706c3fb27SDimitry Andric 
423806c3fb27SDimitry Andric     if (V.hasOneUse() || all_of(V->uses(), [](const SDNode *U) -> bool {
423906c3fb27SDimitry Andric           return U->getOpcode() == ISD::MUL;
424006c3fb27SDimitry Andric         }))
424106c3fb27SDimitry Andric       return AddOp;
424206c3fb27SDimitry Andric 
424306c3fb27SDimitry Andric     return SDValue();
424406c3fb27SDimitry Andric   };
424506c3fb27SDimitry Andric 
424606c3fb27SDimitry Andric   // FIXME: The selection pattern is not properly checking for commuted
424706c3fb27SDimitry Andric   // operands, so we have to place the mul in the LHS
424806c3fb27SDimitry Andric   if (SDValue MulOper = IsFoldableAdd(N0)) {
424906c3fb27SDimitry Andric     SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N1, MulOper);
425006c3fb27SDimitry Andric     return DAG.getNode(ISD::ADD, DL, VT, MulVal, N1);
425106c3fb27SDimitry Andric   }
425206c3fb27SDimitry Andric 
425306c3fb27SDimitry Andric   if (SDValue MulOper = IsFoldableAdd(N1)) {
425406c3fb27SDimitry Andric     SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N0, MulOper);
425506c3fb27SDimitry Andric     return DAG.getNode(ISD::ADD, DL, VT, MulVal, N0);
425606c3fb27SDimitry Andric   }
425706c3fb27SDimitry Andric 
425806c3fb27SDimitry Andric   // Skip if already mul24.
425906c3fb27SDimitry Andric   if (N->getOpcode() != ISD::MUL)
426006c3fb27SDimitry Andric     return SDValue();
426106c3fb27SDimitry Andric 
426206c3fb27SDimitry Andric   // There are i16 integer mul/mad.
426306c3fb27SDimitry Andric   if (Subtarget->has16BitInsts() && VT.getScalarType().bitsLE(MVT::i16))
426406c3fb27SDimitry Andric     return SDValue();
426506c3fb27SDimitry Andric 
42660b57cec5SDimitry Andric   // SimplifyDemandedBits has the annoying habit of turning useful zero_extends
42670b57cec5SDimitry Andric   // in the source into any_extends if the result of the mul is truncated. Since
42680b57cec5SDimitry Andric   // we can assume the high bits are whatever we want, use the underlying value
42690b57cec5SDimitry Andric   // to avoid the unknown high bits from interfering.
42700b57cec5SDimitry Andric   if (N0.getOpcode() == ISD::ANY_EXTEND)
42710b57cec5SDimitry Andric     N0 = N0.getOperand(0);
42720b57cec5SDimitry Andric 
42730b57cec5SDimitry Andric   if (N1.getOpcode() == ISD::ANY_EXTEND)
42740b57cec5SDimitry Andric     N1 = N1.getOperand(0);
42750b57cec5SDimitry Andric 
42760b57cec5SDimitry Andric   SDValue Mul;
42770b57cec5SDimitry Andric 
42780b57cec5SDimitry Andric   if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
42790b57cec5SDimitry Andric     N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
42800b57cec5SDimitry Andric     N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
42810b57cec5SDimitry Andric     Mul = getMul24(DAG, DL, N0, N1, Size, false);
42820b57cec5SDimitry Andric   } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
42830b57cec5SDimitry Andric     N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
42840b57cec5SDimitry Andric     N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
42850b57cec5SDimitry Andric     Mul = getMul24(DAG, DL, N0, N1, Size, true);
42860b57cec5SDimitry Andric   } else {
42870b57cec5SDimitry Andric     return SDValue();
42880b57cec5SDimitry Andric   }
42890b57cec5SDimitry Andric 
42900b57cec5SDimitry Andric   // We need to use sext even for MUL_U24, because MUL_U24 is used
42910b57cec5SDimitry Andric   // for signed multiply of 8 and 16-bit types.
42920b57cec5SDimitry Andric   return DAG.getSExtOrTrunc(Mul, DL, VT);
42930b57cec5SDimitry Andric }
42940b57cec5SDimitry Andric 
42954824e7fdSDimitry Andric SDValue
performMulLoHiCombine(SDNode * N,DAGCombinerInfo & DCI) const42964824e7fdSDimitry Andric AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N,
42974824e7fdSDimitry Andric                                             DAGCombinerInfo &DCI) const {
42984824e7fdSDimitry Andric   if (N->getValueType(0) != MVT::i32)
42994824e7fdSDimitry Andric     return SDValue();
43004824e7fdSDimitry Andric 
43014824e7fdSDimitry Andric   SelectionDAG &DAG = DCI.DAG;
43024824e7fdSDimitry Andric   SDLoc DL(N);
43034824e7fdSDimitry Andric 
43044824e7fdSDimitry Andric   SDValue N0 = N->getOperand(0);
43054824e7fdSDimitry Andric   SDValue N1 = N->getOperand(1);
43064824e7fdSDimitry Andric 
43074824e7fdSDimitry Andric   // SimplifyDemandedBits has the annoying habit of turning useful zero_extends
43084824e7fdSDimitry Andric   // in the source into any_extends if the result of the mul is truncated. Since
43094824e7fdSDimitry Andric   // we can assume the high bits are whatever we want, use the underlying value
43104824e7fdSDimitry Andric   // to avoid the unknown high bits from interfering.
43114824e7fdSDimitry Andric   if (N0.getOpcode() == ISD::ANY_EXTEND)
43124824e7fdSDimitry Andric     N0 = N0.getOperand(0);
43134824e7fdSDimitry Andric   if (N1.getOpcode() == ISD::ANY_EXTEND)
43144824e7fdSDimitry Andric     N1 = N1.getOperand(0);
43154824e7fdSDimitry Andric 
43164824e7fdSDimitry Andric   // Try to use two fast 24-bit multiplies (one for each half of the result)
43174824e7fdSDimitry Andric   // instead of one slow extending multiply.
43184824e7fdSDimitry Andric   unsigned LoOpcode, HiOpcode;
43194824e7fdSDimitry Andric   if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
43204824e7fdSDimitry Andric     N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
43214824e7fdSDimitry Andric     N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
43224824e7fdSDimitry Andric     LoOpcode = AMDGPUISD::MUL_U24;
43234824e7fdSDimitry Andric     HiOpcode = AMDGPUISD::MULHI_U24;
43244824e7fdSDimitry Andric   } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
43254824e7fdSDimitry Andric     N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
43264824e7fdSDimitry Andric     N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
43274824e7fdSDimitry Andric     LoOpcode = AMDGPUISD::MUL_I24;
43284824e7fdSDimitry Andric     HiOpcode = AMDGPUISD::MULHI_I24;
43294824e7fdSDimitry Andric   } else {
43304824e7fdSDimitry Andric     return SDValue();
43314824e7fdSDimitry Andric   }
43324824e7fdSDimitry Andric 
43334824e7fdSDimitry Andric   SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1);
43344824e7fdSDimitry Andric   SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1);
43354824e7fdSDimitry Andric   DCI.CombineTo(N, Lo, Hi);
43364824e7fdSDimitry Andric   return SDValue(N, 0);
43374824e7fdSDimitry Andric }
43384824e7fdSDimitry Andric 
performMulhsCombine(SDNode * N,DAGCombinerInfo & DCI) const43390b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulhsCombine(SDNode *N,
43400b57cec5SDimitry Andric                                                   DAGCombinerInfo &DCI) const {
43410b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
43420b57cec5SDimitry Andric 
43430b57cec5SDimitry Andric   if (!Subtarget->hasMulI24() || VT.isVector())
43440b57cec5SDimitry Andric     return SDValue();
43450b57cec5SDimitry Andric 
4346fe6060f1SDimitry Andric   // Don't generate 24-bit multiplies on values that are in SGPRs, since
4347fe6060f1SDimitry Andric   // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs
4348fe6060f1SDimitry Andric   // unnecessarily). isDivergent() is used as an approximation of whether the
4349fe6060f1SDimitry Andric   // value is in an SGPR.
4350fe6060f1SDimitry Andric   // This doesn't apply if no s_mul_hi is available (since we'll end up with a
4351fe6060f1SDimitry Andric   // valu op anyway)
4352fe6060f1SDimitry Andric   if (Subtarget->hasSMulHi() && !N->isDivergent())
4353fe6060f1SDimitry Andric     return SDValue();
4354fe6060f1SDimitry Andric 
43550b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
43560b57cec5SDimitry Andric   SDLoc DL(N);
43570b57cec5SDimitry Andric 
43580b57cec5SDimitry Andric   SDValue N0 = N->getOperand(0);
43590b57cec5SDimitry Andric   SDValue N1 = N->getOperand(1);
43600b57cec5SDimitry Andric 
43610b57cec5SDimitry Andric   if (!isI24(N0, DAG) || !isI24(N1, DAG))
43620b57cec5SDimitry Andric     return SDValue();
43630b57cec5SDimitry Andric 
43640b57cec5SDimitry Andric   N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
43650b57cec5SDimitry Andric   N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
43660b57cec5SDimitry Andric 
43670b57cec5SDimitry Andric   SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_I24, DL, MVT::i32, N0, N1);
43680b57cec5SDimitry Andric   DCI.AddToWorklist(Mulhi.getNode());
43690b57cec5SDimitry Andric   return DAG.getSExtOrTrunc(Mulhi, DL, VT);
43700b57cec5SDimitry Andric }
43710b57cec5SDimitry Andric 
performMulhuCombine(SDNode * N,DAGCombinerInfo & DCI) const43720b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulhuCombine(SDNode *N,
43730b57cec5SDimitry Andric                                                   DAGCombinerInfo &DCI) const {
43740b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
43750b57cec5SDimitry Andric 
43760b57cec5SDimitry Andric   if (!Subtarget->hasMulU24() || VT.isVector() || VT.getSizeInBits() > 32)
43770b57cec5SDimitry Andric     return SDValue();
43780b57cec5SDimitry Andric 
4379fe6060f1SDimitry Andric   // Don't generate 24-bit multiplies on values that are in SGPRs, since
4380fe6060f1SDimitry Andric   // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs
4381fe6060f1SDimitry Andric   // unnecessarily). isDivergent() is used as an approximation of whether the
4382fe6060f1SDimitry Andric   // value is in an SGPR.
4383fe6060f1SDimitry Andric   // This doesn't apply if no s_mul_hi is available (since we'll end up with a
4384fe6060f1SDimitry Andric   // valu op anyway)
4385fe6060f1SDimitry Andric   if (Subtarget->hasSMulHi() && !N->isDivergent())
4386fe6060f1SDimitry Andric     return SDValue();
4387fe6060f1SDimitry Andric 
43880b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
43890b57cec5SDimitry Andric   SDLoc DL(N);
43900b57cec5SDimitry Andric 
43910b57cec5SDimitry Andric   SDValue N0 = N->getOperand(0);
43920b57cec5SDimitry Andric   SDValue N1 = N->getOperand(1);
43930b57cec5SDimitry Andric 
43940b57cec5SDimitry Andric   if (!isU24(N0, DAG) || !isU24(N1, DAG))
43950b57cec5SDimitry Andric     return SDValue();
43960b57cec5SDimitry Andric 
43970b57cec5SDimitry Andric   N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
43980b57cec5SDimitry Andric   N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
43990b57cec5SDimitry Andric 
44000b57cec5SDimitry Andric   SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_U24, DL, MVT::i32, N0, N1);
44010b57cec5SDimitry Andric   DCI.AddToWorklist(Mulhi.getNode());
44020b57cec5SDimitry Andric   return DAG.getZExtOrTrunc(Mulhi, DL, VT);
44030b57cec5SDimitry Andric }
44040b57cec5SDimitry Andric 
getFFBX_U32(SelectionDAG & DAG,SDValue Op,const SDLoc & DL,unsigned Opc) const44050b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getFFBX_U32(SelectionDAG &DAG,
44060b57cec5SDimitry Andric                                           SDValue Op,
44070b57cec5SDimitry Andric                                           const SDLoc &DL,
44080b57cec5SDimitry Andric                                           unsigned Opc) const {
44090b57cec5SDimitry Andric   EVT VT = Op.getValueType();
44100b57cec5SDimitry Andric   EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), VT);
44110b57cec5SDimitry Andric   if (LegalVT != MVT::i32 && (Subtarget->has16BitInsts() &&
44120b57cec5SDimitry Andric                               LegalVT != MVT::i16))
44130b57cec5SDimitry Andric     return SDValue();
44140b57cec5SDimitry Andric 
44150b57cec5SDimitry Andric   if (VT != MVT::i32)
44160b57cec5SDimitry Andric     Op = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Op);
44170b57cec5SDimitry Andric 
44180b57cec5SDimitry Andric   SDValue FFBX = DAG.getNode(Opc, DL, MVT::i32, Op);
44190b57cec5SDimitry Andric   if (VT != MVT::i32)
44200b57cec5SDimitry Andric     FFBX = DAG.getNode(ISD::TRUNCATE, DL, VT, FFBX);
44210b57cec5SDimitry Andric 
44220b57cec5SDimitry Andric   return FFBX;
44230b57cec5SDimitry Andric }
44240b57cec5SDimitry Andric 
44250b57cec5SDimitry Andric // The native instructions return -1 on 0 input. Optimize out a select that
44260b57cec5SDimitry Andric // produces -1 on 0.
44270b57cec5SDimitry Andric //
44280b57cec5SDimitry Andric // TODO: If zero is not undef, we could also do this if the output is compared
44290b57cec5SDimitry Andric // against the bitwidth.
44300b57cec5SDimitry Andric //
44310b57cec5SDimitry Andric // TODO: Should probably combine against FFBH_U32 instead of ctlz directly.
performCtlz_CttzCombine(const SDLoc & SL,SDValue Cond,SDValue LHS,SDValue RHS,DAGCombinerInfo & DCI) const44320b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond,
44330b57cec5SDimitry Andric                                                  SDValue LHS, SDValue RHS,
44340b57cec5SDimitry Andric                                                  DAGCombinerInfo &DCI) const {
44355f757f3fSDimitry Andric   if (!isNullConstant(Cond.getOperand(1)))
44360b57cec5SDimitry Andric     return SDValue();
44370b57cec5SDimitry Andric 
44380b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
44390b57cec5SDimitry Andric   ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
44400b57cec5SDimitry Andric   SDValue CmpLHS = Cond.getOperand(0);
44410b57cec5SDimitry Andric 
44420b57cec5SDimitry Andric   // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x
44430b57cec5SDimitry Andric   // select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x
44440b57cec5SDimitry Andric   if (CCOpcode == ISD::SETEQ &&
44450b57cec5SDimitry Andric       (isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) &&
444606c3fb27SDimitry Andric       RHS.getOperand(0) == CmpLHS && isAllOnesConstant(LHS)) {
44475ffd83dbSDimitry Andric     unsigned Opc =
44485ffd83dbSDimitry Andric         isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;
44490b57cec5SDimitry Andric     return getFFBX_U32(DAG, CmpLHS, SL, Opc);
44500b57cec5SDimitry Andric   }
44510b57cec5SDimitry Andric 
44520b57cec5SDimitry Andric   // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
44530b57cec5SDimitry Andric   // select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x
44540b57cec5SDimitry Andric   if (CCOpcode == ISD::SETNE &&
44555ffd83dbSDimitry Andric       (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(LHS.getOpcode())) &&
445606c3fb27SDimitry Andric       LHS.getOperand(0) == CmpLHS && isAllOnesConstant(RHS)) {
44575ffd83dbSDimitry Andric     unsigned Opc =
44585ffd83dbSDimitry Andric         isCttzOpc(LHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;
44595ffd83dbSDimitry Andric 
44600b57cec5SDimitry Andric     return getFFBX_U32(DAG, CmpLHS, SL, Opc);
44610b57cec5SDimitry Andric   }
44620b57cec5SDimitry Andric 
44630b57cec5SDimitry Andric   return SDValue();
44640b57cec5SDimitry Andric }
44650b57cec5SDimitry Andric 
distributeOpThroughSelect(TargetLowering::DAGCombinerInfo & DCI,unsigned Op,const SDLoc & SL,SDValue Cond,SDValue N1,SDValue N2)44660b57cec5SDimitry Andric static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI,
44670b57cec5SDimitry Andric                                          unsigned Op,
44680b57cec5SDimitry Andric                                          const SDLoc &SL,
44690b57cec5SDimitry Andric                                          SDValue Cond,
44700b57cec5SDimitry Andric                                          SDValue N1,
44710b57cec5SDimitry Andric                                          SDValue N2) {
44720b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
44730b57cec5SDimitry Andric   EVT VT = N1.getValueType();
44740b57cec5SDimitry Andric 
44750b57cec5SDimitry Andric   SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, Cond,
44760b57cec5SDimitry Andric                                   N1.getOperand(0), N2.getOperand(0));
44770b57cec5SDimitry Andric   DCI.AddToWorklist(NewSelect.getNode());
44780b57cec5SDimitry Andric   return DAG.getNode(Op, SL, VT, NewSelect);
44790b57cec5SDimitry Andric }
44800b57cec5SDimitry Andric 
44810b57cec5SDimitry Andric // Pull a free FP operation out of a select so it may fold into uses.
44820b57cec5SDimitry Andric //
44830b57cec5SDimitry Andric // select c, (fneg x), (fneg y) -> fneg (select c, x, y)
44840b57cec5SDimitry Andric // select c, (fneg x), k -> fneg (select c, x, (fneg k))
44850b57cec5SDimitry Andric //
44860b57cec5SDimitry Andric // select c, (fabs x), (fabs y) -> fabs (select c, x, y)
44870b57cec5SDimitry Andric // select c, (fabs x), +k -> fabs (select c, x, k)
448806c3fb27SDimitry Andric SDValue
foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo & DCI,SDValue N) const448906c3fb27SDimitry Andric AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
449006c3fb27SDimitry Andric                                            SDValue N) const {
44910b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
44920b57cec5SDimitry Andric   SDValue Cond = N.getOperand(0);
44930b57cec5SDimitry Andric   SDValue LHS = N.getOperand(1);
44940b57cec5SDimitry Andric   SDValue RHS = N.getOperand(2);
44950b57cec5SDimitry Andric 
44960b57cec5SDimitry Andric   EVT VT = N.getValueType();
44970b57cec5SDimitry Andric   if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) ||
44980b57cec5SDimitry Andric       (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) {
449906c3fb27SDimitry Andric     if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode()))
450006c3fb27SDimitry Andric       return SDValue();
450106c3fb27SDimitry Andric 
45020b57cec5SDimitry Andric     return distributeOpThroughSelect(DCI, LHS.getOpcode(),
45030b57cec5SDimitry Andric                                      SDLoc(N), Cond, LHS, RHS);
45040b57cec5SDimitry Andric   }
45050b57cec5SDimitry Andric 
45060b57cec5SDimitry Andric   bool Inv = false;
45070b57cec5SDimitry Andric   if (RHS.getOpcode() == ISD::FABS || RHS.getOpcode() == ISD::FNEG) {
45080b57cec5SDimitry Andric     std::swap(LHS, RHS);
45090b57cec5SDimitry Andric     Inv = true;
45100b57cec5SDimitry Andric   }
45110b57cec5SDimitry Andric 
45120b57cec5SDimitry Andric   // TODO: Support vector constants.
45130b57cec5SDimitry Andric   ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
451406c3fb27SDimitry Andric   if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS &&
451506c3fb27SDimitry Andric       !selectSupportsSourceMods(N.getNode())) {
45160b57cec5SDimitry Andric     SDLoc SL(N);
45170b57cec5SDimitry Andric     // If one side is an fneg/fabs and the other is a constant, we can push the
45180b57cec5SDimitry Andric     // fneg/fabs down. If it's an fabs, the constant needs to be non-negative.
45190b57cec5SDimitry Andric     SDValue NewLHS = LHS.getOperand(0);
45200b57cec5SDimitry Andric     SDValue NewRHS = RHS;
45210b57cec5SDimitry Andric 
45220b57cec5SDimitry Andric     // Careful: if the neg can be folded up, don't try to pull it back down.
45230b57cec5SDimitry Andric     bool ShouldFoldNeg = true;
45240b57cec5SDimitry Andric 
45250b57cec5SDimitry Andric     if (NewLHS.hasOneUse()) {
45260b57cec5SDimitry Andric       unsigned Opc = NewLHS.getOpcode();
452706c3fb27SDimitry Andric       if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(NewLHS.getNode()))
45280b57cec5SDimitry Andric         ShouldFoldNeg = false;
45290b57cec5SDimitry Andric       if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL)
45300b57cec5SDimitry Andric         ShouldFoldNeg = false;
45310b57cec5SDimitry Andric     }
45320b57cec5SDimitry Andric 
45330b57cec5SDimitry Andric     if (ShouldFoldNeg) {
453406c3fb27SDimitry Andric       if (LHS.getOpcode() == ISD::FABS && CRHS->isNegative())
453506c3fb27SDimitry Andric         return SDValue();
453606c3fb27SDimitry Andric 
453706c3fb27SDimitry Andric       // We're going to be forced to use a source modifier anyway, there's no
453806c3fb27SDimitry Andric       // point to pulling the negate out unless we can get a size reduction by
453906c3fb27SDimitry Andric       // negating the constant.
454006c3fb27SDimitry Andric       //
454106c3fb27SDimitry Andric       // TODO: Generalize to use getCheaperNegatedExpression which doesn't know
454206c3fb27SDimitry Andric       // about cheaper constants.
454306c3fb27SDimitry Andric       if (NewLHS.getOpcode() == ISD::FABS &&
454406c3fb27SDimitry Andric           getConstantNegateCost(CRHS) != NegatibleCost::Cheaper)
454506c3fb27SDimitry Andric         return SDValue();
454606c3fb27SDimitry Andric 
454706c3fb27SDimitry Andric       if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode()))
454806c3fb27SDimitry Andric         return SDValue();
454906c3fb27SDimitry Andric 
45500b57cec5SDimitry Andric       if (LHS.getOpcode() == ISD::FNEG)
45510b57cec5SDimitry Andric         NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
45520b57cec5SDimitry Andric 
45530b57cec5SDimitry Andric       if (Inv)
45540b57cec5SDimitry Andric         std::swap(NewLHS, NewRHS);
45550b57cec5SDimitry Andric 
45560b57cec5SDimitry Andric       SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT,
45570b57cec5SDimitry Andric                                       Cond, NewLHS, NewRHS);
45580b57cec5SDimitry Andric       DCI.AddToWorklist(NewSelect.getNode());
45590b57cec5SDimitry Andric       return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect);
45600b57cec5SDimitry Andric     }
45610b57cec5SDimitry Andric   }
45620b57cec5SDimitry Andric 
45630b57cec5SDimitry Andric   return SDValue();
45640b57cec5SDimitry Andric }
45650b57cec5SDimitry Andric 
performSelectCombine(SDNode * N,DAGCombinerInfo & DCI) const45660b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
45670b57cec5SDimitry Andric                                                    DAGCombinerInfo &DCI) const {
45680b57cec5SDimitry Andric   if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0)))
45690b57cec5SDimitry Andric     return Folded;
45700b57cec5SDimitry Andric 
45710b57cec5SDimitry Andric   SDValue Cond = N->getOperand(0);
45720b57cec5SDimitry Andric   if (Cond.getOpcode() != ISD::SETCC)
45730b57cec5SDimitry Andric     return SDValue();
45740b57cec5SDimitry Andric 
45750b57cec5SDimitry Andric   EVT VT = N->getValueType(0);
45760b57cec5SDimitry Andric   SDValue LHS = Cond.getOperand(0);
45770b57cec5SDimitry Andric   SDValue RHS = Cond.getOperand(1);
45780b57cec5SDimitry Andric   SDValue CC = Cond.getOperand(2);
45790b57cec5SDimitry Andric 
45800b57cec5SDimitry Andric   SDValue True = N->getOperand(1);
45810b57cec5SDimitry Andric   SDValue False = N->getOperand(2);
45820b57cec5SDimitry Andric 
45830b57cec5SDimitry Andric   if (Cond.hasOneUse()) { // TODO: Look for multiple select uses.
45840b57cec5SDimitry Andric     SelectionDAG &DAG = DCI.DAG;
45850b57cec5SDimitry Andric     if (DAG.isConstantValueOfAnyType(True) &&
45860b57cec5SDimitry Andric         !DAG.isConstantValueOfAnyType(False)) {
45870b57cec5SDimitry Andric       // Swap cmp + select pair to move constant to false input.
45880b57cec5SDimitry Andric       // This will allow using VOPC cndmasks more often.
45890b57cec5SDimitry Andric       // select (setcc x, y), k, x -> select (setccinv x, y), x, k
45900b57cec5SDimitry Andric 
45910b57cec5SDimitry Andric       SDLoc SL(N);
4592480093f4SDimitry Andric       ISD::CondCode NewCC =
4593480093f4SDimitry Andric           getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), LHS.getValueType());
45940b57cec5SDimitry Andric 
45950b57cec5SDimitry Andric       SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC);
45960b57cec5SDimitry Andric       return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True);
45970b57cec5SDimitry Andric     }
45980b57cec5SDimitry Andric 
45990b57cec5SDimitry Andric     if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) {
46000b57cec5SDimitry Andric       SDValue MinMax
46010b57cec5SDimitry Andric         = combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
46020b57cec5SDimitry Andric       // Revisit this node so we can catch min3/max3/med3 patterns.
46030b57cec5SDimitry Andric       //DCI.AddToWorklist(MinMax.getNode());
46040b57cec5SDimitry Andric       return MinMax;
46050b57cec5SDimitry Andric     }
46060b57cec5SDimitry Andric   }
46070b57cec5SDimitry Andric 
46080b57cec5SDimitry Andric   // There's no reason to not do this if the condition has other uses.
46090b57cec5SDimitry Andric   return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI);
46100b57cec5SDimitry Andric }
46110b57cec5SDimitry Andric 
isInv2Pi(const APFloat & APF)46120b57cec5SDimitry Andric static bool isInv2Pi(const APFloat &APF) {
46130b57cec5SDimitry Andric   static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118));
46140b57cec5SDimitry Andric   static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983));
46150b57cec5SDimitry Andric   static const APFloat KF64(APFloat::IEEEdouble(), APInt(64, 0x3fc45f306dc9c882));
46160b57cec5SDimitry Andric 
46170b57cec5SDimitry Andric   return APF.bitwiseIsEqual(KF16) ||
46180b57cec5SDimitry Andric          APF.bitwiseIsEqual(KF32) ||
46190b57cec5SDimitry Andric          APF.bitwiseIsEqual(KF64);
46200b57cec5SDimitry Andric }
46210b57cec5SDimitry Andric 
46220b57cec5SDimitry Andric // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
46230b57cec5SDimitry Andric // additional cost to negate them.
462406c3fb27SDimitry Andric TargetLowering::NegatibleCost
getConstantNegateCost(const ConstantFPSDNode * C) const462506c3fb27SDimitry Andric AMDGPUTargetLowering::getConstantNegateCost(const ConstantFPSDNode *C) const {
462606c3fb27SDimitry Andric   if (C->isZero())
462706c3fb27SDimitry Andric     return C->isNegative() ? NegatibleCost::Cheaper : NegatibleCost::Expensive;
46280b57cec5SDimitry Andric 
46290b57cec5SDimitry Andric   if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF()))
463006c3fb27SDimitry Andric     return C->isNegative() ? NegatibleCost::Cheaper : NegatibleCost::Expensive;
463106c3fb27SDimitry Andric 
463206c3fb27SDimitry Andric   return NegatibleCost::Neutral;
46330b57cec5SDimitry Andric }
46340b57cec5SDimitry Andric 
isConstantCostlierToNegate(SDValue N) const463506c3fb27SDimitry Andric bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const {
463606c3fb27SDimitry Andric   if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N))
463706c3fb27SDimitry Andric     return getConstantNegateCost(C) == NegatibleCost::Expensive;
463806c3fb27SDimitry Andric   return false;
463906c3fb27SDimitry Andric }
464006c3fb27SDimitry Andric 
isConstantCheaperToNegate(SDValue N) const464106c3fb27SDimitry Andric bool AMDGPUTargetLowering::isConstantCheaperToNegate(SDValue N) const {
464206c3fb27SDimitry Andric   if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N))
464306c3fb27SDimitry Andric     return getConstantNegateCost(C) == NegatibleCost::Cheaper;
46440b57cec5SDimitry Andric   return false;
46450b57cec5SDimitry Andric }
46460b57cec5SDimitry Andric 
inverseMinMax(unsigned Opc)46470b57cec5SDimitry Andric static unsigned inverseMinMax(unsigned Opc) {
46480b57cec5SDimitry Andric   switch (Opc) {
46490b57cec5SDimitry Andric   case ISD::FMAXNUM:
46500b57cec5SDimitry Andric     return ISD::FMINNUM;
46510b57cec5SDimitry Andric   case ISD::FMINNUM:
46520b57cec5SDimitry Andric     return ISD::FMAXNUM;
46530b57cec5SDimitry Andric   case ISD::FMAXNUM_IEEE:
46540b57cec5SDimitry Andric     return ISD::FMINNUM_IEEE;
46550b57cec5SDimitry Andric   case ISD::FMINNUM_IEEE:
46560b57cec5SDimitry Andric     return ISD::FMAXNUM_IEEE;
46575f757f3fSDimitry Andric   case ISD::FMAXIMUM:
46585f757f3fSDimitry Andric     return ISD::FMINIMUM;
46595f757f3fSDimitry Andric   case ISD::FMINIMUM:
46605f757f3fSDimitry Andric     return ISD::FMAXIMUM;
46610b57cec5SDimitry Andric   case AMDGPUISD::FMAX_LEGACY:
46620b57cec5SDimitry Andric     return AMDGPUISD::FMIN_LEGACY;
46630b57cec5SDimitry Andric   case AMDGPUISD::FMIN_LEGACY:
46640b57cec5SDimitry Andric     return  AMDGPUISD::FMAX_LEGACY;
46650b57cec5SDimitry Andric   default:
46660b57cec5SDimitry Andric     llvm_unreachable("invalid min/max opcode");
46670b57cec5SDimitry Andric   }
46680b57cec5SDimitry Andric }
46690b57cec5SDimitry Andric 
467006c3fb27SDimitry Andric /// \return true if it's profitable to try to push an fneg into its source
467106c3fb27SDimitry Andric /// instruction.
shouldFoldFNegIntoSrc(SDNode * N,SDValue N0)467206c3fb27SDimitry Andric bool AMDGPUTargetLowering::shouldFoldFNegIntoSrc(SDNode *N, SDValue N0) {
46730b57cec5SDimitry Andric   // If the input has multiple uses and we can either fold the negate down, or
46740b57cec5SDimitry Andric   // the other uses cannot, give up. This both prevents unprofitable
46750b57cec5SDimitry Andric   // transformations and infinite loops: we won't repeatedly try to fold around
46760b57cec5SDimitry Andric   // a negate that has no 'good' form.
46770b57cec5SDimitry Andric   if (N0.hasOneUse()) {
46780b57cec5SDimitry Andric     // This may be able to fold into the source, but at a code size cost. Don't
46790b57cec5SDimitry Andric     // fold if the fold into the user is free.
46800b57cec5SDimitry Andric     if (allUsesHaveSourceMods(N, 0))
468106c3fb27SDimitry Andric       return false;
46820b57cec5SDimitry Andric   } else {
468306c3fb27SDimitry Andric     if (fnegFoldsIntoOp(N0.getNode()) &&
46840b57cec5SDimitry Andric         (allUsesHaveSourceMods(N) || !allUsesHaveSourceMods(N0.getNode())))
468506c3fb27SDimitry Andric       return false;
46860b57cec5SDimitry Andric   }
46870b57cec5SDimitry Andric 
468806c3fb27SDimitry Andric   return true;
468906c3fb27SDimitry Andric }
469006c3fb27SDimitry Andric 
performFNegCombine(SDNode * N,DAGCombinerInfo & DCI) const469106c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
469206c3fb27SDimitry Andric                                                  DAGCombinerInfo &DCI) const {
469306c3fb27SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
469406c3fb27SDimitry Andric   SDValue N0 = N->getOperand(0);
469506c3fb27SDimitry Andric   EVT VT = N->getValueType(0);
469606c3fb27SDimitry Andric 
469706c3fb27SDimitry Andric   unsigned Opc = N0.getOpcode();
469806c3fb27SDimitry Andric 
469906c3fb27SDimitry Andric   if (!shouldFoldFNegIntoSrc(N, N0))
470006c3fb27SDimitry Andric     return SDValue();
470106c3fb27SDimitry Andric 
47020b57cec5SDimitry Andric   SDLoc SL(N);
47030b57cec5SDimitry Andric   switch (Opc) {
47040b57cec5SDimitry Andric   case ISD::FADD: {
47050b57cec5SDimitry Andric     if (!mayIgnoreSignedZero(N0))
47060b57cec5SDimitry Andric       return SDValue();
47070b57cec5SDimitry Andric 
47080b57cec5SDimitry Andric     // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y))
47090b57cec5SDimitry Andric     SDValue LHS = N0.getOperand(0);
47100b57cec5SDimitry Andric     SDValue RHS = N0.getOperand(1);
47110b57cec5SDimitry Andric 
47120b57cec5SDimitry Andric     if (LHS.getOpcode() != ISD::FNEG)
47130b57cec5SDimitry Andric       LHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
47140b57cec5SDimitry Andric     else
47150b57cec5SDimitry Andric       LHS = LHS.getOperand(0);
47160b57cec5SDimitry Andric 
47170b57cec5SDimitry Andric     if (RHS.getOpcode() != ISD::FNEG)
47180b57cec5SDimitry Andric       RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
47190b57cec5SDimitry Andric     else
47200b57cec5SDimitry Andric       RHS = RHS.getOperand(0);
47210b57cec5SDimitry Andric 
47220b57cec5SDimitry Andric     SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags());
47230b57cec5SDimitry Andric     if (Res.getOpcode() != ISD::FADD)
47240b57cec5SDimitry Andric       return SDValue(); // Op got folded away.
47250b57cec5SDimitry Andric     if (!N0.hasOneUse())
47260b57cec5SDimitry Andric       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
47270b57cec5SDimitry Andric     return Res;
47280b57cec5SDimitry Andric   }
47290b57cec5SDimitry Andric   case ISD::FMUL:
47300b57cec5SDimitry Andric   case AMDGPUISD::FMUL_LEGACY: {
47310b57cec5SDimitry Andric     // (fneg (fmul x, y)) -> (fmul x, (fneg y))
47320b57cec5SDimitry Andric     // (fneg (fmul_legacy x, y)) -> (fmul_legacy x, (fneg y))
47330b57cec5SDimitry Andric     SDValue LHS = N0.getOperand(0);
47340b57cec5SDimitry Andric     SDValue RHS = N0.getOperand(1);
47350b57cec5SDimitry Andric 
47360b57cec5SDimitry Andric     if (LHS.getOpcode() == ISD::FNEG)
47370b57cec5SDimitry Andric       LHS = LHS.getOperand(0);
47380b57cec5SDimitry Andric     else if (RHS.getOpcode() == ISD::FNEG)
47390b57cec5SDimitry Andric       RHS = RHS.getOperand(0);
47400b57cec5SDimitry Andric     else
47410b57cec5SDimitry Andric       RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
47420b57cec5SDimitry Andric 
47430b57cec5SDimitry Andric     SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags());
47440b57cec5SDimitry Andric     if (Res.getOpcode() != Opc)
47450b57cec5SDimitry Andric       return SDValue(); // Op got folded away.
47460b57cec5SDimitry Andric     if (!N0.hasOneUse())
47470b57cec5SDimitry Andric       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
47480b57cec5SDimitry Andric     return Res;
47490b57cec5SDimitry Andric   }
47500b57cec5SDimitry Andric   case ISD::FMA:
47510b57cec5SDimitry Andric   case ISD::FMAD: {
4752e8d8bef9SDimitry Andric     // TODO: handle llvm.amdgcn.fma.legacy
47530b57cec5SDimitry Andric     if (!mayIgnoreSignedZero(N0))
47540b57cec5SDimitry Andric       return SDValue();
47550b57cec5SDimitry Andric 
47560b57cec5SDimitry Andric     // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z))
47570b57cec5SDimitry Andric     SDValue LHS = N0.getOperand(0);
47580b57cec5SDimitry Andric     SDValue MHS = N0.getOperand(1);
47590b57cec5SDimitry Andric     SDValue RHS = N0.getOperand(2);
47600b57cec5SDimitry Andric 
47610b57cec5SDimitry Andric     if (LHS.getOpcode() == ISD::FNEG)
47620b57cec5SDimitry Andric       LHS = LHS.getOperand(0);
47630b57cec5SDimitry Andric     else if (MHS.getOpcode() == ISD::FNEG)
47640b57cec5SDimitry Andric       MHS = MHS.getOperand(0);
47650b57cec5SDimitry Andric     else
47660b57cec5SDimitry Andric       MHS = DAG.getNode(ISD::FNEG, SL, VT, MHS);
47670b57cec5SDimitry Andric 
47680b57cec5SDimitry Andric     if (RHS.getOpcode() != ISD::FNEG)
47690b57cec5SDimitry Andric       RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
47700b57cec5SDimitry Andric     else
47710b57cec5SDimitry Andric       RHS = RHS.getOperand(0);
47720b57cec5SDimitry Andric 
47730b57cec5SDimitry Andric     SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS);
47740b57cec5SDimitry Andric     if (Res.getOpcode() != Opc)
47750b57cec5SDimitry Andric       return SDValue(); // Op got folded away.
47760b57cec5SDimitry Andric     if (!N0.hasOneUse())
47770b57cec5SDimitry Andric       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
47780b57cec5SDimitry Andric     return Res;
47790b57cec5SDimitry Andric   }
47800b57cec5SDimitry Andric   case ISD::FMAXNUM:
47810b57cec5SDimitry Andric   case ISD::FMINNUM:
47820b57cec5SDimitry Andric   case ISD::FMAXNUM_IEEE:
47830b57cec5SDimitry Andric   case ISD::FMINNUM_IEEE:
47845f757f3fSDimitry Andric   case ISD::FMINIMUM:
47855f757f3fSDimitry Andric   case ISD::FMAXIMUM:
47860b57cec5SDimitry Andric   case AMDGPUISD::FMAX_LEGACY:
47870b57cec5SDimitry Andric   case AMDGPUISD::FMIN_LEGACY: {
47880b57cec5SDimitry Andric     // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y)
47890b57cec5SDimitry Andric     // fneg (fminnum x, y) -> fmaxnum (fneg x), (fneg y)
47900b57cec5SDimitry Andric     // fneg (fmax_legacy x, y) -> fmin_legacy (fneg x), (fneg y)
47910b57cec5SDimitry Andric     // fneg (fmin_legacy x, y) -> fmax_legacy (fneg x), (fneg y)
47920b57cec5SDimitry Andric 
47930b57cec5SDimitry Andric     SDValue LHS = N0.getOperand(0);
47940b57cec5SDimitry Andric     SDValue RHS = N0.getOperand(1);
47950b57cec5SDimitry Andric 
47960b57cec5SDimitry Andric     // 0 doesn't have a negated inline immediate.
47970b57cec5SDimitry Andric     // TODO: This constant check should be generalized to other operations.
47980b57cec5SDimitry Andric     if (isConstantCostlierToNegate(RHS))
47990b57cec5SDimitry Andric       return SDValue();
48000b57cec5SDimitry Andric 
48010b57cec5SDimitry Andric     SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS);
48020b57cec5SDimitry Andric     SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
48030b57cec5SDimitry Andric     unsigned Opposite = inverseMinMax(Opc);
48040b57cec5SDimitry Andric 
48050b57cec5SDimitry Andric     SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags());
48060b57cec5SDimitry Andric     if (Res.getOpcode() != Opposite)
48070b57cec5SDimitry Andric       return SDValue(); // Op got folded away.
48080b57cec5SDimitry Andric     if (!N0.hasOneUse())
48090b57cec5SDimitry Andric       DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res));
48100b57cec5SDimitry Andric     return Res;
48110b57cec5SDimitry Andric   }
48120b57cec5SDimitry Andric   case AMDGPUISD::FMED3: {
48130b57cec5SDimitry Andric     SDValue Ops[3];
48140b57cec5SDimitry Andric     for (unsigned I = 0; I < 3; ++I)
48150b57cec5SDimitry Andric       Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags());
48160b57cec5SDimitry Andric 
48170b57cec5SDimitry Andric     SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags());
48180b57cec5SDimitry Andric     if (Res.getOpcode() != AMDGPUISD::FMED3)
48190b57cec5SDimitry Andric       return SDValue(); // Op got folded away.
4820e8d8bef9SDimitry Andric 
4821e8d8bef9SDimitry Andric     if (!N0.hasOneUse()) {
4822e8d8bef9SDimitry Andric       SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Res);
4823e8d8bef9SDimitry Andric       DAG.ReplaceAllUsesWith(N0, Neg);
4824e8d8bef9SDimitry Andric 
4825e8d8bef9SDimitry Andric       for (SDNode *U : Neg->uses())
4826e8d8bef9SDimitry Andric         DCI.AddToWorklist(U);
4827e8d8bef9SDimitry Andric     }
4828e8d8bef9SDimitry Andric 
48290b57cec5SDimitry Andric     return Res;
48300b57cec5SDimitry Andric   }
48310b57cec5SDimitry Andric   case ISD::FP_EXTEND:
48320b57cec5SDimitry Andric   case ISD::FTRUNC:
48330b57cec5SDimitry Andric   case ISD::FRINT:
48340b57cec5SDimitry Andric   case ISD::FNEARBYINT: // XXX - Should fround be handled?
48355f757f3fSDimitry Andric   case ISD::FROUNDEVEN:
48360b57cec5SDimitry Andric   case ISD::FSIN:
48370b57cec5SDimitry Andric   case ISD::FCANONICALIZE:
48380b57cec5SDimitry Andric   case AMDGPUISD::RCP:
48390b57cec5SDimitry Andric   case AMDGPUISD::RCP_LEGACY:
48400b57cec5SDimitry Andric   case AMDGPUISD::RCP_IFLAG:
48410b57cec5SDimitry Andric   case AMDGPUISD::SIN_HW: {
48420b57cec5SDimitry Andric     SDValue CvtSrc = N0.getOperand(0);
48430b57cec5SDimitry Andric     if (CvtSrc.getOpcode() == ISD::FNEG) {
48440b57cec5SDimitry Andric       // (fneg (fp_extend (fneg x))) -> (fp_extend x)
48450b57cec5SDimitry Andric       // (fneg (rcp (fneg x))) -> (rcp x)
48460b57cec5SDimitry Andric       return DAG.getNode(Opc, SL, VT, CvtSrc.getOperand(0));
48470b57cec5SDimitry Andric     }
48480b57cec5SDimitry Andric 
48490b57cec5SDimitry Andric     if (!N0.hasOneUse())
48500b57cec5SDimitry Andric       return SDValue();
48510b57cec5SDimitry Andric 
48520b57cec5SDimitry Andric     // (fneg (fp_extend x)) -> (fp_extend (fneg x))
48530b57cec5SDimitry Andric     // (fneg (rcp x)) -> (rcp (fneg x))
48540b57cec5SDimitry Andric     SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
48550b57cec5SDimitry Andric     return DAG.getNode(Opc, SL, VT, Neg, N0->getFlags());
48560b57cec5SDimitry Andric   }
48570b57cec5SDimitry Andric   case ISD::FP_ROUND: {
48580b57cec5SDimitry Andric     SDValue CvtSrc = N0.getOperand(0);
48590b57cec5SDimitry Andric 
48600b57cec5SDimitry Andric     if (CvtSrc.getOpcode() == ISD::FNEG) {
48610b57cec5SDimitry Andric       // (fneg (fp_round (fneg x))) -> (fp_round x)
48620b57cec5SDimitry Andric       return DAG.getNode(ISD::FP_ROUND, SL, VT,
48630b57cec5SDimitry Andric                          CvtSrc.getOperand(0), N0.getOperand(1));
48640b57cec5SDimitry Andric     }
48650b57cec5SDimitry Andric 
48660b57cec5SDimitry Andric     if (!N0.hasOneUse())
48670b57cec5SDimitry Andric       return SDValue();
48680b57cec5SDimitry Andric 
48690b57cec5SDimitry Andric     // (fneg (fp_round x)) -> (fp_round (fneg x))
48700b57cec5SDimitry Andric     SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc);
48710b57cec5SDimitry Andric     return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1));
48720b57cec5SDimitry Andric   }
48730b57cec5SDimitry Andric   case ISD::FP16_TO_FP: {
48740b57cec5SDimitry Andric     // v_cvt_f32_f16 supports source modifiers on pre-VI targets without legal
48750b57cec5SDimitry Andric     // f16, but legalization of f16 fneg ends up pulling it out of the source.
48760b57cec5SDimitry Andric     // Put the fneg back as a legal source operation that can be matched later.
48770b57cec5SDimitry Andric     SDLoc SL(N);
48780b57cec5SDimitry Andric 
48790b57cec5SDimitry Andric     SDValue Src = N0.getOperand(0);
48800b57cec5SDimitry Andric     EVT SrcVT = Src.getValueType();
48810b57cec5SDimitry Andric 
48820b57cec5SDimitry Andric     // fneg (fp16_to_fp x) -> fp16_to_fp (xor x, 0x8000)
48830b57cec5SDimitry Andric     SDValue IntFNeg = DAG.getNode(ISD::XOR, SL, SrcVT, Src,
48840b57cec5SDimitry Andric                                   DAG.getConstant(0x8000, SL, SrcVT));
48850b57cec5SDimitry Andric     return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg);
48860b57cec5SDimitry Andric   }
488706c3fb27SDimitry Andric   case ISD::SELECT: {
488806c3fb27SDimitry Andric     // fneg (select c, a, b) -> select c, (fneg a), (fneg b)
488906c3fb27SDimitry Andric     // TODO: Invert conditions of foldFreeOpFromSelect
489006c3fb27SDimitry Andric     return SDValue();
489106c3fb27SDimitry Andric   }
489206c3fb27SDimitry Andric   case ISD::BITCAST: {
489306c3fb27SDimitry Andric     SDLoc SL(N);
489406c3fb27SDimitry Andric     SDValue BCSrc = N0.getOperand(0);
489506c3fb27SDimitry Andric     if (BCSrc.getOpcode() == ISD::BUILD_VECTOR) {
489606c3fb27SDimitry Andric       SDValue HighBits = BCSrc.getOperand(BCSrc.getNumOperands() - 1);
489706c3fb27SDimitry Andric       if (HighBits.getValueType().getSizeInBits() != 32 ||
489806c3fb27SDimitry Andric           !fnegFoldsIntoOp(HighBits.getNode()))
489906c3fb27SDimitry Andric         return SDValue();
490006c3fb27SDimitry Andric 
490106c3fb27SDimitry Andric       // f64 fneg only really needs to operate on the high half of of the
490206c3fb27SDimitry Andric       // register, so try to force it to an f32 operation to help make use of
490306c3fb27SDimitry Andric       // source modifiers.
490406c3fb27SDimitry Andric       //
490506c3fb27SDimitry Andric       //
490606c3fb27SDimitry Andric       // fneg (f64 (bitcast (build_vector x, y))) ->
490706c3fb27SDimitry Andric       // f64 (bitcast (build_vector (bitcast i32:x to f32),
490806c3fb27SDimitry Andric       //                            (fneg (bitcast i32:y to f32)))
490906c3fb27SDimitry Andric 
491006c3fb27SDimitry Andric       SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, MVT::f32, HighBits);
491106c3fb27SDimitry Andric       SDValue NegHi = DAG.getNode(ISD::FNEG, SL, MVT::f32, CastHi);
491206c3fb27SDimitry Andric       SDValue CastBack =
491306c3fb27SDimitry Andric           DAG.getNode(ISD::BITCAST, SL, HighBits.getValueType(), NegHi);
491406c3fb27SDimitry Andric 
491506c3fb27SDimitry Andric       SmallVector<SDValue, 8> Ops(BCSrc->op_begin(), BCSrc->op_end());
491606c3fb27SDimitry Andric       Ops.back() = CastBack;
491706c3fb27SDimitry Andric       DCI.AddToWorklist(NegHi.getNode());
491806c3fb27SDimitry Andric       SDValue Build =
491906c3fb27SDimitry Andric           DAG.getNode(ISD::BUILD_VECTOR, SL, BCSrc.getValueType(), Ops);
492006c3fb27SDimitry Andric       SDValue Result = DAG.getNode(ISD::BITCAST, SL, VT, Build);
492106c3fb27SDimitry Andric 
492206c3fb27SDimitry Andric       if (!N0.hasOneUse())
492306c3fb27SDimitry Andric         DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Result));
492406c3fb27SDimitry Andric       return Result;
492506c3fb27SDimitry Andric     }
492606c3fb27SDimitry Andric 
492706c3fb27SDimitry Andric     if (BCSrc.getOpcode() == ISD::SELECT && VT == MVT::f32 &&
492806c3fb27SDimitry Andric         BCSrc.hasOneUse()) {
492906c3fb27SDimitry Andric       // fneg (bitcast (f32 (select cond, i32:lhs, i32:rhs))) ->
493006c3fb27SDimitry Andric       //   select cond, (bitcast i32:lhs to f32), (bitcast i32:rhs to f32)
493106c3fb27SDimitry Andric 
493206c3fb27SDimitry Andric       // TODO: Cast back result for multiple uses is beneficial in some cases.
493306c3fb27SDimitry Andric 
493406c3fb27SDimitry Andric       SDValue LHS =
493506c3fb27SDimitry Andric           DAG.getNode(ISD::BITCAST, SL, MVT::f32, BCSrc.getOperand(1));
493606c3fb27SDimitry Andric       SDValue RHS =
493706c3fb27SDimitry Andric           DAG.getNode(ISD::BITCAST, SL, MVT::f32, BCSrc.getOperand(2));
493806c3fb27SDimitry Andric 
493906c3fb27SDimitry Andric       SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, LHS);
494006c3fb27SDimitry Andric       SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, RHS);
494106c3fb27SDimitry Andric 
494206c3fb27SDimitry Andric       return DAG.getNode(ISD::SELECT, SL, MVT::f32, BCSrc.getOperand(0), NegLHS,
494306c3fb27SDimitry Andric                          NegRHS);
494406c3fb27SDimitry Andric     }
494506c3fb27SDimitry Andric 
494606c3fb27SDimitry Andric     return SDValue();
494706c3fb27SDimitry Andric   }
49480b57cec5SDimitry Andric   default:
49490b57cec5SDimitry Andric     return SDValue();
49500b57cec5SDimitry Andric   }
49510b57cec5SDimitry Andric }
49520b57cec5SDimitry Andric 
performFAbsCombine(SDNode * N,DAGCombinerInfo & DCI) const49530b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N,
49540b57cec5SDimitry Andric                                                  DAGCombinerInfo &DCI) const {
49550b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
49560b57cec5SDimitry Andric   SDValue N0 = N->getOperand(0);
49570b57cec5SDimitry Andric 
49580b57cec5SDimitry Andric   if (!N0.hasOneUse())
49590b57cec5SDimitry Andric     return SDValue();
49600b57cec5SDimitry Andric 
49610b57cec5SDimitry Andric   switch (N0.getOpcode()) {
49620b57cec5SDimitry Andric   case ISD::FP16_TO_FP: {
49630b57cec5SDimitry Andric     assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal");
49640b57cec5SDimitry Andric     SDLoc SL(N);
49650b57cec5SDimitry Andric     SDValue Src = N0.getOperand(0);
49660b57cec5SDimitry Andric     EVT SrcVT = Src.getValueType();
49670b57cec5SDimitry Andric 
49680b57cec5SDimitry Andric     // fabs (fp16_to_fp x) -> fp16_to_fp (and x, 0x7fff)
49690b57cec5SDimitry Andric     SDValue IntFAbs = DAG.getNode(ISD::AND, SL, SrcVT, Src,
49700b57cec5SDimitry Andric                                   DAG.getConstant(0x7fff, SL, SrcVT));
49710b57cec5SDimitry Andric     return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs);
49720b57cec5SDimitry Andric   }
49730b57cec5SDimitry Andric   default:
49740b57cec5SDimitry Andric     return SDValue();
49750b57cec5SDimitry Andric   }
49760b57cec5SDimitry Andric }
49770b57cec5SDimitry Andric 
performRcpCombine(SDNode * N,DAGCombinerInfo & DCI) const49780b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N,
49790b57cec5SDimitry Andric                                                 DAGCombinerInfo &DCI) const {
49800b57cec5SDimitry Andric   const auto *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
49810b57cec5SDimitry Andric   if (!CFP)
49820b57cec5SDimitry Andric     return SDValue();
49830b57cec5SDimitry Andric 
49840b57cec5SDimitry Andric   // XXX - Should this flush denormals?
49850b57cec5SDimitry Andric   const APFloat &Val = CFP->getValueAPF();
49860b57cec5SDimitry Andric   APFloat One(Val.getSemantics(), "1.0");
49870b57cec5SDimitry Andric   return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0));
49880b57cec5SDimitry Andric }
49890b57cec5SDimitry Andric 
PerformDAGCombine(SDNode * N,DAGCombinerInfo & DCI) const49900b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
49910b57cec5SDimitry Andric                                                 DAGCombinerInfo &DCI) const {
49920b57cec5SDimitry Andric   SelectionDAG &DAG = DCI.DAG;
49930b57cec5SDimitry Andric   SDLoc DL(N);
49940b57cec5SDimitry Andric 
49950b57cec5SDimitry Andric   switch(N->getOpcode()) {
49960b57cec5SDimitry Andric   default:
49970b57cec5SDimitry Andric     break;
49980b57cec5SDimitry Andric   case ISD::BITCAST: {
49990b57cec5SDimitry Andric     EVT DestVT = N->getValueType(0);
50000b57cec5SDimitry Andric 
50010b57cec5SDimitry Andric     // Push casts through vector builds. This helps avoid emitting a large
50020b57cec5SDimitry Andric     // number of copies when materializing floating point vector constants.
50030b57cec5SDimitry Andric     //
50040b57cec5SDimitry Andric     // vNt1 bitcast (vNt0 (build_vector t0:x, t0:y)) =>
50050b57cec5SDimitry Andric     //   vnt1 = build_vector (t1 (bitcast t0:x)), (t1 (bitcast t0:y))
50060b57cec5SDimitry Andric     if (DestVT.isVector()) {
50070b57cec5SDimitry Andric       SDValue Src = N->getOperand(0);
50081db9f3b2SDimitry Andric       if (Src.getOpcode() == ISD::BUILD_VECTOR &&
50091db9f3b2SDimitry Andric           (DCI.getDAGCombineLevel() < AfterLegalizeDAG ||
50101db9f3b2SDimitry Andric            isOperationLegal(ISD::BUILD_VECTOR, DestVT))) {
50110b57cec5SDimitry Andric         EVT SrcVT = Src.getValueType();
50120b57cec5SDimitry Andric         unsigned NElts = DestVT.getVectorNumElements();
50130b57cec5SDimitry Andric 
50140b57cec5SDimitry Andric         if (SrcVT.getVectorNumElements() == NElts) {
50150b57cec5SDimitry Andric           EVT DestEltVT = DestVT.getVectorElementType();
50160b57cec5SDimitry Andric 
50170b57cec5SDimitry Andric           SmallVector<SDValue, 8> CastedElts;
50180b57cec5SDimitry Andric           SDLoc SL(N);
50190b57cec5SDimitry Andric           for (unsigned I = 0, E = SrcVT.getVectorNumElements(); I != E; ++I) {
50200b57cec5SDimitry Andric             SDValue Elt = Src.getOperand(I);
50210b57cec5SDimitry Andric             CastedElts.push_back(DAG.getNode(ISD::BITCAST, DL, DestEltVT, Elt));
50220b57cec5SDimitry Andric           }
50230b57cec5SDimitry Andric 
50240b57cec5SDimitry Andric           return DAG.getBuildVector(DestVT, SL, CastedElts);
50250b57cec5SDimitry Andric         }
50260b57cec5SDimitry Andric       }
50270b57cec5SDimitry Andric     }
50280b57cec5SDimitry Andric 
5029e8d8bef9SDimitry Andric     if (DestVT.getSizeInBits() != 64 || !DestVT.isVector())
50300b57cec5SDimitry Andric       break;
50310b57cec5SDimitry Andric 
50320b57cec5SDimitry Andric     // Fold bitcasts of constants.
50330b57cec5SDimitry Andric     //
50340b57cec5SDimitry Andric     // v2i32 (bitcast i64:k) -> build_vector lo_32(k), hi_32(k)
50350b57cec5SDimitry Andric     // TODO: Generalize and move to DAGCombiner
50360b57cec5SDimitry Andric     SDValue Src = N->getOperand(0);
50370b57cec5SDimitry Andric     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src)) {
50380b57cec5SDimitry Andric       SDLoc SL(N);
50390b57cec5SDimitry Andric       uint64_t CVal = C->getZExtValue();
50400b57cec5SDimitry Andric       SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
50410b57cec5SDimitry Andric                                DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
50420b57cec5SDimitry Andric                                DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
50430b57cec5SDimitry Andric       return DAG.getNode(ISD::BITCAST, SL, DestVT, BV);
50440b57cec5SDimitry Andric     }
50450b57cec5SDimitry Andric 
50460b57cec5SDimitry Andric     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Src)) {
50470b57cec5SDimitry Andric       const APInt &Val = C->getValueAPF().bitcastToAPInt();
50480b57cec5SDimitry Andric       SDLoc SL(N);
50490b57cec5SDimitry Andric       uint64_t CVal = Val.getZExtValue();
50500b57cec5SDimitry Andric       SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
50510b57cec5SDimitry Andric                                 DAG.getConstant(Lo_32(CVal), SL, MVT::i32),
50520b57cec5SDimitry Andric                                 DAG.getConstant(Hi_32(CVal), SL, MVT::i32));
50530b57cec5SDimitry Andric 
50540b57cec5SDimitry Andric       return DAG.getNode(ISD::BITCAST, SL, DestVT, Vec);
50550b57cec5SDimitry Andric     }
50560b57cec5SDimitry Andric 
50570b57cec5SDimitry Andric     break;
50580b57cec5SDimitry Andric   }
50590b57cec5SDimitry Andric   case ISD::SHL: {
50600b57cec5SDimitry Andric     if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
50610b57cec5SDimitry Andric       break;
50620b57cec5SDimitry Andric 
50630b57cec5SDimitry Andric     return performShlCombine(N, DCI);
50640b57cec5SDimitry Andric   }
50650b57cec5SDimitry Andric   case ISD::SRL: {
50660b57cec5SDimitry Andric     if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
50670b57cec5SDimitry Andric       break;
50680b57cec5SDimitry Andric 
50690b57cec5SDimitry Andric     return performSrlCombine(N, DCI);
50700b57cec5SDimitry Andric   }
50710b57cec5SDimitry Andric   case ISD::SRA: {
50720b57cec5SDimitry Andric     if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
50730b57cec5SDimitry Andric       break;
50740b57cec5SDimitry Andric 
50750b57cec5SDimitry Andric     return performSraCombine(N, DCI);
50760b57cec5SDimitry Andric   }
50770b57cec5SDimitry Andric   case ISD::TRUNCATE:
50780b57cec5SDimitry Andric     return performTruncateCombine(N, DCI);
50790b57cec5SDimitry Andric   case ISD::MUL:
50800b57cec5SDimitry Andric     return performMulCombine(N, DCI);
508106c3fb27SDimitry Andric   case AMDGPUISD::MUL_U24:
508206c3fb27SDimitry Andric   case AMDGPUISD::MUL_I24: {
508306c3fb27SDimitry Andric     if (SDValue Simplified = simplifyMul24(N, DCI))
508406c3fb27SDimitry Andric       return Simplified;
508506c3fb27SDimitry Andric     return performMulCombine(N, DCI);
508606c3fb27SDimitry Andric   }
508706c3fb27SDimitry Andric   case AMDGPUISD::MULHI_I24:
508806c3fb27SDimitry Andric   case AMDGPUISD::MULHI_U24:
508906c3fb27SDimitry Andric     return simplifyMul24(N, DCI);
50904824e7fdSDimitry Andric   case ISD::SMUL_LOHI:
50914824e7fdSDimitry Andric   case ISD::UMUL_LOHI:
50924824e7fdSDimitry Andric     return performMulLoHiCombine(N, DCI);
50930b57cec5SDimitry Andric   case ISD::MULHS:
50940b57cec5SDimitry Andric     return performMulhsCombine(N, DCI);
50950b57cec5SDimitry Andric   case ISD::MULHU:
50960b57cec5SDimitry Andric     return performMulhuCombine(N, DCI);
50970b57cec5SDimitry Andric   case ISD::SELECT:
50980b57cec5SDimitry Andric     return performSelectCombine(N, DCI);
50990b57cec5SDimitry Andric   case ISD::FNEG:
51000b57cec5SDimitry Andric     return performFNegCombine(N, DCI);
51010b57cec5SDimitry Andric   case ISD::FABS:
51020b57cec5SDimitry Andric     return performFAbsCombine(N, DCI);
51030b57cec5SDimitry Andric   case AMDGPUISD::BFE_I32:
51040b57cec5SDimitry Andric   case AMDGPUISD::BFE_U32: {
51050b57cec5SDimitry Andric     assert(!N->getValueType(0).isVector() &&
51060b57cec5SDimitry Andric            "Vector handling of BFE not implemented");
51070b57cec5SDimitry Andric     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
51080b57cec5SDimitry Andric     if (!Width)
51090b57cec5SDimitry Andric       break;
51100b57cec5SDimitry Andric 
51110b57cec5SDimitry Andric     uint32_t WidthVal = Width->getZExtValue() & 0x1f;
51120b57cec5SDimitry Andric     if (WidthVal == 0)
51130b57cec5SDimitry Andric       return DAG.getConstant(0, DL, MVT::i32);
51140b57cec5SDimitry Andric 
51150b57cec5SDimitry Andric     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
51160b57cec5SDimitry Andric     if (!Offset)
51170b57cec5SDimitry Andric       break;
51180b57cec5SDimitry Andric 
51190b57cec5SDimitry Andric     SDValue BitsFrom = N->getOperand(0);
51200b57cec5SDimitry Andric     uint32_t OffsetVal = Offset->getZExtValue() & 0x1f;
51210b57cec5SDimitry Andric 
51220b57cec5SDimitry Andric     bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32;
51230b57cec5SDimitry Andric 
51240b57cec5SDimitry Andric     if (OffsetVal == 0) {
51250b57cec5SDimitry Andric       // This is already sign / zero extended, so try to fold away extra BFEs.
51260b57cec5SDimitry Andric       unsigned SignBits =  Signed ? (32 - WidthVal + 1) : (32 - WidthVal);
51270b57cec5SDimitry Andric 
51280b57cec5SDimitry Andric       unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom);
51290b57cec5SDimitry Andric       if (OpSignBits >= SignBits)
51300b57cec5SDimitry Andric         return BitsFrom;
51310b57cec5SDimitry Andric 
51320b57cec5SDimitry Andric       EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal);
51330b57cec5SDimitry Andric       if (Signed) {
51340b57cec5SDimitry Andric         // This is a sign_extend_inreg. Replace it to take advantage of existing
51350b57cec5SDimitry Andric         // DAG Combines. If not eliminated, we will match back to BFE during
51360b57cec5SDimitry Andric         // selection.
51370b57cec5SDimitry Andric 
51380b57cec5SDimitry Andric         // TODO: The sext_inreg of extended types ends, although we can could
51390b57cec5SDimitry Andric         // handle them in a single BFE.
51400b57cec5SDimitry Andric         return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom,
51410b57cec5SDimitry Andric                            DAG.getValueType(SmallVT));
51420b57cec5SDimitry Andric       }
51430b57cec5SDimitry Andric 
51440b57cec5SDimitry Andric       return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT);
51450b57cec5SDimitry Andric     }
51460b57cec5SDimitry Andric 
51470b57cec5SDimitry Andric     if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) {
51480b57cec5SDimitry Andric       if (Signed) {
51490b57cec5SDimitry Andric         return constantFoldBFE<int32_t>(DAG,
51500b57cec5SDimitry Andric                                         CVal->getSExtValue(),
51510b57cec5SDimitry Andric                                         OffsetVal,
51520b57cec5SDimitry Andric                                         WidthVal,
51530b57cec5SDimitry Andric                                         DL);
51540b57cec5SDimitry Andric       }
51550b57cec5SDimitry Andric 
51560b57cec5SDimitry Andric       return constantFoldBFE<uint32_t>(DAG,
51570b57cec5SDimitry Andric                                        CVal->getZExtValue(),
51580b57cec5SDimitry Andric                                        OffsetVal,
51590b57cec5SDimitry Andric                                        WidthVal,
51600b57cec5SDimitry Andric                                        DL);
51610b57cec5SDimitry Andric     }
51620b57cec5SDimitry Andric 
51630b57cec5SDimitry Andric     if ((OffsetVal + WidthVal) >= 32 &&
51640b57cec5SDimitry Andric         !(Subtarget->hasSDWA() && OffsetVal == 16 && WidthVal == 16)) {
51650b57cec5SDimitry Andric       SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32);
51660b57cec5SDimitry Andric       return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32,
51670b57cec5SDimitry Andric                          BitsFrom, ShiftVal);
51680b57cec5SDimitry Andric     }
51690b57cec5SDimitry Andric 
51700b57cec5SDimitry Andric     if (BitsFrom.hasOneUse()) {
51710b57cec5SDimitry Andric       APInt Demanded = APInt::getBitsSet(32,
51720b57cec5SDimitry Andric                                          OffsetVal,
51730b57cec5SDimitry Andric                                          OffsetVal + WidthVal);
51740b57cec5SDimitry Andric 
51750b57cec5SDimitry Andric       KnownBits Known;
51760b57cec5SDimitry Andric       TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
51770b57cec5SDimitry Andric                                             !DCI.isBeforeLegalizeOps());
51780b57cec5SDimitry Andric       const TargetLowering &TLI = DAG.getTargetLoweringInfo();
51790b57cec5SDimitry Andric       if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) ||
51800b57cec5SDimitry Andric           TLI.SimplifyDemandedBits(BitsFrom, Demanded, Known, TLO)) {
51810b57cec5SDimitry Andric         DCI.CommitTargetLoweringOpt(TLO);
51820b57cec5SDimitry Andric       }
51830b57cec5SDimitry Andric     }
51840b57cec5SDimitry Andric 
51850b57cec5SDimitry Andric     break;
51860b57cec5SDimitry Andric   }
51870b57cec5SDimitry Andric   case ISD::LOAD:
51880b57cec5SDimitry Andric     return performLoadCombine(N, DCI);
51890b57cec5SDimitry Andric   case ISD::STORE:
51900b57cec5SDimitry Andric     return performStoreCombine(N, DCI);
51910b57cec5SDimitry Andric   case AMDGPUISD::RCP:
51920b57cec5SDimitry Andric   case AMDGPUISD::RCP_IFLAG:
51930b57cec5SDimitry Andric     return performRcpCombine(N, DCI);
51940b57cec5SDimitry Andric   case ISD::AssertZext:
51950b57cec5SDimitry Andric   case ISD::AssertSext:
51960b57cec5SDimitry Andric     return performAssertSZExtCombine(N, DCI);
51978bcb0991SDimitry Andric   case ISD::INTRINSIC_WO_CHAIN:
51988bcb0991SDimitry Andric     return performIntrinsicWOChainCombine(N, DCI);
51995f757f3fSDimitry Andric   case AMDGPUISD::FMAD_FTZ: {
52005f757f3fSDimitry Andric     SDValue N0 = N->getOperand(0);
52015f757f3fSDimitry Andric     SDValue N1 = N->getOperand(1);
52025f757f3fSDimitry Andric     SDValue N2 = N->getOperand(2);
52035f757f3fSDimitry Andric     EVT VT = N->getValueType(0);
52045f757f3fSDimitry Andric 
52055f757f3fSDimitry Andric     // FMAD_FTZ is a FMAD + flush denormals to zero.
52065f757f3fSDimitry Andric     // We flush the inputs, the intermediate step, and the output.
52075f757f3fSDimitry Andric     ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
52085f757f3fSDimitry Andric     ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
52095f757f3fSDimitry Andric     ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
52105f757f3fSDimitry Andric     if (N0CFP && N1CFP && N2CFP) {
52115f757f3fSDimitry Andric       const auto FTZ = [](const APFloat &V) {
52125f757f3fSDimitry Andric         if (V.isDenormal()) {
52135f757f3fSDimitry Andric           APFloat Zero(V.getSemantics(), 0);
52145f757f3fSDimitry Andric           return V.isNegative() ? -Zero : Zero;
52155f757f3fSDimitry Andric         }
52165f757f3fSDimitry Andric         return V;
52175f757f3fSDimitry Andric       };
52185f757f3fSDimitry Andric 
52195f757f3fSDimitry Andric       APFloat V0 = FTZ(N0CFP->getValueAPF());
52205f757f3fSDimitry Andric       APFloat V1 = FTZ(N1CFP->getValueAPF());
52215f757f3fSDimitry Andric       APFloat V2 = FTZ(N2CFP->getValueAPF());
52225f757f3fSDimitry Andric       V0.multiply(V1, APFloat::rmNearestTiesToEven);
52235f757f3fSDimitry Andric       V0 = FTZ(V0);
52245f757f3fSDimitry Andric       V0.add(V2, APFloat::rmNearestTiesToEven);
52255f757f3fSDimitry Andric       return DAG.getConstantFP(FTZ(V0), DL, VT);
52265f757f3fSDimitry Andric     }
52275f757f3fSDimitry Andric     break;
52285f757f3fSDimitry Andric   }
52290b57cec5SDimitry Andric   }
52300b57cec5SDimitry Andric   return SDValue();
52310b57cec5SDimitry Andric }
52320b57cec5SDimitry Andric 
52330b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
52340b57cec5SDimitry Andric // Helper functions
52350b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
52360b57cec5SDimitry Andric 
CreateLiveInRegister(SelectionDAG & DAG,const TargetRegisterClass * RC,Register Reg,EVT VT,const SDLoc & SL,bool RawReg) const52370b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
52380b57cec5SDimitry Andric                                                    const TargetRegisterClass *RC,
52395ffd83dbSDimitry Andric                                                    Register Reg, EVT VT,
52400b57cec5SDimitry Andric                                                    const SDLoc &SL,
52410b57cec5SDimitry Andric                                                    bool RawReg) const {
52420b57cec5SDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
52430b57cec5SDimitry Andric   MachineRegisterInfo &MRI = MF.getRegInfo();
52445ffd83dbSDimitry Andric   Register VReg;
52450b57cec5SDimitry Andric 
52460b57cec5SDimitry Andric   if (!MRI.isLiveIn(Reg)) {
52470b57cec5SDimitry Andric     VReg = MRI.createVirtualRegister(RC);
52480b57cec5SDimitry Andric     MRI.addLiveIn(Reg, VReg);
52490b57cec5SDimitry Andric   } else {
52500b57cec5SDimitry Andric     VReg = MRI.getLiveInVirtReg(Reg);
52510b57cec5SDimitry Andric   }
52520b57cec5SDimitry Andric 
52530b57cec5SDimitry Andric   if (RawReg)
52540b57cec5SDimitry Andric     return DAG.getRegister(VReg, VT);
52550b57cec5SDimitry Andric 
52560b57cec5SDimitry Andric   return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT);
52570b57cec5SDimitry Andric }
52580b57cec5SDimitry Andric 
52598bcb0991SDimitry Andric // This may be called multiple times, and nothing prevents creating multiple
52608bcb0991SDimitry Andric // objects at the same offset. See if we already defined this object.
getOrCreateFixedStackObject(MachineFrameInfo & MFI,unsigned Size,int64_t Offset)52618bcb0991SDimitry Andric static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size,
52628bcb0991SDimitry Andric                                        int64_t Offset) {
52638bcb0991SDimitry Andric   for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
52648bcb0991SDimitry Andric     if (MFI.getObjectOffset(I) == Offset) {
52658bcb0991SDimitry Andric       assert(MFI.getObjectSize(I) == Size);
52668bcb0991SDimitry Andric       return I;
52678bcb0991SDimitry Andric     }
52688bcb0991SDimitry Andric   }
52698bcb0991SDimitry Andric 
52708bcb0991SDimitry Andric   return MFI.CreateFixedObject(Size, Offset, true);
52718bcb0991SDimitry Andric }
52728bcb0991SDimitry Andric 
loadStackInputValue(SelectionDAG & DAG,EVT VT,const SDLoc & SL,int64_t Offset) const52730b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG,
52740b57cec5SDimitry Andric                                                   EVT VT,
52750b57cec5SDimitry Andric                                                   const SDLoc &SL,
52760b57cec5SDimitry Andric                                                   int64_t Offset) const {
52770b57cec5SDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
52780b57cec5SDimitry Andric   MachineFrameInfo &MFI = MF.getFrameInfo();
52798bcb0991SDimitry Andric   int FI = getOrCreateFixedStackObject(MFI, VT.getStoreSize(), Offset);
52800b57cec5SDimitry Andric 
52810b57cec5SDimitry Andric   auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset);
52820b57cec5SDimitry Andric   SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32);
52830b57cec5SDimitry Andric 
5284e8d8bef9SDimitry Andric   return DAG.getLoad(VT, SL, DAG.getEntryNode(), Ptr, SrcPtrInfo, Align(4),
52850b57cec5SDimitry Andric                      MachineMemOperand::MODereferenceable |
52860b57cec5SDimitry Andric                          MachineMemOperand::MOInvariant);
52870b57cec5SDimitry Andric }
52880b57cec5SDimitry Andric 
storeStackInputValue(SelectionDAG & DAG,const SDLoc & SL,SDValue Chain,SDValue ArgVal,int64_t Offset) const52890b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG,
52900b57cec5SDimitry Andric                                                    const SDLoc &SL,
52910b57cec5SDimitry Andric                                                    SDValue Chain,
52920b57cec5SDimitry Andric                                                    SDValue ArgVal,
52930b57cec5SDimitry Andric                                                    int64_t Offset) const {
52940b57cec5SDimitry Andric   MachineFunction &MF = DAG.getMachineFunction();
52950b57cec5SDimitry Andric   MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset);
5296fe6060f1SDimitry Andric   const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
52970b57cec5SDimitry Andric 
52980b57cec5SDimitry Andric   SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32);
5299fe6060f1SDimitry Andric   // Stores to the argument stack area are relative to the stack pointer.
5300fe6060f1SDimitry Andric   SDValue SP =
5301fe6060f1SDimitry Andric       DAG.getCopyFromReg(Chain, SL, Info->getStackPtrOffsetReg(), MVT::i32);
5302fe6060f1SDimitry Andric   Ptr = DAG.getNode(ISD::ADD, SL, MVT::i32, SP, Ptr);
5303e8d8bef9SDimitry Andric   SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, Align(4),
53040b57cec5SDimitry Andric                                MachineMemOperand::MODereferenceable);
53050b57cec5SDimitry Andric   return Store;
53060b57cec5SDimitry Andric }
53070b57cec5SDimitry Andric 
loadInputValue(SelectionDAG & DAG,const TargetRegisterClass * RC,EVT VT,const SDLoc & SL,const ArgDescriptor & Arg) const53080b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG,
53090b57cec5SDimitry Andric                                              const TargetRegisterClass *RC,
53100b57cec5SDimitry Andric                                              EVT VT, const SDLoc &SL,
53110b57cec5SDimitry Andric                                              const ArgDescriptor &Arg) const {
53120b57cec5SDimitry Andric   assert(Arg && "Attempting to load missing argument");
53130b57cec5SDimitry Andric 
53140b57cec5SDimitry Andric   SDValue V = Arg.isRegister() ?
53150b57cec5SDimitry Andric     CreateLiveInRegister(DAG, RC, Arg.getRegister(), VT, SL) :
53160b57cec5SDimitry Andric     loadStackInputValue(DAG, VT, SL, Arg.getStackOffset());
53170b57cec5SDimitry Andric 
53180b57cec5SDimitry Andric   if (!Arg.isMasked())
53190b57cec5SDimitry Andric     return V;
53200b57cec5SDimitry Andric 
53210b57cec5SDimitry Andric   unsigned Mask = Arg.getMask();
532206c3fb27SDimitry Andric   unsigned Shift = llvm::countr_zero<unsigned>(Mask);
53230b57cec5SDimitry Andric   V = DAG.getNode(ISD::SRL, SL, VT, V,
53240b57cec5SDimitry Andric                   DAG.getShiftAmountConstant(Shift, VT, SL));
53250b57cec5SDimitry Andric   return DAG.getNode(ISD::AND, SL, VT, V,
53260b57cec5SDimitry Andric                      DAG.getConstant(Mask >> Shift, SL, VT));
53270b57cec5SDimitry Andric }
53280b57cec5SDimitry Andric 
getImplicitParameterOffset(uint64_t ExplicitKernArgSize,const ImplicitParameter Param) const53290b57cec5SDimitry Andric uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
533006c3fb27SDimitry Andric     uint64_t ExplicitKernArgSize, const ImplicitParameter Param) const {
533106c3fb27SDimitry Andric   unsigned ExplicitArgOffset = Subtarget->getExplicitKernelArgOffset();
533206c3fb27SDimitry Andric   const Align Alignment = Subtarget->getAlignmentForImplicitArgPtr();
533306c3fb27SDimitry Andric   uint64_t ArgOffset =
533406c3fb27SDimitry Andric       alignTo(ExplicitKernArgSize, Alignment) + ExplicitArgOffset;
53350b57cec5SDimitry Andric   switch (Param) {
533681ad6265SDimitry Andric   case FIRST_IMPLICIT:
53370b57cec5SDimitry Andric     return ArgOffset;
533881ad6265SDimitry Andric   case PRIVATE_BASE:
533981ad6265SDimitry Andric     return ArgOffset + AMDGPU::ImplicitArg::PRIVATE_BASE_OFFSET;
534081ad6265SDimitry Andric   case SHARED_BASE:
534181ad6265SDimitry Andric     return ArgOffset + AMDGPU::ImplicitArg::SHARED_BASE_OFFSET;
534281ad6265SDimitry Andric   case QUEUE_PTR:
534381ad6265SDimitry Andric     return ArgOffset + AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET;
53440b57cec5SDimitry Andric   }
53450b57cec5SDimitry Andric   llvm_unreachable("unexpected implicit parameter type");
53460b57cec5SDimitry Andric }
53470b57cec5SDimitry Andric 
getImplicitParameterOffset(const MachineFunction & MF,const ImplicitParameter Param) const534806c3fb27SDimitry Andric uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
534906c3fb27SDimitry Andric     const MachineFunction &MF, const ImplicitParameter Param) const {
535006c3fb27SDimitry Andric   const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
535106c3fb27SDimitry Andric   return getImplicitParameterOffset(MFI->getExplicitKernArgSize(), Param);
535206c3fb27SDimitry Andric }
535306c3fb27SDimitry Andric 
53540b57cec5SDimitry Andric #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
53550b57cec5SDimitry Andric 
getTargetNodeName(unsigned Opcode) const53560b57cec5SDimitry Andric const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
53570b57cec5SDimitry Andric   switch ((AMDGPUISD::NodeType)Opcode) {
53580b57cec5SDimitry Andric   case AMDGPUISD::FIRST_NUMBER: break;
53590b57cec5SDimitry Andric   // AMDIL DAG nodes
53600b57cec5SDimitry Andric   NODE_NAME_CASE(UMUL);
53610b57cec5SDimitry Andric   NODE_NAME_CASE(BRANCH_COND);
53620b57cec5SDimitry Andric 
53630b57cec5SDimitry Andric   // AMDGPU DAG nodes
53640b57cec5SDimitry Andric   NODE_NAME_CASE(IF)
53650b57cec5SDimitry Andric   NODE_NAME_CASE(ELSE)
53660b57cec5SDimitry Andric   NODE_NAME_CASE(LOOP)
53670b57cec5SDimitry Andric   NODE_NAME_CASE(CALL)
53680b57cec5SDimitry Andric   NODE_NAME_CASE(TC_RETURN)
536906c3fb27SDimitry Andric   NODE_NAME_CASE(TC_RETURN_GFX)
53705f757f3fSDimitry Andric   NODE_NAME_CASE(TC_RETURN_CHAIN)
53710b57cec5SDimitry Andric   NODE_NAME_CASE(TRAP)
537206c3fb27SDimitry Andric   NODE_NAME_CASE(RET_GLUE)
53735f757f3fSDimitry Andric   NODE_NAME_CASE(WAVE_ADDRESS)
53740b57cec5SDimitry Andric   NODE_NAME_CASE(RETURN_TO_EPILOG)
53750b57cec5SDimitry Andric   NODE_NAME_CASE(ENDPGM)
537606c3fb27SDimitry Andric   NODE_NAME_CASE(ENDPGM_TRAP)
53770b57cec5SDimitry Andric   NODE_NAME_CASE(DWORDADDR)
53780b57cec5SDimitry Andric   NODE_NAME_CASE(FRACT)
53790b57cec5SDimitry Andric   NODE_NAME_CASE(SETCC)
53800b57cec5SDimitry Andric   NODE_NAME_CASE(SETREG)
53818bcb0991SDimitry Andric   NODE_NAME_CASE(DENORM_MODE)
53820b57cec5SDimitry Andric   NODE_NAME_CASE(FMA_W_CHAIN)
53830b57cec5SDimitry Andric   NODE_NAME_CASE(FMUL_W_CHAIN)
53840b57cec5SDimitry Andric   NODE_NAME_CASE(CLAMP)
53850b57cec5SDimitry Andric   NODE_NAME_CASE(COS_HW)
53860b57cec5SDimitry Andric   NODE_NAME_CASE(SIN_HW)
53870b57cec5SDimitry Andric   NODE_NAME_CASE(FMAX_LEGACY)
53880b57cec5SDimitry Andric   NODE_NAME_CASE(FMIN_LEGACY)
53890b57cec5SDimitry Andric   NODE_NAME_CASE(FMAX3)
53900b57cec5SDimitry Andric   NODE_NAME_CASE(SMAX3)
53910b57cec5SDimitry Andric   NODE_NAME_CASE(UMAX3)
53920b57cec5SDimitry Andric   NODE_NAME_CASE(FMIN3)
53930b57cec5SDimitry Andric   NODE_NAME_CASE(SMIN3)
53940b57cec5SDimitry Andric   NODE_NAME_CASE(UMIN3)
53950b57cec5SDimitry Andric   NODE_NAME_CASE(FMED3)
53960b57cec5SDimitry Andric   NODE_NAME_CASE(SMED3)
53970b57cec5SDimitry Andric   NODE_NAME_CASE(UMED3)
53985f757f3fSDimitry Andric   NODE_NAME_CASE(FMAXIMUM3)
53995f757f3fSDimitry Andric   NODE_NAME_CASE(FMINIMUM3)
54000b57cec5SDimitry Andric   NODE_NAME_CASE(FDOT2)
54010b57cec5SDimitry Andric   NODE_NAME_CASE(URECIP)
54020b57cec5SDimitry Andric   NODE_NAME_CASE(DIV_SCALE)
54030b57cec5SDimitry Andric   NODE_NAME_CASE(DIV_FMAS)
54040b57cec5SDimitry Andric   NODE_NAME_CASE(DIV_FIXUP)
54050b57cec5SDimitry Andric   NODE_NAME_CASE(FMAD_FTZ)
54060b57cec5SDimitry Andric   NODE_NAME_CASE(RCP)
54070b57cec5SDimitry Andric   NODE_NAME_CASE(RSQ)
54080b57cec5SDimitry Andric   NODE_NAME_CASE(RCP_LEGACY)
54090b57cec5SDimitry Andric   NODE_NAME_CASE(RCP_IFLAG)
541006c3fb27SDimitry Andric   NODE_NAME_CASE(LOG)
541106c3fb27SDimitry Andric   NODE_NAME_CASE(EXP)
54120b57cec5SDimitry Andric   NODE_NAME_CASE(FMUL_LEGACY)
54130b57cec5SDimitry Andric   NODE_NAME_CASE(RSQ_CLAMP)
54140b57cec5SDimitry Andric   NODE_NAME_CASE(FP_CLASS)
54150b57cec5SDimitry Andric   NODE_NAME_CASE(DOT4)
54160b57cec5SDimitry Andric   NODE_NAME_CASE(CARRY)
54170b57cec5SDimitry Andric   NODE_NAME_CASE(BORROW)
54180b57cec5SDimitry Andric   NODE_NAME_CASE(BFE_U32)
54190b57cec5SDimitry Andric   NODE_NAME_CASE(BFE_I32)
54200b57cec5SDimitry Andric   NODE_NAME_CASE(BFI)
54210b57cec5SDimitry Andric   NODE_NAME_CASE(BFM)
54220b57cec5SDimitry Andric   NODE_NAME_CASE(FFBH_U32)
54230b57cec5SDimitry Andric   NODE_NAME_CASE(FFBH_I32)
54240b57cec5SDimitry Andric   NODE_NAME_CASE(FFBL_B32)
54250b57cec5SDimitry Andric   NODE_NAME_CASE(MUL_U24)
54260b57cec5SDimitry Andric   NODE_NAME_CASE(MUL_I24)
54270b57cec5SDimitry Andric   NODE_NAME_CASE(MULHI_U24)
54280b57cec5SDimitry Andric   NODE_NAME_CASE(MULHI_I24)
54290b57cec5SDimitry Andric   NODE_NAME_CASE(MAD_U24)
54300b57cec5SDimitry Andric   NODE_NAME_CASE(MAD_I24)
54310b57cec5SDimitry Andric   NODE_NAME_CASE(MAD_I64_I32)
54320b57cec5SDimitry Andric   NODE_NAME_CASE(MAD_U64_U32)
54330b57cec5SDimitry Andric   NODE_NAME_CASE(PERM)
54340b57cec5SDimitry Andric   NODE_NAME_CASE(TEXTURE_FETCH)
54350b57cec5SDimitry Andric   NODE_NAME_CASE(R600_EXPORT)
54360b57cec5SDimitry Andric   NODE_NAME_CASE(CONST_ADDRESS)
54370b57cec5SDimitry Andric   NODE_NAME_CASE(REGISTER_LOAD)
54380b57cec5SDimitry Andric   NODE_NAME_CASE(REGISTER_STORE)
54390b57cec5SDimitry Andric   NODE_NAME_CASE(SAMPLE)
54400b57cec5SDimitry Andric   NODE_NAME_CASE(SAMPLEB)
54410b57cec5SDimitry Andric   NODE_NAME_CASE(SAMPLED)
54420b57cec5SDimitry Andric   NODE_NAME_CASE(SAMPLEL)
54430b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_F32_UBYTE0)
54440b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_F32_UBYTE1)
54450b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_F32_UBYTE2)
54460b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_F32_UBYTE3)
54470b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_PKRTZ_F16_F32)
54480b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_PKNORM_I16_F32)
54490b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_PKNORM_U16_F32)
54500b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_PK_I16_I32)
54510b57cec5SDimitry Andric   NODE_NAME_CASE(CVT_PK_U16_U32)
54520b57cec5SDimitry Andric   NODE_NAME_CASE(FP_TO_FP16)
54530b57cec5SDimitry Andric   NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
54540b57cec5SDimitry Andric   NODE_NAME_CASE(CONST_DATA_PTR)
54550b57cec5SDimitry Andric   NODE_NAME_CASE(PC_ADD_REL_OFFSET)
54560b57cec5SDimitry Andric   NODE_NAME_CASE(LDS)
545781ad6265SDimitry Andric   NODE_NAME_CASE(FPTRUNC_ROUND_UPWARD)
545881ad6265SDimitry Andric   NODE_NAME_CASE(FPTRUNC_ROUND_DOWNWARD)
54590b57cec5SDimitry Andric   NODE_NAME_CASE(DUMMY_CHAIN)
54600b57cec5SDimitry Andric   case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
54610b57cec5SDimitry Andric   NODE_NAME_CASE(LOAD_D16_HI)
54620b57cec5SDimitry Andric   NODE_NAME_CASE(LOAD_D16_LO)
54630b57cec5SDimitry Andric   NODE_NAME_CASE(LOAD_D16_HI_I8)
54640b57cec5SDimitry Andric   NODE_NAME_CASE(LOAD_D16_HI_U8)
54650b57cec5SDimitry Andric   NODE_NAME_CASE(LOAD_D16_LO_I8)
54660b57cec5SDimitry Andric   NODE_NAME_CASE(LOAD_D16_LO_U8)
54670b57cec5SDimitry Andric   NODE_NAME_CASE(STORE_MSKOR)
54680b57cec5SDimitry Andric   NODE_NAME_CASE(LOAD_CONSTANT)
54690b57cec5SDimitry Andric   NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
54700b57cec5SDimitry Andric   NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16)
54710b57cec5SDimitry Andric   NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
54720b57cec5SDimitry Andric   NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
54730b57cec5SDimitry Andric   NODE_NAME_CASE(DS_ORDERED_COUNT)
54740b57cec5SDimitry Andric   NODE_NAME_CASE(ATOMIC_CMP_SWAP)
54750b57cec5SDimitry Andric   NODE_NAME_CASE(ATOMIC_LOAD_FMIN)
54760b57cec5SDimitry Andric   NODE_NAME_CASE(ATOMIC_LOAD_FMAX)
54770b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_LOAD)
54780b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_LOAD_UBYTE)
54790b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_LOAD_USHORT)
54800b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_LOAD_BYTE)
54810b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_LOAD_SHORT)
54820b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_LOAD_FORMAT)
5483bdd1243dSDimitry Andric   NODE_NAME_CASE(BUFFER_LOAD_FORMAT_TFE)
54840b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16)
54850b57cec5SDimitry Andric   NODE_NAME_CASE(SBUFFER_LOAD)
54867a6dacacSDimitry Andric   NODE_NAME_CASE(SBUFFER_LOAD_BYTE)
54877a6dacacSDimitry Andric   NODE_NAME_CASE(SBUFFER_LOAD_UBYTE)
54887a6dacacSDimitry Andric   NODE_NAME_CASE(SBUFFER_LOAD_SHORT)
54897a6dacacSDimitry Andric   NODE_NAME_CASE(SBUFFER_LOAD_USHORT)
54900b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_STORE)
54910b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_STORE_BYTE)
54920b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_STORE_SHORT)
54930b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_STORE_FORMAT)
54940b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16)
54950b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_SWAP)
54960b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_ADD)
54970b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_SUB)
54980b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_SMIN)
54990b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_UMIN)
55000b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_SMAX)
55010b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_UMAX)
55020b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_AND)
55030b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_OR)
55040b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_XOR)
55058bcb0991SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_INC)
55068bcb0991SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_DEC)
55070b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP)
55085ffd83dbSDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_CSUB)
55090b57cec5SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_FADD)
55107a6dacacSDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_FADD_BF16)
5511fe6060f1SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_FMIN)
5512fe6060f1SDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_FMAX)
55137a6dacacSDimitry Andric   NODE_NAME_CASE(BUFFER_ATOMIC_COND_SUB_U32)
55140b57cec5SDimitry Andric 
55150b57cec5SDimitry Andric   case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
55160b57cec5SDimitry Andric   }
55170b57cec5SDimitry Andric   return nullptr;
55180b57cec5SDimitry Andric }
55190b57cec5SDimitry Andric 
getSqrtEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & RefinementSteps,bool & UseOneConstNR,bool Reciprocal) const55200b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getSqrtEstimate(SDValue Operand,
55210b57cec5SDimitry Andric                                               SelectionDAG &DAG, int Enabled,
55220b57cec5SDimitry Andric                                               int &RefinementSteps,
55230b57cec5SDimitry Andric                                               bool &UseOneConstNR,
55240b57cec5SDimitry Andric                                               bool Reciprocal) const {
55250b57cec5SDimitry Andric   EVT VT = Operand.getValueType();
55260b57cec5SDimitry Andric 
55270b57cec5SDimitry Andric   if (VT == MVT::f32) {
55280b57cec5SDimitry Andric     RefinementSteps = 0;
55290b57cec5SDimitry Andric     return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand);
55300b57cec5SDimitry Andric   }
55310b57cec5SDimitry Andric 
55320b57cec5SDimitry Andric   // TODO: There is also f64 rsq instruction, but the documentation is less
55330b57cec5SDimitry Andric   // clear on its precision.
55340b57cec5SDimitry Andric 
55350b57cec5SDimitry Andric   return SDValue();
55360b57cec5SDimitry Andric }
55370b57cec5SDimitry Andric 
getRecipEstimate(SDValue Operand,SelectionDAG & DAG,int Enabled,int & RefinementSteps) const55380b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
55390b57cec5SDimitry Andric                                                SelectionDAG &DAG, int Enabled,
55400b57cec5SDimitry Andric                                                int &RefinementSteps) const {
55410b57cec5SDimitry Andric   EVT VT = Operand.getValueType();
55420b57cec5SDimitry Andric 
55430b57cec5SDimitry Andric   if (VT == MVT::f32) {
55440b57cec5SDimitry Andric     // Reciprocal, < 1 ulp error.
55450b57cec5SDimitry Andric     //
55460b57cec5SDimitry Andric     // This reciprocal approximation converges to < 0.5 ulp error with one
55470b57cec5SDimitry Andric     // newton rhapson performed with two fused multiple adds (FMAs).
55480b57cec5SDimitry Andric 
55490b57cec5SDimitry Andric     RefinementSteps = 0;
55500b57cec5SDimitry Andric     return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand);
55510b57cec5SDimitry Andric   }
55520b57cec5SDimitry Andric 
55530b57cec5SDimitry Andric   // TODO: There is also f64 rcp instruction, but the documentation is less
55540b57cec5SDimitry Andric   // clear on its precision.
55550b57cec5SDimitry Andric 
55560b57cec5SDimitry Andric   return SDValue();
55570b57cec5SDimitry Andric }
55580b57cec5SDimitry Andric 
workitemIntrinsicDim(unsigned ID)555981ad6265SDimitry Andric static unsigned workitemIntrinsicDim(unsigned ID) {
556081ad6265SDimitry Andric   switch (ID) {
556181ad6265SDimitry Andric   case Intrinsic::amdgcn_workitem_id_x:
556281ad6265SDimitry Andric     return 0;
556381ad6265SDimitry Andric   case Intrinsic::amdgcn_workitem_id_y:
556481ad6265SDimitry Andric     return 1;
556581ad6265SDimitry Andric   case Intrinsic::amdgcn_workitem_id_z:
556681ad6265SDimitry Andric     return 2;
556781ad6265SDimitry Andric   default:
556881ad6265SDimitry Andric     llvm_unreachable("not a workitem intrinsic");
556981ad6265SDimitry Andric   }
557081ad6265SDimitry Andric }
557181ad6265SDimitry Andric 
computeKnownBitsForTargetNode(const SDValue Op,KnownBits & Known,const APInt & DemandedElts,const SelectionDAG & DAG,unsigned Depth) const55720b57cec5SDimitry Andric void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
55730b57cec5SDimitry Andric     const SDValue Op, KnownBits &Known,
55740b57cec5SDimitry Andric     const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
55750b57cec5SDimitry Andric 
55760b57cec5SDimitry Andric   Known.resetAll(); // Don't know anything.
55770b57cec5SDimitry Andric 
55780b57cec5SDimitry Andric   unsigned Opc = Op.getOpcode();
55790b57cec5SDimitry Andric 
55800b57cec5SDimitry Andric   switch (Opc) {
55810b57cec5SDimitry Andric   default:
55820b57cec5SDimitry Andric     break;
55830b57cec5SDimitry Andric   case AMDGPUISD::CARRY:
55840b57cec5SDimitry Andric   case AMDGPUISD::BORROW: {
55850b57cec5SDimitry Andric     Known.Zero = APInt::getHighBitsSet(32, 31);
55860b57cec5SDimitry Andric     break;
55870b57cec5SDimitry Andric   }
55880b57cec5SDimitry Andric 
55890b57cec5SDimitry Andric   case AMDGPUISD::BFE_I32:
55900b57cec5SDimitry Andric   case AMDGPUISD::BFE_U32: {
55910b57cec5SDimitry Andric     ConstantSDNode *CWidth = dyn_cast<ConstantSDNode>(Op.getOperand(2));
55920b57cec5SDimitry Andric     if (!CWidth)
55930b57cec5SDimitry Andric       return;
55940b57cec5SDimitry Andric 
55950b57cec5SDimitry Andric     uint32_t Width = CWidth->getZExtValue() & 0x1f;
55960b57cec5SDimitry Andric 
55970b57cec5SDimitry Andric     if (Opc == AMDGPUISD::BFE_U32)
55980b57cec5SDimitry Andric       Known.Zero = APInt::getHighBitsSet(32, 32 - Width);
55990b57cec5SDimitry Andric 
56000b57cec5SDimitry Andric     break;
56010b57cec5SDimitry Andric   }
5602fe6060f1SDimitry Andric   case AMDGPUISD::FP_TO_FP16: {
56030b57cec5SDimitry Andric     unsigned BitWidth = Known.getBitWidth();
56040b57cec5SDimitry Andric 
56050b57cec5SDimitry Andric     // High bits are zero.
56060b57cec5SDimitry Andric     Known.Zero = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
56070b57cec5SDimitry Andric     break;
56080b57cec5SDimitry Andric   }
56090b57cec5SDimitry Andric   case AMDGPUISD::MUL_U24:
56100b57cec5SDimitry Andric   case AMDGPUISD::MUL_I24: {
56110b57cec5SDimitry Andric     KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
56120b57cec5SDimitry Andric     KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
56130b57cec5SDimitry Andric     unsigned TrailZ = LHSKnown.countMinTrailingZeros() +
56140b57cec5SDimitry Andric                       RHSKnown.countMinTrailingZeros();
56150b57cec5SDimitry Andric     Known.Zero.setLowBits(std::min(TrailZ, 32u));
5616480093f4SDimitry Andric     // Skip extra check if all bits are known zeros.
5617480093f4SDimitry Andric     if (TrailZ >= 32)
5618480093f4SDimitry Andric       break;
56190b57cec5SDimitry Andric 
56200b57cec5SDimitry Andric     // Truncate to 24 bits.
56210b57cec5SDimitry Andric     LHSKnown = LHSKnown.trunc(24);
56220b57cec5SDimitry Andric     RHSKnown = RHSKnown.trunc(24);
56230b57cec5SDimitry Andric 
56240b57cec5SDimitry Andric     if (Opc == AMDGPUISD::MUL_I24) {
562504eeddc0SDimitry Andric       unsigned LHSValBits = LHSKnown.countMaxSignificantBits();
562604eeddc0SDimitry Andric       unsigned RHSValBits = RHSKnown.countMaxSignificantBits();
562704eeddc0SDimitry Andric       unsigned MaxValBits = LHSValBits + RHSValBits;
562804eeddc0SDimitry Andric       if (MaxValBits > 32)
56290b57cec5SDimitry Andric         break;
563004eeddc0SDimitry Andric       unsigned SignBits = 32 - MaxValBits + 1;
56310b57cec5SDimitry Andric       bool LHSNegative = LHSKnown.isNegative();
5632480093f4SDimitry Andric       bool LHSNonNegative = LHSKnown.isNonNegative();
5633480093f4SDimitry Andric       bool LHSPositive = LHSKnown.isStrictlyPositive();
56340b57cec5SDimitry Andric       bool RHSNegative = RHSKnown.isNegative();
5635480093f4SDimitry Andric       bool RHSNonNegative = RHSKnown.isNonNegative();
5636480093f4SDimitry Andric       bool RHSPositive = RHSKnown.isStrictlyPositive();
5637480093f4SDimitry Andric 
5638480093f4SDimitry Andric       if ((LHSNonNegative && RHSNonNegative) || (LHSNegative && RHSNegative))
563904eeddc0SDimitry Andric         Known.Zero.setHighBits(SignBits);
5640480093f4SDimitry Andric       else if ((LHSNegative && RHSPositive) || (LHSPositive && RHSNegative))
564104eeddc0SDimitry Andric         Known.One.setHighBits(SignBits);
56420b57cec5SDimitry Andric     } else {
564304eeddc0SDimitry Andric       unsigned LHSValBits = LHSKnown.countMaxActiveBits();
564404eeddc0SDimitry Andric       unsigned RHSValBits = RHSKnown.countMaxActiveBits();
564504eeddc0SDimitry Andric       unsigned MaxValBits = LHSValBits + RHSValBits;
56460b57cec5SDimitry Andric       if (MaxValBits >= 32)
56470b57cec5SDimitry Andric         break;
564804eeddc0SDimitry Andric       Known.Zero.setBitsFrom(MaxValBits);
56490b57cec5SDimitry Andric     }
56500b57cec5SDimitry Andric     break;
56510b57cec5SDimitry Andric   }
56520b57cec5SDimitry Andric   case AMDGPUISD::PERM: {
56530b57cec5SDimitry Andric     ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Op.getOperand(2));
56540b57cec5SDimitry Andric     if (!CMask)
56550b57cec5SDimitry Andric       return;
56560b57cec5SDimitry Andric 
56570b57cec5SDimitry Andric     KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
56580b57cec5SDimitry Andric     KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
56590b57cec5SDimitry Andric     unsigned Sel = CMask->getZExtValue();
56600b57cec5SDimitry Andric 
56610b57cec5SDimitry Andric     for (unsigned I = 0; I < 32; I += 8) {
56620b57cec5SDimitry Andric       unsigned SelBits = Sel & 0xff;
56630b57cec5SDimitry Andric       if (SelBits < 4) {
56640b57cec5SDimitry Andric         SelBits *= 8;
56650b57cec5SDimitry Andric         Known.One |= ((RHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
56660b57cec5SDimitry Andric         Known.Zero |= ((RHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
56670b57cec5SDimitry Andric       } else if (SelBits < 7) {
56680b57cec5SDimitry Andric         SelBits = (SelBits & 3) * 8;
56690b57cec5SDimitry Andric         Known.One |= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I;
56700b57cec5SDimitry Andric         Known.Zero |= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I;
56710b57cec5SDimitry Andric       } else if (SelBits == 0x0c) {
56728bcb0991SDimitry Andric         Known.Zero |= 0xFFull << I;
56730b57cec5SDimitry Andric       } else if (SelBits > 0x0c) {
56748bcb0991SDimitry Andric         Known.One |= 0xFFull << I;
56750b57cec5SDimitry Andric       }
56760b57cec5SDimitry Andric       Sel >>= 8;
56770b57cec5SDimitry Andric     }
56780b57cec5SDimitry Andric     break;
56790b57cec5SDimitry Andric   }
56800b57cec5SDimitry Andric   case AMDGPUISD::BUFFER_LOAD_UBYTE:  {
56810b57cec5SDimitry Andric     Known.Zero.setHighBits(24);
56820b57cec5SDimitry Andric     break;
56830b57cec5SDimitry Andric   }
56840b57cec5SDimitry Andric   case AMDGPUISD::BUFFER_LOAD_USHORT: {
56850b57cec5SDimitry Andric     Known.Zero.setHighBits(16);
56860b57cec5SDimitry Andric     break;
56870b57cec5SDimitry Andric   }
56880b57cec5SDimitry Andric   case AMDGPUISD::LDS: {
56890b57cec5SDimitry Andric     auto GA = cast<GlobalAddressSDNode>(Op.getOperand(0).getNode());
56905ffd83dbSDimitry Andric     Align Alignment = GA->getGlobal()->getPointerAlignment(DAG.getDataLayout());
56910b57cec5SDimitry Andric 
56920b57cec5SDimitry Andric     Known.Zero.setHighBits(16);
56935ffd83dbSDimitry Andric     Known.Zero.setLowBits(Log2(Alignment));
56940b57cec5SDimitry Andric     break;
56950b57cec5SDimitry Andric   }
569606c3fb27SDimitry Andric   case AMDGPUISD::SMIN3:
569706c3fb27SDimitry Andric   case AMDGPUISD::SMAX3:
569806c3fb27SDimitry Andric   case AMDGPUISD::SMED3:
569906c3fb27SDimitry Andric   case AMDGPUISD::UMIN3:
570006c3fb27SDimitry Andric   case AMDGPUISD::UMAX3:
570106c3fb27SDimitry Andric   case AMDGPUISD::UMED3: {
570206c3fb27SDimitry Andric     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
570306c3fb27SDimitry Andric     if (Known2.isUnknown())
570406c3fb27SDimitry Andric       break;
570506c3fb27SDimitry Andric 
570606c3fb27SDimitry Andric     KnownBits Known1 = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
570706c3fb27SDimitry Andric     if (Known1.isUnknown())
570806c3fb27SDimitry Andric       break;
570906c3fb27SDimitry Andric 
571006c3fb27SDimitry Andric     KnownBits Known0 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
571106c3fb27SDimitry Andric     if (Known0.isUnknown())
571206c3fb27SDimitry Andric       break;
571306c3fb27SDimitry Andric 
571406c3fb27SDimitry Andric     // TODO: Handle LeadZero/LeadOne from UMIN/UMAX handling.
571506c3fb27SDimitry Andric     Known.Zero = Known0.Zero & Known1.Zero & Known2.Zero;
571606c3fb27SDimitry Andric     Known.One = Known0.One & Known1.One & Known2.One;
571706c3fb27SDimitry Andric     break;
571806c3fb27SDimitry Andric   }
57190b57cec5SDimitry Andric   case ISD::INTRINSIC_WO_CHAIN: {
5720647cbc5dSDimitry Andric     unsigned IID = Op.getConstantOperandVal(0);
57210b57cec5SDimitry Andric     switch (IID) {
572281ad6265SDimitry Andric     case Intrinsic::amdgcn_workitem_id_x:
572381ad6265SDimitry Andric     case Intrinsic::amdgcn_workitem_id_y:
572481ad6265SDimitry Andric     case Intrinsic::amdgcn_workitem_id_z: {
572581ad6265SDimitry Andric       unsigned MaxValue = Subtarget->getMaxWorkitemID(
572681ad6265SDimitry Andric           DAG.getMachineFunction().getFunction(), workitemIntrinsicDim(IID));
572706c3fb27SDimitry Andric       Known.Zero.setHighBits(llvm::countl_zero(MaxValue));
572881ad6265SDimitry Andric       break;
572981ad6265SDimitry Andric     }
57300b57cec5SDimitry Andric     default:
57310b57cec5SDimitry Andric       break;
57320b57cec5SDimitry Andric     }
57330b57cec5SDimitry Andric   }
57340b57cec5SDimitry Andric   }
57350b57cec5SDimitry Andric }
57360b57cec5SDimitry Andric 
ComputeNumSignBitsForTargetNode(SDValue Op,const APInt & DemandedElts,const SelectionDAG & DAG,unsigned Depth) const57370b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
57380b57cec5SDimitry Andric     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
57390b57cec5SDimitry Andric     unsigned Depth) const {
57400b57cec5SDimitry Andric   switch (Op.getOpcode()) {
57410b57cec5SDimitry Andric   case AMDGPUISD::BFE_I32: {
57420b57cec5SDimitry Andric     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
57430b57cec5SDimitry Andric     if (!Width)
57440b57cec5SDimitry Andric       return 1;
57450b57cec5SDimitry Andric 
57460b57cec5SDimitry Andric     unsigned SignBits = 32 - Width->getZExtValue() + 1;
57470b57cec5SDimitry Andric     if (!isNullConstant(Op.getOperand(1)))
57480b57cec5SDimitry Andric       return SignBits;
57490b57cec5SDimitry Andric 
57500b57cec5SDimitry Andric     // TODO: Could probably figure something out with non-0 offsets.
57510b57cec5SDimitry Andric     unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
57520b57cec5SDimitry Andric     return std::max(SignBits, Op0SignBits);
57530b57cec5SDimitry Andric   }
57540b57cec5SDimitry Andric 
57550b57cec5SDimitry Andric   case AMDGPUISD::BFE_U32: {
57560b57cec5SDimitry Andric     ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2));
57570b57cec5SDimitry Andric     return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1;
57580b57cec5SDimitry Andric   }
57590b57cec5SDimitry Andric 
57600b57cec5SDimitry Andric   case AMDGPUISD::CARRY:
57610b57cec5SDimitry Andric   case AMDGPUISD::BORROW:
57620b57cec5SDimitry Andric     return 31;
57630b57cec5SDimitry Andric   case AMDGPUISD::BUFFER_LOAD_BYTE:
57640b57cec5SDimitry Andric     return 25;
57650b57cec5SDimitry Andric   case AMDGPUISD::BUFFER_LOAD_SHORT:
57660b57cec5SDimitry Andric     return 17;
57670b57cec5SDimitry Andric   case AMDGPUISD::BUFFER_LOAD_UBYTE:
57680b57cec5SDimitry Andric     return 24;
57690b57cec5SDimitry Andric   case AMDGPUISD::BUFFER_LOAD_USHORT:
57700b57cec5SDimitry Andric     return 16;
57710b57cec5SDimitry Andric   case AMDGPUISD::FP_TO_FP16:
57720b57cec5SDimitry Andric     return 16;
577306c3fb27SDimitry Andric   case AMDGPUISD::SMIN3:
577406c3fb27SDimitry Andric   case AMDGPUISD::SMAX3:
577506c3fb27SDimitry Andric   case AMDGPUISD::SMED3:
577606c3fb27SDimitry Andric   case AMDGPUISD::UMIN3:
577706c3fb27SDimitry Andric   case AMDGPUISD::UMAX3:
577806c3fb27SDimitry Andric   case AMDGPUISD::UMED3: {
577906c3fb27SDimitry Andric     unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(2), Depth + 1);
578006c3fb27SDimitry Andric     if (Tmp2 == 1)
578106c3fb27SDimitry Andric       return 1; // Early out.
578206c3fb27SDimitry Andric 
578306c3fb27SDimitry Andric     unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth + 1);
578406c3fb27SDimitry Andric     if (Tmp1 == 1)
578506c3fb27SDimitry Andric       return 1; // Early out.
578606c3fb27SDimitry Andric 
578706c3fb27SDimitry Andric     unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
578806c3fb27SDimitry Andric     if (Tmp0 == 1)
578906c3fb27SDimitry Andric       return 1; // Early out.
579006c3fb27SDimitry Andric 
579106c3fb27SDimitry Andric     return std::min(Tmp0, std::min(Tmp1, Tmp2));
579206c3fb27SDimitry Andric   }
57930b57cec5SDimitry Andric   default:
57940b57cec5SDimitry Andric     return 1;
57950b57cec5SDimitry Andric   }
57960b57cec5SDimitry Andric }
57970b57cec5SDimitry Andric 
computeNumSignBitsForTargetInstr(GISelKnownBits & Analysis,Register R,const APInt & DemandedElts,const MachineRegisterInfo & MRI,unsigned Depth) const57985ffd83dbSDimitry Andric unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
57995ffd83dbSDimitry Andric   GISelKnownBits &Analysis, Register R,
58005ffd83dbSDimitry Andric   const APInt &DemandedElts, const MachineRegisterInfo &MRI,
58015ffd83dbSDimitry Andric   unsigned Depth) const {
58025ffd83dbSDimitry Andric   const MachineInstr *MI = MRI.getVRegDef(R);
58035ffd83dbSDimitry Andric   if (!MI)
58045ffd83dbSDimitry Andric     return 1;
58055ffd83dbSDimitry Andric 
58065ffd83dbSDimitry Andric   // TODO: Check range metadata on MMO.
58075ffd83dbSDimitry Andric   switch (MI->getOpcode()) {
58085ffd83dbSDimitry Andric   case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE:
58095ffd83dbSDimitry Andric     return 25;
58105ffd83dbSDimitry Andric   case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT:
58115ffd83dbSDimitry Andric     return 17;
58125ffd83dbSDimitry Andric   case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
58135ffd83dbSDimitry Andric     return 24;
58145ffd83dbSDimitry Andric   case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
58155ffd83dbSDimitry Andric     return 16;
581606c3fb27SDimitry Andric   case AMDGPU::G_AMDGPU_SMED3:
581706c3fb27SDimitry Andric   case AMDGPU::G_AMDGPU_UMED3: {
581806c3fb27SDimitry Andric     auto [Dst, Src0, Src1, Src2] = MI->getFirst4Regs();
581906c3fb27SDimitry Andric     unsigned Tmp2 = Analysis.computeNumSignBits(Src2, DemandedElts, Depth + 1);
582006c3fb27SDimitry Andric     if (Tmp2 == 1)
582106c3fb27SDimitry Andric       return 1;
582206c3fb27SDimitry Andric     unsigned Tmp1 = Analysis.computeNumSignBits(Src1, DemandedElts, Depth + 1);
582306c3fb27SDimitry Andric     if (Tmp1 == 1)
582406c3fb27SDimitry Andric       return 1;
582506c3fb27SDimitry Andric     unsigned Tmp0 = Analysis.computeNumSignBits(Src0, DemandedElts, Depth + 1);
582606c3fb27SDimitry Andric     if (Tmp0 == 1)
582706c3fb27SDimitry Andric       return 1;
582806c3fb27SDimitry Andric     return std::min(Tmp0, std::min(Tmp1, Tmp2));
582906c3fb27SDimitry Andric   }
58305ffd83dbSDimitry Andric   default:
58315ffd83dbSDimitry Andric     return 1;
58325ffd83dbSDimitry Andric   }
58335ffd83dbSDimitry Andric }
58345ffd83dbSDimitry Andric 
isKnownNeverNaNForTargetNode(SDValue Op,const SelectionDAG & DAG,bool SNaN,unsigned Depth) const58350b57cec5SDimitry Andric bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
58360b57cec5SDimitry Andric                                                         const SelectionDAG &DAG,
58370b57cec5SDimitry Andric                                                         bool SNaN,
58380b57cec5SDimitry Andric                                                         unsigned Depth) const {
58390b57cec5SDimitry Andric   unsigned Opcode = Op.getOpcode();
58400b57cec5SDimitry Andric   switch (Opcode) {
58410b57cec5SDimitry Andric   case AMDGPUISD::FMIN_LEGACY:
58420b57cec5SDimitry Andric   case AMDGPUISD::FMAX_LEGACY: {
58430b57cec5SDimitry Andric     if (SNaN)
58440b57cec5SDimitry Andric       return true;
58450b57cec5SDimitry Andric 
58460b57cec5SDimitry Andric     // TODO: Can check no nans on one of the operands for each one, but which
58470b57cec5SDimitry Andric     // one?
58480b57cec5SDimitry Andric     return false;
58490b57cec5SDimitry Andric   }
58500b57cec5SDimitry Andric   case AMDGPUISD::FMUL_LEGACY:
58510b57cec5SDimitry Andric   case AMDGPUISD::CVT_PKRTZ_F16_F32: {
58520b57cec5SDimitry Andric     if (SNaN)
58530b57cec5SDimitry Andric       return true;
58540b57cec5SDimitry Andric     return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
58550b57cec5SDimitry Andric            DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
58560b57cec5SDimitry Andric   }
58570b57cec5SDimitry Andric   case AMDGPUISD::FMED3:
58580b57cec5SDimitry Andric   case AMDGPUISD::FMIN3:
58590b57cec5SDimitry Andric   case AMDGPUISD::FMAX3:
58605f757f3fSDimitry Andric   case AMDGPUISD::FMINIMUM3:
58615f757f3fSDimitry Andric   case AMDGPUISD::FMAXIMUM3:
58620b57cec5SDimitry Andric   case AMDGPUISD::FMAD_FTZ: {
58630b57cec5SDimitry Andric     if (SNaN)
58640b57cec5SDimitry Andric       return true;
58650b57cec5SDimitry Andric     return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) &&
58660b57cec5SDimitry Andric            DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
58670b57cec5SDimitry Andric            DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
58680b57cec5SDimitry Andric   }
58690b57cec5SDimitry Andric   case AMDGPUISD::CVT_F32_UBYTE0:
58700b57cec5SDimitry Andric   case AMDGPUISD::CVT_F32_UBYTE1:
58710b57cec5SDimitry Andric   case AMDGPUISD::CVT_F32_UBYTE2:
58720b57cec5SDimitry Andric   case AMDGPUISD::CVT_F32_UBYTE3:
58730b57cec5SDimitry Andric     return true;
58740b57cec5SDimitry Andric 
58750b57cec5SDimitry Andric   case AMDGPUISD::RCP:
58760b57cec5SDimitry Andric   case AMDGPUISD::RSQ:
58770b57cec5SDimitry Andric   case AMDGPUISD::RCP_LEGACY:
58780b57cec5SDimitry Andric   case AMDGPUISD::RSQ_CLAMP: {
58790b57cec5SDimitry Andric     if (SNaN)
58800b57cec5SDimitry Andric       return true;
58810b57cec5SDimitry Andric 
58820b57cec5SDimitry Andric     // TODO: Need is known positive check.
58830b57cec5SDimitry Andric     return false;
58840b57cec5SDimitry Andric   }
588506c3fb27SDimitry Andric   case ISD::FLDEXP:
58860b57cec5SDimitry Andric   case AMDGPUISD::FRACT: {
58870b57cec5SDimitry Andric     if (SNaN)
58880b57cec5SDimitry Andric       return true;
58890b57cec5SDimitry Andric     return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
58900b57cec5SDimitry Andric   }
58910b57cec5SDimitry Andric   case AMDGPUISD::DIV_SCALE:
58920b57cec5SDimitry Andric   case AMDGPUISD::DIV_FMAS:
58930b57cec5SDimitry Andric   case AMDGPUISD::DIV_FIXUP:
58940b57cec5SDimitry Andric     // TODO: Refine on operands.
58950b57cec5SDimitry Andric     return SNaN;
58960b57cec5SDimitry Andric   case AMDGPUISD::SIN_HW:
58970b57cec5SDimitry Andric   case AMDGPUISD::COS_HW: {
58980b57cec5SDimitry Andric     // TODO: Need check for infinity
58990b57cec5SDimitry Andric     return SNaN;
59000b57cec5SDimitry Andric   }
59010b57cec5SDimitry Andric   case ISD::INTRINSIC_WO_CHAIN: {
5902647cbc5dSDimitry Andric     unsigned IntrinsicID = Op.getConstantOperandVal(0);
59030b57cec5SDimitry Andric     // TODO: Handle more intrinsics
59040b57cec5SDimitry Andric     switch (IntrinsicID) {
59050b57cec5SDimitry Andric     case Intrinsic::amdgcn_cubeid:
59060b57cec5SDimitry Andric       return true;
59070b57cec5SDimitry Andric 
59080b57cec5SDimitry Andric     case Intrinsic::amdgcn_frexp_mant: {
59090b57cec5SDimitry Andric       if (SNaN)
59100b57cec5SDimitry Andric         return true;
59110b57cec5SDimitry Andric       return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1);
59120b57cec5SDimitry Andric     }
59130b57cec5SDimitry Andric     case Intrinsic::amdgcn_cvt_pkrtz: {
59140b57cec5SDimitry Andric       if (SNaN)
59150b57cec5SDimitry Andric         return true;
59160b57cec5SDimitry Andric       return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
59170b57cec5SDimitry Andric              DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1);
59180b57cec5SDimitry Andric     }
59195ffd83dbSDimitry Andric     case Intrinsic::amdgcn_rcp:
59205ffd83dbSDimitry Andric     case Intrinsic::amdgcn_rsq:
59215ffd83dbSDimitry Andric     case Intrinsic::amdgcn_rcp_legacy:
59225ffd83dbSDimitry Andric     case Intrinsic::amdgcn_rsq_legacy:
59235ffd83dbSDimitry Andric     case Intrinsic::amdgcn_rsq_clamp: {
59245ffd83dbSDimitry Andric       if (SNaN)
59255ffd83dbSDimitry Andric         return true;
59265ffd83dbSDimitry Andric 
59275ffd83dbSDimitry Andric       // TODO: Need is known positive check.
59285ffd83dbSDimitry Andric       return false;
59295ffd83dbSDimitry Andric     }
59305ffd83dbSDimitry Andric     case Intrinsic::amdgcn_trig_preop:
59310b57cec5SDimitry Andric     case Intrinsic::amdgcn_fdot2:
59320b57cec5SDimitry Andric       // TODO: Refine on operand
59330b57cec5SDimitry Andric       return SNaN;
5934e8d8bef9SDimitry Andric     case Intrinsic::amdgcn_fma_legacy:
5935e8d8bef9SDimitry Andric       if (SNaN)
5936e8d8bef9SDimitry Andric         return true;
5937e8d8bef9SDimitry Andric       return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) &&
5938e8d8bef9SDimitry Andric              DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1) &&
5939e8d8bef9SDimitry Andric              DAG.isKnownNeverNaN(Op.getOperand(3), SNaN, Depth + 1);
59400b57cec5SDimitry Andric     default:
59410b57cec5SDimitry Andric       return false;
59420b57cec5SDimitry Andric     }
59430b57cec5SDimitry Andric   }
59440b57cec5SDimitry Andric   default:
59450b57cec5SDimitry Andric     return false;
59460b57cec5SDimitry Andric   }
59470b57cec5SDimitry Andric }
59480b57cec5SDimitry Andric 
isReassocProfitable(MachineRegisterInfo & MRI,Register N0,Register N1) const594906c3fb27SDimitry Andric bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
595006c3fb27SDimitry Andric                                                Register N0, Register N1) const {
595106c3fb27SDimitry Andric   return MRI.hasOneNonDBGUse(N0); // FIXME: handle regbanks
595206c3fb27SDimitry Andric }
595306c3fb27SDimitry Andric 
59540b57cec5SDimitry Andric TargetLowering::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst * RMW) const59550b57cec5SDimitry Andric AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
59560b57cec5SDimitry Andric   switch (RMW->getOperation()) {
59570b57cec5SDimitry Andric   case AtomicRMWInst::Nand:
59580b57cec5SDimitry Andric   case AtomicRMWInst::FAdd:
59590b57cec5SDimitry Andric   case AtomicRMWInst::FSub:
5960753f127fSDimitry Andric   case AtomicRMWInst::FMax:
5961753f127fSDimitry Andric   case AtomicRMWInst::FMin:
59620b57cec5SDimitry Andric     return AtomicExpansionKind::CmpXChg;
5963bdd1243dSDimitry Andric   default: {
5964bdd1243dSDimitry Andric     if (auto *IntTy = dyn_cast<IntegerType>(RMW->getType())) {
5965bdd1243dSDimitry Andric       unsigned Size = IntTy->getBitWidth();
5966bdd1243dSDimitry Andric       if (Size == 32 || Size == 64)
59670b57cec5SDimitry Andric         return AtomicExpansionKind::None;
59680b57cec5SDimitry Andric     }
5969bdd1243dSDimitry Andric 
5970bdd1243dSDimitry Andric     return AtomicExpansionKind::CmpXChg;
5971bdd1243dSDimitry Andric   }
5972bdd1243dSDimitry Andric   }
59730b57cec5SDimitry Andric }
5974fe6060f1SDimitry Andric 
597506c3fb27SDimitry Andric /// Whether it is profitable to sink the operands of an
597606c3fb27SDimitry Andric /// Instruction I to the basic block of I.
597706c3fb27SDimitry Andric /// This helps using several modifiers (like abs and neg) more often.
shouldSinkOperands(Instruction * I,SmallVectorImpl<Use * > & Ops) const597806c3fb27SDimitry Andric bool AMDGPUTargetLowering::shouldSinkOperands(
597906c3fb27SDimitry Andric     Instruction *I, SmallVectorImpl<Use *> &Ops) const {
598006c3fb27SDimitry Andric   using namespace PatternMatch;
598106c3fb27SDimitry Andric 
598206c3fb27SDimitry Andric   for (auto &Op : I->operands()) {
598306c3fb27SDimitry Andric     // Ensure we are not already sinking this operand.
598406c3fb27SDimitry Andric     if (any_of(Ops, [&](Use *U) { return U->get() == Op.get(); }))
598506c3fb27SDimitry Andric       continue;
598606c3fb27SDimitry Andric 
598706c3fb27SDimitry Andric     if (match(&Op, m_FAbs(m_Value())) || match(&Op, m_FNeg(m_Value())))
598806c3fb27SDimitry Andric       Ops.push_back(&Op);
598906c3fb27SDimitry Andric   }
599006c3fb27SDimitry Andric 
599106c3fb27SDimitry Andric   return !Ops.empty();
599206c3fb27SDimitry Andric }
5993