106f32e7eSjoerg //===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===//
206f32e7eSjoerg //
306f32e7eSjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
406f32e7eSjoerg // See https://llvm.org/LICENSE.txt for license information.
506f32e7eSjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
606f32e7eSjoerg //
706f32e7eSjoerg //===----------------------------------------------------------------------===//
806f32e7eSjoerg //
906f32e7eSjoerg /// Provides analysis for querying information about KnownBits during GISel
1006f32e7eSjoerg /// passes.
1106f32e7eSjoerg //
1206f32e7eSjoerg //===------------------
1306f32e7eSjoerg #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
1406f32e7eSjoerg #include "llvm/Analysis/ValueTracking.h"
1506f32e7eSjoerg #include "llvm/CodeGen/GlobalISel/Utils.h"
1606f32e7eSjoerg #include "llvm/CodeGen/MachineFrameInfo.h"
1706f32e7eSjoerg #include "llvm/CodeGen/MachineRegisterInfo.h"
1806f32e7eSjoerg #include "llvm/CodeGen/TargetLowering.h"
1906f32e7eSjoerg #include "llvm/CodeGen/TargetOpcodes.h"
2006f32e7eSjoerg 
2106f32e7eSjoerg #define DEBUG_TYPE "gisel-known-bits"
2206f32e7eSjoerg 
2306f32e7eSjoerg using namespace llvm;
2406f32e7eSjoerg 
2506f32e7eSjoerg char llvm::GISelKnownBitsAnalysis::ID = 0;
2606f32e7eSjoerg 
27*da58b97aSjoerg INITIALIZE_PASS(GISelKnownBitsAnalysis, DEBUG_TYPE,
2806f32e7eSjoerg                 "Analysis for ComputingKnownBits", false, true)
2906f32e7eSjoerg 
GISelKnownBits(MachineFunction & MF,unsigned MaxDepth)30*da58b97aSjoerg GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth)
3106f32e7eSjoerg     : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
32*da58b97aSjoerg       DL(MF.getFunction().getParent()->getDataLayout()), MaxDepth(MaxDepth) {}
3306f32e7eSjoerg 
computeKnownAlignment(Register R,unsigned Depth)34*da58b97aSjoerg Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
35*da58b97aSjoerg   const MachineInstr *MI = MRI.getVRegDef(R);
36*da58b97aSjoerg   switch (MI->getOpcode()) {
37*da58b97aSjoerg   case TargetOpcode::COPY:
38*da58b97aSjoerg     return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
39*da58b97aSjoerg   case TargetOpcode::G_FRAME_INDEX: {
40*da58b97aSjoerg     int FrameIdx = MI->getOperand(1).getIndex();
41*da58b97aSjoerg     return MF.getFrameInfo().getObjectAlign(FrameIdx);
4206f32e7eSjoerg   }
43*da58b97aSjoerg   case TargetOpcode::G_INTRINSIC:
44*da58b97aSjoerg   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
45*da58b97aSjoerg   default:
46*da58b97aSjoerg     return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1);
4706f32e7eSjoerg   }
4806f32e7eSjoerg }
4906f32e7eSjoerg 
getKnownBits(MachineInstr & MI)5006f32e7eSjoerg KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {
51*da58b97aSjoerg   assert(MI.getNumExplicitDefs() == 1 &&
52*da58b97aSjoerg          "expected single return generic instruction");
5306f32e7eSjoerg   return getKnownBits(MI.getOperand(0).getReg());
5406f32e7eSjoerg }
5506f32e7eSjoerg 
getKnownBits(Register R)5606f32e7eSjoerg KnownBits GISelKnownBits::getKnownBits(Register R) {
57*da58b97aSjoerg   const LLT Ty = MRI.getType(R);
5806f32e7eSjoerg   APInt DemandedElts =
5906f32e7eSjoerg       Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1);
60*da58b97aSjoerg   return getKnownBits(R, DemandedElts);
61*da58b97aSjoerg }
62*da58b97aSjoerg 
getKnownBits(Register R,const APInt & DemandedElts,unsigned Depth)63*da58b97aSjoerg KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts,
64*da58b97aSjoerg                                        unsigned Depth) {
65*da58b97aSjoerg   // For now, we only maintain the cache during one request.
66*da58b97aSjoerg   assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared");
67*da58b97aSjoerg 
68*da58b97aSjoerg   KnownBits Known;
6906f32e7eSjoerg   computeKnownBitsImpl(R, Known, DemandedElts);
70*da58b97aSjoerg   ComputeKnownBitsCache.clear();
7106f32e7eSjoerg   return Known;
7206f32e7eSjoerg }
7306f32e7eSjoerg 
signBitIsZero(Register R)7406f32e7eSjoerg bool GISelKnownBits::signBitIsZero(Register R) {
7506f32e7eSjoerg   LLT Ty = MRI.getType(R);
7606f32e7eSjoerg   unsigned BitWidth = Ty.getScalarSizeInBits();
7706f32e7eSjoerg   return maskedValueIsZero(R, APInt::getSignMask(BitWidth));
7806f32e7eSjoerg }
7906f32e7eSjoerg 
getKnownZeroes(Register R)8006f32e7eSjoerg APInt GISelKnownBits::getKnownZeroes(Register R) {
8106f32e7eSjoerg   return getKnownBits(R).Zero;
8206f32e7eSjoerg }
8306f32e7eSjoerg 
getKnownOnes(Register R)8406f32e7eSjoerg APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; }
8506f32e7eSjoerg 
86*da58b97aSjoerg LLVM_ATTRIBUTE_UNUSED static void
dumpResult(const MachineInstr & MI,const KnownBits & Known,unsigned Depth)87*da58b97aSjoerg dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) {
88*da58b97aSjoerg   dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth
89*da58b97aSjoerg          << "] Computed for: " << MI << "[" << Depth << "] Known: 0x"
90*da58b97aSjoerg          << (Known.Zero | Known.One).toString(16, false) << "\n"
91*da58b97aSjoerg          << "[" << Depth << "] Zero: 0x" << Known.Zero.toString(16, false)
92*da58b97aSjoerg          << "\n"
93*da58b97aSjoerg          << "[" << Depth << "] One:  0x" << Known.One.toString(16, false)
94*da58b97aSjoerg          << "\n";
95*da58b97aSjoerg }
96*da58b97aSjoerg 
97*da58b97aSjoerg /// Compute known bits for the intersection of \p Src0 and \p Src1
computeKnownBitsMin(Register Src0,Register Src1,KnownBits & Known,const APInt & DemandedElts,unsigned Depth)98*da58b97aSjoerg void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
99*da58b97aSjoerg                                          KnownBits &Known,
100*da58b97aSjoerg                                          const APInt &DemandedElts,
101*da58b97aSjoerg                                          unsigned Depth) {
102*da58b97aSjoerg   // Test src1 first, since we canonicalize simpler expressions to the RHS.
103*da58b97aSjoerg   computeKnownBitsImpl(Src1, Known, DemandedElts, Depth);
104*da58b97aSjoerg 
105*da58b97aSjoerg   // If we don't know any bits, early out.
106*da58b97aSjoerg   if (Known.isUnknown())
107*da58b97aSjoerg     return;
108*da58b97aSjoerg 
109*da58b97aSjoerg   KnownBits Known2;
110*da58b97aSjoerg   computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
111*da58b97aSjoerg 
112*da58b97aSjoerg   // Only known if known in both the LHS and RHS.
113*da58b97aSjoerg   Known = KnownBits::commonBits(Known, Known2);
114*da58b97aSjoerg }
115*da58b97aSjoerg 
computeKnownBitsImpl(Register R,KnownBits & Known,const APInt & DemandedElts,unsigned Depth)11606f32e7eSjoerg void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
11706f32e7eSjoerg                                           const APInt &DemandedElts,
11806f32e7eSjoerg                                           unsigned Depth) {
11906f32e7eSjoerg   MachineInstr &MI = *MRI.getVRegDef(R);
12006f32e7eSjoerg   unsigned Opcode = MI.getOpcode();
12106f32e7eSjoerg   LLT DstTy = MRI.getType(R);
12206f32e7eSjoerg 
12306f32e7eSjoerg   // Handle the case where this is called on a register that does not have a
12406f32e7eSjoerg   // type constraint (i.e. it has a register class constraint instead). This is
12506f32e7eSjoerg   // unlikely to occur except by looking through copies but it is possible for
12606f32e7eSjoerg   // the initial register being queried to be in this state.
12706f32e7eSjoerg   if (!DstTy.isValid()) {
12806f32e7eSjoerg     Known = KnownBits();
12906f32e7eSjoerg     return;
13006f32e7eSjoerg   }
13106f32e7eSjoerg 
132*da58b97aSjoerg   unsigned BitWidth = DstTy.getScalarSizeInBits();
133*da58b97aSjoerg   auto CacheEntry = ComputeKnownBitsCache.find(R);
134*da58b97aSjoerg   if (CacheEntry != ComputeKnownBitsCache.end()) {
135*da58b97aSjoerg     Known = CacheEntry->second;
136*da58b97aSjoerg     LLVM_DEBUG(dbgs() << "Cache hit at ");
137*da58b97aSjoerg     LLVM_DEBUG(dumpResult(MI, Known, Depth));
138*da58b97aSjoerg     assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match");
139*da58b97aSjoerg     return;
140*da58b97aSjoerg   }
14106f32e7eSjoerg   Known = KnownBits(BitWidth); // Don't know anything
14206f32e7eSjoerg 
143*da58b97aSjoerg   // Depth may get bigger than max depth if it gets passed to a different
144*da58b97aSjoerg   // GISelKnownBits object.
145*da58b97aSjoerg   // This may happen when say a generic part uses a GISelKnownBits object
146*da58b97aSjoerg   // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr
147*da58b97aSjoerg   // which creates a new GISelKnownBits object with a different and smaller
148*da58b97aSjoerg   // depth. If we just check for equality, we would never exit if the depth
149*da58b97aSjoerg   // that is passed down to the target specific GISelKnownBits object is
150*da58b97aSjoerg   // already bigger than its max depth.
151*da58b97aSjoerg   if (Depth >= getMaxDepth())
15206f32e7eSjoerg     return;
15306f32e7eSjoerg 
15406f32e7eSjoerg   if (!DemandedElts)
15506f32e7eSjoerg     return; // No demanded elts, better to assume we don't know anything.
15606f32e7eSjoerg 
15706f32e7eSjoerg   KnownBits Known2;
15806f32e7eSjoerg 
15906f32e7eSjoerg   switch (Opcode) {
16006f32e7eSjoerg   default:
16106f32e7eSjoerg     TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI,
16206f32e7eSjoerg                                       Depth);
16306f32e7eSjoerg     break;
164*da58b97aSjoerg   case TargetOpcode::G_BUILD_VECTOR: {
165*da58b97aSjoerg     // Collect the known bits that are shared by every demanded vector element.
166*da58b97aSjoerg     Known.Zero.setAllBits(); Known.One.setAllBits();
167*da58b97aSjoerg     for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) {
168*da58b97aSjoerg       if (!DemandedElts[i])
169*da58b97aSjoerg         continue;
170*da58b97aSjoerg 
171*da58b97aSjoerg       computeKnownBitsImpl(MI.getOperand(i + 1).getReg(), Known2, DemandedElts,
172*da58b97aSjoerg                            Depth + 1);
173*da58b97aSjoerg 
174*da58b97aSjoerg       // Known bits are the values that are shared by every demanded element.
175*da58b97aSjoerg       Known = KnownBits::commonBits(Known, Known2);
176*da58b97aSjoerg 
177*da58b97aSjoerg       // If we don't know any bits, early out.
178*da58b97aSjoerg       if (Known.isUnknown())
179*da58b97aSjoerg         break;
180*da58b97aSjoerg     }
181*da58b97aSjoerg     break;
182*da58b97aSjoerg   }
183*da58b97aSjoerg   case TargetOpcode::COPY:
184*da58b97aSjoerg   case TargetOpcode::G_PHI:
185*da58b97aSjoerg   case TargetOpcode::PHI: {
186*da58b97aSjoerg     Known.One = APInt::getAllOnesValue(BitWidth);
187*da58b97aSjoerg     Known.Zero = APInt::getAllOnesValue(BitWidth);
188*da58b97aSjoerg     // Destination registers should not have subregisters at this
189*da58b97aSjoerg     // point of the pipeline, otherwise the main live-range will be
190*da58b97aSjoerg     // defined more than once, which is against SSA.
191*da58b97aSjoerg     assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?");
192*da58b97aSjoerg     // Record in the cache that we know nothing for MI.
193*da58b97aSjoerg     // This will get updated later and in the meantime, if we reach that
194*da58b97aSjoerg     // phi again, because of a loop, we will cut the search thanks to this
195*da58b97aSjoerg     // cache entry.
196*da58b97aSjoerg     // We could actually build up more information on the phi by not cutting
197*da58b97aSjoerg     // the search, but that additional information is more a side effect
198*da58b97aSjoerg     // than an intended choice.
199*da58b97aSjoerg     // Therefore, for now, save on compile time until we derive a proper way
200*da58b97aSjoerg     // to derive known bits for PHIs within loops.
201*da58b97aSjoerg     ComputeKnownBitsCache[R] = KnownBits(BitWidth);
202*da58b97aSjoerg     // PHI's operand are a mix of registers and basic blocks interleaved.
203*da58b97aSjoerg     // We only care about the register ones.
204*da58b97aSjoerg     for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
205*da58b97aSjoerg       const MachineOperand &Src = MI.getOperand(Idx);
206*da58b97aSjoerg       Register SrcReg = Src.getReg();
207*da58b97aSjoerg       // Look through trivial copies and phis but don't look through trivial
208*da58b97aSjoerg       // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits
209*da58b97aSjoerg       // analysis is currently unable to determine the bit width of a
210*da58b97aSjoerg       // register class.
21106f32e7eSjoerg       //
21206f32e7eSjoerg       // We can't use NoSubRegister by name as it's defined by each target but
21306f32e7eSjoerg       // it's always defined to be 0 by tablegen.
214*da58b97aSjoerg       if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ &&
215*da58b97aSjoerg           MRI.getType(SrcReg).isValid()) {
216*da58b97aSjoerg         // For COPYs we don't do anything, don't increase the depth.
217*da58b97aSjoerg         computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
218*da58b97aSjoerg                              Depth + (Opcode != TargetOpcode::COPY));
219*da58b97aSjoerg         Known = KnownBits::commonBits(Known, Known2);
220*da58b97aSjoerg         // If we reach a point where we don't know anything
221*da58b97aSjoerg         // just stop looking through the operands.
222*da58b97aSjoerg         if (Known.One == 0 && Known.Zero == 0)
223*da58b97aSjoerg           break;
224*da58b97aSjoerg       } else {
225*da58b97aSjoerg         // We know nothing.
226*da58b97aSjoerg         Known = KnownBits(BitWidth);
227*da58b97aSjoerg         break;
228*da58b97aSjoerg       }
22906f32e7eSjoerg     }
23006f32e7eSjoerg     break;
23106f32e7eSjoerg   }
23206f32e7eSjoerg   case TargetOpcode::G_CONSTANT: {
23306f32e7eSjoerg     auto CstVal = getConstantVRegVal(R, MRI);
23406f32e7eSjoerg     if (!CstVal)
23506f32e7eSjoerg       break;
236*da58b97aSjoerg     Known = KnownBits::makeConstant(*CstVal);
23706f32e7eSjoerg     break;
23806f32e7eSjoerg   }
23906f32e7eSjoerg   case TargetOpcode::G_FRAME_INDEX: {
240*da58b97aSjoerg     int FrameIdx = MI.getOperand(1).getIndex();
241*da58b97aSjoerg     TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF);
24206f32e7eSjoerg     break;
24306f32e7eSjoerg   }
24406f32e7eSjoerg   case TargetOpcode::G_SUB: {
245*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
24606f32e7eSjoerg                          Depth + 1);
24706f32e7eSjoerg     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
24806f32e7eSjoerg                          Depth + 1);
249*da58b97aSjoerg     Known = KnownBits::computeForAddSub(/*Add*/ false, /*NSW*/ false, Known,
250*da58b97aSjoerg                                         Known2);
25106f32e7eSjoerg     break;
25206f32e7eSjoerg   }
25306f32e7eSjoerg   case TargetOpcode::G_XOR: {
25406f32e7eSjoerg     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
25506f32e7eSjoerg                          Depth + 1);
25606f32e7eSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
25706f32e7eSjoerg                          Depth + 1);
25806f32e7eSjoerg 
259*da58b97aSjoerg     Known ^= Known2;
26006f32e7eSjoerg     break;
26106f32e7eSjoerg   }
262*da58b97aSjoerg   case TargetOpcode::G_PTR_ADD: {
263*da58b97aSjoerg     if (DstTy.isVector())
264*da58b97aSjoerg       break;
265*da58b97aSjoerg     // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets?
26606f32e7eSjoerg     LLT Ty = MRI.getType(MI.getOperand(1).getReg());
26706f32e7eSjoerg     if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
26806f32e7eSjoerg       break;
26906f32e7eSjoerg     LLVM_FALLTHROUGH;
27006f32e7eSjoerg   }
27106f32e7eSjoerg   case TargetOpcode::G_ADD: {
272*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
27306f32e7eSjoerg                          Depth + 1);
27406f32e7eSjoerg     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
27506f32e7eSjoerg                          Depth + 1);
276*da58b97aSjoerg     Known =
277*da58b97aSjoerg         KnownBits::computeForAddSub(/*Add*/ true, /*NSW*/ false, Known, Known2);
27806f32e7eSjoerg     break;
27906f32e7eSjoerg   }
28006f32e7eSjoerg   case TargetOpcode::G_AND: {
28106f32e7eSjoerg     // If either the LHS or the RHS are Zero, the result is zero.
28206f32e7eSjoerg     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
28306f32e7eSjoerg                          Depth + 1);
28406f32e7eSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
28506f32e7eSjoerg                          Depth + 1);
28606f32e7eSjoerg 
287*da58b97aSjoerg     Known &= Known2;
28806f32e7eSjoerg     break;
28906f32e7eSjoerg   }
29006f32e7eSjoerg   case TargetOpcode::G_OR: {
29106f32e7eSjoerg     // If either the LHS or the RHS are Zero, the result is zero.
29206f32e7eSjoerg     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
29306f32e7eSjoerg                          Depth + 1);
29406f32e7eSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
29506f32e7eSjoerg                          Depth + 1);
29606f32e7eSjoerg 
297*da58b97aSjoerg     Known |= Known2;
29806f32e7eSjoerg     break;
29906f32e7eSjoerg   }
30006f32e7eSjoerg   case TargetOpcode::G_MUL: {
30106f32e7eSjoerg     computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
30206f32e7eSjoerg                          Depth + 1);
30306f32e7eSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
30406f32e7eSjoerg                          Depth + 1);
305*da58b97aSjoerg     Known = KnownBits::mul(Known, Known2);
30606f32e7eSjoerg     break;
30706f32e7eSjoerg   }
30806f32e7eSjoerg   case TargetOpcode::G_SELECT: {
309*da58b97aSjoerg     computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(),
310*da58b97aSjoerg                         Known, DemandedElts, Depth + 1);
31106f32e7eSjoerg     break;
312*da58b97aSjoerg   }
313*da58b97aSjoerg   case TargetOpcode::G_SMIN: {
314*da58b97aSjoerg     // TODO: Handle clamp pattern with number of sign bits
315*da58b97aSjoerg     KnownBits KnownRHS;
316*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
31706f32e7eSjoerg                          Depth + 1);
318*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
319*da58b97aSjoerg                          Depth + 1);
320*da58b97aSjoerg     Known = KnownBits::smin(Known, KnownRHS);
321*da58b97aSjoerg     break;
322*da58b97aSjoerg   }
323*da58b97aSjoerg   case TargetOpcode::G_SMAX: {
324*da58b97aSjoerg     // TODO: Handle clamp pattern with number of sign bits
325*da58b97aSjoerg     KnownBits KnownRHS;
326*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
327*da58b97aSjoerg                          Depth + 1);
328*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts,
329*da58b97aSjoerg                          Depth + 1);
330*da58b97aSjoerg     Known = KnownBits::smax(Known, KnownRHS);
331*da58b97aSjoerg     break;
332*da58b97aSjoerg   }
333*da58b97aSjoerg   case TargetOpcode::G_UMIN: {
334*da58b97aSjoerg     KnownBits KnownRHS;
335*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
336*da58b97aSjoerg                          DemandedElts, Depth + 1);
337*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
338*da58b97aSjoerg                          DemandedElts, Depth + 1);
339*da58b97aSjoerg     Known = KnownBits::umin(Known, KnownRHS);
340*da58b97aSjoerg     break;
341*da58b97aSjoerg   }
342*da58b97aSjoerg   case TargetOpcode::G_UMAX: {
343*da58b97aSjoerg     KnownBits KnownRHS;
344*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known,
345*da58b97aSjoerg                          DemandedElts, Depth + 1);
346*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS,
347*da58b97aSjoerg                          DemandedElts, Depth + 1);
348*da58b97aSjoerg     Known = KnownBits::umax(Known, KnownRHS);
34906f32e7eSjoerg     break;
35006f32e7eSjoerg   }
35106f32e7eSjoerg   case TargetOpcode::G_FCMP:
35206f32e7eSjoerg   case TargetOpcode::G_ICMP: {
353*da58b97aSjoerg     if (DstTy.isVector())
354*da58b97aSjoerg       break;
35506f32e7eSjoerg     if (TL.getBooleanContents(DstTy.isVector(),
35606f32e7eSjoerg                               Opcode == TargetOpcode::G_FCMP) ==
35706f32e7eSjoerg             TargetLowering::ZeroOrOneBooleanContent &&
35806f32e7eSjoerg         BitWidth > 1)
35906f32e7eSjoerg       Known.Zero.setBitsFrom(1);
36006f32e7eSjoerg     break;
36106f32e7eSjoerg   }
36206f32e7eSjoerg   case TargetOpcode::G_SEXT: {
36306f32e7eSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
36406f32e7eSjoerg                          Depth + 1);
36506f32e7eSjoerg     // If the sign bit is known to be zero or one, then sext will extend
36606f32e7eSjoerg     // it to the top bits, else it will just zext.
36706f32e7eSjoerg     Known = Known.sext(BitWidth);
36806f32e7eSjoerg     break;
36906f32e7eSjoerg   }
370*da58b97aSjoerg   case TargetOpcode::G_ASSERT_SEXT:
371*da58b97aSjoerg   case TargetOpcode::G_SEXT_INREG: {
372*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
373*da58b97aSjoerg                          Depth + 1);
374*da58b97aSjoerg     Known = Known.sextInReg(MI.getOperand(2).getImm());
375*da58b97aSjoerg     break;
376*da58b97aSjoerg   }
37706f32e7eSjoerg   case TargetOpcode::G_ANYEXT: {
37806f32e7eSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
37906f32e7eSjoerg                          Depth + 1);
380*da58b97aSjoerg     Known = Known.anyext(BitWidth);
38106f32e7eSjoerg     break;
38206f32e7eSjoerg   }
38306f32e7eSjoerg   case TargetOpcode::G_LOAD: {
38406f32e7eSjoerg     const MachineMemOperand *MMO = *MI.memoperands_begin();
38506f32e7eSjoerg     if (const MDNode *Ranges = MMO->getRanges()) {
38606f32e7eSjoerg       computeKnownBitsFromRangeMetadata(*Ranges, Known);
38706f32e7eSjoerg     }
388*da58b97aSjoerg 
38906f32e7eSjoerg     break;
39006f32e7eSjoerg   }
39106f32e7eSjoerg   case TargetOpcode::G_ZEXTLOAD: {
392*da58b97aSjoerg     if (DstTy.isVector())
393*da58b97aSjoerg       break;
39406f32e7eSjoerg     // Everything above the retrieved bits is zero
39506f32e7eSjoerg     Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits());
39606f32e7eSjoerg     break;
39706f32e7eSjoerg   }
398*da58b97aSjoerg   case TargetOpcode::G_ASHR: {
399*da58b97aSjoerg     KnownBits LHSKnown, RHSKnown;
400*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
401*da58b97aSjoerg                          Depth + 1);
40206f32e7eSjoerg     computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
40306f32e7eSjoerg                          Depth + 1);
404*da58b97aSjoerg     Known = KnownBits::ashr(LHSKnown, RHSKnown);
40506f32e7eSjoerg     break;
40606f32e7eSjoerg   }
407*da58b97aSjoerg   case TargetOpcode::G_LSHR: {
408*da58b97aSjoerg     KnownBits LHSKnown, RHSKnown;
409*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
41006f32e7eSjoerg                          Depth + 1);
411*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
412*da58b97aSjoerg                          Depth + 1);
413*da58b97aSjoerg     Known = KnownBits::lshr(LHSKnown, RHSKnown);
41406f32e7eSjoerg     break;
41506f32e7eSjoerg   }
416*da58b97aSjoerg   case TargetOpcode::G_SHL: {
417*da58b97aSjoerg     KnownBits LHSKnown, RHSKnown;
418*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts,
419*da58b97aSjoerg                          Depth + 1);
420*da58b97aSjoerg     computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
421*da58b97aSjoerg                          Depth + 1);
422*da58b97aSjoerg     Known = KnownBits::shl(LHSKnown, RHSKnown);
42306f32e7eSjoerg     break;
42406f32e7eSjoerg   }
42506f32e7eSjoerg   case TargetOpcode::G_INTTOPTR:
42606f32e7eSjoerg   case TargetOpcode::G_PTRTOINT:
427*da58b97aSjoerg     if (DstTy.isVector())
428*da58b97aSjoerg       break;
42906f32e7eSjoerg     // Fall through and handle them the same as zext/trunc.
43006f32e7eSjoerg     LLVM_FALLTHROUGH;
431*da58b97aSjoerg   case TargetOpcode::G_ASSERT_ZEXT:
43206f32e7eSjoerg   case TargetOpcode::G_ZEXT:
43306f32e7eSjoerg   case TargetOpcode::G_TRUNC: {
43406f32e7eSjoerg     Register SrcReg = MI.getOperand(1).getReg();
43506f32e7eSjoerg     LLT SrcTy = MRI.getType(SrcReg);
436*da58b97aSjoerg     unsigned SrcBitWidth;
437*da58b97aSjoerg 
438*da58b97aSjoerg     // G_ASSERT_ZEXT stores the original bitwidth in the immediate operand.
439*da58b97aSjoerg     if (Opcode == TargetOpcode::G_ASSERT_ZEXT)
440*da58b97aSjoerg       SrcBitWidth = MI.getOperand(2).getImm();
441*da58b97aSjoerg     else {
442*da58b97aSjoerg       SrcBitWidth = SrcTy.isPointer()
44306f32e7eSjoerg                         ? DL.getIndexSizeInBits(SrcTy.getAddressSpace())
44406f32e7eSjoerg                         : SrcTy.getSizeInBits();
445*da58b97aSjoerg     }
44606f32e7eSjoerg     assert(SrcBitWidth && "SrcBitWidth can't be zero");
447*da58b97aSjoerg     Known = Known.zextOrTrunc(SrcBitWidth);
44806f32e7eSjoerg     computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
449*da58b97aSjoerg     Known = Known.zextOrTrunc(BitWidth);
45006f32e7eSjoerg     if (BitWidth > SrcBitWidth)
45106f32e7eSjoerg       Known.Zero.setBitsFrom(SrcBitWidth);
45206f32e7eSjoerg     break;
45306f32e7eSjoerg   }
454*da58b97aSjoerg   case TargetOpcode::G_MERGE_VALUES: {
455*da58b97aSjoerg     unsigned NumOps = MI.getNumOperands();
456*da58b97aSjoerg     unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
457*da58b97aSjoerg 
458*da58b97aSjoerg     for (unsigned I = 0; I != NumOps - 1; ++I) {
459*da58b97aSjoerg       KnownBits SrcOpKnown;
460*da58b97aSjoerg       computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown,
461*da58b97aSjoerg                            DemandedElts, Depth + 1);
462*da58b97aSjoerg       Known.insertBits(SrcOpKnown, I * OpSize);
463*da58b97aSjoerg     }
464*da58b97aSjoerg     break;
465*da58b97aSjoerg   }
466*da58b97aSjoerg   case TargetOpcode::G_UNMERGE_VALUES: {
467*da58b97aSjoerg     if (DstTy.isVector())
468*da58b97aSjoerg       break;
469*da58b97aSjoerg     unsigned NumOps = MI.getNumOperands();
470*da58b97aSjoerg     Register SrcReg = MI.getOperand(NumOps - 1).getReg();
471*da58b97aSjoerg     if (MRI.getType(SrcReg).isVector())
472*da58b97aSjoerg       return; // TODO: Handle vectors.
473*da58b97aSjoerg 
474*da58b97aSjoerg     KnownBits SrcOpKnown;
475*da58b97aSjoerg     computeKnownBitsImpl(SrcReg, SrcOpKnown, DemandedElts, Depth + 1);
476*da58b97aSjoerg 
477*da58b97aSjoerg     // Figure out the result operand index
478*da58b97aSjoerg     unsigned DstIdx = 0;
479*da58b97aSjoerg     for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R;
480*da58b97aSjoerg          ++DstIdx)
481*da58b97aSjoerg       ;
482*da58b97aSjoerg 
483*da58b97aSjoerg     Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx);
484*da58b97aSjoerg     break;
485*da58b97aSjoerg   }
486*da58b97aSjoerg   case TargetOpcode::G_BSWAP: {
487*da58b97aSjoerg     Register SrcReg = MI.getOperand(1).getReg();
488*da58b97aSjoerg     computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
489*da58b97aSjoerg     Known.byteSwap();
490*da58b97aSjoerg     break;
491*da58b97aSjoerg   }
492*da58b97aSjoerg   case TargetOpcode::G_BITREVERSE: {
493*da58b97aSjoerg     Register SrcReg = MI.getOperand(1).getReg();
494*da58b97aSjoerg     computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
495*da58b97aSjoerg     Known.reverseBits();
496*da58b97aSjoerg     break;
497*da58b97aSjoerg   }
49806f32e7eSjoerg   }
49906f32e7eSjoerg 
50006f32e7eSjoerg   assert(!Known.hasConflict() && "Bits known to be one AND zero?");
501*da58b97aSjoerg   LLVM_DEBUG(dumpResult(MI, Known, Depth));
502*da58b97aSjoerg 
503*da58b97aSjoerg   // Update the cache.
504*da58b97aSjoerg   ComputeKnownBitsCache[R] = Known;
505*da58b97aSjoerg }
506*da58b97aSjoerg 
507*da58b97aSjoerg /// Compute number of sign bits for the intersection of \p Src0 and \p Src1
computeNumSignBitsMin(Register Src0,Register Src1,const APInt & DemandedElts,unsigned Depth)508*da58b97aSjoerg unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1,
509*da58b97aSjoerg                                                const APInt &DemandedElts,
510*da58b97aSjoerg                                                unsigned Depth) {
511*da58b97aSjoerg   // Test src1 first, since we canonicalize simpler expressions to the RHS.
512*da58b97aSjoerg   unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth);
513*da58b97aSjoerg   if (Src1SignBits == 1)
514*da58b97aSjoerg     return 1;
515*da58b97aSjoerg   return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits);
516*da58b97aSjoerg }
517*da58b97aSjoerg 
computeNumSignBits(Register R,const APInt & DemandedElts,unsigned Depth)518*da58b97aSjoerg unsigned GISelKnownBits::computeNumSignBits(Register R,
519*da58b97aSjoerg                                             const APInt &DemandedElts,
520*da58b97aSjoerg                                             unsigned Depth) {
521*da58b97aSjoerg   MachineInstr &MI = *MRI.getVRegDef(R);
522*da58b97aSjoerg   unsigned Opcode = MI.getOpcode();
523*da58b97aSjoerg 
524*da58b97aSjoerg   if (Opcode == TargetOpcode::G_CONSTANT)
525*da58b97aSjoerg     return MI.getOperand(1).getCImm()->getValue().getNumSignBits();
526*da58b97aSjoerg 
527*da58b97aSjoerg   if (Depth == getMaxDepth())
528*da58b97aSjoerg     return 1;
529*da58b97aSjoerg 
530*da58b97aSjoerg   if (!DemandedElts)
531*da58b97aSjoerg     return 1; // No demanded elts, better to assume we don't know anything.
532*da58b97aSjoerg 
533*da58b97aSjoerg   LLT DstTy = MRI.getType(R);
534*da58b97aSjoerg   const unsigned TyBits = DstTy.getScalarSizeInBits();
535*da58b97aSjoerg 
536*da58b97aSjoerg   // Handle the case where this is called on a register that does not have a
537*da58b97aSjoerg   // type constraint. This is unlikely to occur except by looking through copies
538*da58b97aSjoerg   // but it is possible for the initial register being queried to be in this
539*da58b97aSjoerg   // state.
540*da58b97aSjoerg   if (!DstTy.isValid())
541*da58b97aSjoerg     return 1;
542*da58b97aSjoerg 
543*da58b97aSjoerg   unsigned FirstAnswer = 1;
544*da58b97aSjoerg   switch (Opcode) {
545*da58b97aSjoerg   case TargetOpcode::COPY: {
546*da58b97aSjoerg     MachineOperand &Src = MI.getOperand(1);
547*da58b97aSjoerg     if (Src.getReg().isVirtual() && Src.getSubReg() == 0 &&
548*da58b97aSjoerg         MRI.getType(Src.getReg()).isValid()) {
549*da58b97aSjoerg       // Don't increment Depth for this one since we didn't do any work.
550*da58b97aSjoerg       return computeNumSignBits(Src.getReg(), DemandedElts, Depth);
551*da58b97aSjoerg     }
552*da58b97aSjoerg 
553*da58b97aSjoerg     return 1;
554*da58b97aSjoerg   }
555*da58b97aSjoerg   case TargetOpcode::G_SEXT: {
556*da58b97aSjoerg     Register Src = MI.getOperand(1).getReg();
557*da58b97aSjoerg     LLT SrcTy = MRI.getType(Src);
558*da58b97aSjoerg     unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
559*da58b97aSjoerg     return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
560*da58b97aSjoerg   }
561*da58b97aSjoerg   case TargetOpcode::G_ASSERT_SEXT:
562*da58b97aSjoerg   case TargetOpcode::G_SEXT_INREG: {
563*da58b97aSjoerg     // Max of the input and what this extends.
564*da58b97aSjoerg     Register Src = MI.getOperand(1).getReg();
565*da58b97aSjoerg     unsigned SrcBits = MI.getOperand(2).getImm();
566*da58b97aSjoerg     unsigned InRegBits = TyBits - SrcBits + 1;
567*da58b97aSjoerg     return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits);
568*da58b97aSjoerg   }
569*da58b97aSjoerg   case TargetOpcode::G_SEXTLOAD: {
570*da58b97aSjoerg     // FIXME: We need an in-memory type representation.
571*da58b97aSjoerg     if (DstTy.isVector())
572*da58b97aSjoerg       return 1;
573*da58b97aSjoerg 
574*da58b97aSjoerg     // e.g. i16->i32 = '17' bits known.
575*da58b97aSjoerg     const MachineMemOperand *MMO = *MI.memoperands_begin();
576*da58b97aSjoerg     return TyBits - MMO->getSizeInBits() + 1;
577*da58b97aSjoerg   }
578*da58b97aSjoerg   case TargetOpcode::G_ZEXTLOAD: {
579*da58b97aSjoerg     // FIXME: We need an in-memory type representation.
580*da58b97aSjoerg     if (DstTy.isVector())
581*da58b97aSjoerg       return 1;
582*da58b97aSjoerg 
583*da58b97aSjoerg     // e.g. i16->i32 = '16' bits known.
584*da58b97aSjoerg     const MachineMemOperand *MMO = *MI.memoperands_begin();
585*da58b97aSjoerg     return TyBits - MMO->getSizeInBits();
586*da58b97aSjoerg   }
587*da58b97aSjoerg   case TargetOpcode::G_TRUNC: {
588*da58b97aSjoerg     Register Src = MI.getOperand(1).getReg();
589*da58b97aSjoerg     LLT SrcTy = MRI.getType(Src);
590*da58b97aSjoerg 
591*da58b97aSjoerg     // Check if the sign bits of source go down as far as the truncated value.
592*da58b97aSjoerg     unsigned DstTyBits = DstTy.getScalarSizeInBits();
593*da58b97aSjoerg     unsigned NumSrcBits = SrcTy.getScalarSizeInBits();
594*da58b97aSjoerg     unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1);
595*da58b97aSjoerg     if (NumSrcSignBits > (NumSrcBits - DstTyBits))
596*da58b97aSjoerg       return NumSrcSignBits - (NumSrcBits - DstTyBits);
597*da58b97aSjoerg     break;
598*da58b97aSjoerg   }
599*da58b97aSjoerg   case TargetOpcode::G_SELECT: {
600*da58b97aSjoerg     return computeNumSignBitsMin(MI.getOperand(2).getReg(),
601*da58b97aSjoerg                                  MI.getOperand(3).getReg(), DemandedElts,
602*da58b97aSjoerg                                  Depth + 1);
603*da58b97aSjoerg   }
604*da58b97aSjoerg   case TargetOpcode::G_INTRINSIC:
605*da58b97aSjoerg   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
606*da58b97aSjoerg   default: {
607*da58b97aSjoerg     unsigned NumBits =
608*da58b97aSjoerg       TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth);
609*da58b97aSjoerg     if (NumBits > 1)
610*da58b97aSjoerg       FirstAnswer = std::max(FirstAnswer, NumBits);
611*da58b97aSjoerg     break;
612*da58b97aSjoerg   }
613*da58b97aSjoerg   }
614*da58b97aSjoerg 
615*da58b97aSjoerg   // Finally, if we can prove that the top bits of the result are 0's or 1's,
616*da58b97aSjoerg   // use this information.
617*da58b97aSjoerg   KnownBits Known = getKnownBits(R, DemandedElts, Depth);
618*da58b97aSjoerg   APInt Mask;
619*da58b97aSjoerg   if (Known.isNonNegative()) {        // sign bit is 0
620*da58b97aSjoerg     Mask = Known.Zero;
621*da58b97aSjoerg   } else if (Known.isNegative()) {  // sign bit is 1;
622*da58b97aSjoerg     Mask = Known.One;
623*da58b97aSjoerg   } else {
624*da58b97aSjoerg     // Nothing known.
625*da58b97aSjoerg     return FirstAnswer;
626*da58b97aSjoerg   }
627*da58b97aSjoerg 
628*da58b97aSjoerg   // Okay, we know that the sign bit in Mask is set.  Use CLO to determine
629*da58b97aSjoerg   // the number of identical bits in the top of the input value.
630*da58b97aSjoerg   Mask <<= Mask.getBitWidth() - TyBits;
631*da58b97aSjoerg   return std::max(FirstAnswer, Mask.countLeadingOnes());
632*da58b97aSjoerg }
633*da58b97aSjoerg 
computeNumSignBits(Register R,unsigned Depth)634*da58b97aSjoerg unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
635*da58b97aSjoerg   LLT Ty = MRI.getType(R);
636*da58b97aSjoerg   APInt DemandedElts = Ty.isVector()
637*da58b97aSjoerg                            ? APInt::getAllOnesValue(Ty.getNumElements())
638*da58b97aSjoerg                            : APInt(1, 1);
639*da58b97aSjoerg   return computeNumSignBits(R, DemandedElts, Depth);
64006f32e7eSjoerg }
64106f32e7eSjoerg 
getAnalysisUsage(AnalysisUsage & AU) const64206f32e7eSjoerg void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
64306f32e7eSjoerg   AU.setPreservesAll();
64406f32e7eSjoerg   MachineFunctionPass::getAnalysisUsage(AU);
64506f32e7eSjoerg }
64606f32e7eSjoerg 
runOnMachineFunction(MachineFunction & MF)64706f32e7eSjoerg bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) {
64806f32e7eSjoerg   return false;
64906f32e7eSjoerg }
650